From b6b50a21625bbf59a89b807dd0fc1eb5412aeff3 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Fri, 9 Jan 2009 15:25:09 -0800
Subject: mac80211: more kernel-doc fixes

Fix (delete) more mac80211 kernel-doc:

Warning(linux-2.6.28-git13//include/net/mac80211.h:375): Excess struct/union/enum/typedef member 'retry_count' description in 'ieee80211_tx_info'
Warning(linux-2.6.28-git13//net/mac80211/sta_info.h:308): Excess struct/union/enum/typedef member 'last_txrate' description in 'sta_info'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/sta_info.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index dc2606d0ae7..e49a5b99cf1 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -195,7 +195,6 @@ struct sta_ampdu_mlme {
  * @tx_packets: number of RX/TX MSDUs
  * @tx_bytes: number of bytes transmitted to this STA
  * @tx_fragments: number of transmitted MPDUs
- * @last_txrate: description of the last used transmit rate
  * @tid_seq: per-TID sequence numbers for sending to this STA
  * @ampdu_mlme: A-MPDU state machine state
  * @timer_to_tid: identity mapping to ID timers
-- 
cgit v1.2.3


From a92a3ce72483d7f0902dff8a3be8cdcee215a37c Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Wed, 7 Jan 2009 17:43:33 -0800
Subject: cfg80211: make handle_band() and handle_channel() wiphy specific

This allows us to make more wiphy specific judgements when
handling the channels later on.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 4f877535e66..af805b0e157 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -778,13 +778,22 @@ static int freq_reg_info(u32 center_freq, u32 *bandwidth,
 	return !max_bandwidth;
 }
 
-static void handle_channel(struct ieee80211_channel *chan)
+static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
+			   unsigned int chan_idx)
 {
 	int r;
-	u32 flags = chan->orig_flags;
+	u32 flags;
 	u32 max_bandwidth = 0;
 	const struct ieee80211_reg_rule *reg_rule = NULL;
 	const struct ieee80211_power_rule *power_rule = NULL;
+	struct ieee80211_supported_band *sband;
+	struct ieee80211_channel *chan;
+
+	sband = wiphy->bands[band];
+	BUG_ON(chan_idx >= sband->n_channels);
+	chan = &sband->channels[chan_idx];
+
+	flags = chan->orig_flags;
 
 	r = freq_reg_info(MHZ_TO_KHZ(chan->center_freq),
 		&max_bandwidth, &reg_rule);
@@ -808,12 +817,16 @@ static void handle_channel(struct ieee80211_channel *chan)
 		chan->max_power = (int) MBM_TO_DBM(power_rule->max_eirp);
 }
 
-static void handle_band(struct ieee80211_supported_band *sband)
+static void handle_band(struct wiphy *wiphy, enum ieee80211_band band)
 {
-	int i;
+	unsigned int i;
+	struct ieee80211_supported_band *sband;
+
+	BUG_ON(!wiphy->bands[band]);
+	sband = wiphy->bands[band];
 
 	for (i = 0; i < sband->n_channels; i++)
-		handle_channel(&sband->channels[i]);
+		handle_channel(wiphy, band, i);
 }
 
 static bool ignore_reg_update(struct wiphy *wiphy, enum reg_set_by setby)
@@ -840,7 +853,7 @@ void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby)
 	enum ieee80211_band band;
 	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
 		if (wiphy->bands[band])
-			handle_band(wiphy->bands[band]);
+			handle_band(wiphy, band);
 		if (wiphy->reg_notifier)
 			wiphy->reg_notifier(wiphy, setby);
 	}
-- 
cgit v1.2.3


From 0c7dc45d21de6ae212b5ccb7cdff5beff795ccf0 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Wed, 7 Jan 2009 17:43:36 -0800
Subject: cfg80211: Fix regression with 11d on bands

This fixes a regression on disallowing bands introduced with the new
802.11d support. The issue is that IEEE-802.11 allows APs to send
a subset of what a country regulatory domain defines. This was clarified
in this document:

http://tinyurl.com/11d-clarification

As such it is possible, and this is what is done in practice, that a
single band 2.4 GHz AP will only send 2.4 GHz band regulatory information
through the 802.11 country information element and then the current
intersection with what CRDA provided yields a regulatory domain with
no 5 GHz information -- even though that country may actually allow
5 GHz operation. We correct this by only applying the intersection rules
on a channel if the the intersection yields a regulatory rule on the
same band the channel is on.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 79 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index af805b0e157..5f6d20d98ee 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -421,6 +421,31 @@ static u32 freq_max_bandwidth(const struct ieee80211_freq_range *freq_range,
 	return 0;
 }
 
+/**
+ * freq_in_rule_band - tells us if a frequency is in a frequency band
+ * @freq_range: frequency rule we want to query
+ * @freq_khz: frequency we are inquiring about
+ *
+ * This lets us know if a specific frequency rule is or is not relevant to
+ * a specific frequency's band. Bands are device specific and artificial
+ * definitions (the "2.4 GHz band" and the "5 GHz band"), however it is
+ * safe for now to assume that a frequency rule should not be part of a
+ * frequency's band if the start freq or end freq are off by more than 2 GHz.
+ * This resolution can be lowered and should be considered as we add
+ * regulatory rule support for other "bands".
+ **/
+static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range,
+	u32 freq_khz)
+{
+#define ONE_GHZ_IN_KHZ	1000000
+	if (abs(freq_khz - freq_range->start_freq_khz) <= (2 * ONE_GHZ_IN_KHZ))
+		return true;
+	if (abs(freq_khz - freq_range->end_freq_khz) <= (2 * ONE_GHZ_IN_KHZ))
+		return true;
+	return false;
+#undef ONE_GHZ_IN_KHZ
+}
+
 /* Converts a country IE to a regulatory domain. A regulatory domain
  * structure has a lot of information which the IE doesn't yet have,
  * so for the other values we use upper max values as we will intersect
@@ -748,12 +773,23 @@ static u32 map_regdom_flags(u32 rd_flags)
  * 	this value to the maximum allowed bandwidth.
  * @reg_rule: the regulatory rule which we have for this frequency
  *
- * Use this function to get the regulatory rule for a specific frequency.
+ * Use this function to get the regulatory rule for a specific frequency on
+ * a given wireless device. If the device has a specific regulatory domain
+ * it wants to follow we respect that unless a country IE has been received
+ * and processed already.
+ *
+ * Returns 0 if it was able to find a valid regulatory rule which does
+ * apply to the given center_freq otherwise it returns non-zero. It will
+ * also return -ERANGE if we determine the given center_freq does not even have
+ * a regulatory rule for a frequency range in the center_freq's band. See
+ * freq_in_rule_band() for our current definition of a band -- this is purely
+ * subjective and right now its 802.11 specific.
  */
 static int freq_reg_info(u32 center_freq, u32 *bandwidth,
 			 const struct ieee80211_reg_rule **reg_rule)
 {
 	int i;
+	bool band_rule_found = false;
 	u32 max_bandwidth = 0;
 
 	if (!cfg80211_regdomain)
@@ -767,7 +803,15 @@ static int freq_reg_info(u32 center_freq, u32 *bandwidth,
 		rr = &cfg80211_regdomain->reg_rules[i];
 		fr = &rr->freq_range;
 		pr = &rr->power_rule;
+
+		/* We only need to know if one frequency rule was
+		 * was in center_freq's band, that's enough, so lets
+		 * not overwrite it once found */
+		if (!band_rule_found)
+			band_rule_found = freq_in_rule_band(fr, center_freq);
+
 		max_bandwidth = freq_max_bandwidth(fr, center_freq);
+
 		if (max_bandwidth && *bandwidth <= max_bandwidth) {
 			*reg_rule = rr;
 			*bandwidth = max_bandwidth;
@@ -775,6 +819,9 @@ static int freq_reg_info(u32 center_freq, u32 *bandwidth,
 		}
 	}
 
+	if (!band_rule_found)
+		return -ERANGE;
+
 	return !max_bandwidth;
 }
 
@@ -799,8 +846,37 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
 		&max_bandwidth, &reg_rule);
 
 	if (r) {
-		flags |= IEEE80211_CHAN_DISABLED;
-		chan->flags = flags;
+		/* This means no regulatory rule was found in the country IE
+		 * with a frequency range on the center_freq's band, since
+		 * IEEE-802.11 allows for a country IE to have a subset of the
+		 * regulatory information provided in a country we ignore
+		 * disabling the channel unless at least one reg rule was
+		 * found on the center_freq's band. For details see this
+		 * clarification:
+		 *
+		 * http://tinyurl.com/11d-clarification
+		 */
+		if (r == -ERANGE &&
+		    last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) {
+#ifdef CONFIG_CFG80211_REG_DEBUG
+			printk(KERN_DEBUG "cfg80211: Leaving channel %d MHz "
+				"intact on %s - no rule found in band on "
+				"Country IE\n",
+				chan->center_freq, wiphy_name(wiphy));
+#endif
+		} else {
+		/* In this case we know the country IE has at least one reg rule
+		 * for the band so we respect its band definitions */
+#ifdef CONFIG_CFG80211_REG_DEBUG
+			if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE)
+				printk(KERN_DEBUG "cfg80211: Disabling "
+					"channel %d MHz on %s due to "
+					"Country IE\n",
+					chan->center_freq, wiphy_name(wiphy));
+#endif
+			flags |= IEEE80211_CHAN_DISABLED;
+			chan->flags = flags;
+		}
 		return;
 	}
 
-- 
cgit v1.2.3


From 02e68a3da0fbdb178cdec54b7db48edeefd1691d Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Wed, 7 Jan 2009 17:43:37 -0800
Subject: cfg80211: Fix parsed country IE info for 5 GHz

The country IE number of channels on 5 GHz specifies the number
of 5 GHz channels, not the number of sequential channel numbers.
For example, if in a country IEs if the first channel given is 36
and the number of channels passed is 4 then the individual channel
numbers defined for the 5 GHz PHY by these parameters

are: 36, 40, 44, 48
not: 36, 37, 38, 39

See: http://tinyurl.com/11d-clarification

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 5f6d20d98ee..bc494cef210 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -563,6 +563,7 @@ static struct ieee80211_regdomain *country_ie_2_rd(
 
 	/* This time around we fill in the rd */
 	while (country_ie_len >= 3) {
+		int end_channel = 0;
 		struct ieee80211_country_ie_triplet *triplet =
 			(struct ieee80211_country_ie_triplet *) country_ie;
 		struct ieee80211_reg_rule *reg_rule = NULL;
@@ -584,6 +585,23 @@ static struct ieee80211_regdomain *country_ie_2_rd(
 
 		reg_rule->flags = flags;
 
+		/* 2 GHz */
+		if (triplet->chans.first_channel <= 14)
+			end_channel = triplet->chans.first_channel +
+				triplet->chans.num_channels;
+		else
+			/*
+			 * 5 GHz -- For example in country IEs if the first
+			 * channel given is 36 and the number of channels is 4
+			 * then the individual channel numbers defined for the
+			 * 5 GHz PHY by these parameters are: 36, 40, 44, and 48
+			 * and not 36, 37, 38, 39.
+			 *
+			 * See: http://tinyurl.com/11d-clarification
+			 */
+			end_channel =  triplet->chans.first_channel +
+				(4 * (triplet->chans.num_channels - 1));
+
 		/* The +10 is since the regulatory domain expects
 		 * the actual band edge, not the center of freq for
 		 * its start and end freqs, assuming 20 MHz bandwidth on
@@ -593,8 +611,7 @@ static struct ieee80211_regdomain *country_ie_2_rd(
 				triplet->chans.first_channel) - 10);
 		freq_range->end_freq_khz =
 			MHZ_TO_KHZ(ieee80211_channel_to_frequency(
-				triplet->chans.first_channel +
-					triplet->chans.num_channels) + 10);
+				end_channel) + 10);
 
 		/* Large arbitrary values, we intersect later */
 		/* Increment this if we ever support >= 40 MHz channels
-- 
cgit v1.2.3


From 24e94de41e76134fad05552588fe01af2cab1494 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Sun, 18 Jan 2009 21:32:11 -0800
Subject: net/9p: fid->fid is used uninitialized

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/9p/client.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 821f1ec0b2c..1eb580c38fb 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -618,7 +618,7 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt)
 		return ERR_PTR(-ENOMEM);
 
 	ret = p9_idpool_get(clnt->fidpool);
-	if (fid->fid < 0) {
+	if (ret < 0) {
 		ret = -ENOSPC;
 		goto error;
 	}
-- 
cgit v1.2.3


From 67fd1a731ff1a990d4da7689909317756e50cb4d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 19 Jan 2009 16:26:44 -0800
Subject: net: Add debug info to track down GSO checksum bug

I'm trying to track down why people're hitting the checksum warning
in skb_gso_segment.  As the problem seems to be hitting lots of
people and I can't reproduce it or locate the bug, here is a patch
to print out more details which hopefully should help us to track
this down.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 8d675975d85..6e44c327710 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1534,7 +1534,19 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 	skb->mac_len = skb->network_header - skb->mac_header;
 	__skb_pull(skb, skb->mac_len);
 
-	if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
+	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
+		struct net_device *dev = skb->dev;
+		struct ethtool_drvinfo info = {};
+
+		if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
+			dev->ethtool_ops->get_drvinfo(dev, &info);
+
+		WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
+			"ip_summed=%d",
+		     info.driver, dev ? dev->features : 0L,
+		     skb->sk ? skb->sk->sk_route_caps : 0L,
+		     skb->len, skb->data_len, skb->ip_summed);
+
 		if (skb_header_cloned(skb) &&
 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
 			return ERR_PTR(err);
-- 
cgit v1.2.3


From 8b9d3728977760f6bd1317c4420890f73695354e Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 19 Jan 2009 17:03:56 -0800
Subject: net: Fix data corruption when splicing from sockets.

The trick in socket splicing where we try to convert the skb->data
into a page based reference using virt_to_page() does not work so
well.

The idea is to pass the virt_to_page() reference via the pipe
buffer, and refcount the buffer using a SKB reference.

But if we are splicing from a socket to a socket (via sendpage)
this doesn't work.

The from side processing will grab the page (and SKB) references.
The sendpage() calls will grab page references only, return, and
then the from side processing completes and drops the SKB ref.

The page based reference to skb->data is not enough to keep the
kmalloc() buffer backing it from being reused.  Yet, that is
all that the socket send side has at this point.

This leads to data corruption if the skb->data buffer is reused
by SLAB before the send side socket actually gets the TX packet
out to the device.

The fix employed here is to simply allocate a page and copy the
skb->data bytes into that page.

This will hurt performance, but there is no clear way to fix this
properly without a copy at the present time, and it is important
to get rid of the data corruption.

With fixes from Herbert Xu.

Tested-by: Willy Tarreau <w@1wt.eu>
Foreseen-by: Changli Gao <xiaosuo@gmail.com>
Diagnosed-by: Willy Tarreau <w@1wt.eu>
Reported-by: Willy Tarreau <w@1wt.eu>
Fixed-by: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 61 ++++++++++++++++++++++++++-----------------------------
 1 file changed, 29 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 65eac773903..56272ac6dfd 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -73,17 +73,13 @@ static struct kmem_cache *skbuff_fclone_cache __read_mostly;
 static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
 				  struct pipe_buffer *buf)
 {
-	struct sk_buff *skb = (struct sk_buff *) buf->private;
-
-	kfree_skb(skb);
+	put_page(buf->page);
 }
 
 static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
 				struct pipe_buffer *buf)
 {
-	struct sk_buff *skb = (struct sk_buff *) buf->private;
-
-	skb_get(skb);
+	get_page(buf->page);
 }
 
 static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -1334,9 +1330,19 @@ fault:
  */
 static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
 {
-	struct sk_buff *skb = (struct sk_buff *) spd->partial[i].private;
+	put_page(spd->pages[i]);
+}
 
-	kfree_skb(skb);
+static inline struct page *linear_to_page(struct page *page, unsigned int len,
+					  unsigned int offset)
+{
+	struct page *p = alloc_pages(GFP_KERNEL, 0);
+
+	if (!p)
+		return NULL;
+	memcpy(page_address(p) + offset, page_address(page) + offset, len);
+
+	return p;
 }
 
 /*
@@ -1344,16 +1350,23 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
  */
 static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
 				unsigned int len, unsigned int offset,
-				struct sk_buff *skb)
+				struct sk_buff *skb, int linear)
 {
 	if (unlikely(spd->nr_pages == PIPE_BUFFERS))
 		return 1;
 
+	if (linear) {
+		page = linear_to_page(page, len, offset);
+		if (!page)
+			return 1;
+	} else
+		get_page(page);
+
 	spd->pages[spd->nr_pages] = page;
 	spd->partial[spd->nr_pages].len = len;
 	spd->partial[spd->nr_pages].offset = offset;
-	spd->partial[spd->nr_pages].private = (unsigned long) skb_get(skb);
 	spd->nr_pages++;
+
 	return 0;
 }
 
@@ -1369,7 +1382,7 @@ static inline void __segment_seek(struct page **page, unsigned int *poff,
 static inline int __splice_segment(struct page *page, unsigned int poff,
 				   unsigned int plen, unsigned int *off,
 				   unsigned int *len, struct sk_buff *skb,
-				   struct splice_pipe_desc *spd)
+				   struct splice_pipe_desc *spd, int linear)
 {
 	if (!*len)
 		return 1;
@@ -1392,7 +1405,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
 		/* the linear region may spread across several pages  */
 		flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
 
-		if (spd_fill_page(spd, page, flen, poff, skb))
+		if (spd_fill_page(spd, page, flen, poff, skb, linear))
 			return 1;
 
 		__segment_seek(&page, &poff, &plen, flen);
@@ -1419,7 +1432,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
 	if (__splice_segment(virt_to_page(skb->data),
 			     (unsigned long) skb->data & (PAGE_SIZE - 1),
 			     skb_headlen(skb),
-			     offset, len, skb, spd))
+			     offset, len, skb, spd, 1))
 		return 1;
 
 	/*
@@ -1429,7 +1442,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
 		const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
 
 		if (__splice_segment(f->page, f->page_offset, f->size,
-				     offset, len, skb, spd))
+				     offset, len, skb, spd, 0))
 			return 1;
 	}
 
@@ -1442,7 +1455,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
  * the frag list, if such a thing exists. We'd probably need to recurse to
  * handle that cleanly.
  */
-int skb_splice_bits(struct sk_buff *__skb, unsigned int offset,
+int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
 		    struct pipe_inode_info *pipe, unsigned int tlen,
 		    unsigned int flags)
 {
@@ -1455,16 +1468,6 @@ int skb_splice_bits(struct sk_buff *__skb, unsigned int offset,
 		.ops = &sock_pipe_buf_ops,
 		.spd_release = sock_spd_release,
 	};
-	struct sk_buff *skb;
-
-	/*
-	 * I'd love to avoid the clone here, but tcp_read_sock()
-	 * ignores reference counts and unconditonally kills the sk_buff
-	 * on return from the actor.
-	 */
-	skb = skb_clone(__skb, GFP_KERNEL);
-	if (unlikely(!skb))
-		return -ENOMEM;
 
 	/*
 	 * __skb_splice_bits() only fails if the output has no room left,
@@ -1488,15 +1491,9 @@ int skb_splice_bits(struct sk_buff *__skb, unsigned int offset,
 	}
 
 done:
-	/*
-	 * drop our reference to the clone, the pipe consumption will
-	 * drop the rest.
-	 */
-	kfree_skb(skb);
-
 	if (spd.nr_pages) {
+		struct sock *sk = skb->sk;
 		int ret;
-		struct sock *sk = __skb->sk;
 
 		/*
 		 * Drop the socket lock, otherwise we have reverse
-- 
cgit v1.2.3


From 66f9a2590aa87dc77cddaeaf46177de76edd2339 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 20 Jan 2009 09:49:51 -0800
Subject: Revert "xfrm: For 32/64 compatability wrt. xfrm_usersa_info"

This reverts commit fc8c7dc1b29560c016a67a34ccff32a712b5aa86.

As indicated by Jiri Klimes, this won't work.  These numbers are
not only used the size validation, they are also used to locate
attributes sitting after the message.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_user.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 7877e7975da..b95a2d64eb5 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1914,17 +1914,10 @@ static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
 }
 #endif
 
-/* For the xfrm_usersa_info cases we have to work around some 32-bit vs.
- * 64-bit compatability issues.  On 32-bit the structure is 220 bytes, but
- * for 64-bit it gets padded out to 224 bytes.  Those bytes are just
- * padding and don't have any content we care about.  Therefore as long
- * as we have enough bytes for the content we can make both cases work.
- */
-
 #define XMSGSIZE(type) sizeof(struct type)
 
 static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
-	[XFRM_MSG_NEWSA       - XFRM_MSG_BASE] = 220, /* see above */
+	[XFRM_MSG_NEWSA       - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_info),
 	[XFRM_MSG_DELSA       - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_id),
 	[XFRM_MSG_GETSA       - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_id),
 	[XFRM_MSG_NEWPOLICY   - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_info),
@@ -1934,7 +1927,7 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
 	[XFRM_MSG_ACQUIRE     - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_acquire),
 	[XFRM_MSG_EXPIRE      - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_expire),
 	[XFRM_MSG_UPDPOLICY   - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_info),
-	[XFRM_MSG_UPDSA       - XFRM_MSG_BASE] = 220, /* see above */
+	[XFRM_MSG_UPDSA       - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_info),
 	[XFRM_MSG_POLEXPIRE   - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_polexpire),
 	[XFRM_MSG_FLUSHSA     - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_flush),
 	[XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = 0,
-- 
cgit v1.2.3


From 357f5b0b91054ae23385ea4b0634bb8b43736e83 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Sat, 17 Jan 2009 06:47:12 +0000
Subject: NET: net_namespace, fix lock imbalance

register_pernet_gen_subsys omits mutex_unlock in one fail path.
Fix it.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net_namespace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 55cffad2f32..55151faaf90 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -341,8 +341,8 @@ again:
 	rv = register_pernet_operations(first_device, ops);
 	if (rv < 0)
 		ida_remove(&net_generic_ids, *id);
-	mutex_unlock(&net_mutex);
 out:
+	mutex_unlock(&net_mutex);
 	return rv;
 }
 EXPORT_SYMBOL_GPL(register_pernet_gen_subsys);
-- 
cgit v1.2.3


From ebad18e93fbc6bc63ee734edbc0eb38ac6b919c0 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 17 Jan 2009 19:46:16 +0000
Subject: gro: Fix handling of complete checksums in IPv6

We need to perform skb_postpull_rcsum after pulling the IPv6
header in order to maintain the correctness of the complete
checksum.

This patch also adds a missing iph reload after pulling.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 94f74f5b0cb..c802bc1658a 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -797,6 +797,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
 	unsigned int nlen;
 	int flush = 1;
 	int proto;
+	__wsum csum;
 
 	if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
 		goto out;
@@ -808,6 +809,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
 
 	rcu_read_lock();
 	proto = ipv6_gso_pull_exthdrs(skb, iph->nexthdr);
+	iph = ipv6_hdr(skb);
 	IPV6_GRO_CB(skb)->proto = proto;
 	ops = rcu_dereference(inet6_protos[proto]);
 	if (!ops || !ops->gro_receive)
@@ -839,8 +841,13 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
 
 	NAPI_GRO_CB(skb)->flush |= flush;
 
+	csum = skb->csum;
+	skb_postpull_rcsum(skb, iph, skb_network_header_len(skb));
+
 	pp = ops->gro_receive(head, skb);
 
+	skb->csum = csum;
+
 out_unlock:
 	rcu_read_unlock();
 
-- 
cgit v1.2.3


From 9a8e47ffd95608f0768e1a8a0225c822aa53aa9b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 17 Jan 2009 19:47:18 +0000
Subject: gro: Fix error handling on extremely short frags

When a frag is shorter than an Ethernet header, we'd return a
zeroed packet instead of aborting.  This patch fixes that.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 6e44c327710..5379b0c1190 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2536,6 +2536,7 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
 
 	if (!pskb_may_pull(skb, ETH_HLEN)) {
 		napi_reuse_skb(napi, skb);
+		skb = NULL;
 		goto out;
 	}
 
-- 
cgit v1.2.3


From 37fe4732b978eb02e5433387a40f2b61706cebe3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 17 Jan 2009 19:48:13 +0000
Subject: gro: Fix merging of paged packets

The previous fix to paged packets broke the merging because it
reset the skb->len before we added it to the merged packet.  This
wasn't detected because it simply resulted in the truncation of
the packet while the missing bit is subsequently retransmitted.

The fix is to store skb->len before we clobber it.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 56272ac6dfd..2e5f2ca3bdc 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2585,8 +2585,9 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 	struct sk_buff *nskb;
 	unsigned int headroom;
 	unsigned int hlen = p->data - skb_mac_header(p);
+	unsigned int len = skb->len;
 
-	if (hlen + p->len + skb->len >= 65536)
+	if (hlen + p->len + len >= 65536)
 		return -E2BIG;
 
 	if (skb_shinfo(p)->frag_list)
@@ -2648,9 +2649,9 @@ merge:
 
 done:
 	NAPI_GRO_CB(p)->count++;
-	p->data_len += skb->len;
-	p->truesize += skb->len;
-	p->len += skb->len;
+	p->data_len += len;
+	p->truesize += len;
+	p->len += len;
 
 	NAPI_GRO_CB(skb)->same_flow = 1;
 	return 0;
-- 
cgit v1.2.3


From 748085fcbedbf7b0f38d95e178265d7b13360b44 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 21 Jan 2009 12:19:49 -0800
Subject: netfilter: ctnetlink: fix scheduling while atomic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Caused by call to request_module() while holding nf_conntrack_lock.

Reported-and-tested-by: Kövesdi György <kgy@teledigit.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_netlink.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 3dddec6d2f7..c32a7e8e3a1 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -831,13 +831,16 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
 	if (!parse_nat_setup) {
 #ifdef CONFIG_MODULES
 		rcu_read_unlock();
+		spin_unlock_bh(&nf_conntrack_lock);
 		nfnl_unlock();
 		if (request_module("nf-nat-ipv4") < 0) {
 			nfnl_lock();
+			spin_lock_bh(&nf_conntrack_lock);
 			rcu_read_lock();
 			return -EOPNOTSUPP;
 		}
 		nfnl_lock();
+		spin_lock_bh(&nf_conntrack_lock);
 		rcu_read_lock();
 		if (nfnetlink_parse_nat_setup_hook)
 			return -EAGAIN;
-- 
cgit v1.2.3


From 9098c24f35f7da6c89a83420acf21e3d7b35151d Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 22 Jan 2009 11:11:56 +0300
Subject: fs/Kconfig: move sunrpc out

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
 net/sunrpc/Kconfig | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)
 create mode 100644 net/sunrpc/Kconfig

(limited to 'net')

diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
new file mode 100644
index 00000000000..eda4a7aee59
--- /dev/null
+++ b/net/sunrpc/Kconfig
@@ -0,0 +1,79 @@
+config SUNRPC
+	tristate
+
+config SUNRPC_GSS
+	tristate
+
+config SUNRPC_XPRT_RDMA
+	tristate
+	depends on SUNRPC && INFINIBAND && EXPERIMENTAL
+	default SUNRPC && INFINIBAND
+	help
+	  This option enables an RPC client transport capability that
+	  allows the NFS client to mount servers via an RDMA-enabled
+	  transport.
+
+	  To compile RPC client RDMA transport support as a module,
+	  choose M here: the module will be called xprtrdma.
+
+	  If unsure, say N.
+
+config SUNRPC_REGISTER_V4
+	bool "Register local RPC services via rpcbind v4 (EXPERIMENTAL)"
+	depends on SUNRPC && EXPERIMENTAL
+	default n
+	help
+	  Sun added support for registering RPC services at an IPv6
+	  address by creating two new versions of the rpcbind protocol
+	  (RFC 1833).
+
+	  This option enables support in the kernel RPC server for
+	  registering kernel RPC services via version 4 of the rpcbind
+	  protocol.  If you enable this option, you must run a portmapper
+	  daemon that supports rpcbind protocol version 4.
+
+	  Serving NFS over IPv6 from knfsd (the kernel's NFS server)
+	  requires that you enable this option and use a portmapper that
+	  supports rpcbind version 4.
+
+	  If unsure, say N to get traditional behavior (register kernel
+	  RPC services using only rpcbind version 2).  Distributions
+	  using the legacy Linux portmapper daemon must say N here.
+
+config RPCSEC_GSS_KRB5
+	tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)"
+	depends on SUNRPC && EXPERIMENTAL
+	select SUNRPC_GSS
+	select CRYPTO
+	select CRYPTO_MD5
+	select CRYPTO_DES
+	select CRYPTO_CBC
+	help
+	  Choose Y here to enable Secure RPC using the Kerberos version 5
+	  GSS-API mechanism (RFC 1964).
+
+	  Secure RPC calls with Kerberos require an auxiliary user-space
+	  daemon which may be found in the Linux nfs-utils package
+	  available from http://linux-nfs.org/.  In addition, user-space
+	  Kerberos support should be installed.
+
+	  If unsure, say N.
+
+config RPCSEC_GSS_SPKM3
+	tristate "Secure RPC: SPKM3 mechanism (EXPERIMENTAL)"
+	depends on SUNRPC && EXPERIMENTAL
+	select SUNRPC_GSS
+	select CRYPTO
+	select CRYPTO_MD5
+	select CRYPTO_DES
+	select CRYPTO_CAST5
+	select CRYPTO_CBC
+	help
+	  Choose Y here to enable Secure RPC using the SPKM3 public key
+	  GSS-API mechansim (RFC 2025).
+
+	  Secure RPC calls with SPKM3 require an auxiliary userspace
+	  daemon which may be found in the Linux nfs-utils package
+	  available from http://linux-nfs.org/.
+
+	  If unsure, say N.
-- 
cgit v1.2.3


From 5dc306f3bd1d4cfdf79df39221b3036eab1ddcf3 Mon Sep 17 00:00:00 2001
From: Brian Cavagnolo <brian@cozybit.com>
Date: Fri, 16 Jan 2009 19:04:49 -0800
Subject: mac80211: decrement ref count to netdev after launching mesh
 discovery

After launching mesh discovery in tx path, reference count was not being
decremented.  This was preventing module unload.

Signed-off-by: Brian Cavagnolo <brian@cozybit.com>
Signed-off-by: Andrey Yurovsky <andrey@cozybit.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index a4af3a124cc..4278e545638 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1307,8 +1307,10 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		if (is_multicast_ether_addr(hdr->addr3))
 			memcpy(hdr->addr1, hdr->addr3, ETH_ALEN);
 		else
-			if (mesh_nexthop_lookup(skb, osdata))
-				return  0;
+			if (mesh_nexthop_lookup(skb, osdata)) {
+				dev_put(odev);
+				return 0;
+			}
 		if (memcmp(odev->dev_addr, hdr->addr4, ETH_ALEN) != 0)
 			IEEE80211_IFSTA_MESH_CTR_INC(&osdata->u.mesh,
 							    fwded_frames);
-- 
cgit v1.2.3


From 391429c18f58ae37cc2e254e408bff847f4beb21 Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@web.de>
Date: Sun, 18 Jan 2009 02:24:15 +0100
Subject: mac80211: fix slot time debug message

wlan0: switched to short barker preamble (BSSID=00:01:aa:bb:cc:dd)
wlan0: switched to short slot (BSSID=) <something is missing here>

should be:

wlan0: switched to short barker preamble (BSSID=00:01:aa:bb:cc:dd)
wlan0: switched to short slot (BSSID=00:01:aa:bb:cc:dd)

Signed-off-by: Christian Lamparter <chunkeey@web.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 5ba721b6a39..2b890af01ba 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -620,8 +620,8 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
 	if (use_short_slot != bss_conf->use_short_slot) {
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 		if (net_ratelimit()) {
-			printk(KERN_DEBUG "%s: switched to %s slot"
-			       " (BSSID=%s)\n",
+			printk(KERN_DEBUG "%s: switched to %s slot time"
+			       " (BSSID=%pM)\n",
 			       sdata->dev->name,
 			       use_short_slot ? "short" : "long",
 			       ifsta->bssid);
-- 
cgit v1.2.3


From 6574df9a89f9f7da3a4e5cee7633d430319d3350 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 22 Jan 2009 14:52:43 -0800
Subject: sctp: Correctly start rtx timer on new packet transmissions.

Commit 62aeaff5ccd96462b7077046357a6d7886175a57
(sctp: Start T3-RTX timer when fast retransmitting lowest TSN)
introduced a regression where it was possible to forcibly
restart the sctp retransmit timer at the transmission of any
new chunk.  This resulted in much longer timeout times and
sometimes hung sctp connections.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/outqueue.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 247ebc95c1e..bc411c89621 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -929,7 +929,6 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 		}
 
 		/* Finally, transmit new packets.  */
-		start_timer = 0;
 		while ((chunk = sctp_outq_dequeue_data(q)) != NULL) {
 			/* RFC 2960 6.5 Every DATA chunk MUST carry a valid
 			 * stream identifier.
@@ -1028,7 +1027,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 			list_add_tail(&chunk->transmitted_list,
 				      &transport->transmitted);
 
-			sctp_transport_reset_timers(transport, start_timer-1);
+			sctp_transport_reset_timers(transport, 0);
 
 			q->empty = 0;
 
-- 
cgit v1.2.3


From 759af00ebef858015eb68876ac1f383bcb6a1774 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 22 Jan 2009 14:53:01 -0800
Subject: sctp: Properly timestamp outgoing data chunks for rtx purposes

Recent changes to the retransmit code exposed a long standing
bug where it was possible for a chunk to be time stamped
after the retransmit timer was reset.  This caused a rare
situation where the retrnamist timer has expired, but
nothing was marked for retrnasmission because all of
timesamps on data were less then 1 rto ago.  As result,
the timer was never restarted since nothing was retransmitted,
and this resulted in a hung association that did couldn't
complete the data transfer.  The solution is to timestamp
the chunk when it's added to the packet for transmission
purposes.  After the packet is trsnmitted the rtx timer
is restarted.  This guarantees that when the timer expires,
there will be data to retransmit.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/output.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sctp/output.c b/net/sctp/output.c
index c3f417f7ec6..73639355157 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -324,14 +324,16 @@ append:
 	switch (chunk->chunk_hdr->type) {
 	    case SCTP_CID_DATA:
 		retval = sctp_packet_append_data(packet, chunk);
+		if (SCTP_XMIT_OK != retval)
+			goto finish;
 		/* Disallow SACK bundling after DATA. */
 		packet->has_sack = 1;
 		/* Disallow AUTH bundling after DATA */
 		packet->has_auth = 1;
 		/* Let it be knows that packet has DATA in it */
 		packet->has_data = 1;
-		if (SCTP_XMIT_OK != retval)
-			goto finish;
+		/* timestamp the chunk for rtx purposes */
+		chunk->sent_at = jiffies;
 		break;
 	    case SCTP_CID_COOKIE_ECHO:
 		packet->has_cookie_echo = 1;
@@ -470,7 +472,6 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 			} else
 				chunk->resent = 1;
 
-			chunk->sent_at = jiffies;
 			has_data = 1;
 		}
 
-- 
cgit v1.2.3


From ae53b5bd77719fed58086c5be60ce4f22bffe1c6 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 22 Jan 2009 14:53:23 -0800
Subject: sctp: Fix another socket race during accept/peeloff

There is a race between sctp_rcv() and sctp_accept() where we
have moved the association from the listening socket to the
accepted socket, but sctp_rcv() processing cached the old
socket and continues to use it.

The easy solution is to check for the socket mismatch once we've
grabed the socket lock.  If we hit a mis-match, that means
that were are currently holding the lock on the listening socket,
but the association is refrencing a newly accepted socket.  We need
to drop the lock on the old socket and grab the lock on the new one.

A more proper solution might be to create accepted sockets when
the new association is established, similar to TCP.  That would
eliminate the race for 1-to-1 style sockets, but it would still
existing for 1-to-many sockets where a user wished to peeloff an
association.  For now, we'll live with this easy solution as
it addresses the problem.

Reported-by: Michal Hocko <mhocko@suse.cz>
Reported-by: Karsten Keil <kkeil@suse.de>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/input.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'net')

diff --git a/net/sctp/input.c b/net/sctp/input.c
index bf612d954d4..2e4a8646dbc 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -249,6 +249,19 @@ int sctp_rcv(struct sk_buff *skb)
 	 */
 	sctp_bh_lock_sock(sk);
 
+	if (sk != rcvr->sk) {
+		/* Our cached sk is different from the rcvr->sk.  This is
+		 * because migrate()/accept() may have moved the association
+		 * to a new socket and released all the sockets.  So now we
+		 * are holding a lock on the old socket while the user may
+		 * be doing something with the new socket.  Switch our veiw
+		 * of the current sk.
+		 */
+		sctp_bh_unlock_sock(sk);
+		sk = rcvr->sk;
+		sctp_bh_lock_sock(sk);
+	}
+
 	if (sock_owned_by_user(sk)) {
 		SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG);
 		sctp_add_backlog(sk, skb);
-- 
cgit v1.2.3


From a8d694c651356ec89452e15b0189c061fb7e1cf1 Mon Sep 17 00:00:00 2001
From: Timo Teras <timo.teras@iki.fi>
Date: Sun, 25 Jan 2009 20:49:14 -0800
Subject: af_key: initialize xfrm encap_oa

Currently encap_oa is left uninitialized, so it contains garbage data which
is visible to userland via Netlink. Initialize it by zeroing it out.

Signed-off-by: Timo Teras <timo.teras@iki.fi>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/key/af_key.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index f8bd8df5e25..7dcbde3ea7d 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1285,6 +1285,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 				ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1];
 			natt->encap_dport = n_port->sadb_x_nat_t_port_port;
 		}
+		memset(&natt->encap_oa, 0, sizeof(natt->encap_oa));
 	}
 
 	err = xfrm_init_state(x);
-- 
cgit v1.2.3


From d6eb633fe680c18119346a364acff7723245e278 Mon Sep 17 00:00:00 2001
From: Matt Helsley <matthltc@us.ibm.com>
Date: Mon, 26 Jan 2009 12:25:55 -0800
Subject: net: Move config NET_NS to from net/Kconfig to init/Kconfig

Make NET_NS available underneath the generic Namespaces config option
since all of the other namespace options are there.

Signed-off-by: Matt Helsley <matthltc@us.ibm.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/Kconfig | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'net')

diff --git a/net/Kconfig b/net/Kconfig
index bf2776018f7..cdb8fdef6c4 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -24,14 +24,6 @@ if NET
 
 menu "Networking options"
 
-config NET_NS
-	bool "Network namespace support"
-	default n
-	depends on EXPERIMENTAL && NAMESPACES
-	help
-	  Allow user space to create what appear to be multiple instances
-	  of the network stack.
-
 config COMPAT_NET_DEV_OPS
        def_bool y
 
-- 
cgit v1.2.3


From 116cb42855fbd052fc8cd2ca3e06050bff762673 Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Mon, 26 Jan 2009 12:37:53 -0800
Subject: vlan: Export symbols as non GPL symbols.

In previous kernels, any kernel module could get access to the
'real-device' and the VLAN-ID for a particular VLAN.  In more recent
kernels, the code was restructured such that this is hard to do
without accessing private .h files for any module that cannot use
GPL-only symbols.

Attached is a patch to once again allow non-GPL modules the ability to
access the real-device and VLAN id for VLANs.

Signed-off-by: Ben Greear <greearb@candelatech.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 6c132394026..e9db889d622 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -62,13 +62,13 @@ struct net_device *vlan_dev_real_dev(const struct net_device *dev)
 {
 	return vlan_dev_info(dev)->real_dev;
 }
-EXPORT_SYMBOL_GPL(vlan_dev_real_dev);
+EXPORT_SYMBOL(vlan_dev_real_dev);
 
 u16 vlan_dev_vlan_id(const struct net_device *dev)
 {
 	return vlan_dev_info(dev)->vlan_id;
 }
-EXPORT_SYMBOL_GPL(vlan_dev_vlan_id);
+EXPORT_SYMBOL(vlan_dev_vlan_id);
 
 static int vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
 			   unsigned int vlan_tci, struct sk_buff *skb)
-- 
cgit v1.2.3


From 98322f22eca889478045cf896b572250d03dc45f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Mon, 26 Jan 2009 21:35:35 -0800
Subject: udp: optimize bind(0) if many ports are in use

commit 9088c5609584684149f3fb5b065aa7f18dcb03ff
(udp: Improve port randomization) introduced a regression for UDP bind() syscall
to null port (getting a random port) in case lot of ports are already in use.

This is because we do about 28000 scans of very long chains (220 sockets per chain),
with many spin_lock_bh()/spin_unlock_bh() calls.

Fix this using a bitmap (64 bytes for current value of UDP_HTABLE_SIZE)
so that we scan chains at most once.

Instead of 250 ms per bind() call, we get after patch a time of 2.9 ms

Based on a report from Vitaly Mayatskikh

Reported-by: Vitaly Mayatskikh <v.mayatskih@gmail.com>
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Tested-by: Vitaly Mayatskikh <v.mayatskih@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 55 +++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 39 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index cf5ab0581eb..b7faffe5c02 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -120,8 +120,11 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min);
 atomic_t udp_memory_allocated;
 EXPORT_SYMBOL(udp_memory_allocated);
 
+#define PORTS_PER_CHAIN (65536 / UDP_HTABLE_SIZE)
+
 static int udp_lib_lport_inuse(struct net *net, __u16 num,
 			       const struct udp_hslot *hslot,
+			       unsigned long *bitmap,
 			       struct sock *sk,
 			       int (*saddr_comp)(const struct sock *sk1,
 						 const struct sock *sk2))
@@ -132,12 +135,17 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
 	sk_nulls_for_each(sk2, node, &hslot->head)
 		if (net_eq(sock_net(sk2), net)			&&
 		    sk2 != sk					&&
-		    sk2->sk_hash == num				&&
+		    (bitmap || sk2->sk_hash == num)		&&
 		    (!sk2->sk_reuse || !sk->sk_reuse)		&&
 		    (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
 			|| sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
-		    (*saddr_comp)(sk, sk2))
-			return 1;
+		    (*saddr_comp)(sk, sk2)) {
+			if (bitmap)
+				__set_bit(sk2->sk_hash / UDP_HTABLE_SIZE,
+					  bitmap);
+			else
+				return 1;
+		}
 	return 0;
 }
 
@@ -160,32 +168,47 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 	if (!snum) {
 		int low, high, remaining;
 		unsigned rand;
-		unsigned short first;
+		unsigned short first, last;
+		DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN);
 
 		inet_get_local_port_range(&low, &high);
 		remaining = (high - low) + 1;
 
 		rand = net_random();
-		snum = first = rand % remaining + low;
-		rand |= 1;
-		for (;;) {
-			hslot = &udptable->hash[udp_hashfn(net, snum)];
+		first = (((u64)rand * remaining) >> 32) + low;
+		/*
+		 * force rand to be an odd multiple of UDP_HTABLE_SIZE
+		 */
+		rand = (rand | 1) * UDP_HTABLE_SIZE;
+		for (last = first + UDP_HTABLE_SIZE; first != last; first++) {
+			hslot = &udptable->hash[udp_hashfn(net, first)];
+			bitmap_zero(bitmap, PORTS_PER_CHAIN);
 			spin_lock_bh(&hslot->lock);
-			if (!udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp))
-				break;
-			spin_unlock_bh(&hslot->lock);
+			udp_lib_lport_inuse(net, snum, hslot, bitmap, sk,
+					    saddr_comp);
+
+			snum = first;
+			/*
+			 * Iterate on all possible values of snum for this hash.
+			 * Using steps of an odd multiple of UDP_HTABLE_SIZE
+			 * give us randomization and full range coverage.
+			 */
 			do {
-				snum = snum + rand;
-			} while (snum < low || snum > high);
-			if (snum == first)
-				goto fail;
+				if (low <= snum && snum <= high &&
+				    !test_bit(snum / UDP_HTABLE_SIZE, bitmap))
+					goto found;
+				snum += rand;
+			} while (snum != first);
+			spin_unlock_bh(&hslot->lock);
 		}
+		goto fail;
 	} else {
 		hslot = &udptable->hash[udp_hashfn(net, snum)];
 		spin_lock_bh(&hslot->lock);
-		if (udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp))
+		if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp))
 			goto fail_unlock;
 	}
+found:
 	inet_sk(sk)->num = snum;
 	sk->sk_hash = snum;
 	if (sk_unhashed(sk)) {
-- 
cgit v1.2.3


From 9fa5fdf291c9b58b1cb8b4bb2a0ee57efa21d635 Mon Sep 17 00:00:00 2001
From: Dimitris Michailidis <dm@chelsio.com>
Date: Mon, 26 Jan 2009 22:15:31 -0800
Subject: tcp: Fix length tcp_splice_data_recv passes to skb_splice_bits.

tcp_splice_data_recv has two lengths to consider: the len parameter it
gets from tcp_read_sock, which specifies the amount of data in the skb,
and rd_desc->count, which is the amount of data the splice caller still
wants.  Currently it passes just the latter to skb_splice_bits, which then
splices min(rd_desc->count, skb->len - offset) bytes.

Most of the time this is fine, except when the skb contains urgent data.
In that case len goes only up to the urgent byte and is less than
skb->len - offset.  By ignoring len tcp_splice_data_recv may a) splice
data tcp_read_sock told it not to, b) return to tcp_read_sock a value > len.

Now, tcp_read_sock doesn't handle used > len and leaves the socket in a
bad state (both sk_receive_queue and copied_seq are bad at that point)
resulting in duplicated data and corruption.

Fix by passing min(rd_desc->count, len) to skb_splice_bits.

Signed-off-by: Dimitris Michailidis <dm@chelsio.com>
Acked-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0cd71b84e48..76b148bcb0d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -524,7 +524,8 @@ static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
 	struct tcp_splice_state *tss = rd_desc->arg.data;
 	int ret;
 
-	ret = skb_splice_bits(skb, offset, tss->pipe, rd_desc->count, tss->flags);
+	ret = skb_splice_bits(skb, offset, tss->pipe, min(rd_desc->count, len),
+			      tss->flags);
 	if (ret > 0)
 		rd_desc->count -= ret;
 	return ret;
-- 
cgit v1.2.3


From ce0cf6622c9a6f18c2723ea4bef7616799a1ca39 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Sun, 2 Nov 2008 16:18:08 -0500
Subject: nfs: note that CONFIG_SUNRPC_XPRT_RDMA turns on server side support
 too

We forgot to update this when adding server-side support.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/Kconfig | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index eda4a7aee59..dcef600d0bf 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -9,9 +9,8 @@ config SUNRPC_XPRT_RDMA
 	depends on SUNRPC && INFINIBAND && EXPERIMENTAL
 	default SUNRPC && INFINIBAND
 	help
-	  This option enables an RPC client transport capability that
-	  allows the NFS client to mount servers via an RDMA-enabled
-	  transport.
+	  This option allows the NFS client and server to support
+	  an RDMA-enabled transport.
 
 	  To compile RPC client RDMA transport support as a module,
 	  choose M here: the module will be called xprtrdma.
-- 
cgit v1.2.3


From 6c06a478c9e59d1584a5dc1b2b3519bae5d6546a Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Tue, 27 Jan 2009 22:30:19 -0800
Subject: net: fix xfrm reverse flow lookup for icmp6

This patch fixes the xfrm reverse flow lookup for icmp6 so that icmp6 packets
don't get lost over ipsec tunnels. Similar patch is in RHEL5 kernel for a quite
long time and I do not see why it isn't in mainline.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/icmp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 4f433847d95..36dff880718 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -443,10 +443,10 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
 	if (xfrm_decode_session_reverse(skb, &fl2, AF_INET6))
 		goto relookup_failed;
 
-	if (ip6_dst_lookup(sk, &dst2, &fl))
+	if (ip6_dst_lookup(sk, &dst2, &fl2))
 		goto relookup_failed;
 
-	err = xfrm_lookup(net, &dst2, &fl, sk, XFRM_LOOKUP_ICMP);
+	err = xfrm_lookup(net, &dst2, &fl2, sk, XFRM_LOOKUP_ICMP);
 	switch (err) {
 	case 0:
 		dst_release(dst);
-- 
cgit v1.2.3


From 1d6e55f195128813f96458203a9fa14204f9251e Mon Sep 17 00:00:00 2001
From: Thomas Goff <thomas.goff@boeing.com>
Date: Tue, 27 Jan 2009 22:39:59 -0800
Subject: IPv6: Fix multicast routing bugs.

This patch addresses the IPv6 multicast routing issues described
below.  It was tested with XORP 1.4/1.5 as the IPv6 PIM-SM routing
daemon against FreeBSD peers.

net/ipv6/ip6_input.c:

  - Don't try to forward link-local multicast packets.

  - Don't reset skb2->dev before calling ip6_mr_input() so packets can
    be identified as coming from the PIM register vif properly.

net/ipv6/ip6mr.c:

  - Fix incoming PIM register messages processing:

    * The IPv6 pseudo-header should be included when checksumming PIM
      messages (RFC 4601 section 4.9; RFC 3973 section 4.7.1).

    * Packets decapsulated from PIM register messages should have
      skb->protocol ETH_P_IPV6.

  - Enable/disable IPv6 multicast forwarding on the corresponding
    interface when a routing daemon adds/removes a multicast virtual
    interface.

  - Remove incorrect skb_pull() to fix userspace signaling.

  - Enable/disable global IPv6 multicast forwarding when an IPv6
    multicast routing socket is opened/closed.

net/ipv6/route.c:

  - Don't use strict routing logic for packets decapsulated from PIM
    register messages (similar to disabling rp_filter for the IPv4
    case).

Signed-off-by: Thomas Goff <thomas.goff@boeing.com>
Reviewed-by: Fred Templin <fred.l.templin@boeing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_input.c |  2 +-
 net/ipv6/ip6mr.c     | 23 ++++++++++++++++++-----
 net/ipv6/route.c     |  2 +-
 3 files changed, 20 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 936f48946e2..f171e8dbac9 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -255,6 +255,7 @@ int ip6_mc_input(struct sk_buff *skb)
 	 *      IPv6 multicast router mode is now supported ;)
 	 */
 	if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding &&
+	    !(ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) &&
 	    likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) {
 		/*
 		 * Okay, we try to forward - split and duplicate
@@ -316,7 +317,6 @@ int ip6_mc_input(struct sk_buff *skb)
 		}
 
 		if (skb2) {
-			skb2->dev = skb2->dst->dev;
 			ip6_mr_input(skb2);
 		}
 	}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 3c51b2d827f..d19a84b7950 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -365,7 +365,9 @@ static int pim6_rcv(struct sk_buff *skb)
 	pim = (struct pimreghdr *)skb_transport_header(skb);
 	if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
 	    (pim->flags & PIM_NULL_REGISTER) ||
-	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
+	    (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
+			     sizeof(*pim), IPPROTO_PIM,
+			     csum_partial((void *)pim, sizeof(*pim), 0)) &&
 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
 		goto drop;
 
@@ -392,7 +394,7 @@ static int pim6_rcv(struct sk_buff *skb)
 	skb_pull(skb, (u8 *)encap - skb->data);
 	skb_reset_network_header(skb);
 	skb->dev = reg_dev;
-	skb->protocol = htons(ETH_P_IP);
+	skb->protocol = htons(ETH_P_IPV6);
 	skb->ip_summed = 0;
 	skb->pkt_type = PACKET_HOST;
 	dst_release(skb->dst);
@@ -481,6 +483,7 @@ static int mif6_delete(struct net *net, int vifi)
 {
 	struct mif_device *v;
 	struct net_device *dev;
+	struct inet6_dev *in6_dev;
 	if (vifi < 0 || vifi >= net->ipv6.maxvif)
 		return -EADDRNOTAVAIL;
 
@@ -513,6 +516,10 @@ static int mif6_delete(struct net *net, int vifi)
 
 	dev_set_allmulti(dev, -1);
 
+	in6_dev = __in6_dev_get(dev);
+	if (in6_dev)
+		in6_dev->cnf.mc_forwarding--;
+
 	if (v->flags & MIFF_REGISTER)
 		unregister_netdevice(dev);
 
@@ -622,6 +629,7 @@ static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock)
 	int vifi = vifc->mif6c_mifi;
 	struct mif_device *v = &net->ipv6.vif6_table[vifi];
 	struct net_device *dev;
+	struct inet6_dev *in6_dev;
 	int err;
 
 	/* Is vif busy ? */
@@ -662,6 +670,10 @@ static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock)
 		return -EINVAL;
 	}
 
+	in6_dev = __in6_dev_get(dev);
+	if (in6_dev)
+		in6_dev->cnf.mc_forwarding++;
+
 	/*
 	 *	Fill in the VIF structures
 	 */
@@ -838,8 +850,6 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
 
 	skb->dst = dst_clone(pkt->dst);
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
-
-	skb_pull(skb, sizeof(struct ipv6hdr));
 	}
 
 	if (net->ipv6.mroute6_sk == NULL) {
@@ -1222,8 +1232,10 @@ static int ip6mr_sk_init(struct sock *sk)
 
 	rtnl_lock();
 	write_lock_bh(&mrt_lock);
-	if (likely(net->ipv6.mroute6_sk == NULL))
+	if (likely(net->ipv6.mroute6_sk == NULL)) {
 		net->ipv6.mroute6_sk = sk;
+		net->ipv6.devconf_all->mc_forwarding++;
+	}
 	else
 		err = -EADDRINUSE;
 	write_unlock_bh(&mrt_lock);
@@ -1242,6 +1254,7 @@ int ip6mr_sk_done(struct sock *sk)
 	if (sk == net->ipv6.mroute6_sk) {
 		write_lock_bh(&mrt_lock);
 		net->ipv6.mroute6_sk = NULL;
+		net->ipv6.devconf_all->mc_forwarding--;
 		write_unlock_bh(&mrt_lock);
 
 		mroute_clean_tables(net);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c4a59824ac2..9c574235c90 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -794,7 +794,7 @@ void ip6_route_input(struct sk_buff *skb)
 		.proto = iph->nexthdr,
 	};
 
-	if (rt6_need_strict(&iph->daddr))
+	if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
 		flags |= RT6_LOOKUP_F_IFACE;
 
 	skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
-- 
cgit v1.2.3


From a4e6db07984529847c6ad8bc616485e721dcb809 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 27 Jan 2009 22:41:03 -0800
Subject: ipv6: Make mc_forwarding sysctl read-only.

The kernel manages this value internally, as necessary, as
VIFs are added/removed and as multicast routers are registered
and deregistered.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e92ad8455c6..f9afb452249 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4250,7 +4250,7 @@ static struct addrconf_sysctl_table
 			.procname	=	"mc_forwarding",
 			.data		=	&ipv6_devconf.mc_forwarding,
 			.maxlen		=	sizeof(int),
-			.mode		=	0644,
+			.mode		=	0444,
 			.proc_handler	=	proc_dointvec,
 		},
 #endif
-- 
cgit v1.2.3


From 615aab4b75dfa77b00c372330d6f70edd2458bf9 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Thu, 22 Jan 2009 15:05:46 -0800
Subject: cfg80211: Fix sanity check on 5 GHz when processing country IE

This fixes two issues with the sanity check loop when processing
the country IE:

1. Do not use frequency for the current subband channel check,
   this was a big fat typo.
2. Apply the 5 GHz 4-channel steps when considering max channel
   on each subband as was done with a recent patch.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index bc494cef210..61698095e1a 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -498,6 +498,7 @@ static struct ieee80211_regdomain *country_ie_2_rd(
 	 * calculate the number of reg rules we will need. We will need one
 	 * for each channel subband */
 	while (country_ie_len >= 3) {
+		int end_channel = 0;
 		struct ieee80211_country_ie_triplet *triplet =
 			(struct ieee80211_country_ie_triplet *) country_ie;
 		int cur_sub_max_channel = 0, cur_channel = 0;
@@ -509,9 +510,25 @@ static struct ieee80211_regdomain *country_ie_2_rd(
 			continue;
 		}
 
+		/* 2 GHz */
+		if (triplet->chans.first_channel <= 14)
+			end_channel = triplet->chans.first_channel +
+				triplet->chans.num_channels;
+		else
+			/*
+			 * 5 GHz -- For example in country IEs if the first
+			 * channel given is 36 and the number of channels is 4
+			 * then the individual channel numbers defined for the
+			 * 5 GHz PHY by these parameters are: 36, 40, 44, and 48
+			 * and not 36, 37, 38, 39.
+			 *
+			 * See: http://tinyurl.com/11d-clarification
+			 */
+			end_channel =  triplet->chans.first_channel +
+				(4 * (triplet->chans.num_channels - 1));
+
 		cur_channel = triplet->chans.first_channel;
-		cur_sub_max_channel = ieee80211_channel_to_frequency(
-			cur_channel + triplet->chans.num_channels);
+		cur_sub_max_channel = end_channel;
 
 		/* Basic sanity check */
 		if (cur_sub_max_channel < cur_channel)
@@ -590,15 +607,6 @@ static struct ieee80211_regdomain *country_ie_2_rd(
 			end_channel = triplet->chans.first_channel +
 				triplet->chans.num_channels;
 		else
-			/*
-			 * 5 GHz -- For example in country IEs if the first
-			 * channel given is 36 and the number of channels is 4
-			 * then the individual channel numbers defined for the
-			 * 5 GHz PHY by these parameters are: 36, 40, 44, and 48
-			 * and not 36, 37, 38, 39.
-			 *
-			 * See: http://tinyurl.com/11d-clarification
-			 */
 			end_channel =  triplet->chans.first_channel +
 				(4 * (triplet->chans.num_channels - 1));
 
-- 
cgit v1.2.3


From 667ecd010d870f861a9e276aaaca8cb443ded8b3 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Thu, 22 Jan 2009 15:05:43 -0800
Subject: cfg80211: print correct intersected regulatory domain

When CONFIG_CFG80211_REG_DEBUG is enabled and an intersection
occurs we are printing the regulatory domain passed by CRDA
and indicating its the intersected regulatory domain. Lets fix
this and print the intersection as originally intended.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 61698095e1a..85c9034c59b 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1284,7 +1284,7 @@ static void reg_country_ie_process_debug(
 	if (intersected_rd) {
 		printk(KERN_DEBUG "cfg80211: We intersect both of these "
 			"and get:\n");
-		print_regdomain_info(rd);
+		print_regdomain_info(intersected_rd);
 		return;
 	}
 	printk(KERN_DEBUG "cfg80211: Intersection between both failed\n");
-- 
cgit v1.2.3


From 95e3b24cfb4ec0479d2c42f7a1780d68063a542a Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 29 Jan 2009 16:07:52 -0800
Subject: net: Fix frag_list handling in skb_seq_read

The frag_list handling was broken in skb_seq_read:

1) We didn't add the stepped offset when looking at the head
are of fragments other than the first.

2) We didn't take the stepped offset away when setting the data
pointer in the head area.

3) The frag index wasn't reset.

This patch fixes both issues.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2e5f2ca3bdc..f23fd43539e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2212,10 +2212,10 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
 		return 0;
 
 next_skb:
-	block_limit = skb_headlen(st->cur_skb);
+	block_limit = skb_headlen(st->cur_skb) + st->stepped_offset;
 
 	if (abs_offset < block_limit) {
-		*data = st->cur_skb->data + abs_offset;
+		*data = st->cur_skb->data + (abs_offset - st->stepped_offset);
 		return block_limit - abs_offset;
 	}
 
@@ -2257,6 +2257,7 @@ next_skb:
 	} else if (st->root_skb == st->cur_skb &&
 		   skb_shinfo(st->root_skb)->frag_list) {
 		st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
+		st->frag_idx = 0;
 		goto next_skb;
 	}
 
-- 
cgit v1.2.3


From 71b3346d182355f19509fadb8fe45114a35cc499 Mon Sep 17 00:00:00 2001
From: Shyam Iyer <shyam_iyer@dell.com>
Date: Thu, 29 Jan 2009 16:12:42 -0800
Subject: net: Fix OOPS in skb_seq_read().

It oopsd for me in skb_seq_read. addr2line said it was
linux-2.6/net/core/skbuff.c:2228, which is this line:


	while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) {


I added some printks in there and it looks like we hit this:

        } else if (st->root_skb == st->cur_skb &&
                   skb_shinfo(st->root_skb)->frag_list) {
                 st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
                 st->frag_idx = 0;
                 goto next_skb;
        }


Actually I did some testing and added a few printks and found that the
st->cur_skb->data was 0 and hence the ptr used by iscsi_tcp was null.
This caused the kernel panic.

 	if (abs_offset < block_limit) {
-		*data = st->cur_skb->data + abs_offset;
+		*data = st->cur_skb->data + (abs_offset - st->stepped_offset);

I enabled the debug_tcp and with a few printks found that the code did
not go to the next_skb label and could find that the sequence being
followed was this -

It hit this if condition -

        if (st->cur_skb->next) {
                st->cur_skb = st->cur_skb->next;
                st->frag_idx = 0;
                goto next_skb;

And so, now the st pointer is shifted to the next skb whereas actually
it should have hit the second else if first since the data is in the
frag_list.

        else if (st->root_skb == st->cur_skb &&
                 skb_shinfo(st->root_skb)->frag_list) {
                st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
                goto next_skb;
        }

Reversing the two conditions the attached patch fixes the issue for me
on top of Herbert's patches.

Signed-off-by: Shyam Iyer <shyam_iyer@dell.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f23fd43539e..da74b844f4e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2250,13 +2250,13 @@ next_skb:
 		st->frag_data = NULL;
 	}
 
-	if (st->cur_skb->next) {
-		st->cur_skb = st->cur_skb->next;
+	if (st->root_skb == st->cur_skb &&
+	    skb_shinfo(st->root_skb)->frag_list) {
+		st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
 		st->frag_idx = 0;
 		goto next_skb;
-	} else if (st->root_skb == st->cur_skb &&
-		   skb_shinfo(st->root_skb)->frag_list) {
-		st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
+	} else if (st->cur_skb->next) {
+		st->cur_skb = st->cur_skb->next;
 		st->frag_idx = 0;
 		goto next_skb;
 	}
-- 
cgit v1.2.3


From 9d8dba6c979fa99c96938c869611b9a23b73efa9 Mon Sep 17 00:00:00 2001
From: Benjamin Zores <benjamin.zores@alcatel-lucent.fr>
Date: Thu, 29 Jan 2009 16:19:13 -0800
Subject: ipv4: fix infinite retry loop in IP-Config

Signed-off-by: Benjamin Zores <benjamin.zores@alcatel-lucent.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipconfig.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 42a0f3dd3fd..d722013c1ca 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1268,6 +1268,9 @@ __be32 __init root_nfs_parse_addr(char *name)
 static int __init ip_auto_config(void)
 {
 	__be32 addr;
+#ifdef IPCONFIG_DYNAMIC
+	int retries = CONF_OPEN_RETRIES;
+#endif
 
 #ifdef CONFIG_PROC_FS
 	proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops);
@@ -1304,9 +1307,6 @@ static int __init ip_auto_config(void)
 #endif
 	    ic_first_dev->next) {
 #ifdef IPCONFIG_DYNAMIC
-
-		int retries = CONF_OPEN_RETRIES;
-
 		if (ic_dynamic() < 0) {
 			ic_close_devs();
 
-- 
cgit v1.2.3


From 1af7ad51049d6a310a19d497960597198290ddfa Mon Sep 17 00:00:00 2001
From: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Date: Thu, 29 Jan 2009 17:18:31 -0800
Subject: wimax: fix build issue when debugfs is disabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As reported by Toralf Förster and Randy Dunlap.

- http://linuxwimax.org/pipermail/wimax/2009-January/000460.html

- http://lkml.org/lkml/2009/1/29/279

The definitions needed for the wimax stack and i2400m driver debug
infrastructure was, by mistake, compiled depending on CONFIG_DEBUG_FS
(by them being placed in the debugfs.c files); thus the build broke in
2.6.29-rc3 when debugging was enabled (CONFIG_WIMAX_DEBUG) and
DEBUG_FS was disabled.

These definitions are always needed if debug is enabled at compile
time (independently of DEBUG_FS being or not enabled), so moving them
to a file that is always compiled fixes the issue.

Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wimax/debugfs.c | 11 -----------
 net/wimax/stack.c   | 13 +++++++++++++
 2 files changed, 13 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/wimax/debugfs.c b/net/wimax/debugfs.c
index 87cf4430079..94d216a4640 100644
--- a/net/wimax/debugfs.c
+++ b/net/wimax/debugfs.c
@@ -28,17 +28,6 @@
 #include "debug-levels.h"
 
 
-/* Debug framework control of debug levels */
-struct d_level D_LEVEL[] = {
-	D_SUBMODULE_DEFINE(debugfs),
-	D_SUBMODULE_DEFINE(id_table),
-	D_SUBMODULE_DEFINE(op_msg),
-	D_SUBMODULE_DEFINE(op_reset),
-	D_SUBMODULE_DEFINE(op_rfkill),
-	D_SUBMODULE_DEFINE(stack),
-};
-size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL);
-
 #define __debugfs_register(prefix, name, parent)			\
 do {									\
 	result = d_level_register_debugfs(prefix, name, parent);	\
diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index d4da92f8981..3869c032788 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c
@@ -516,6 +516,19 @@ void wimax_dev_rm(struct wimax_dev *wimax_dev)
 }
 EXPORT_SYMBOL_GPL(wimax_dev_rm);
 
+
+/* Debug framework control of debug levels */
+struct d_level D_LEVEL[] = {
+	D_SUBMODULE_DEFINE(debugfs),
+	D_SUBMODULE_DEFINE(id_table),
+	D_SUBMODULE_DEFINE(op_msg),
+	D_SUBMODULE_DEFINE(op_reset),
+	D_SUBMODULE_DEFINE(op_rfkill),
+	D_SUBMODULE_DEFINE(stack),
+};
+size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL);
+
+
 struct genl_family wimax_gnl_family = {
 	.id = GENL_ID_GENERATE,
 	.name = "WiMAX",
-- 
cgit v1.2.3


From 905db44087855e3c1709f538ecdc22fd149cadd8 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 30 Jan 2009 14:12:06 -0800
Subject: packet: Avoid lock_sock in mmap handler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As the mmap handler gets called under mmap_sem, and we may grab
mmap_sem elsewhere under the socket lock to access user data, we
should avoid grabbing the socket lock in the mmap handler.

Since the only thing we care about in the mmap handler is for
pg_vec* to be invariant, i.e., to exclude packet_set_ring, we
can achieve this by simply using a new mutex.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Tested-by: Martin MOKREJŠ <mmokrejs@ribosome.natur.cuni.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 5f94db2f3e9..9454d4ae46d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -77,6 +77,7 @@
 #include <linux/poll.h>
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/mutex.h>
 
 #ifdef CONFIG_INET
 #include <net/inet_common.h>
@@ -175,6 +176,7 @@ struct packet_sock {
 #endif
 	struct packet_type	prot_hook;
 	spinlock_t		bind_lock;
+	struct mutex		pg_vec_lock;
 	unsigned int		running:1,	/* prot_hook is attached*/
 				auxdata:1,
 				origdev:1;
@@ -1069,6 +1071,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol)
 	 */
 
 	spin_lock_init(&po->bind_lock);
+	mutex_init(&po->pg_vec_lock);
 	po->prot_hook.func = packet_rcv;
 
 	if (sock->type == SOCK_PACKET)
@@ -1865,6 +1868,7 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
 	synchronize_net();
 
 	err = -EBUSY;
+	mutex_lock(&po->pg_vec_lock);
 	if (closing || atomic_read(&po->mapped) == 0) {
 		err = 0;
 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
@@ -1886,6 +1890,7 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
 		if (atomic_read(&po->mapped))
 			printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
 	}
+	mutex_unlock(&po->pg_vec_lock);
 
 	spin_lock(&po->bind_lock);
 	if (was_running && !po->running) {
@@ -1918,7 +1923,7 @@ static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_st
 
 	size = vma->vm_end - vma->vm_start;
 
-	lock_sock(sk);
+	mutex_lock(&po->pg_vec_lock);
 	if (po->pg_vec == NULL)
 		goto out;
 	if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
@@ -1941,7 +1946,7 @@ static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_st
 	err = 0;
 
 out:
-	release_sock(sk);
+	mutex_unlock(&po->pg_vec_lock);
 	return err;
 }
 #endif
-- 
cgit v1.2.3


From 5d6e430d3bafe743b18dc443189093bf532e91ed Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Sat, 31 Jan 2009 00:51:49 -0800
Subject: ipv6: compile fix for ip6mr.c

net/ipv6/ip6mr.c: In function 'pim6_rcv':
net/ipv6/ip6mr.c:368: error: implicit declaration of function 'csum_ipv6_magic'

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6mr.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index d19a84b7950..228be551e9c 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -48,6 +48,7 @@
 #include <linux/pim.h>
 #include <net/addrconf.h>
 #include <linux/netfilter_ipv6.h>
+#include <net/ip6_checksum.h>
 
 /* Big lock, protecting vif table, mrt cache and mroute socket state.
    Note that the changes are semaphored via rtnl_lock.
-- 
cgit v1.2.3


From f9e6934502e46c363100245f137ddf0f4b1cb574 Mon Sep 17 00:00:00 2001
From: Sebastiano Di Paola <sebastiano.dipaola@gmail.com>
Date: Fri, 30 Jan 2009 23:37:17 +0000
Subject: net: packet socket packet_lookup_frame fix

packet_lookup_frames() fails to get user frame if current frame header
status contains extra flags.
This is due to the wrong assumption on the operators precedence during
frame status tests.
Fixed by forcing the right operators precedence order with explicit brackets.

Signed-off-by: Paolo Abeni <paolo.abeni@gmail.com>
Signed-off-by: Sebastiano Di Paola <sebastiano.dipaola@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9454d4ae46d..1fc4a7885c4 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -222,13 +222,13 @@ static void *packet_lookup_frame(struct packet_sock *po, unsigned int position,
 	h.raw = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size);
 	switch (po->tp_version) {
 	case TPACKET_V1:
-		if (status != h.h1->tp_status ? TP_STATUS_USER :
-						TP_STATUS_KERNEL)
+		if (status != (h.h1->tp_status ? TP_STATUS_USER :
+						TP_STATUS_KERNEL))
 			return NULL;
 		break;
 	case TPACKET_V2:
-		if (status != h.h2->tp_status ? TP_STATUS_USER :
-						TP_STATUS_KERNEL)
+		if (status != (h.h2->tp_status ? TP_STATUS_USER :
+						TP_STATUS_KERNEL))
 			return NULL;
 		break;
 	}
-- 
cgit v1.2.3


From e408b8dcb5ce42243a902205005208e590f28454 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Mon, 2 Feb 2009 13:41:57 -0800
Subject: udp: increments sk_drops in __udp_queue_rcv_skb()

Commit 93821778def10ec1e69aa3ac10adee975dad4ff3 (udp: Fix rcv socket
locking) accidentally removed sk_drops increments for UDP IPV4
sockets.

This field can be used to detect incorrect sizing of socket receive
buffers.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b7faffe5c02..1ab180bad72 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1015,9 +1015,11 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 
 	if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) {
 		/* Note that an ENOMEM error is charged twice */
-		if (rc == -ENOMEM)
+		if (rc == -ENOMEM) {
 			UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS,
 					 is_udplite);
+			atomic_inc(&sk->sk_drops);
+		}
 		goto drop;
 	}
 
-- 
cgit v1.2.3


From 55128bc23e9ab44e97f81f6cd349035230ee59a6 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 3 Feb 2009 15:20:13 -0800
Subject: sunrpc: fix rdma dependencies

Fix sunrpc/rdma build dependencies.
Survives 12 build combinations of INET, IPV6, SUNRPC,
INFINIBAND, and INFINIBAND_ADDR_TRANS.

ERROR: "rdma_destroy_id" [net/sunrpc/xprtrdma/xprtrdma.ko] undefined!
ERROR: "rdma_connect" [net/sunrpc/xprtrdma/xprtrdma.ko] undefined!
ERROR: "rdma_destroy_qp" [net/sunrpc/xprtrdma/xprtrdma.ko] undefined!
ERROR: "rdma_create_id" [net/sunrpc/xprtrdma/xprtrdma.ko] undefined!
ERROR: "rdma_create_qp" [net/sunrpc/xprtrdma/xprtrdma.ko] undefined!
ERROR: "rdma_resolve_route" [net/sunrpc/xprtrdma/xprtrdma.ko] undefined!
ERROR: "rdma_disconnect" [net/sunrpc/xprtrdma/xprtrdma.ko] undefined!
ERROR: "rdma_resolve_addr" [net/sunrpc/xprtrdma/xprtrdma.ko] undefined!
ERROR: "rdma_accept" [net/sunrpc/xprtrdma/svcrdma.ko] undefined!
ERROR: "rdma_destroy_id" [net/sunrpc/xprtrdma/svcrdma.ko] undefined!
ERROR: "rdma_listen" [net/sunrpc/xprtrdma/svcrdma.ko] undefined!
ERROR: "rdma_create_id" [net/sunrpc/xprtrdma/svcrdma.ko] undefined!
ERROR: "rdma_create_qp" [net/sunrpc/xprtrdma/svcrdma.ko] undefined!
ERROR: "rdma_bind_addr" [net/sunrpc/xprtrdma/svcrdma.ko] undefined!
ERROR: "rdma_disconnect" [net/sunrpc/xprtrdma/svcrdma.ko] undefined!

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sunrpc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index dcef600d0bf..5592883e1e4 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -6,7 +6,7 @@ config SUNRPC_GSS
 
 config SUNRPC_XPRT_RDMA
 	tristate
-	depends on SUNRPC && INFINIBAND && EXPERIMENTAL
+	depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS && EXPERIMENTAL
 	default SUNRPC && INFINIBAND
 	help
 	  This option allows the NFS client and server to support
-- 
cgit v1.2.3


From 7b5e56f9d635643ad54f2f42e69ad16b80a2cff1 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <hawk@comx.dk>
Date: Thu, 5 Feb 2009 15:05:45 -0800
Subject: udp: Fix UDP short packet false positive

The UDP header pointer assignment must happen after calling
pskb_may_pull().  As pskb_may_pull() can potentially alter the SKB
buffer.

This was exposted by running multicast traffic through the NIU driver,
as it won't prepull the protocol headers into the linear area on
receive.

Signed-off-by: Jesper Dangaard Brouer <hawk@comx.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1ab180bad72..cc3a0a06c00 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1231,7 +1231,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 		   int proto)
 {
 	struct sock *sk;
-	struct udphdr *uh = udp_hdr(skb);
+	struct udphdr *uh;
 	unsigned short ulen;
 	struct rtable *rt = (struct rtable*)skb->dst;
 	__be32 saddr = ip_hdr(skb)->saddr;
@@ -1244,6 +1244,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
 		goto drop;		/* No space for header. */
 
+	uh   = udp_hdr(skb);
 	ulen = ntohs(uh->len);
 	if (ulen > skb->len)
 		goto short_packet;
-- 
cgit v1.2.3


From 0178b695fd6b40a62a215cbeb03dd51ada3bb5e0 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 5 Feb 2009 15:15:50 -0800
Subject: ipv6: Copy cork options in ip6_append_data

As the options passed to ip6_append_data may be ephemeral, we need
to duplicate it for corking.  This patch applies the simplest fix
which is to memdup all the relevant bits.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 67 +++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 52 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 4b15938bef4..9fb49c3b518 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1105,6 +1105,18 @@ static inline int ip6_ufo_append_data(struct sock *sk,
 	return err;
 }
 
+static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
+					       gfp_t gfp)
+{
+	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
+}
+
+static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
+						gfp_t gfp)
+{
+	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
+}
+
 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 	int offset, int len, int odd, struct sk_buff *skb),
 	void *from, int length, int transhdrlen,
@@ -1130,17 +1142,37 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 		 * setup for corking
 		 */
 		if (opt) {
-			if (np->cork.opt == NULL) {
-				np->cork.opt = kmalloc(opt->tot_len,
-						       sk->sk_allocation);
-				if (unlikely(np->cork.opt == NULL))
-					return -ENOBUFS;
-			} else if (np->cork.opt->tot_len < opt->tot_len) {
-				printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
+			if (WARN_ON(np->cork.opt))
 				return -EINVAL;
-			}
-			memcpy(np->cork.opt, opt, opt->tot_len);
-			inet->cork.flags |= IPCORK_OPT;
+
+			np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
+			if (unlikely(np->cork.opt == NULL))
+				return -ENOBUFS;
+
+			np->cork.opt->tot_len = opt->tot_len;
+			np->cork.opt->opt_flen = opt->opt_flen;
+			np->cork.opt->opt_nflen = opt->opt_nflen;
+
+			np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
+							    sk->sk_allocation);
+			if (opt->dst0opt && !np->cork.opt->dst0opt)
+				return -ENOBUFS;
+
+			np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
+							    sk->sk_allocation);
+			if (opt->dst1opt && !np->cork.opt->dst1opt)
+				return -ENOBUFS;
+
+			np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
+							   sk->sk_allocation);
+			if (opt->hopopt && !np->cork.opt->hopopt)
+				return -ENOBUFS;
+
+			np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
+							    sk->sk_allocation);
+			if (opt->srcrt && !np->cork.opt->srcrt)
+				return -ENOBUFS;
+
 			/* need source address above miyazawa*/
 		}
 		dst_hold(&rt->u.dst);
@@ -1167,8 +1199,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 	} else {
 		rt = (struct rt6_info *)inet->cork.dst;
 		fl = &inet->cork.fl;
-		if (inet->cork.flags & IPCORK_OPT)
-			opt = np->cork.opt;
+		opt = np->cork.opt;
 		transhdrlen = 0;
 		exthdrlen = 0;
 		mtu = inet->cork.fragsize;
@@ -1407,9 +1438,15 @@ error:
 
 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
 {
-	inet->cork.flags &= ~IPCORK_OPT;
-	kfree(np->cork.opt);
-	np->cork.opt = NULL;
+	if (np->cork.opt) {
+		kfree(np->cork.opt->dst0opt);
+		kfree(np->cork.opt->dst1opt);
+		kfree(np->cork.opt->hopopt);
+		kfree(np->cork.opt->srcrt);
+		kfree(np->cork.opt);
+		np->cork.opt = NULL;
+	}
+
 	if (inet->cork.dst) {
 		dst_release(inet->cork.dst);
 		inet->cork.dst = NULL;
-- 
cgit v1.2.3


From a23f4bbd8d27ac8ddc5d71ace1f91bb503f0469a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 5 Feb 2009 15:38:31 -0800
Subject: Revert "tcp: Always set urgent pointer if it's beyond snd_nxt"

This reverts commit 64ff3b938ec6782e6585a83d5459b98b0c3f6eb8.

Jeff Chua reports that it breaks rlogin for him.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 557fe16cbfb..dda42f0bd7a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -663,14 +663,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	th->urg_ptr		= 0;
 
 	/* The urg_mode check is necessary during a below snd_una win probe */
-	if (unlikely(tcp_urg_mode(tp))) {
-		if (between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF)) {
-			th->urg_ptr = htons(tp->snd_up - tcb->seq);
-			th->urg = 1;
-		} else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
-			th->urg_ptr = 0xFFFF;
-			th->urg = 1;
-		}
+	if (unlikely(tcp_urg_mode(tp) &&
+		     between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) {
+		th->urg_ptr		= htons(tp->snd_up - tcb->seq);
+		th->urg			= 1;
 	}
 
 	tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location);
-- 
cgit v1.2.3


From 684de409acff8b1fe8bf188d75ff2f99c624387d Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 6 Feb 2009 00:49:55 -0800
Subject: ipv6: Disallow rediculious flowlabel option sizes.

Just like PKTINFO, limit the options area to 64K.

Based upon report by Eric Sesterhenn and analysis by
Roland Dreier.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_flowlabel.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index c62dd247774..7712578bdc6 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -323,17 +323,21 @@ static struct ip6_flowlabel *
 fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval,
 	  int optlen, int *err_p)
 {
-	struct ip6_flowlabel *fl;
+	struct ip6_flowlabel *fl = NULL;
 	int olen;
 	int addr_type;
 	int err;
 
+	olen = optlen - CMSG_ALIGN(sizeof(*freq));
+	err = -EINVAL;
+	if (olen > 64 * 1024)
+		goto done;
+
 	err = -ENOMEM;
 	fl = kzalloc(sizeof(*fl), GFP_KERNEL);
 	if (fl == NULL)
 		goto done;
 
-	olen = optlen - CMSG_ALIGN(sizeof(*freq));
 	if (olen > 0) {
 		struct msghdr msg;
 		struct flowi flowi;
-- 
cgit v1.2.3


From efc683fc2a692735029067b4f939af2a3625e31d Mon Sep 17 00:00:00 2001
From: Gautam Kachroo <gk@aristanetworks.com>
Date: Fri, 6 Feb 2009 00:52:04 -0800
Subject: neigh: some entries can be skipped during dumping

neightbl_dump_info and neigh_dump_table  can skip entries if the
*fill*info functions return an error. This results in an incomplete
dump ((invoked by netlink requests for RTM_GETNEIGHTBL or
RTM_GETNEIGH)

nidx and idx should not be incremented if the current entry was not
placed in the output buffer

Signed-off-by: Gautam Kachroo <gk@aristanetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index f66c58df895..278a142d104 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1994,8 +1994,8 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 			if (!net_eq(neigh_parms_net(p), net))
 				continue;
 
-			if (nidx++ < neigh_skip)
-				continue;
+			if (nidx < neigh_skip)
+				goto next;
 
 			if (neightbl_fill_param_info(skb, tbl, p,
 						     NETLINK_CB(cb->skb).pid,
@@ -2003,6 +2003,8 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 						     RTM_NEWNEIGHTBL,
 						     NLM_F_MULTI) <= 0)
 				goto out;
+		next:
+			nidx++;
 		}
 
 		neigh_skip = 0;
@@ -2082,12 +2084,10 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 		if (h > s_h)
 			s_idx = 0;
 		for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) {
-			int lidx;
 			if (dev_net(n->dev) != net)
 				continue;
-			lidx = idx++;
-			if (lidx < s_idx)
-				continue;
+			if (idx < s_idx)
+				goto next;
 			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
 					    cb->nlh->nlmsg_seq,
 					    RTM_NEWNEIGH,
@@ -2096,6 +2096,8 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 				rc = -1;
 				goto out;
 			}
+		next:
+			idx++;
 		}
 	}
 	read_unlock_bh(&tbl->lock);
-- 
cgit v1.2.3


From 2783ef23128ad0a4b34e4121c1f7ff664785712f Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <hawk@comx.dk>
Date: Fri, 6 Feb 2009 01:59:12 -0800
Subject: udp: Fix potential wrong ip_hdr(skb) pointers

Like the UDP header fix, pskb_may_pull() can potentially
alter the SKB buffer.  Thus the saddr and daddr, pointers
may point to the old skb->data buffer.

I haven't seen corruptions, as its only seen if the old
skb->data buffer were reallocated by another user and
written into very quickly (or poison'd by SLAB debugging).

Signed-off-by: Jesper Dangaard Brouer <hawk@comx.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index cc3a0a06c00..c47c989cb1f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1234,8 +1234,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	struct udphdr *uh;
 	unsigned short ulen;
 	struct rtable *rt = (struct rtable*)skb->dst;
-	__be32 saddr = ip_hdr(skb)->saddr;
-	__be32 daddr = ip_hdr(skb)->daddr;
+	__be32 saddr, daddr;
 	struct net *net = dev_net(skb->dev);
 
 	/*
@@ -1259,6 +1258,9 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	if (udp4_csum_init(skb, uh, proto))
 		goto csum_error;
 
+	saddr = ip_hdr(skb)->saddr;
+	daddr = ip_hdr(skb)->daddr;
+
 	if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
 		return __udp4_lib_mcast_deliver(net, skb, uh,
 				saddr, daddr, udptable);
-- 
cgit v1.2.3


From 15bde72738f373aa060ececeda8e064e4f924360 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 6 Feb 2009 21:50:52 -0800
Subject: RxRPC: Fix a potential NULL dereference

Fix a potential NULL dereference bug during error handling in
rxrpc_kernel_begin_call(), whereby rxrpc_put_transport() may be handed a NULL
pointer.

This was found with a code checker (http://repo.or.cz/w/smatch.git/).

Reported-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/af_rxrpc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index d7d2bed7a69..eac5e7bb736 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -284,13 +284,13 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
 		if (IS_ERR(trans)) {
 			call = ERR_CAST(trans);
 			trans = NULL;
-			goto out;
+			goto out_notrans;
 		}
 	} else {
 		trans = rx->trans;
 		if (!trans) {
 			call = ERR_PTR(-ENOTCONN);
-			goto out;
+			goto out_notrans;
 		}
 		atomic_inc(&trans->usage);
 	}
@@ -315,6 +315,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
 	rxrpc_put_bundle(trans, bundle);
 out:
 	rxrpc_put_transport(trans);
+out_notrans:
 	release_sock(&rx->sk);
 	_leave(" = %p", call);
 	return call;
-- 
cgit v1.2.3


From b4bd07c20ba0c1fa7ad09ba257e0a5cfc2bf6bb3 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 6 Feb 2009 22:06:43 -0800
Subject: net_dma: call dmaengine_get only if NET_DMA enabled

Based upon a patch from Atsushi Nemoto <anemo@mba.ocn.ne.jp>

--------------------
The commit 649274d993212e7c23c0cb734572c2311c200872 ("net_dma:
acquire/release dma channels on ifup/ifdown") added unconditional call
of dmaengine_get() to net_dma.  The API should be called only if
NET_DMA was enabled.
--------------------

Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Dan Williams <dan.j.williams@intel.com>
---
 net/core/dev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 5379b0c1190..a17e0066236 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1090,7 +1090,7 @@ int dev_open(struct net_device *dev)
 		/*
 		 *	Enable NET_DMA
 		 */
-		dmaengine_get();
+		net_dmaengine_get();
 
 		/*
 		 *	Initialize multicasting status
@@ -1172,7 +1172,7 @@ int dev_close(struct net_device *dev)
 	/*
 	 *	Shutdown NET_DMA
 	 */
-	dmaengine_put();
+	net_dmaengine_put();
 
 	return 0;
 }
-- 
cgit v1.2.3


From beeebc92ee04bff6a722ebf85e23131faedd4479 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Fri, 6 Feb 2009 22:07:41 -0800
Subject: 9p: fix endian issues [attempt 3]

When the changes were done to the protocol last release, some endian
bugs crept in.  This patch fixes those endian problems and has been
verified to run on 32/64 bit and x86/ppc architectures.

This version of the patch incorporates the correct annotations
for endian variables.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/9p/protocol.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index dcd7666824b..fc70147c771 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -29,6 +29,7 @@
 #include <linux/errno.h>
 #include <linux/uaccess.h>
 #include <linux/sched.h>
+#include <linux/types.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 #include "protocol.h"
@@ -160,29 +161,32 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 			break;
 		case 'w':{
 				int16_t *val = va_arg(ap, int16_t *);
-				if (pdu_read(pdu, val, sizeof(*val))) {
+				__le16 le_val;
+				if (pdu_read(pdu, &le_val, sizeof(le_val))) {
 					errcode = -EFAULT;
 					break;
 				}
-				*val = cpu_to_le16(*val);
+				*val = le16_to_cpu(le_val);
 			}
 			break;
 		case 'd':{
 				int32_t *val = va_arg(ap, int32_t *);
-				if (pdu_read(pdu, val, sizeof(*val))) {
+				__le32 le_val;
+				if (pdu_read(pdu, &le_val, sizeof(le_val))) {
 					errcode = -EFAULT;
 					break;
 				}
-				*val = cpu_to_le32(*val);
+				*val = le32_to_cpu(le_val);
 			}
 			break;
 		case 'q':{
 				int64_t *val = va_arg(ap, int64_t *);
-				if (pdu_read(pdu, val, sizeof(*val))) {
+				__le64 le_val;
+				if (pdu_read(pdu, &le_val, sizeof(le_val))) {
 					errcode = -EFAULT;
 					break;
 				}
-				*val = cpu_to_le64(*val);
+				*val = le64_to_cpu(le_val);
 			}
 			break;
 		case 's':{
@@ -362,19 +366,19 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
 			}
 			break;
 		case 'w':{
-				int16_t val = va_arg(ap, int);
+				__le16 val = cpu_to_le16(va_arg(ap, int));
 				if (pdu_write(pdu, &val, sizeof(val)))
 					errcode = -EFAULT;
 			}
 			break;
 		case 'd':{
-				int32_t val = va_arg(ap, int32_t);
+				__le32 val = cpu_to_le32(va_arg(ap, int32_t));
 				if (pdu_write(pdu, &val, sizeof(val)))
 					errcode = -EFAULT;
 			}
 			break;
 		case 'q':{
-				int64_t val = va_arg(ap, int64_t);
+				__le64 val = cpu_to_le64(va_arg(ap, int64_t));
 				if (pdu_write(pdu, &val, sizeof(val)))
 					errcode = -EFAULT;
 			}
-- 
cgit v1.2.3


From a51f42f3c940e5582c40454ece066d033bc7e24f Mon Sep 17 00:00:00 2001
From: Eric Leblond <eric@inl.fr>
Date: Mon, 9 Feb 2009 14:33:03 -0800
Subject: netfilter: fix tuple inversion for Node information request

The patch fixes a typo in the inverse mapping of Node Information
request. Following draft-ietf-ipngwg-icmp-name-lookups-09, "Querier"
sends a type 139 (ICMPV6_NI_QUERY) packet to "Responder" which answer
with a type 140 (ICMPV6_NI_REPLY) packet.

Signed-off-by: Eric Leblond <eric@inl.fr>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index c455cf4ee75..114a92e4258 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -49,8 +49,8 @@ static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
 static const u_int8_t invmap[] = {
 	[ICMPV6_ECHO_REQUEST - 128]	= ICMPV6_ECHO_REPLY + 1,
 	[ICMPV6_ECHO_REPLY - 128]	= ICMPV6_ECHO_REQUEST + 1,
-	[ICMPV6_NI_QUERY - 128]		= ICMPV6_NI_QUERY + 1,
-	[ICMPV6_NI_REPLY - 128]		= ICMPV6_NI_REPLY +1
+	[ICMPV6_NI_QUERY - 128]		= ICMPV6_NI_REPLY + 1,
+	[ICMPV6_NI_REPLY - 128]		= ICMPV6_NI_QUERY +1
 };
 
 static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
-- 
cgit v1.2.3


From 3f9007135c1dc896db9a9e35920aafc65b157230 Mon Sep 17 00:00:00 2001
From: Eric Leblond <eric@inl.fr>
Date: Mon, 9 Feb 2009 14:33:20 -0800
Subject: netfilter: nf_conntrack_ipv6: don't track ICMPv6 negotiation message

This patch removes connection tracking handling for ICMPv6 messages
related to Stateless Address Autoconfiguration, MLD, and MLDv2. They
can not be tracked because they are massively using multicast (on
pre-defined address). But they are not invalid and should not be
detected as such.

Signed-off-by: Eric Leblond <eric@inl.fr>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 114a92e4258..c323643ffcf 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -53,6 +53,17 @@ static const u_int8_t invmap[] = {
 	[ICMPV6_NI_REPLY - 128]		= ICMPV6_NI_QUERY +1
 };
 
+static const u_int8_t noct_valid_new[] = {
+	[ICMPV6_MGM_QUERY - 130] = 1,
+	[ICMPV6_MGM_REPORT -130] = 1,
+	[ICMPV6_MGM_REDUCTION - 130] = 1,
+	[NDISC_ROUTER_SOLICITATION - 130] = 1,
+	[NDISC_ROUTER_ADVERTISEMENT - 130] = 1,
+	[NDISC_NEIGHBOUR_SOLICITATION - 130] = 1,
+	[NDISC_NEIGHBOUR_ADVERTISEMENT - 130] = 1,
+	[ICMPV6_MLD2_REPORT - 130] = 1
+};
+
 static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
 				const struct nf_conntrack_tuple *orig)
 {
@@ -178,6 +189,7 @@ icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
 {
 	const struct icmp6hdr *icmp6h;
 	struct icmp6hdr _ih;
+	int type;
 
 	icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
 	if (icmp6h == NULL) {
@@ -194,6 +206,15 @@ icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
 		return -NF_ACCEPT;
 	}
 
+	type = icmp6h->icmp6_type - 130;
+	if (type >= 0 && type < sizeof(noct_valid_new) &&
+	    noct_valid_new[type]) {
+		skb->nfct = &nf_conntrack_untracked.ct_general;
+		skb->nfctinfo = IP_CT_NEW;
+		nf_conntrack_get(skb->nfct);
+		return NF_ACCEPT;
+	}
+
 	/* is not error message ? */
 	if (icmp6h->icmp6_type >= 128)
 		return NF_ACCEPT;
-- 
cgit v1.2.3


From c969aa7d2cd5621ad4129dae6b6551af422944c6 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 9 Feb 2009 14:33:57 -0800
Subject: netfilter: ctnetlink: allow changing NAT sequence adjustment in
 creation

This patch fixes an inconsistency in the current ctnetlink code
since NAT sequence adjustment bit can only be updated but not set
in the conntrack entry creation.

This patch is used by conntrackd to successfully recover newly
created entries that represent connections with helpers and NAT
payload mangling.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_netlink.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index c32a7e8e3a1..9051bb4f81d 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1215,6 +1215,16 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
 		}
 	}
 
+#ifdef CONFIG_NF_NAT_NEEDED
+	if (cda[CTA_NAT_SEQ_ADJ_ORIG] || cda[CTA_NAT_SEQ_ADJ_REPLY]) {
+		err = ctnetlink_change_nat_seq_adj(ct, cda);
+		if (err < 0) {
+			rcu_read_unlock();
+			goto err;
+		}
+	}
+#endif
+
 	if (cda[CTA_PROTOINFO]) {
 		err = ctnetlink_change_protoinfo(ct, cda);
 		if (err < 0) {
-- 
cgit v1.2.3


From 1f9da256163e3ff91a12d0b861091f0e525139df Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 9 Feb 2009 14:34:26 -0800
Subject: netfilter: ctnetlink: fix echo if not subscribed to any multicast
 group

This patch fixes echoing if the socket that has sent the request to
create/update/delete an entry is not subscribed to any multicast
group. With the current code, ctnetlink would not send the echo
message via unicast as nfnetlink_send() would be skip.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_netlink.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 9051bb4f81d..cb78aa00399 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -434,7 +434,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 	} else
 		return NOTIFY_DONE;
 
-	if (!nfnetlink_has_listeners(group))
+	if (!item->report && !nfnetlink_has_listeners(group))
 		return NOTIFY_DONE;
 
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
@@ -1502,7 +1502,8 @@ static int ctnetlink_expect_event(struct notifier_block *this,
 	} else
 		return NOTIFY_DONE;
 
-	if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
+	if (!item->report &&
+	    !nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
 		return NOTIFY_DONE;
 
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
-- 
cgit v1.2.3


From d4e2675a61890a84849a24affedf80d5cae8b199 Mon Sep 17 00:00:00 2001
From: Qu Haoran <haoran.qu@6wind.com>
Date: Mon, 9 Feb 2009 14:34:56 -0800
Subject: netfilter: xt_sctp: sctp chunk mapping doesn't work

When user tries to map all chunks given in argument, kernel
works on a copy of the chunkmap, but at the end it doesn't
check the copy, but the orginal one.

Signed-off-by: Qu Haoran <haoran.qu@6wind.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_sctp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index e223cb43ae8..a189ada9128 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -105,7 +105,7 @@ match_packet(const struct sk_buff *skb,
 
 	switch (chunk_match_type) {
 	case SCTP_CHUNK_MATCH_ALL:
-		return SCTP_CHUNKMAP_IS_CLEAR(info->chunkmap);
+		return SCTP_CHUNKMAP_IS_CLEAR(chunkmapcopy);
 	case SCTP_CHUNK_MATCH_ANY:
 		return false;
 	case SCTP_CHUNK_MATCH_ONLY:
-- 
cgit v1.2.3


From 20461c1740cac5e02733221c9f653098a703f55a Mon Sep 17 00:00:00 2001
From: Noriaki TAKAMIYA <takamiya@po.ntts.co.jp>
Date: Mon, 9 Feb 2009 15:01:19 -0800
Subject: IPv6: fix to set device name when new IPv6 over IPv6 tunnel device is
 created.

When the user creates IPv6 over IPv6 tunnel, the device name created
by the kernel isn't set to t->parm.name, which is referred as the
result of ioctl().

Signed-off-by: Noriaki TAKAMIYA <takamiya@po.ntts.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 58e2b0d9375..d994c55a5b1 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -249,8 +249,8 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p)
 	}
 
 	t = netdev_priv(dev);
-	ip6_tnl_dev_init(dev);
 	t->parms = *p;
+	ip6_tnl_dev_init(dev);
 
 	if ((err = register_netdevice(dev)) < 0)
 		goto failed_free;
-- 
cgit v1.2.3


From 4906f9985e310fc01f956256b0d58ac28b0dcb19 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 9 Feb 2009 15:07:18 -0800
Subject: bridge: Fix LRO crash with tun

> Kernel BUG at drivers/net/tun.c:444
> invalid opcode: 0000 [1] SMP
> last sysfs file: /class/net/lo/ifindex
> CPU 0
> Modules linked in: tun ipt_MASQUERADE iptable_nat ip_nat xt_state ip_conntrack
> nfnetlink ipt_REJECT xt_tcpudp iptable_filter d
> Pid: 6912, comm: qemu-kvm Tainted: G      2.6.18-128.el5 #1
> RIP: 0010:[<ffffffff886f57b0>]  [<ffffffff886f57b0>]
> :tun:tun_chr_readv+0x2b1/0x3a6
> RSP: 0018:ffff8102202c5e48  EFLAGS: 00010246
> RAX: 0000000000000000 RBX: ffff8102202c5e98 RCX: 0000000004010000
> RDX: ffff810227063680 RSI: ffff8102202c5e9e RDI: ffff8102202c5e92
> RBP: 0000000000010ff6 R08: 0000000000000000 R09: 0000000000000001
> R10: ffff8102202c5e94 R11: 0000000000000202 R12: ffff8102275357c0
> R13: ffff81022755e500 R14: 0000000000000000 R15: ffff8102202c5ef8
> FS:  00002ae4398db980(0000) GS:ffffffff803ac000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
> CR2: 00002ae4ab514000 CR3: 0000000221344000 CR4: 00000000000026e0
> Process qemu-kvm (pid: 6912, threadinfo ffff8102202c4000, task
> ffff81022e58d820)
> Stack:  00000000498735cb ffff810229d1a3c0 0000000000000000 ffff81022e58d820
>  ffffffff8008a461 ffff81022755e528 ffff81022755e528 ffffffff8009f925
>  000005ea05ea0000 ffff8102209d0000 00001051143e1600 ffffffff8003c00e
> Call Trace:
>  [<ffffffff8008a461>] default_wake_function+0x0/0xe
>  [<ffffffff8009f925>] enqueue_hrtimer+0x55/0x70
>  [<ffffffff8003c00e>] hrtimer_start+0xbc/0xce
>  [<ffffffff886f58bf>] :tun:tun_chr_read+0x1a/0x1f
>  [<ffffffff8000b3f3>] vfs_read+0xcb/0x171
>  [<ffffffff800117d4>] sys_read+0x45/0x6e
>  [<ffffffff8005d116>] system_call+0x7e/0x83
>
>
> Code: 0f 0b 68 40 62 6f 88 c2 bc 01 f6 42 0a 08 74 0c 80 4c 24 41
> RIP  [<ffffffff886f57b0>] :tun:tun_chr_readv+0x2b1/0x3a6
>  RSP <ffff8102202c5e48>
>  <0>Kernel panic - not syncing: Fatal exception

This crashed when an LRO packet generated by bnx2x reached a
tun device through the bridge.  We're supposed to drop it at
the bridge.  However, because the check was placed in br_forward
instead of __br_forward, it's only effective if we are sending
the packet through a single port.

This patch fixes it by moving the check into __br_forward.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_forward.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index bdd9ccea17c..d2c27c808d3 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -67,6 +67,11 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
 {
 	struct net_device *indev;
 
+	if (skb_warn_if_lro(skb)) {
+		kfree_skb(skb);
+		return;
+	}
+
 	indev = skb->dev;
 	skb->dev = to->dev;
 	skb_forward_csum(skb);
@@ -89,7 +94,7 @@ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
 /* called with rcu_read_lock */
 void br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
 {
-	if (!skb_warn_if_lro(skb) && should_deliver(to, skb)) {
+	if (should_deliver(to, skb)) {
 		__br_forward(to, skb);
 		return;
 	}
-- 
cgit v1.2.3


From a2bf4538714f83fc83ac175c4de296510ae596ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <remi.denis-courmont@nokia.com>
Date: Tue, 10 Feb 2009 17:14:31 -0800
Subject: Phonet: fix double free in GPRS outbound packet error path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/phonet/pep-gprs.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c
index 6a91a32a80c..4aa888584d2 100644
--- a/net/phonet/pep-gprs.c
+++ b/net/phonet/pep-gprs.c
@@ -207,7 +207,6 @@ static int gprs_xmit(struct sk_buff *skb, struct net_device *dev)
 				dev->name, err);
 		dev->stats.tx_aborted_errors++;
 		dev->stats.tx_errors++;
-		dev_kfree_skb(skb);
 	} else {
 		dev->stats.tx_packets++;
 		dev->stats.tx_bytes += len;
-- 
cgit v1.2.3


From 2ddc1ac1b9f00096869a48b97c28de72386200d2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= <remi.denis-courmont@nokia.com>
Date: Tue, 10 Feb 2009 17:14:50 -0800
Subject: Phonet: do not compute unused value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/phonet/pep.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index bb3e67849b3..8ad2b533388 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -553,7 +553,7 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb)
 {
 	struct pep_sock *pn = pep_sk(sk);
 	struct sock *sknode;
-	struct pnpipehdr *hdr = pnp_hdr(skb);
+	struct pnpipehdr *hdr;
 	struct sockaddr_pn dst;
 	int err = NET_RX_SUCCESS;
 	u8 pipe_handle;
-- 
cgit v1.2.3


From f1b33cb1c25ac476cbf22783f9ca2016f99648ed Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 6 Feb 2009 00:27:32 +0100
Subject: mac80211: restrict to AP in outgoing interface heuristic

We try to find the correct outgoing interface for injected frames
based on the TA, but since this is a hack for hostapd 11w, restrict
the heuristic to AP mode interfaces. At some point we'll add the
ability to give an interface index in radiotap or so and just
remove this heuristic again.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Cc: stable@kernel.org [2.6.28.x]
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 4278e545638..94de5033f0b 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1343,6 +1343,8 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev)
 						list) {
 				if (!netif_running(sdata->dev))
 					continue;
+				if (sdata->vif.type != NL80211_IFTYPE_AP)
+					continue;
 				if (compare_ether_addr(sdata->dev->dev_addr,
 						       hdr->addr2)) {
 					dev_hold(sdata->dev);
-- 
cgit v1.2.3


From df0bca049d01c0ee94afb7cd5dfd959541e6c8da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Lecigne?= <clement.lecigne@netasq.com>
Date: Thu, 12 Feb 2009 16:59:09 -0800
Subject: net: 4 bytes kernel memory disclosure in SO_BSDCOMPAT gsopt try #2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In function sock_getsockopt() located in net/core/sock.c, optval v.val
is not correctly initialized and directly returned in userland in case
we have SO_BSDCOMPAT option set.

This dummy code should trigger the bug:

int main(void)
{
	unsigned char buf[4] = { 0, 0, 0, 0 };
	int len;
	int sock;
	sock = socket(33, 2, 2);
	getsockopt(sock, 1, SO_BSDCOMPAT, &buf, &len);
	printf("%x%x%x%x\n", buf[0], buf[1], buf[2], buf[3]);
	close(sock);
}

Here is a patch that fix this bug by initalizing v.val just after its
declaration.

Signed-off-by: Clément Lecigne <clement.lecigne@netasq.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index f3a0d08cbb4..6f2e1337975 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -696,6 +696,8 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 	if (len < 0)
 		return -EINVAL;
 
+	v.val = 0;
+
 	switch(optname) {
 	case SO_DEBUG:
 		v.val = sock_flag(sk, SOCK_DBG);
-- 
cgit v1.2.3


From 1d7b33f77b2d8b0b1ee767e6f8f05cbd9d72cb7c Mon Sep 17 00:00:00 2001
From: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Date: Thu, 12 Feb 2009 17:00:20 -0800
Subject: wimax: fix oops in wimax_dev_get_by_genl_info() when looking up
 non-wimax iface

When a non-wimax interface is looked up by the stack, a bad pointer is
returned when the looked-up interface is not found in the list (of
registered WiMAX interfaces). This causes an oops in the caller when
trying to use the pointer.

Fix by properly setting the pointer to NULL if we don't exit from the
list_for_each() with a found entry.

Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wimax/id-table.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/wimax/id-table.c b/net/wimax/id-table.c
index 5e685f7eda9..72273abfcb1 100644
--- a/net/wimax/id-table.c
+++ b/net/wimax/id-table.c
@@ -94,12 +94,13 @@ struct wimax_dev *wimax_dev_get_by_genl_info(
 	list_for_each_entry(wimax_dev, &wimax_id_table, id_table_node) {
 		if (wimax_dev->net_dev->ifindex == ifindex) {
 			dev_hold(wimax_dev->net_dev);
-			break;
+			goto found;
 		}
 	}
-	if (wimax_dev == NULL)
-		d_printf(1, NULL, "wimax: no devices found with ifindex %d\n",
-			 ifindex);
+	wimax_dev = NULL;
+	d_printf(1, NULL, "wimax: no devices found with ifindex %d\n",
+		 ifindex);
+found:
 	spin_unlock(&wimax_id_table_lock);
 	d_fnend(3, NULL, "(info %p ifindex %d) = %p\n",
 		info, ifindex, wimax_dev);
-- 
cgit v1.2.3


From 92a0acce186cde8ead56c6915d9479773673ea1a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 17 Feb 2009 21:24:05 -0800
Subject: net: Kill skb_truesize_check(), it only catches false-positives.

A long time ago we had bugs, primarily in TCP, where we would modify
skb->truesize (for TSO queue collapsing) in ways which would corrupt
the socket memory accounting.

skb_truesize_check() was added in order to try and catch this error
more systematically.

However this debugging check has morphed into a Frankenstein of sorts
and these days it does nothing other than catch false-positives.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 8 --------
 net/core/sock.c   | 1 -
 2 files changed, 9 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index da74b844f4e..c6a6b166f8d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -143,14 +143,6 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 	BUG();
 }
 
-void skb_truesize_bug(struct sk_buff *skb)
-{
-	WARN(net_ratelimit(), KERN_ERR "SKB BUG: Invalid truesize (%u) "
-	       "len=%u, sizeof(sk_buff)=%Zd\n",
-	       skb->truesize, skb->len, sizeof(struct sk_buff));
-}
-EXPORT_SYMBOL(skb_truesize_bug);
-
 /* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
  *	'private' fields and also do memory statistics to find all the
  *	[BEEP] leaks.
diff --git a/net/core/sock.c b/net/core/sock.c
index 6f2e1337975..6e4f14d1ef8 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1137,7 +1137,6 @@ void sock_rfree(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
 
-	skb_truesize_check(skb);
 	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
 	sk_mem_uncharge(skb->sk, skb->truesize);
 }
-- 
cgit v1.2.3


From 4aa3b2ee1945ed082430ae1fb988d60eef64ca07 Mon Sep 17 00:00:00 2001
From: Eric Leblond <eric@inl.fr>
Date: Wed, 18 Feb 2009 15:28:46 +0100
Subject: netfilter: nf_conntrack_ipv6: fix nf_log_packet message in icmpv6
 conntrack

This patch fixes a trivial typo that was adding a new line at end of
the nf_log_packet() prefix. It also make the logging conditionnal by
adding a LOG_INVALID test.

Signed-off-by: Eric Leblond <eric@inl.fr>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index c323643ffcf..72dbb6d1a6b 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -201,8 +201,9 @@ icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
 
 	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
 	    nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
-		nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
-			      "nf_ct_icmpv6: ICMPv6 checksum failed\n");
+		if (LOG_INVALID(net, IPPROTO_ICMPV6))
+			nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
+				      "nf_ct_icmpv6: ICMPv6 checksum failed ");
 		return -NF_ACCEPT;
 	}
 
-- 
cgit v1.2.3


From 5ca431f9ae8db8c6edb9c64bebe6d6521077afd6 Mon Sep 17 00:00:00 2001
From: Eric Leblond <eric@inl.fr>
Date: Wed, 18 Feb 2009 15:29:23 +0100
Subject: netfilter: nfnetlink_log: fix per-rule qthreshold override

In NFLOG the per-rule qthreshold should overrides per-instance only
it is set. With current code, the per-rule qthreshold is 1 if not set
and it overrides the per-instance qthreshold.

This patch modifies the default xt_NFLOG threshold from 1 to
0. Thus a value of 0 means there is no per-rule setting and the instance
parameter has to apply.

Signed-off-by: Eric Leblond <eric@inl.fr>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nfnetlink_log.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index fa49dc7fe10..580b837e44d 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -590,8 +590,10 @@ nfulnl_log_packet(u_int8_t pf,
 
 	qthreshold = inst->qthreshold;
 	/* per-rule qthreshold overrides per-instance */
-	if (qthreshold > li->u.ulog.qthreshold)
-		qthreshold = li->u.ulog.qthreshold;
+	if (li->u.ulog.qthreshold)
+		if (qthreshold > li->u.ulog.qthreshold)
+			qthreshold = li->u.ulog.qthreshold;
+
 
 	switch (inst->copy_mode) {
 	case NFULNL_COPY_META:
-- 
cgit v1.2.3


From 2c6764b743f9d25dd0806a417f06920dcbd0f599 Mon Sep 17 00:00:00 2001
From: Eric Leblond <eric@inl.fr>
Date: Wed, 18 Feb 2009 15:29:49 +0100
Subject: netfilter: nfnetlink_log: fix timeout handling

NFLOG timeout was computed in timer by doing:

    flushtimeout*HZ/100

Default value of flushtimeout was HZ (for 1 second delay). This was
wrong for non 100HZ computer. This patch modify the default delay by
using 100 instead of HZ.

Signed-off-by: Eric Leblond <eric@inl.fr>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nfnetlink_log.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 580b837e44d..c712e9fc6bb 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -39,7 +39,7 @@
 #endif
 
 #define NFULNL_NLBUFSIZ_DEFAULT	NLMSG_GOODSIZE
-#define NFULNL_TIMEOUT_DEFAULT 	HZ	/* every second */
+#define NFULNL_TIMEOUT_DEFAULT 	100	/* every second */
 #define NFULNL_QTHRESH_DEFAULT 	100	/* 100 packets */
 #define NFULNL_COPY_RANGE_MAX	0xFFFF	/* max packet size is limited by 16-bit struct nfattr nfa_len field */
 
-- 
cgit v1.2.3


From eb132205ca2f7ad44d8c8c482815b6911200b6a0 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 18 Feb 2009 16:42:19 +0100
Subject: netfilter: make proc/net/ip* print names from foreign NFPROTO

When extensions were moved to the NFPROTO_UNSPEC wildcard in
ab4f21e6fb1c09b13c4c3cb8357babe8223471bd, they disappeared from the
procfs files.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/x_tables.c | 199 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 142 insertions(+), 57 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index bfbf521f6ea..5baccfa5a0d 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -827,59 +827,143 @@ static const struct file_operations xt_table_ops = {
 	.release = seq_release_net,
 };
 
-static void *xt_match_seq_start(struct seq_file *seq, loff_t *pos)
+/*
+ * Traverse state for ip{,6}_{tables,matches} for helping crossing
+ * the multi-AF mutexes.
+ */
+struct nf_mttg_trav {
+	struct list_head *head, *curr;
+	uint8_t class, nfproto;
+};
+
+enum {
+	MTTG_TRAV_INIT,
+	MTTG_TRAV_NFP_UNSPEC,
+	MTTG_TRAV_NFP_SPEC,
+	MTTG_TRAV_DONE,
+};
+
+static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos,
+    bool is_target)
 {
-	struct proc_dir_entry *pde = (struct proc_dir_entry *)seq->private;
-	u_int16_t af = (unsigned long)pde->data;
+	static const uint8_t next_class[] = {
+		[MTTG_TRAV_NFP_UNSPEC] = MTTG_TRAV_NFP_SPEC,
+		[MTTG_TRAV_NFP_SPEC]   = MTTG_TRAV_DONE,
+	};
+	struct nf_mttg_trav *trav = seq->private;
+
+	switch (trav->class) {
+	case MTTG_TRAV_INIT:
+		trav->class = MTTG_TRAV_NFP_UNSPEC;
+		mutex_lock(&xt[NFPROTO_UNSPEC].mutex);
+		trav->head = trav->curr = is_target ?
+			&xt[NFPROTO_UNSPEC].target : &xt[NFPROTO_UNSPEC].match;
+ 		break;
+	case MTTG_TRAV_NFP_UNSPEC:
+		trav->curr = trav->curr->next;
+		if (trav->curr != trav->head)
+			break;
+		mutex_unlock(&xt[NFPROTO_UNSPEC].mutex);
+		mutex_lock(&xt[trav->nfproto].mutex);
+		trav->head = trav->curr = is_target ?
+			&xt[trav->nfproto].target : &xt[trav->nfproto].match;
+		trav->class = next_class[trav->class];
+		break;
+	case MTTG_TRAV_NFP_SPEC:
+		trav->curr = trav->curr->next;
+		if (trav->curr != trav->head)
+			break;
+		/* fallthru, _stop will unlock */
+	default:
+		return NULL;
+	}
 
-	mutex_lock(&xt[af].mutex);
-	return seq_list_start(&xt[af].match, *pos);
+	if (ppos != NULL)
+		++*ppos;
+	return trav;
 }
 
-static void *xt_match_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+static void *xt_mttg_seq_start(struct seq_file *seq, loff_t *pos,
+    bool is_target)
 {
-	struct proc_dir_entry *pde = (struct proc_dir_entry *)seq->private;
-	u_int16_t af = (unsigned long)pde->data;
+	struct nf_mttg_trav *trav = seq->private;
+	unsigned int j;
 
-	return seq_list_next(v, &xt[af].match, pos);
+	trav->class = MTTG_TRAV_INIT;
+	for (j = 0; j < *pos; ++j)
+		if (xt_mttg_seq_next(seq, NULL, NULL, is_target) == NULL)
+			return NULL;
+	return trav;
 }
 
-static void xt_match_seq_stop(struct seq_file *seq, void *v)
+static void xt_mttg_seq_stop(struct seq_file *seq, void *v)
 {
-	struct proc_dir_entry *pde = seq->private;
-	u_int16_t af = (unsigned long)pde->data;
+	struct nf_mttg_trav *trav = seq->private;
+
+	switch (trav->class) {
+	case MTTG_TRAV_NFP_UNSPEC:
+		mutex_unlock(&xt[NFPROTO_UNSPEC].mutex);
+		break;
+	case MTTG_TRAV_NFP_SPEC:
+		mutex_unlock(&xt[trav->nfproto].mutex);
+		break;
+	}
+}
 
-	mutex_unlock(&xt[af].mutex);
+static void *xt_match_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	return xt_mttg_seq_start(seq, pos, false);
 }
 
-static int xt_match_seq_show(struct seq_file *seq, void *v)
+static void *xt_match_seq_next(struct seq_file *seq, void *v, loff_t *ppos)
 {
-	struct xt_match *match = list_entry(v, struct xt_match, list);
+	return xt_mttg_seq_next(seq, v, ppos, false);
+}
 
-	if (strlen(match->name))
-		return seq_printf(seq, "%s\n", match->name);
-	else
-		return 0;
+static int xt_match_seq_show(struct seq_file *seq, void *v)
+{
+	const struct nf_mttg_trav *trav = seq->private;
+	const struct xt_match *match;
+
+	switch (trav->class) {
+	case MTTG_TRAV_NFP_UNSPEC:
+	case MTTG_TRAV_NFP_SPEC:
+		if (trav->curr == trav->head)
+			return 0;
+		match = list_entry(trav->curr, struct xt_match, list);
+		return (*match->name == '\0') ? 0 :
+		       seq_printf(seq, "%s\n", match->name);
+	}
+	return 0;
 }
 
 static const struct seq_operations xt_match_seq_ops = {
 	.start	= xt_match_seq_start,
 	.next	= xt_match_seq_next,
-	.stop	= xt_match_seq_stop,
+	.stop	= xt_mttg_seq_stop,
 	.show	= xt_match_seq_show,
 };
 
 static int xt_match_open(struct inode *inode, struct file *file)
 {
+	struct seq_file *seq;
+	struct nf_mttg_trav *trav;
 	int ret;
 
-	ret = seq_open(file, &xt_match_seq_ops);
-	if (!ret) {
-		struct seq_file *seq = file->private_data;
+	trav = kmalloc(sizeof(*trav), GFP_KERNEL);
+	if (trav == NULL)
+		return -ENOMEM;
 
-		seq->private = PDE(inode);
+	ret = seq_open(file, &xt_match_seq_ops);
+	if (ret < 0) {
+		kfree(trav);
+		return ret;
 	}
-	return ret;
+
+	seq = file->private_data;
+	seq->private = trav;
+	trav->nfproto = (unsigned long)PDE(inode)->data;
+	return 0;
 }
 
 static const struct file_operations xt_match_ops = {
@@ -887,62 +971,63 @@ static const struct file_operations xt_match_ops = {
 	.open	 = xt_match_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
-	.release = seq_release,
+	.release = seq_release_private,
 };
 
 static void *xt_target_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	struct proc_dir_entry *pde = (struct proc_dir_entry *)seq->private;
-	u_int16_t af = (unsigned long)pde->data;
-
-	mutex_lock(&xt[af].mutex);
-	return seq_list_start(&xt[af].target, *pos);
+	return xt_mttg_seq_start(seq, pos, true);
 }
 
-static void *xt_target_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+static void *xt_target_seq_next(struct seq_file *seq, void *v, loff_t *ppos)
 {
-	struct proc_dir_entry *pde = (struct proc_dir_entry *)seq->private;
-	u_int16_t af = (unsigned long)pde->data;
-
-	return seq_list_next(v, &xt[af].target, pos);
-}
-
-static void xt_target_seq_stop(struct seq_file *seq, void *v)
-{
-	struct proc_dir_entry *pde = seq->private;
-	u_int16_t af = (unsigned long)pde->data;
-
-	mutex_unlock(&xt[af].mutex);
+	return xt_mttg_seq_next(seq, v, ppos, true);
 }
 
 static int xt_target_seq_show(struct seq_file *seq, void *v)
 {
-	struct xt_target *target = list_entry(v, struct xt_target, list);
-
-	if (strlen(target->name))
-		return seq_printf(seq, "%s\n", target->name);
-	else
-		return 0;
+	const struct nf_mttg_trav *trav = seq->private;
+	const struct xt_target *target;
+
+	switch (trav->class) {
+	case MTTG_TRAV_NFP_UNSPEC:
+	case MTTG_TRAV_NFP_SPEC:
+		if (trav->curr == trav->head)
+			return 0;
+		target = list_entry(trav->curr, struct xt_target, list);
+		return (*target->name == '\0') ? 0 :
+		       seq_printf(seq, "%s\n", target->name);
+	}
+	return 0;
 }
 
 static const struct seq_operations xt_target_seq_ops = {
 	.start	= xt_target_seq_start,
 	.next	= xt_target_seq_next,
-	.stop	= xt_target_seq_stop,
+	.stop	= xt_mttg_seq_stop,
 	.show	= xt_target_seq_show,
 };
 
 static int xt_target_open(struct inode *inode, struct file *file)
 {
+	struct seq_file *seq;
+	struct nf_mttg_trav *trav;
 	int ret;
 
-	ret = seq_open(file, &xt_target_seq_ops);
-	if (!ret) {
-		struct seq_file *seq = file->private_data;
+	trav = kmalloc(sizeof(*trav), GFP_KERNEL);
+	if (trav == NULL)
+		return -ENOMEM;
 
-		seq->private = PDE(inode);
+	ret = seq_open(file, &xt_target_seq_ops);
+	if (ret < 0) {
+		kfree(trav);
+		return ret;
 	}
-	return ret;
+
+	seq = file->private_data;
+	seq->private = trav;
+	trav->nfproto = (unsigned long)PDE(inode)->data;
+	return 0;
 }
 
 static const struct file_operations xt_target_ops = {
@@ -950,7 +1035,7 @@ static const struct file_operations xt_target_ops = {
 	.open	 = xt_target_open,
 	.read	 = seq_read,
 	.llseek	 = seq_lseek,
-	.release = seq_release,
+	.release = seq_release_private,
 };
 
 #define FORMAT_TABLES	"_tables_names"
-- 
cgit v1.2.3


From 5209921cf15452cbe43097afce11d2846630cb51 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Wed, 18 Feb 2009 17:45:44 -0800
Subject: tcp: remove obsoleted comment about different passes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is obsolete since the passes got combined.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index dda42f0bd7a..da2c3b8794f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2023,7 +2023,6 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 		last_lost = tp->snd_una;
 	}
 
-	/* First pass: retransmit lost packets. */
 	tcp_for_write_queue_from(skb, sk) {
 		__u8 sacked = TCP_SKB_CB(skb)->sacked;
 
-- 
cgit v1.2.3


From 486a87f1e5624096bd1c09e9e716239597d48dca Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@free.fr>
Date: Sun, 22 Feb 2009 00:07:53 -0800
Subject: netns: fix double free at netns creation

This patch fix a double free when a network namespace fails.
The previous code does a kfree of the net_generic structure when
one of the init subsystem initialization fails.
The 'setup_net' function does kfree(ng) and returns an error.
The caller, 'copy_net_ns', call net_free on error, and this one
calls kfree(net->gen), making this pointer freed twice.

This patch make the code symetric, the net_alloc does the net_generic
allocation and the net_free frees the net_generic.

Signed-off-by: Daniel Lezcano <daniel.lezcano@free.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net_namespace.c | 86 +++++++++++++++++++++++++++++++-----------------
 1 file changed, 55 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 55151faaf90..b0767abf23e 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -32,24 +32,14 @@ static __net_init int setup_net(struct net *net)
 {
 	/* Must be called with net_mutex held */
 	struct pernet_operations *ops;
-	int error;
-	struct net_generic *ng;
+	int error = 0;
 
 	atomic_set(&net->count, 1);
+
 #ifdef NETNS_REFCNT_DEBUG
 	atomic_set(&net->use_count, 0);
 #endif
 
-	error = -ENOMEM;
-	ng = kzalloc(sizeof(struct net_generic) +
-			INITIAL_NET_GEN_PTRS * sizeof(void *), GFP_KERNEL);
-	if (ng == NULL)
-		goto out;
-
-	ng->len = INITIAL_NET_GEN_PTRS;
-	rcu_assign_pointer(net->gen, ng);
-
-	error = 0;
 	list_for_each_entry(ops, &pernet_list, list) {
 		if (ops->init) {
 			error = ops->init(net);
@@ -70,7 +60,6 @@ out_undo:
 	}
 
 	rcu_barrier();
-	kfree(ng);
 	goto out;
 }
 
@@ -78,16 +67,43 @@ out_undo:
 static struct kmem_cache *net_cachep;
 static struct workqueue_struct *netns_wq;
 
-static struct net *net_alloc(void)
+static struct net_generic *net_alloc_generic(void)
 {
-	return kmem_cache_zalloc(net_cachep, GFP_KERNEL);
+	struct net_generic *ng;
+	size_t generic_size = sizeof(struct net_generic) +
+		INITIAL_NET_GEN_PTRS * sizeof(void *);
+
+	ng = kzalloc(generic_size, GFP_KERNEL);
+	if (ng)
+		ng->len = INITIAL_NET_GEN_PTRS;
+
+	return ng;
 }
 
-static void net_free(struct net *net)
+static struct net *net_alloc(void)
 {
+	struct net *net = NULL;
+	struct net_generic *ng;
+
+	ng = net_alloc_generic();
+	if (!ng)
+		goto out;
+
+	net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
 	if (!net)
-		return;
+		goto out_free;
+
+	rcu_assign_pointer(net->gen, ng);
+out:
+	return net;
+
+out_free:
+	kfree(ng);
+	goto out;
+}
 
+static void net_free(struct net *net)
+{
 #ifdef NETNS_REFCNT_DEBUG
 	if (unlikely(atomic_read(&net->use_count) != 0)) {
 		printk(KERN_EMERG "network namespace not free! Usage: %d\n",
@@ -112,27 +128,28 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
 	err = -ENOMEM;
 	new_net = net_alloc();
 	if (!new_net)
-		goto out;
+		goto out_err;
 
 	mutex_lock(&net_mutex);
 	err = setup_net(new_net);
-	if (err)
-		goto out_unlock;
-
-	rtnl_lock();
-	list_add_tail(&new_net->list, &net_namespace_list);
-	rtnl_unlock();
-
-
-out_unlock:
+	if (!err) {
+		rtnl_lock();
+		list_add_tail(&new_net->list, &net_namespace_list);
+		rtnl_unlock();
+	}
 	mutex_unlock(&net_mutex);
+
+	if (err)
+		goto out_free;
 out:
 	put_net(old_net);
-	if (err) {
-		net_free(new_net);
-		new_net = ERR_PTR(err);
-	}
 	return new_net;
+
+out_free:
+	net_free(new_net);
+out_err:
+	new_net = ERR_PTR(err);
+	goto out;
 }
 
 static void cleanup_net(struct work_struct *work)
@@ -188,6 +205,7 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
 
 static int __init net_ns_init(void)
 {
+	struct net_generic *ng;
 	int err;
 
 	printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net));
@@ -202,6 +220,12 @@ static int __init net_ns_init(void)
 		panic("Could not create netns workq");
 #endif
 
+	ng = net_alloc_generic();
+	if (!ng)
+		panic("Could not allocate generic netns");
+
+	rcu_assign_pointer(init_net.gen, ng);
+
 	mutex_lock(&net_mutex);
 	err = setup_net(&init_net);
 
-- 
cgit v1.2.3


From 586c25003707067f074043d80fb2071671c58db0 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 20 Feb 2009 16:32:55 -0500
Subject: cipso: Fix documentation comment

The CIPSO protocol engine incorrectly stated that the FIPS-188 specification
could be found in the kernel's Documentation directory.  This patch corrects
that by removing the comment and directing users to the FIPS-188 documented
hosted online.  For the sake of completeness I've also included a link to the
CIPSO draft specification on the NetLabel website.

Thanks to Randy Dunlap for spotting the error and letting me know.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 net/ipv4/cipso_ipv4.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 6bb2635b5de..7bc992976d2 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -3,11 +3,16 @@
  *
  * This is an implementation of the CIPSO 2.2 protocol as specified in
  * draft-ietf-cipso-ipsecurity-01.txt with additional tag types as found in
- * FIPS-188, copies of both documents can be found in the Documentation
- * directory.  While CIPSO never became a full IETF RFC standard many vendors
+ * FIPS-188.  While CIPSO never became a full IETF RFC standard many vendors
  * have chosen to adopt the protocol and over the years it has become a
  * de-facto standard for labeled networking.
  *
+ * The CIPSO draft specification can be found in the kernel's Documentation
+ * directory as well as the following URL:
+ *   http://netlabel.sourceforge.net/files/draft-ietf-cipso-ipsecurity-01.txt
+ * The FIPS-188 specification can be found at the following URL:
+ *   http://www.itl.nist.gov/fipspubs/fip188.htm
+ *
  * Author: Paul Moore <paul.moore@hp.com>
  *
  */
-- 
cgit v1.2.3


From ebe47d47b7b7fed72dabcce4717da727b4e2367d Mon Sep 17 00:00:00 2001
From: Clemens Noss <cnoss@gmx.de>
Date: Mon, 23 Feb 2009 15:37:35 -0800
Subject: netns: build fix for net_alloc_generic

net_alloc_generic was defined in #ifdef CONFIG_NET_NS, but used
unconditionally. Move net_alloc_generic out of #ifdef.

Signed-off-by: Clemens Noss <cnoss@gmx.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net_namespace.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index b0767abf23e..2adb1a7d361 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -63,10 +63,6 @@ out_undo:
 	goto out;
 }
 
-#ifdef CONFIG_NET_NS
-static struct kmem_cache *net_cachep;
-static struct workqueue_struct *netns_wq;
-
 static struct net_generic *net_alloc_generic(void)
 {
 	struct net_generic *ng;
@@ -80,6 +76,10 @@ static struct net_generic *net_alloc_generic(void)
 	return ng;
 }
 
+#ifdef CONFIG_NET_NS
+static struct kmem_cache *net_cachep;
+static struct workqueue_struct *netns_wq;
+
 static struct net *net_alloc(void)
 {
 	struct net *net = NULL;
-- 
cgit v1.2.3


From 50fee1dec5d71b8a14c1b82f2f42e16adc227f8b Mon Sep 17 00:00:00 2001
From: Eugene Teo <eugeneteo@kernel.sg>
Date: Mon, 23 Feb 2009 15:38:41 -0800
Subject: net: amend the fix for SO_BSDCOMPAT gsopt infoleak

The fix for CVE-2009-0676 (upstream commit df0bca04) is incomplete. Note
that the same problem of leaking kernel memory will reappear if someone
on some architecture uses struct timeval with some internal padding (for
example tv_sec 64-bit and tv_usec 32-bit) --- then, you are going to
leak the padded bytes to userspace.

Signed-off-by: Eugene Teo <eugeneteo@kernel.sg>
Reported-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 6e4f14d1ef8..5f97caa158e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -696,7 +696,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 	if (len < 0)
 		return -EINVAL;
 
-	v.val = 0;
+	memset(&v, 0, sizeof(v));
 
 	switch(optname) {
 	case SO_DEBUG:
-- 
cgit v1.2.3


From 325fb5b4d26038cba665dd0d8ee09555321061f0 Mon Sep 17 00:00:00 2001
From: Josef Drexler <joe-lk@ttdpatch.net>
Date: Tue, 24 Feb 2009 14:53:12 +0100
Subject: netfilter: xt_recent: fix proc-file addition/removal of IPv4
 addresses

Fix regression introduded by commit 079aa88 (netfilter: xt_recent: IPv6 support):

From http://bugzilla.kernel.org/show_bug.cgi?id=12753:

Problem Description:
An uninitialized buffer causes IPv4 addresses added manually (via the +IP
command to the proc interface) to never match any packets. Similarly, the -IP
command fails to remove IPv4 addresses.

Details:
In the function recent_entry_lookup, the xt_recent module does comparisons of
the entire nf_inet_addr union value, both for IPv4 and IPv6 addresses. For
addresses initialized from actual packets the remaining 12 bytes not occupied
by the IPv4 are zeroed so this works correctly. However when setting the
nf_inet_addr addr variable in the recent_mt_proc_write function, only the IPv4
bytes are initialized and the remaining 12 bytes contain garbage.

Hence addresses added in this way never match any packets, unless these
uninitialized 12 bytes happened to be zero by coincidence. Similarly, addresses
cannot consistently be removed using the proc interface due to mismatch of the
garbage bytes (although it will sometimes work to remove an address that was
added manually).

Reading the /proc/net/xt_recent/ entries hides this problem because this only
uses the first 4 bytes when displaying IPv4 addresses.

Steps to reproduce:
$ iptables -I INPUT -m recent --rcheck -j LOG
$ echo +169.254.156.239 > /proc/net/xt_recent/DEFAULT
$ cat /proc/net/xt_recent/DEFAULT
src=169.254.156.239 ttl: 0 last_seen: 119910 oldest_pkt: 1 119910

[At this point no packets from 169.254.156.239 are being logged.]

$ iptables -I INPUT -s 169.254.156.239 -m recent --set
$ cat /proc/net/xt_recent/DEFAULT
src=169.254.156.239 ttl: 0 last_seen: 119910 oldest_pkt: 1 119910
src=169.254.156.239 ttl: 255 last_seen: 126184 oldest_pkt: 4 125434, 125684, 125934, 126184

[At this point, adding the address via an iptables rule, packets are being
logged correctly.]

$ echo -169.254.156.239 > /proc/net/xt_recent/DEFAULT
$ cat /proc/net/xt_recent/DEFAULT
src=169.254.156.239 ttl: 0 last_seen: 119910 oldest_pkt: 1 119910
src=169.254.156.239 ttl: 255 last_seen: 126992 oldest_pkt: 10 125434, 125684, 125934, 126184, 126434, 126684, 126934, 126991, 126991, 126992
$ echo -169.254.156.239 > /proc/net/xt_recent/DEFAULT
$ cat /proc/net/xt_recent/DEFAULT
src=169.254.156.239 ttl: 0 last_seen: 119910 oldest_pkt: 1 119910
src=169.254.156.239 ttl: 255 last_seen: 126992 oldest_pkt: 10 125434, 125684, 125934, 126184, 126434, 126684, 126934, 126991, 126991, 126992

[Removing the address via /proc interface failed evidently.]

Possible solutions:
- initialize the addr variable in recent_mt_proc_write
- compare only 4 bytes for IPv4 addresses in recent_entry_lookup

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_recent.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index fe80b614a40..791e030ea90 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -542,7 +542,7 @@ recent_mt_proc_write(struct file *file, const char __user *input,
 	struct recent_entry *e;
 	char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")];
 	const char *c = buf;
-	union nf_inet_addr addr;
+	union nf_inet_addr addr = {};
 	u_int16_t family;
 	bool add, succ;
 
-- 
cgit v1.2.3


From a52b8bd338630f78a6bfe39fe17cb8469d2679ae Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 24 Feb 2009 16:40:16 -0800
Subject: tcp_scalable: Update malformed & dead url

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_scalable.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 2747ec7bfb6..4660b088a8c 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -1,6 +1,6 @@
 /* Tom Kelly's Scalable TCP
  *
- * See htt://www-lce.eng.cam.ac.uk/~ctk21/scalable/
+ * See http://www.deneholme.net/tom/scalable/
  *
  * John Heffner <jheffner@sc.edu>
  */
-- 
cgit v1.2.3


From 3f53a38131a4e7a053c0aa060aba0411242fb6b9 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Thu, 26 Feb 2009 03:35:13 -0800
Subject: ipv6: don't use tw net when accounting for recycled tw

We already have a valid net in that place, but this is not just a
cleanup - the tw pointer can be NULL there sometimes, thus causing
an oops in NET_NS=y case.

The same place in ipv4 code already works correctly using existing
net, rather than tw's one.

The bug exists since 2.6.27.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/inet6_hashtables.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 8fe267feb81..1bcc3431859 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -258,11 +258,11 @@ unique:
 
 	if (twp != NULL) {
 		*twp = tw;
-		NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITRECYCLED);
+		NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
 	} else if (tw != NULL) {
 		/* Silly. Should hash-dance instead... */
 		inet_twsk_deschedule(tw, death_row);
-		NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITRECYCLED);
+		NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
 
 		inet_twsk_put(tw);
 	}
-- 
cgit v1.2.3


From 1844f747947bb89d7f12cd3034548805113f764b Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Fri, 27 Feb 2009 02:42:38 -0800
Subject: pkt_sched: sch_drr: Fix oops in drr_change_class.

drr_change_class lacks a check for NULL of tca[TCA_OPTIONS], so oops
is possible.

Reported-by: Denys Fedoryschenko <denys@visp.net.lb>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_drr.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index f6b4fa97df7..e36e94ab4e1 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -66,11 +66,15 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 {
 	struct drr_sched *q = qdisc_priv(sch);
 	struct drr_class *cl = (struct drr_class *)*arg;
+	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_DRR_MAX + 1];
 	u32 quantum;
 	int err;
 
-	err = nla_parse_nested(tb, TCA_DRR_MAX, tca[TCA_OPTIONS], drr_policy);
+	if (!opt)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_DRR_MAX, opt, drr_policy);
 	if (err < 0)
 		return err;
 
-- 
cgit v1.2.3


From 4ead443163b798661c2a2ede5e512e116a9e41e7 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 1 Mar 2009 00:11:52 -0800
Subject: netpoll: Add drop checks to all entry points

The netpoll entry checks are required to ensure that we don't
receive normal packets when invoked via netpoll.  Unfortunately
it only ever worked for the netif_receive_skb/netif_rx entry
points.  The VLAN (and subsequently GRO) entry point didn't
have the check and therefore can trigger all sorts of weird
problems.

This patch adds the netpoll check to all entry points.

I'm still uneasy with receiving at all under netpoll (which
apparently is only used by the out-of-tree kdump code).  The
reason is it is perfectly legal to receive all data including
headers into highmem if netpoll is off, but if you try to do
that with netpoll on and someone gets a printk in an IRQ handler
you're going to get a nice BUG_ON.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_core.c | 10 ++++++++++
 net/core/dev.c        |  6 ++++++
 2 files changed, 16 insertions(+)

(limited to 'net')

diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index e9db889d622..2886d2fb9ab 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -1,12 +1,16 @@
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/if_vlan.h>
+#include <linux/netpoll.h>
 #include "vlan.h"
 
 /* VLAN rx hw acceleration helper.  This acts like netif_{rx,receive_skb}(). */
 int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 		      u16 vlan_tci, int polling)
 {
+	if (netpoll_rx(skb))
+		return NET_RX_DROP;
+
 	if (skb_bond_should_drop(skb))
 		goto drop;
 
@@ -100,6 +104,9 @@ int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
 {
 	int err = NET_RX_SUCCESS;
 
+	if (netpoll_receive_skb(skb))
+		return NET_RX_DROP;
+
 	switch (vlan_gro_common(napi, grp, vlan_tci, skb)) {
 	case -1:
 		return netif_receive_skb(skb);
@@ -126,6 +133,9 @@ int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
 	if (!skb)
 		goto out;
 
+	if (netpoll_receive_skb(skb))
+		goto out;
+
 	err = NET_RX_SUCCESS;
 
 	switch (vlan_gro_common(napi, grp, vlan_tci, skb)) {
diff --git a/net/core/dev.c b/net/core/dev.c
index a17e0066236..72b0d26fd46 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2488,6 +2488,9 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 
 int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
+	if (netpoll_receive_skb(skb))
+		return NET_RX_DROP;
+
 	switch (__napi_gro_receive(napi, skb)) {
 	case -1:
 		return netif_receive_skb(skb);
@@ -2558,6 +2561,9 @@ int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
 	if (!skb)
 		goto out;
 
+	if (netpoll_receive_skb(skb))
+		goto out;
+
 	err = NET_RX_SUCCESS;
 
 	switch (__napi_gro_receive(napi, skb)) {
-- 
cgit v1.2.3


From 9ec06ff57a9badef3b6b019f35efc6b21fc27d03 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sun, 1 Mar 2009 00:21:36 -0800
Subject: tcp: fix retrans_out leaks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There's conflicting assumptions in shifting, the caller assumes
that dupsack results in S'ed skbs (or a part of it) for sure but
never gave a hint to tcp_sacktag_one when dsack is actually in
use. Thus DSACK retrans_out -= pcount was not taken and the
counter became out of sync. Remove obstacle from that information
flow to get DSACKs accounted in tcp_sacktag_one as expected.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Tested-by: Denys Fedoryshchenko <denys@visp.net.lb>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a6961d75c7e..c28976a7e59 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1374,7 +1374,8 @@ static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
 
 static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
 			   struct tcp_sacktag_state *state,
-			   unsigned int pcount, int shifted, int mss)
+			   unsigned int pcount, int shifted, int mss,
+			   int dup_sack)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
@@ -1410,7 +1411,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
 	}
 
 	/* We discard results */
-	tcp_sacktag_one(skb, sk, state, 0, pcount);
+	tcp_sacktag_one(skb, sk, state, dup_sack, pcount);
 
 	/* Difference in this won't matter, both ACKed by the same cumul. ACK */
 	TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS);
@@ -1561,7 +1562,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
 
 	if (!skb_shift(prev, skb, len))
 		goto fallback;
-	if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss))
+	if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
 		goto out;
 
 	/* Hole filled allows collapsing with the next as well, this is very
@@ -1580,7 +1581,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
 	len = skb->len;
 	if (skb_shift(prev, skb, len)) {
 		pcount += tcp_skb_pcount(skb);
-		tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss);
+		tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
 	}
 
 out:
-- 
cgit v1.2.3


From d1dd524785e30cf3d64d395d829b207376acb0aa Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Mon, 2 Mar 2009 06:46:50 +0000
Subject: sctp: fix crash during module unload

An extra list_del() during the module load failure and unload
resulted in a crash with a list corruption.  Now sctp can
be unloaded again.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/protocol.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index b78e3be6901..4e663844963 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1322,9 +1322,8 @@ SCTP_STATIC __init int sctp_init(void)
 out:
 	return status;
 err_v6_add_protocol:
-	sctp_v6_del_protocol();
-err_add_protocol:
 	sctp_v4_del_protocol();
+err_add_protocol:
 	inet_ctl_sock_destroy(sctp_ctl_sock);
 err_ctl_sock_init:
 	sctp_v6_protosw_exit();
@@ -1335,7 +1334,6 @@ err_protosw_init:
 	sctp_v4_pf_exit();
 	sctp_v6_pf_exit();
 	sctp_sysctl_unregister();
-	list_del(&sctp_af_inet.list);
 	free_pages((unsigned long)sctp_port_hashtable,
 		   get_order(sctp_port_hashsize *
 			     sizeof(struct sctp_bind_hashbucket)));
@@ -1383,7 +1381,6 @@ SCTP_STATIC __exit void sctp_exit(void)
 	sctp_v4_pf_exit();
 
 	sctp_sysctl_unregister();
-	list_del(&sctp_af_inet.list);
 
 	free_pages((unsigned long)sctp_assoc_hashtable,
 		   get_order(sctp_assoc_hashsize *
-- 
cgit v1.2.3


From 3df2678737974accf437dad11e584c1871a3ede3 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Mon, 2 Mar 2009 06:46:51 +0000
Subject: sctp: fix kernel panic with ERROR chunk containing too many error
 causes

If ERROR chunk is received with too many error causes in ESTABLISHED
state, the kernel get panic.

This is because sctp limit the max length of cmds to 14, but while
ERROR chunk is received, one error cause will add around 2 cmds by
sctp_add_cmd_sf(). So many error causes will fill the limit of cmds
and panic.

This patch fixed the problem.

This bug can be test by SCTP Conformance Test Suite
<http://networktest.sourceforge.net/>.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_sideeffect.c | 54 +++++++++++++++++++++++++++++-------------------
 net/sctp/sm_statefuns.c  | 16 ++------------
 2 files changed, 35 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index e1d6076b4f5..b5495aecab6 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -787,36 +787,48 @@ static void sctp_cmd_process_operr(sctp_cmd_seq_t *cmds,
 				   struct sctp_association *asoc,
 				   struct sctp_chunk *chunk)
 {
-	struct sctp_operr_chunk *operr_chunk;
 	struct sctp_errhdr *err_hdr;
+	struct sctp_ulpevent *ev;
 
-	operr_chunk = (struct sctp_operr_chunk *)chunk->chunk_hdr;
-	err_hdr = &operr_chunk->err_hdr;
+	while (chunk->chunk_end > chunk->skb->data) {
+		err_hdr = (struct sctp_errhdr *)(chunk->skb->data);
 
-	switch (err_hdr->cause) {
-	case SCTP_ERROR_UNKNOWN_CHUNK:
-	{
-		struct sctp_chunkhdr *unk_chunk_hdr;
+		ev = sctp_ulpevent_make_remote_error(asoc, chunk, 0,
+						     GFP_ATOMIC);
+		if (!ev)
+			return;
 
-		unk_chunk_hdr = (struct sctp_chunkhdr *)err_hdr->variable;
-		switch (unk_chunk_hdr->type) {
-		/* ADDIP 4.1 A9) If the peer responds to an ASCONF with an
-		 * ERROR chunk reporting that it did not recognized the ASCONF
-		 * chunk type, the sender of the ASCONF MUST NOT send any
-		 * further ASCONF chunks and MUST stop its T-4 timer.
-		 */
-		case SCTP_CID_ASCONF:
-			asoc->peer.asconf_capable = 0;
-			sctp_add_cmd_sf(cmds, SCTP_CMD_TIMER_STOP,
+		sctp_ulpq_tail_event(&asoc->ulpq, ev);
+
+		switch (err_hdr->cause) {
+		case SCTP_ERROR_UNKNOWN_CHUNK:
+		{
+			sctp_chunkhdr_t *unk_chunk_hdr;
+
+			unk_chunk_hdr = (sctp_chunkhdr_t *)err_hdr->variable;
+			switch (unk_chunk_hdr->type) {
+			/* ADDIP 4.1 A9) If the peer responds to an ASCONF with
+			 * an ERROR chunk reporting that it did not recognized
+			 * the ASCONF chunk type, the sender of the ASCONF MUST
+			 * NOT send any further ASCONF chunks and MUST stop its
+			 * T-4 timer.
+			 */
+			case SCTP_CID_ASCONF:
+				if (asoc->peer.asconf_capable == 0)
+					break;
+
+				asoc->peer.asconf_capable = 0;
+				sctp_add_cmd_sf(cmds, SCTP_CMD_TIMER_STOP,
 					SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO));
+				break;
+			default:
+				break;
+			}
 			break;
+		}
 		default:
 			break;
 		}
-		break;
-	}
-	default:
-		break;
 	}
 }
 
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 3a0cd075914..f88dfded0e3 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3163,7 +3163,6 @@ sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep,
 					sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk *chunk = arg;
-	struct sctp_ulpevent *ev;
 
 	if (!sctp_vtag_verify(chunk, asoc))
 		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
@@ -3173,21 +3172,10 @@ sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep,
 		return sctp_sf_violation_chunklen(ep, asoc, type, arg,
 						  commands);
 
-	while (chunk->chunk_end > chunk->skb->data) {
-		ev = sctp_ulpevent_make_remote_error(asoc, chunk, 0,
-						     GFP_ATOMIC);
-		if (!ev)
-			goto nomem;
+	sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_OPERR,
+			SCTP_CHUNK(chunk));
 
-		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
-				SCTP_ULPEVENT(ev));
-		sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_OPERR,
-				SCTP_CHUNK(chunk));
-	}
 	return SCTP_DISPOSITION_CONSUME;
-
-nomem:
-	return SCTP_DISPOSITION_NOMEM;
 }
 
 /*
-- 
cgit v1.2.3


From 5a5990d3090b03745a9548a6f5edef02095675cf Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 26 Feb 2009 06:49:24 +0000
Subject: net: Avoid race between network down and sysfs

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 6ac29a46e23..484f58750eb 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -77,7 +77,9 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
 	if (endp == buf)
 		goto err;
 
-	rtnl_lock();
+	if (!rtnl_trylock())
+		return -ERESTARTSYS;
+
 	if (dev_isalive(net)) {
 		if ((ret = (*set)(net, new)) == 0)
 			ret = len;
-- 
cgit v1.2.3


From b325fddb7f869e6c95a88dc6573220f162e5b89f Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 26 Feb 2009 06:55:31 +0000
Subject: ipv6: Fix sysctl unregistration deadlock

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f9afb452249..40fabdee42a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -493,15 +493,17 @@ static void addrconf_forward_change(struct net *net, __s32 newf)
 	read_unlock(&dev_base_lock);
 }
 
-static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
+static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
 {
 	struct net *net;
 
 	net = (struct net *)table->extra2;
 	if (p == &net->ipv6.devconf_dflt->forwarding)
-		return;
+		return 0;
+
+	if (!rtnl_trylock())
+		return -ERESTARTSYS;
 
-	rtnl_lock();
 	if (p == &net->ipv6.devconf_all->forwarding) {
 		__s32 newf = net->ipv6.devconf_all->forwarding;
 		net->ipv6.devconf_dflt->forwarding = newf;
@@ -512,6 +514,7 @@ static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
 
 	if (*p)
 		rt6_purge_dflt_routers(net);
+	return 1;
 }
 #endif
 
@@ -3983,7 +3986,7 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
 	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
 
 	if (write)
-		addrconf_fixup_forwarding(ctl, valp, val);
+		ret = addrconf_fixup_forwarding(ctl, valp, val);
 	return ret;
 }
 
@@ -4019,8 +4022,7 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table,
 	}
 
 	*valp = new;
-	addrconf_fixup_forwarding(table, valp, val);
-	return 1;
+	return addrconf_fixup_forwarding(table, valp, val);
 }
 
 static struct addrconf_sysctl_table
-- 
cgit v1.2.3


From 176c39af29bc4edaf37f663553eeaacd47b5bc9c Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <dlezcano@fr.ibm.com>
Date: Tue, 3 Mar 2009 01:06:45 -0800
Subject: netns: fix addrconf_ifdown kernel panic

When a network namespace is destroyed the network interfaces are
all unregistered, making addrconf_ifdown called by the netdevice
notifier.
In the other hand, the addrconf exit method does a loop on the network
devices and does addrconf_ifdown on each of them. But the ordering of
the netns subsystem is not right because it uses the register_pernet_device
instead of register_pernet_subsys. If we handle the loopback as
any network device, we can safely use register_pernet_subsys.

But if we use register_pernet_subsys, the addrconf exit method will do
exactly what was already done with the unregistering of the network
devices. So in definitive, this code is pointless.

I removed the netns addrconf exit method and moved the code to the
addrconf cleanup function.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 39 +++++++++------------------------------
 1 file changed, 9 insertions(+), 30 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 40fabdee42a..1220e2c7831 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2611,9 +2611,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 	ASSERT_RTNL();
 
-	if ((dev->flags & IFF_LOOPBACK) && how == 1)
-		how = 0;
-
 	rt6_ifdown(net, dev);
 	neigh_ifdown(&nd_tbl, dev);
 
@@ -4448,25 +4445,6 @@ int unregister_inet6addr_notifier(struct notifier_block *nb)
 
 EXPORT_SYMBOL(unregister_inet6addr_notifier);
 
-static void addrconf_net_exit(struct net *net)
-{
-	struct net_device *dev;
-
-	rtnl_lock();
-	/* clean dev list */
-	for_each_netdev(net, dev) {
-		if (__in6_dev_get(dev) == NULL)
-			continue;
-		addrconf_ifdown(dev, 1);
-	}
-	addrconf_ifdown(net->loopback_dev, 2);
-	rtnl_unlock();
-}
-
-static struct pernet_operations addrconf_net_ops = {
-	.exit = addrconf_net_exit,
-};
-
 /*
  *	Init / cleanup code
  */
@@ -4508,10 +4486,6 @@ int __init addrconf_init(void)
 	if (err)
 		goto errlo;
 
-	err = register_pernet_device(&addrconf_net_ops);
-	if (err)
-		return err;
-
 	register_netdevice_notifier(&ipv6_dev_notf);
 
 	addrconf_verify(0);
@@ -4541,15 +4515,22 @@ errlo:
 void addrconf_cleanup(void)
 {
 	struct inet6_ifaddr *ifa;
+	struct net_device *dev;
 	int i;
 
 	unregister_netdevice_notifier(&ipv6_dev_notf);
-	unregister_pernet_device(&addrconf_net_ops);
-
 	unregister_pernet_subsys(&addrconf_ops);
 
 	rtnl_lock();
 
+	/* clean dev list */
+	for_each_netdev(&init_net, dev) {
+		if (__in6_dev_get(dev) == NULL)
+			continue;
+		addrconf_ifdown(dev, 1);
+	}
+	addrconf_ifdown(init_net.loopback_dev, 2);
+
 	/*
 	 *	Check hash table.
 	 */
@@ -4570,6 +4551,4 @@ void addrconf_cleanup(void)
 
 	del_timer(&addr_chk_timer);
 	rtnl_unlock();
-
-	unregister_pernet_subsys(&addrconf_net_ops);
 }
-- 
cgit v1.2.3


From 6eb0777228f31932fc941eafe8b08848466630a1 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@aristanetworks.com>
Date: Sun, 22 Feb 2009 00:09:14 -0800
Subject: netns: Fix icmp shutdown.

Recently I had a kernel panic in icmp_send during a network namespace
cleanup.  There were packets in the arp queue that failed to be sent
and we attempted to generate an ICMP host unreachable message, but
failed because icmp_sk_exit had already been called.

The network devices are removed from a network namespace and their
arp queues are flushed before we do attempt to shutdown subsystems
so this error should have been impossible.

It turns out icmp_init is using register_pernet_device instead
of register_pernet_subsys.  Which resulted in icmp being shut down
while we still had the possibility of packets in flight, making
a nasty NULL pointer deference in interrupt context possible.

Changing this to register_pernet_subsys fixes the problem in
my testing.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Acked-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/icmp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 705b33b184a..fc562d29cc4 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1205,7 +1205,7 @@ static struct pernet_operations __net_initdata icmp_sk_ops = {
 
 int __init icmp_init(void)
 {
-	return register_pernet_device(&icmp_sk_ops);
+	return register_pernet_subsys(&icmp_sk_ops);
 }
 
 EXPORT_SYMBOL(icmp_err_convert);
-- 
cgit v1.2.3


From 2f20d2e667ab1ca44cde5fb361386dff5bb6081d Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@aristanetworks.com>
Date: Sun, 22 Feb 2009 00:10:18 -0800
Subject: tcp: Like icmp use register_pernet_subsys

To remove the possibility of packets flying around when network
devices are being cleaned up use reisger_pernet_subsys instead of
register_pernet_device.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Acked-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 19d7b429a26..cf74c416831 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2443,7 +2443,7 @@ static struct pernet_operations __net_initdata tcp_sk_ops = {
 void __init tcp_v4_init(void)
 {
 	inet_hashinfo_init(&tcp_hashinfo);
-	if (register_pernet_device(&tcp_sk_ops))
+	if (register_pernet_subsys(&tcp_sk_ops))
 		panic("Failed to create the TCP control socket.\n");
 }
 
-- 
cgit v1.2.3


From 17edde520927070a6bf14a6a75027c0b843443e5 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@aristanetworks.com>
Date: Sun, 22 Feb 2009 00:11:09 -0800
Subject: netns: Remove net_alive

It turns out that net_alive is unnecessary, and the original problem
that led to it being added was simply that the icmp code thought
it was a network device and wound up being unable to handle packets
while there were still packets in the network namespace.

Now that icmp and tcp have been fixed to properly register themselves
this problem is no longer present and we have a stronger guarantee
that packets will not arrive in a network namespace then that provided
by net_alive in netif_receive_skb.  So remove net_alive allowing
packet reception run a little faster.

Additionally document the strong reason why network namespace cleanup
is safe so that if something happens again someone else will have
a chance of figuring it out.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c           | 6 ------
 net/core/net_namespace.c | 3 ---
 2 files changed, 9 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 72b0d26fd46..9e4afe650e7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2267,12 +2267,6 @@ int netif_receive_skb(struct sk_buff *skb)
 
 	rcu_read_lock();
 
-	/* Don't receive packets in an exiting network namespace */
-	if (!net_alive(dev_net(skb->dev))) {
-		kfree_skb(skb);
-		goto out;
-	}
-
 #ifdef CONFIG_NET_CLS_ACT
 	if (skb->tc_verd & TC_NCLS) {
 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 2adb1a7d361..e3bebd36f05 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -157,9 +157,6 @@ static void cleanup_net(struct work_struct *work)
 	struct pernet_operations *ops;
 	struct net *net;
 
-	/* Be very certain incoming network packets will not find us */
-	rcu_barrier();
-
 	net = container_of(work, struct net, work);
 
 	mutex_lock(&net_mutex);
-- 
cgit v1.2.3


From 4843b93c96ae5043c6279c4ec6fcd8ee3866ff5b Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 3 Mar 2009 23:37:30 -0800
Subject: netlink: invert error code in netlink_set_err()

The callers of netlink_set_err() currently pass a negative value
as parameter for the error code. However, sk->sk_err wants a
positive error value. Without this patch, skb_recv_datagram() called
by netlink_recvmsg() may return a positive value to report an error.

Another choice to fix this is to change callers to pass a positive
error value, but this seems a bit inconsistent and error prone
to me. Indeed, the callers of netlink_set_err() assumed that the
(usual) negative value for error codes was fine before this patch :).

This patch also includes some documentation in docbook format
for netlink_set_err() to avoid this sort of confusion.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 9eb895c7a2a..3ae3cb81656 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1084,6 +1084,13 @@ out:
 	return 0;
 }
 
+/**
+ * netlink_set_err - report error to broadcast listeners
+ * @ssk: the kernel netlink socket, as returned by netlink_kernel_create()
+ * @pid: the PID of a process that we want to skip (if any)
+ * @groups: the broadcast group that will notice the error
+ * @code: error code, must be negative (as usual in kernelspace)
+ */
 void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
 {
 	struct netlink_set_err_data info;
@@ -1093,7 +1100,8 @@ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
 	info.exclude_sk = ssk;
 	info.pid = pid;
 	info.group = group;
-	info.code = code;
+	/* sk->sk_err wants a positive error value */
+	info.code = -code;
 
 	read_lock(&nl_table_lock);
 
-- 
cgit v1.2.3


From 4222474519ff5b31a526dfa1da7aa4b0e38bef5c Mon Sep 17 00:00:00 2001
From: Meelis Roos <mroos@linux.ee>
Date: Tue, 3 Mar 2009 23:48:50 -0800
Subject: net: fix tokenring license

Currently, modular tokenring ("tr") lacks a license and fails to load:

tr: module license 'unspecified' taints kernel.
tr: Unknown symbol proc_net_fops_create

Beacuse of this, no tokenring driver can load if it depends on modular
tr. Fix this by adding GPL module license as it is in the kernel.

With this fix, tr module loads fine and tms380 driver also loads. Well,
it does'nt work but that's a different bug.

Signed-off-by: Meelis Roos <mroos@linux.ee>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/802/tr.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/802/tr.c b/net/802/tr.c
index 158150fee46..f47ae289d83 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -668,3 +668,5 @@ module_init(rif_init);
 
 EXPORT_SYMBOL(tr_type_trans);
 EXPORT_SYMBOL(alloc_trdev);
+
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From fe7ca2e1e847b65c12d245cbf402af89da96888a Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Wed, 4 Mar 2009 03:18:11 -0800
Subject: IPv6: add "disable" module parameter support to ipv6.ko

Add "disable" module parameter support to ipv6.ko by specifying
"disable=1" on module load.  We just do the minimum of initializing
inetsw6[] so calls from other modules to inet6_register_protosw()
won't OOPs, then bail out.  No IPv6 addresses or sockets can be
created as a result, and a reboot is required to enable IPv6.

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c802bc1658a..da944eca2ca 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -72,6 +72,10 @@ MODULE_LICENSE("GPL");
 static struct list_head inetsw6[SOCK_MAX];
 static DEFINE_SPINLOCK(inetsw6_lock);
 
+static int disable_ipv6 = 0;
+module_param_named(disable, disable_ipv6, int, 0);
+MODULE_PARM_DESC(disable, "Disable IPv6 such that it is non-functional");
+
 static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
 {
 	const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
@@ -991,10 +995,21 @@ static int __init inet6_init(void)
 {
 	struct sk_buff *dummy_skb;
 	struct list_head *r;
-	int err;
+	int err = 0;
 
 	BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb));
 
+	/* Register the socket-side information for inet6_create.  */
+	for(r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
+		INIT_LIST_HEAD(r);
+
+	if (disable_ipv6) {
+		printk(KERN_INFO
+		       "IPv6: Loaded, but administratively disabled, "
+		       "reboot required to enable\n");
+		goto out;
+	}
+
 	err = proto_register(&tcpv6_prot, 1);
 	if (err)
 		goto out;
@@ -1012,10 +1027,6 @@ static int __init inet6_init(void)
 		goto out_unregister_udplite_proto;
 
 
-	/* Register the socket-side information for inet6_create.  */
-	for(r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
-		INIT_LIST_HEAD(r);
-
 	/* We MUST register RAW sockets before we create the ICMP6,
 	 * IGMP6, or NDISC control sockets.
 	 */
-- 
cgit v1.2.3


From fb13d9f9e450bceafd88ac8a98f7a98e8096a5fe Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Wed, 4 Mar 2009 03:20:26 -0800
Subject: SCTP: change sctp_ctl_sock_init() to try IPv4 if IPv6 fails

Change sctp_ctl_sock_init() to try IPv4 if IPv6 socket registration
fails.  Required if the IPv6 module is loaded with "disable=1", else
SCTP will fail to load.

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/protocol.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 4e663844963..c4986d0f741 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -717,15 +717,20 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
 static int sctp_ctl_sock_init(void)
 {
 	int err;
-	sa_family_t family;
+	sa_family_t family = PF_INET;
 
 	if (sctp_get_pf_specific(PF_INET6))
 		family = PF_INET6;
-	else
-		family = PF_INET;
 
 	err = inet_ctl_sock_create(&sctp_ctl_sock, family,
 				   SOCK_SEQPACKET, IPPROTO_SCTP, &init_net);
+
+	/* If IPv6 socket could not be created, try the IPv4 socket */
+	if (err < 0 && family == PF_INET6)
+		err = inet_ctl_sock_create(&sctp_ctl_sock, AF_INET,
+					   SOCK_SEQPACKET, IPPROTO_SCTP,
+					   &init_net);
+
 	if (err < 0) {
 		printk(KERN_ERR
 		       "SCTP: Failed to create the SCTP control socket.\n");
-- 
cgit v1.2.3


From a883bf564ea555447a76682bb2d8d4bc92e23e0e Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Wed, 4 Mar 2009 17:38:10 -0800
Subject: pkt_sched: act_police: Fix a rate estimator test.

A commit c1b56878fb68e9c14070939ea4537ad4db79ffae "tc: policing requires
a rate estimator" introduced a test which invalidates previously working
configs, based on examples from iproute2: doc/actions/actions-general.
This is too rigorous: a rate estimator is needed only when police's
"avrate" option is used.

Reported-by: Joao Correia <joaomiguelcorreia@gmail.com>
Diagnosed-by: John Dykstra <john.dykstra1@gmail.com>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_police.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 5c72a116b1a..f8f047b6124 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -183,13 +183,6 @@ override:
 		if (R_tab == NULL)
 			goto failure;
 
-		if (!est && (ret == ACT_P_CREATED ||
-			     !gen_estimator_active(&police->tcf_bstats,
-						   &police->tcf_rate_est))) {
-			err = -EINVAL;
-			goto failure;
-		}
-
 		if (parm->peakrate.rate) {
 			P_tab = qdisc_get_rtab(&parm->peakrate,
 					       tb[TCA_POLICE_PEAKRATE]);
@@ -205,6 +198,12 @@ override:
 					    &police->tcf_lock, est);
 		if (err)
 			goto failure_unlock;
+	} else if (tb[TCA_POLICE_AVRATE] &&
+		   (ret == ACT_P_CREATED ||
+		    !gen_estimator_active(&police->tcf_bstats,
+					  &police->tcf_rate_est))) {
+		err = -EINVAL;
+		goto failure_unlock;
 	}
 
 	/* No failure allowed after this point */
-- 
cgit v1.2.3


From 54acd0efab072cb70e87206329d561b297f93bbb Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 4 Mar 2009 23:01:02 -0800
Subject: net: Fix missing dev->neigh_setup in register_netdevice().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 9e4afe650e7..2dd484ed3db 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4339,6 +4339,7 @@ int register_netdevice(struct net_device *dev)
 		dev->do_ioctl = ops->ndo_do_ioctl;
 		dev->set_config = ops->ndo_set_config;
 		dev->change_mtu = ops->ndo_change_mtu;
+		dev->neigh_setup = ops->ndo_neigh_setup;
 		dev->tx_timeout = ops->ndo_tx_timeout;
 		dev->get_stats = ops->ndo_get_stats;
 		dev->vlan_rx_register = ops->ndo_vlan_rx_register;
-- 
cgit v1.2.3


From 9d40bbda599def1e1d155d7f7dca14fe8744bd2b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 4 Mar 2009 23:46:25 -0800
Subject: vlan: Fix vlan-in-vlan crashes.

As analyzed by Patrick McHardy, vlan needs to reset it's
netdev_ops pointer in it's ->init() function but this
leaves the compat method pointers stale.

Add a netdev_resync_ops() and call it from the vlan code.

Any other driver which changes ->netdev_ops after register_netdevice()
will need to call this new function after doing so too.

With help from Patrick McHardy.

Tested-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c |  3 ++-
 net/core/dev.c       | 56 +++++++++++++++++++++++++++++++---------------------
 2 files changed, 36 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 4a19acd3a32..1b34135cf99 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -553,7 +553,7 @@ static int vlan_dev_neigh_setup(struct net_device *dev, struct neigh_parms *pa)
 	int err = 0;
 
 	if (netif_device_present(real_dev) && ops->ndo_neigh_setup)
-		err = ops->ndo_neigh_setup(dev, pa);
+		err = ops->ndo_neigh_setup(real_dev, pa);
 
 	return err;
 }
@@ -639,6 +639,7 @@ static int vlan_dev_init(struct net_device *dev)
 		dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN;
 		dev->netdev_ops         = &vlan_netdev_ops;
 	}
+	netdev_resync_ops(dev);
 
 	if (is_vlan_dev(real_dev))
 		subclass = 1;
diff --git a/net/core/dev.c b/net/core/dev.c
index 2dd484ed3db..f1129706ce7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4282,6 +4282,39 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
 }
 EXPORT_SYMBOL(netdev_fix_features);
 
+/* Some devices need to (re-)set their netdev_ops inside
+ * ->init() or similar.  If that happens, we have to setup
+ * the compat pointers again.
+ */
+void netdev_resync_ops(struct net_device *dev)
+{
+#ifdef CONFIG_COMPAT_NET_DEV_OPS
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	dev->init = ops->ndo_init;
+	dev->uninit = ops->ndo_uninit;
+	dev->open = ops->ndo_open;
+	dev->change_rx_flags = ops->ndo_change_rx_flags;
+	dev->set_rx_mode = ops->ndo_set_rx_mode;
+	dev->set_multicast_list = ops->ndo_set_multicast_list;
+	dev->set_mac_address = ops->ndo_set_mac_address;
+	dev->validate_addr = ops->ndo_validate_addr;
+	dev->do_ioctl = ops->ndo_do_ioctl;
+	dev->set_config = ops->ndo_set_config;
+	dev->change_mtu = ops->ndo_change_mtu;
+	dev->neigh_setup = ops->ndo_neigh_setup;
+	dev->tx_timeout = ops->ndo_tx_timeout;
+	dev->get_stats = ops->ndo_get_stats;
+	dev->vlan_rx_register = ops->ndo_vlan_rx_register;
+	dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
+	dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	dev->poll_controller = ops->ndo_poll_controller;
+#endif
+#endif
+}
+EXPORT_SYMBOL(netdev_resync_ops);
+
 /**
  *	register_netdevice	- register a network device
  *	@dev: device to register
@@ -4326,28 +4359,7 @@ int register_netdevice(struct net_device *dev)
 	 * This is temporary until all network devices are converted.
 	 */
 	if (dev->netdev_ops) {
-		const struct net_device_ops *ops = dev->netdev_ops;
-
-		dev->init = ops->ndo_init;
-		dev->uninit = ops->ndo_uninit;
-		dev->open = ops->ndo_open;
-		dev->change_rx_flags = ops->ndo_change_rx_flags;
-		dev->set_rx_mode = ops->ndo_set_rx_mode;
-		dev->set_multicast_list = ops->ndo_set_multicast_list;
-		dev->set_mac_address = ops->ndo_set_mac_address;
-		dev->validate_addr = ops->ndo_validate_addr;
-		dev->do_ioctl = ops->ndo_do_ioctl;
-		dev->set_config = ops->ndo_set_config;
-		dev->change_mtu = ops->ndo_change_mtu;
-		dev->neigh_setup = ops->ndo_neigh_setup;
-		dev->tx_timeout = ops->ndo_tx_timeout;
-		dev->get_stats = ops->ndo_get_stats;
-		dev->vlan_rx_register = ops->ndo_vlan_rx_register;
-		dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
-		dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
-#ifdef CONFIG_NET_POLL_CONTROLLER
-		dev->poll_controller = ops->ndo_poll_controller;
-#endif
+		netdev_resync_ops(dev);
 	} else {
 		char drivername[64];
 		pr_info("%s (%s): not using net_device_ops yet\n",
-- 
cgit v1.2.3


From bd05f28e1a15ae62994fe309a524695fe26dd834 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Tue, 3 Mar 2009 22:55:21 +0100
Subject: cfg80211: test before subtraction on unsigned

freq_diff is unsigned, so test before subtraction

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 85c9034c59b..bd0a16c3de5 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -380,7 +380,8 @@ static bool is_valid_reg_rule(const struct ieee80211_reg_rule *rule)
 
 	freq_diff = freq_range->end_freq_khz - freq_range->start_freq_khz;
 
-	if (freq_diff <= 0 || freq_range->max_bandwidth_khz > freq_diff)
+	if (freq_range->end_freq_khz <= freq_range->start_freq_khz ||
+			freq_range->max_bandwidth_khz > freq_diff)
 		return false;
 
 	return true;
-- 
cgit v1.2.3