From 522e7548a9bd40305df41c0beae69448b7620d6b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 22:54:52 -0800
Subject: [TCP] FRTO: Incorrectly clears TCPCB_EVER_RETRANS bit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

FRTO was slightly too brave... Should only clear
TCPCB_SACKED_RETRANS bit.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1a14191687a..b21e232d5d3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1266,7 +1266,7 @@ void tcp_enter_frto(struct sock *sk)
 	tp->undo_retrans = 0;
 
 	sk_stream_for_retrans_queue(skb, sk) {
-		TCP_SKB_CB(skb)->sacked &= ~TCPCB_RETRANS;
+		TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
 	}
 	tcp_sync_left_out(tp);
 
-- 
cgit v1.2.3


From 9ead9a1d385ae2c52a6dcf2828d84ce66be04fc2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 22:56:19 -0800
Subject: [TCP] FRTO: Separated response from FRTO detection algorithm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

FRTO spurious RTO detection algorithm (RFC4138) does not include response
to a detected spurious RTO but can use different response algorithms.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b21e232d5d3..c5be3d0465f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2467,6 +2467,15 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
 	return flag;
 }
 
+/* A very conservative spurious RTO response algorithm: reduce cwnd and
+ * continue in congestion avoidance.
+ */
+static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
+{
+	tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
+	tcp_moderate_cwnd(tp);
+}
+
 static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -2488,12 +2497,7 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
 		 */
 		tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
 	} else {
-		/* Also the second ACK after RTO advances the window.
-		 * The RTO was likely spurious. Reduce cwnd and continue
-		 * in congestion avoidance
-		 */
-		tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
-		tcp_moderate_cwnd(tp);
+		tcp_conservative_spur_to_response(tp);
 	}
 
 	/* F-RTO affects on two new ACKs following RTO.
-- 
cgit v1.2.3


From bdaae17da81db79b9aa4dfbf43305cfeef64f6a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 22:59:58 -0800
Subject: [TCP] FRTO: Moved tcp_use_frto from tcp.h to tcp_input.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In addition, removed inline.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h    | 14 +-------------
 net/ipv4/tcp_input.c | 13 +++++++++++++
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5c472f255b7..572a77bb690 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -341,6 +341,7 @@ extern struct sock *		tcp_check_req(struct sock *sk,struct sk_buff *skb,
 extern int			tcp_child_process(struct sock *parent,
 						  struct sock *child,
 						  struct sk_buff *skb);
+extern int			tcp_use_frto(const struct sock *sk);
 extern void			tcp_enter_frto(struct sock *sk);
 extern void			tcp_enter_loss(struct sock *sk, int how);
 extern void			tcp_clear_retrans(struct tcp_sock *tp);
@@ -1033,19 +1034,6 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int
 
 #define TCP_CHECK_TIMER(sk) do { } while (0)
 
-static inline int tcp_use_frto(const struct sock *sk)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	
-	/* F-RTO must be activated in sysctl and there must be some
-	 * unsent new data, and the advertised window should allow
-	 * sending it.
-	 */
-	return (sysctl_tcp_frto && sk->sk_send_head &&
-		!after(TCP_SKB_CB(sk->sk_send_head)->end_seq,
-		       tp->snd_una + tp->snd_wnd));
-}
-
 static inline void tcp_mib_init(void)
 {
 	/* See RFC 2012 */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c5be3d0465f..fe96e176d85 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1236,6 +1236,19 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	return flag;
 }
 
+int tcp_use_frto(const struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+
+	/* F-RTO must be activated in sysctl and there must be some
+	 * unsent new data, and the advertised window should allow
+	 * sending it.
+	 */
+	return (sysctl_tcp_frto && sk->sk_send_head &&
+		!after(TCP_SKB_CB(sk->sk_send_head)->end_seq,
+		       tp->snd_una + tp->snd_wnd));
+}
+
 /* RTO occurred, but do not yet enter loss state. Instead, transmit two new
  * segments to see from the next ACKs whether any data was really missing.
  * If the RTO was spurious, new ACKs should arrive.
-- 
cgit v1.2.3


From 30935cf4f915c3178ce63331d6ff4c82163e26af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:01:36 -0800
Subject: [TCP] FRTO: Comment cleanup & improvement
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Moved comments out from the body of process_frto() to the head
(preferred way; see Documentation/CodingStyle). Bonus: it's much
easier to read in this compacted form.

FRTO algorithm and implementation is described in greater detail.
For interested reader, more information is available in RFC4138.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 49 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 32 insertions(+), 17 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index fe96e176d85..561e5d40498 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1236,22 +1236,22 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	return flag;
 }
 
+/* F-RTO can only be used if these conditions are satisfied:
+ *  - there must be some unsent new data
+ *  - the advertised window should allow sending it
+ */
 int tcp_use_frto(const struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 
-	/* F-RTO must be activated in sysctl and there must be some
-	 * unsent new data, and the advertised window should allow
-	 * sending it.
-	 */
 	return (sysctl_tcp_frto && sk->sk_send_head &&
 		!after(TCP_SKB_CB(sk->sk_send_head)->end_seq,
 		       tp->snd_una + tp->snd_wnd));
 }
 
-/* RTO occurred, but do not yet enter loss state. Instead, transmit two new
- * segments to see from the next ACKs whether any data was really missing.
- * If the RTO was spurious, new ACKs should arrive.
+/* RTO occurred, but do not yet enter Loss state. Instead, defer RTO
+ * recovery a bit and use heuristics in tcp_process_frto() to detect if
+ * the RTO was spurious.
  */
 void tcp_enter_frto(struct sock *sk)
 {
@@ -2489,6 +2489,30 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
 	tcp_moderate_cwnd(tp);
 }
 
+/* F-RTO spurious RTO detection algorithm (RFC4138)
+ *
+ * F-RTO affects during two new ACKs following RTO. State (ACK number) is kept
+ * in frto_counter. When ACK advances window (but not to or beyond highest
+ * sequence sent before RTO):
+ *   On First ACK,  send two new segments out.
+ *   On Second ACK, RTO was likely spurious. Do spurious response (response
+ *                  algorithm is not part of the F-RTO detection algorithm
+ *                  given in RFC4138 but can be selected separately).
+ * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss
+ * and TCP falls back to conventional RTO recovery.
+ *
+ * Rationale: if the RTO was spurious, new ACKs should arrive from the
+ * original window even after we transmit two new data segments.
+ *
+ * F-RTO is implemented (mainly) in four functions:
+ *   - tcp_use_frto() is used to determine if TCP is can use F-RTO
+ *   - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is
+ *     called when tcp_use_frto() showed green light
+ *   - tcp_process_frto() handles incoming ACKs during F-RTO algorithm
+ *   - tcp_enter_frto_loss() is called if there is not enough evidence
+ *     to prove that the RTO is indeed spurious. It transfers the control
+ *     from F-RTO to the conventional RTO recovery
+ */
 static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -2497,25 +2521,16 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
 
 	if (tp->snd_una == prior_snd_una ||
 	    !before(tp->snd_una, tp->frto_highmark)) {
-		/* RTO was caused by loss, start retransmitting in
-		 * go-back-N slow start
-		 */
 		tcp_enter_frto_loss(sk);
 		return;
 	}
 
 	if (tp->frto_counter == 1) {
-		/* First ACK after RTO advances the window: allow two new
-		 * segments out.
-		 */
 		tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
-	} else {
+	} else /* frto_counter == 2 */ {
 		tcp_conservative_spur_to_response(tp);
 	}
 
-	/* F-RTO affects on two new ACKs following RTO.
-	 * At latest on third ACK the TCP behavior is back to normal.
-	 */
 	tp->frto_counter = (tp->frto_counter + 1) % 3;
 }
 
-- 
cgit v1.2.3


From 7487c48c4fd15d1e2542be1183b783562cfe10bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:02:30 -0800
Subject: [TCP] FRTO: Consecutive RTOs keep prior_ssthresh and ssthresh
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In case a latency spike causes more than one RTO, the later should not
cause the already reduced ssthresh to propagate into the prior_ssthresh
since FRTO declares all such RTOs spurious at once or none of them. In
treating of ssthresh, we mimic what tcp_enter_loss() does.

The previous state (in frto_counter) must be available until we have
checked it in tcp_enter_frto(), and also ACK information flag in
process_frto().

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 561e5d40498..194e880af51 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1252,6 +1252,10 @@ int tcp_use_frto(const struct sock *sk)
 /* RTO occurred, but do not yet enter Loss state. Instead, defer RTO
  * recovery a bit and use heuristics in tcp_process_frto() to detect if
  * the RTO was spurious.
+ *
+ * Do like tcp_enter_loss() would; when RTO expires the second time it
+ * does:
+ *  "Reduce ssthresh if it has not yet been made inside this window."
  */
 void tcp_enter_frto(struct sock *sk)
 {
@@ -1259,11 +1263,10 @@ void tcp_enter_frto(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 
-	tp->frto_counter = 1;
-
-	if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
+	if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) ||
 	    tp->snd_una == tp->high_seq ||
-	    (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
+	    ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) &&
+	     !icsk->icsk_retransmits)) {
 		tp->prior_ssthresh = tcp_current_ssthresh(sk);
 		tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
 		tcp_ca_event(sk, CA_EVENT_FRTO);
@@ -1285,6 +1288,7 @@ void tcp_enter_frto(struct sock *sk)
 
 	tcp_set_ca_state(sk, TCP_CA_Open);
 	tp->frto_highmark = tp->snd_nxt;
+	tp->frto_counter = 1;
 }
 
 /* Enter Loss state after F-RTO was applied. Dupack arrived after RTO,
@@ -2513,12 +2517,16 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
  *     to prove that the RTO is indeed spurious. It transfers the control
  *     from F-RTO to the conventional RTO recovery
  */
-static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
+static void tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	tcp_sync_left_out(tp);
 
+	/* Duplicate the behavior from Loss state (fastretrans_alert) */
+	if (flag&FLAG_DATA_ACKED)
+		inet_csk(sk)->icsk_retransmits = 0;
+
 	if (tp->snd_una == prior_snd_una ||
 	    !before(tp->snd_una, tp->frto_highmark)) {
 		tcp_enter_frto_loss(sk);
@@ -2607,7 +2615,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	flag |= tcp_clean_rtx_queue(sk, &seq_rtt);
 
 	if (tp->frto_counter)
-		tcp_process_frto(sk, prior_snd_una);
+		tcp_process_frto(sk, prior_snd_una, flag);
 
 	if (tcp_ack_is_dubious(sk, flag)) {
 		/* Advance CWND, if state allows this. */
-- 
cgit v1.2.3


From 7b0eb22b1d3b049306813a4aaa52966650f7491c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:03:35 -0800
Subject: [TCP] FRTO: Use Disorder state during operation instead of Open
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Retransmission counter assumptions are to be changed. Forcing
reason to do this exist: Using sysctl in check would be racy
as soon as FRTO starts to ignore some ACKs (doing that in the
following patches). Userspace may disable it at any moment
giving nice oops if timing is right. frto_counter would be
inaccessible from userspace, but with SACK enhanced FRTO
retrans_out can include other than head, and possibly leaving
it non-zero after spurious RTO, boom again.

Luckily, solution seems rather simple: never go directly to Open
state but use Disorder instead. This does not really change much,
since TCP could anyway change its state to Disorder during FRTO
using path tcp_fastretrans_alert -> tcp_try_to_open (e.g., when
a SACK block makes ACK dubious). Besides, Disorder seems to be
the state where TCP should be if not recovering (in Recovery or
Loss state) while having some retransmissions in-flight (see
tcp_try_to_open), which is exactly what happens with FRTO.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 194e880af51..e806839acdd 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1286,7 +1286,8 @@ void tcp_enter_frto(struct sock *sk)
 	}
 	tcp_sync_left_out(tp);
 
-	tcp_set_ca_state(sk, TCP_CA_Open);
+	tcp_set_ca_state(sk, TCP_CA_Disorder);
+	tp->high_seq = tp->snd_nxt;
 	tp->frto_highmark = tp->snd_nxt;
 	tp->frto_counter = 1;
 }
@@ -2014,8 +2015,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 	/* E. Check state exit conditions. State can be terminated
 	 *    when high_seq is ACKed. */
 	if (icsk->icsk_ca_state == TCP_CA_Open) {
-		if (!sysctl_tcp_frto)
-			BUG_TRAP(tp->retrans_out == 0);
+		BUG_TRAP(tp->retrans_out == 0);
 		tp->retrans_stamp = 0;
 	} else if (!before(tp->snd_una, tp->high_seq)) {
 		switch (icsk->icsk_ca_state) {
-- 
cgit v1.2.3


From 6408d206c7484615ecae54bf6474a02c94e9e862 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:04:11 -0800
Subject: [TCP] FRTO: Ignore some uninteresting ACKs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Handles RFC4138 shortcoming (in step 2); it should also have case
c) which ignores ACKs that are not duplicates nor advance window
(opposite dir data, winupdate).

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e806839acdd..e990d562f5e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2495,9 +2495,9 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
 
 /* F-RTO spurious RTO detection algorithm (RFC4138)
  *
- * F-RTO affects during two new ACKs following RTO. State (ACK number) is kept
- * in frto_counter. When ACK advances window (but not to or beyond highest
- * sequence sent before RTO):
+ * F-RTO affects during two new ACKs following RTO (well, almost, see inline
+ * comments). State (ACK number) is kept in frto_counter. When ACK advances
+ * window (but not to or beyond highest sequence sent before RTO):
  *   On First ACK,  send two new segments out.
  *   On Second ACK, RTO was likely spurious. Do spurious response (response
  *                  algorithm is not part of the F-RTO detection algorithm
@@ -2527,6 +2527,13 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 	if (flag&FLAG_DATA_ACKED)
 		inet_csk(sk)->icsk_retransmits = 0;
 
+	/* RFC4138 shortcoming in step 2; should also have case c): ACK isn't
+	 * duplicate nor advances window, e.g., opposite dir data, winupdate
+	 */
+	if ((tp->snd_una == prior_snd_una) && (flag&FLAG_NOT_DUP) &&
+	    !(flag&FLAG_FORWARD_PROGRESS))
+		return;
+
 	if (tp->snd_una == prior_snd_una ||
 	    !before(tp->snd_una, tp->frto_highmark)) {
 		tcp_enter_frto_loss(sk);
-- 
cgit v1.2.3


From 95c4922bf9330eb2c71b752359dd89c4e166f3c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:05:18 -0800
Subject: [TCP] FRTO: fixes fallback to conventional recovery
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The FRTO detection did not care how ACK pattern affects to cwnd
calculation of the conventional recovery. This caused incorrect
setting of cwnd when the fallback becames necessary. The
knowledge tcp_process_frto() has about the incoming ACK is now
passed on to tcp_enter_frto_loss() in allowed_segments parameter
that gives the number of segments that must be added to
packets-in-flight while calculating the new cwnd.

Instead of snd_una we use FLAG_DATA_ACKED in duplicate ACK
detection because RFC4138 states (in Section 2.2):
  If the first acknowledgment after the RTO retransmission
  does not acknowledge all of the data that was retransmitted
  in step 1, the TCP sender reverts to the conventional RTO
  recovery.  Otherwise, a malicious receiver acknowledging
  partial segments could cause the sender to declare the
  timeout spurious in a case where data was lost.

If the next ACK after RTO is duplicate, we do not retransmit
anything, which is equal to what conservative conventional
recovery does in such case.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e990d562f5e..cc935c8a6aa 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1296,7 +1296,7 @@ void tcp_enter_frto(struct sock *sk)
  * which indicates that we should follow the traditional RTO recovery,
  * i.e. mark everything lost and do go-back-N retransmission.
  */
-static void tcp_enter_frto_loss(struct sock *sk)
+static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
@@ -1326,7 +1326,7 @@ static void tcp_enter_frto_loss(struct sock *sk)
 	}
 	tcp_sync_left_out(tp);
 
-	tp->snd_cwnd = tp->frto_counter + tcp_packets_in_flight(tp)+1;
+	tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
 	tp->snd_cwnd_cnt = 0;
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 	tp->undo_marker = 0;
@@ -2527,6 +2527,11 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 	if (flag&FLAG_DATA_ACKED)
 		inet_csk(sk)->icsk_retransmits = 0;
 
+	if (!before(tp->snd_una, tp->frto_highmark)) {
+		tcp_enter_frto_loss(sk, tp->frto_counter + 1);
+		return;
+	}
+
 	/* RFC4138 shortcoming in step 2; should also have case c): ACK isn't
 	 * duplicate nor advances window, e.g., opposite dir data, winupdate
 	 */
@@ -2534,9 +2539,8 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 	    !(flag&FLAG_FORWARD_PROGRESS))
 		return;
 
-	if (tp->snd_una == prior_snd_una ||
-	    !before(tp->snd_una, tp->frto_highmark)) {
-		tcp_enter_frto_loss(sk);
+	if (!(flag&FLAG_DATA_ACKED)) {
+		tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3));
 		return;
 	}
 
-- 
cgit v1.2.3


From aa8b6a7ad147dfbaaf10368ff15df9418b670d8b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:06:03 -0800
Subject: [TCP] FRTO: Response should reset also snd_cwnd_cnt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since purpose is to reduce CWND, we prevent immediate growth. This
is not a major issue nor is "the correct way" specified anywhere.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index cc935c8a6aa..924b2e6d7d1 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2490,6 +2490,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
 static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
 {
 	tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
+	tp->snd_cwnd_cnt = 0;
 	tcp_moderate_cwnd(tp);
 }
 
-- 
cgit v1.2.3


From 52c63f1e86ebb18ef4b710b5b647e552a041e5ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:06:52 -0800
Subject: [TCP]: Don't enter to fast recovery while using FRTO
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Because TCP is not in Loss state during FRTO recovery, fast
recovery could be triggered by accident. Non-SACK FRTO is more
robust than not yet included SACK-enhanced version (that can
receiver high number of duplicate ACKs with SACK blocks during
FRTO), at least with unidirectional transfers, but under
extraordinary patterns fast recovery can be incorrectly
triggered, e.g., Data loss+ACK losses => cumulative ACK with
enough SACK blocks to meet sacked_out >= dupthresh condition).

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 924b2e6d7d1..7213740477e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1547,6 +1547,10 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp)
 {
 	__u32 packets_out;
 
+	/* Do not perform any recovery during FRTO algorithm */
+	if (tp->frto_counter)
+		return 0;
+
 	/* Trick#1: The loss is proven. */
 	if (tp->lost_out)
 		return 1;
-- 
cgit v1.2.3


From 94d0ea7786714d78d7cb73144bb850254dd0bb78 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:07:27 -0800
Subject: [TCP] FRTO: frto_counter modulo-op converted to two assignments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7213740477e..9dc5754141e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2551,11 +2551,11 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 
 	if (tp->frto_counter == 1) {
 		tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
+		tp->frto_counter = 2;
 	} else /* frto_counter == 2 */ {
 		tcp_conservative_spur_to_response(tp);
+		tp->frto_counter = 0;
 	}
-
-	tp->frto_counter = (tp->frto_counter + 1) % 3;
 }
 
 /* This routine deals with incoming acks, but not outgoing ones. */
-- 
cgit v1.2.3


From 7c9a4a5b67926dd186d427bc5b9fce6ccbde154c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:08:34 -0800
Subject: [TCP]: Prevent unrelated cwnd adjustment while using FRTO
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

FRTO controls cwnd when it still processes the ACK input or it
has just reverted back to conventional RTO recovery; the normal
rules apply when FRTO has reverted to standard congestion
control.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9dc5754141e..723cee63791 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2522,7 +2522,7 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
  *     to prove that the RTO is indeed spurious. It transfers the control
  *     from F-RTO to the conventional RTO recovery
  */
-static void tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
+static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
@@ -2534,7 +2534,7 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 
 	if (!before(tp->snd_una, tp->frto_highmark)) {
 		tcp_enter_frto_loss(sk, tp->frto_counter + 1);
-		return;
+		return 1;
 	}
 
 	/* RFC4138 shortcoming in step 2; should also have case c): ACK isn't
@@ -2542,20 +2542,22 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 	 */
 	if ((tp->snd_una == prior_snd_una) && (flag&FLAG_NOT_DUP) &&
 	    !(flag&FLAG_FORWARD_PROGRESS))
-		return;
+		return 1;
 
 	if (!(flag&FLAG_DATA_ACKED)) {
 		tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3));
-		return;
+		return 1;
 	}
 
 	if (tp->frto_counter == 1) {
 		tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
 		tp->frto_counter = 2;
+		return 1;
 	} else /* frto_counter == 2 */ {
 		tcp_conservative_spur_to_response(tp);
 		tp->frto_counter = 0;
 	}
+	return 0;
 }
 
 /* This routine deals with incoming acks, but not outgoing ones. */
@@ -2569,6 +2571,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	u32 prior_in_flight;
 	s32 seq_rtt;
 	int prior_packets;
+	int frto_cwnd = 0;
 
 	/* If the ack is newer than sent or older than previous acks
 	 * then we can probably ignore it.
@@ -2631,15 +2634,16 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	flag |= tcp_clean_rtx_queue(sk, &seq_rtt);
 
 	if (tp->frto_counter)
-		tcp_process_frto(sk, prior_snd_una, flag);
+		frto_cwnd = tcp_process_frto(sk, prior_snd_una, flag);
 
 	if (tcp_ack_is_dubious(sk, flag)) {
 		/* Advance CWND, if state allows this. */
-		if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
+		if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
+		    tcp_may_raise_cwnd(sk, flag))
 			tcp_cong_avoid(sk, ack,  seq_rtt, prior_in_flight, 0);
 		tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag);
 	} else {
-		if ((flag & FLAG_DATA_ACKED))
+		if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
 			tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 1);
 	}
 
-- 
cgit v1.2.3


From 46d0de4ed92650b95f27acae09914996bbe624e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:10:39 -0800
Subject: [TCP] FRTO: Entry is allowed only during (New)Reno like recovery
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This interpretation comes from RFC4138:
    "If the sender implements some loss recovery algorithm other
     than Reno or NewReno [FHG04], the F-RTO algorithm SHOULD
     NOT be entered when earlier fast recovery is underway."

I think the RFC means to say (especially in the light of
Appendix B) that ...recovery is underway (not just fast recovery)
or was underway when it was interrupted by an earlier (F-)RTO
that hasn't yet been resolved (snd_una has not advanced enough).
Thus, my interpretation is that whenever TCP has ever
retransmitted other than head, basic version cannot be used
because then the order assumptions which are used as FRTO basis
do not hold.

NewReno has only the head segment retransmitted at a time.
Therefore, walk up to the segment that has not been SACKed, if
that segment is not retransmitted nor anything before it, we know
for sure, that nothing after the non-SACKed segment should be
either. This assumption is valid because TCPCB_EVER_RETRANS does
not leave holes but each non-SACKed segment is rexmitted
in-order.

Check for retrans_out > 1 avoids more expensive walk through the
skb list, as we can know the result beforehand: F-RTO will not be
allowed.

SACKed skb can turn into non-SACked only in the extremely rare
case of SACK reneging, in this case we might fail to detect
retransmissions if there were them for any other than head. To
get rid of that feature, whole rexmit queue would have to be
walked (always) or FRTO should be prevented when SACK reneging
happens. Of course RTO should still trigger after reneging which
makes this issue even less likely to show up. And as long as the
response is as conservative as it's now, nothing bad happens even
then.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h    |  2 +-
 net/ipv4/tcp_input.c | 25 +++++++++++++++++++++----
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 572a77bb690..7fd6b77519c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -341,7 +341,7 @@ extern struct sock *		tcp_check_req(struct sock *sk,struct sk_buff *skb,
 extern int			tcp_child_process(struct sock *parent,
 						  struct sock *child,
 						  struct sk_buff *skb);
-extern int			tcp_use_frto(const struct sock *sk);
+extern int			tcp_use_frto(struct sock *sk);
 extern void			tcp_enter_frto(struct sock *sk);
 extern void			tcp_enter_loss(struct sock *sk, int how);
 extern void			tcp_clear_retrans(struct tcp_sock *tp);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 723cee63791..a283fc12186 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1239,14 +1239,31 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 /* F-RTO can only be used if these conditions are satisfied:
  *  - there must be some unsent new data
  *  - the advertised window should allow sending it
+ *  - TCP has never retransmitted anything other than head
  */
-int tcp_use_frto(const struct sock *sk)
+int tcp_use_frto(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+
+	if (!sysctl_tcp_frto || !sk->sk_send_head ||
+		after(TCP_SKB_CB(sk->sk_send_head)->end_seq,
+		      tp->snd_una + tp->snd_wnd))
+		return 0;
 
-	return (sysctl_tcp_frto && sk->sk_send_head &&
-		!after(TCP_SKB_CB(sk->sk_send_head)->end_seq,
-		       tp->snd_una + tp->snd_wnd));
+	/* Avoid expensive walking of rexmit queue if possible */
+	if (tp->retrans_out > 1)
+		return 0;
+
+	skb = skb_peek(&sk->sk_write_queue)->next;	/* Skips head */
+	sk_stream_for_retrans_queue_from(skb, sk) {
+		if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
+			return 0;
+		/* Short-circuit when first non-SACKed skb has been checked */
+		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED))
+			break;
+	}
+	return 1;
 }
 
 /* RTO occurred, but do not yet enter Loss state. Instead, defer RTO
-- 
cgit v1.2.3


From d1a54c6a0a3f9c2c4ef71982d89b8571bd9eaa51 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:11:57 -0800
Subject: [TCP] FRTO: Reverse RETRANS bit clearing logic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously RETRANS bits were cleared on the entry to FRTO. We
postpone that into tcp_enter_frto_loss, which is really the
place were the clearing should be done anyway. This allows
simplification of the logic from a clearing loop to the head skb
clearing only.

Besides, the other changes made in the previous patches to
tcp_use_frto made it impossible for the non-SACKed FRTO to be
entered if other than the head has been rexmitted.

With SACK-enhanced FRTO (and Appendix B), however, there can be
a number retransmissions in flight when RTO expires (same thing
could happen before this patchset also with non-SACK FRTO). To
not introduce any jumpiness into the packet counting during FRTO,
instead of clearing RETRANS bits from skbs during entry, do it
later on.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 35 +++++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a283fc12186..3ef7e9e0796 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1268,7 +1268,11 @@ int tcp_use_frto(struct sock *sk)
 
 /* RTO occurred, but do not yet enter Loss state. Instead, defer RTO
  * recovery a bit and use heuristics in tcp_process_frto() to detect if
- * the RTO was spurious.
+ * the RTO was spurious. Only clear SACKED_RETRANS of the head here to
+ * keep retrans_out counting accurate (with SACK F-RTO, other than head
+ * may still have that bit set); TCPCB_LOST and remaining SACKED_RETRANS
+ * bits are handled if the Loss state is really to be entered (in
+ * tcp_enter_frto_loss).
  *
  * Do like tcp_enter_loss() would; when RTO expires the second time it
  * does:
@@ -1289,17 +1293,13 @@ void tcp_enter_frto(struct sock *sk)
 		tcp_ca_event(sk, CA_EVENT_FRTO);
 	}
 
-	/* Have to clear retransmission markers here to keep the bookkeeping
-	 * in shape, even though we are not yet in Loss state.
-	 * If something was really lost, it is eventually caught up
-	 * in tcp_enter_frto_loss.
-	 */
-	tp->retrans_out = 0;
 	tp->undo_marker = tp->snd_una;
 	tp->undo_retrans = 0;
 
-	sk_stream_for_retrans_queue(skb, sk) {
+	skb = skb_peek(&sk->sk_write_queue);
+	if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
 		TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
+		tp->retrans_out -= tcp_skb_pcount(skb);
 	}
 	tcp_sync_left_out(tp);
 
@@ -1313,7 +1313,7 @@ void tcp_enter_frto(struct sock *sk)
  * which indicates that we should follow the traditional RTO recovery,
  * i.e. mark everything lost and do go-back-N retransmission.
  */
-static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments)
+static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
@@ -1322,10 +1322,21 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments)
 	tp->sacked_out = 0;
 	tp->lost_out = 0;
 	tp->fackets_out = 0;
+	tp->retrans_out = 0;
 
 	sk_stream_for_retrans_queue(skb, sk) {
 		cnt += tcp_skb_pcount(skb);
-		TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
+		/*
+		 * Count the retransmission made on RTO correctly (only when
+		 * waiting for the first ACK and did not get it)...
+		 */
+		if ((tp->frto_counter == 1) && !(flag&FLAG_DATA_ACKED)) {
+			tp->retrans_out += tcp_skb_pcount(skb);
+			/* ...enter this if branch just for the first segment */
+			flag |= FLAG_DATA_ACKED;
+		} else {
+			TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
+		}
 		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
 
 			/* Do not mark those segments lost that were
@@ -2550,7 +2561,7 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 		inet_csk(sk)->icsk_retransmits = 0;
 
 	if (!before(tp->snd_una, tp->frto_highmark)) {
-		tcp_enter_frto_loss(sk, tp->frto_counter + 1);
+		tcp_enter_frto_loss(sk, tp->frto_counter + 1, flag);
 		return 1;
 	}
 
@@ -2562,7 +2573,7 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 		return 1;
 
 	if (!(flag&FLAG_DATA_ACKED)) {
-		tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3));
+		tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3), flag);
 		return 1;
 	}
 
-- 
cgit v1.2.3


From 66e93e45c09affa407750cc06398492e8b897848 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:13:47 -0800
Subject: [TCP] FRTO: Fake cwnd for ssthresh callback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

TCP without FRTO would be in Loss state with small cwnd. FRTO,
however, leaves cwnd (typically) to a larger value which causes
ssthresh to become too large in case RTO is triggered again
compared to what conventional recovery would do. Because
consecutive RTOs result in only a single ssthresh reduction,
RTO+cumulative ACK+RTO pattern is required to trigger this
event.

A large comment is included for congestion control module writers
trying to figure out what CA_EVENT_FRTO handler should do because
there exists a remote possibility of incompatibility between
FRTO and module defined ssthresh functions.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3ef7e9e0796..055721d8495 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1289,7 +1289,31 @@ void tcp_enter_frto(struct sock *sk)
 	    ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) &&
 	     !icsk->icsk_retransmits)) {
 		tp->prior_ssthresh = tcp_current_ssthresh(sk);
-		tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
+		/* Our state is too optimistic in ssthresh() call because cwnd
+		 * is not reduced until tcp_enter_frto_loss() when previous FRTO
+		 * recovery has not yet completed. Pattern would be this: RTO,
+		 * Cumulative ACK, RTO (2xRTO for the same segment does not end
+		 * up here twice).
+		 * RFC4138 should be more specific on what to do, even though
+		 * RTO is quite unlikely to occur after the first Cumulative ACK
+		 * due to back-off and complexity of triggering events ...
+		 */
+		if (tp->frto_counter) {
+			u32 stored_cwnd;
+			stored_cwnd = tp->snd_cwnd;
+			tp->snd_cwnd = 2;
+			tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
+			tp->snd_cwnd = stored_cwnd;
+		} else {
+			tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
+		}
+		/* ... in theory, cong.control module could do "any tricks" in
+		 * ssthresh(), which means that ca_state, lost bits and lost_out
+		 * counter would have to be faked before the call occurs. We
+		 * consider that too expensive, unlikely and hacky, so modules
+		 * using these in ssthresh() must deal these incompatibility
+		 * issues if they receives CA_EVENT_FRTO and frto_counter != 0
+		 */
 		tcp_ca_event(sk, CA_EVENT_FRTO);
 	}
 
-- 
cgit v1.2.3


From 288035f915686a9a9e85e0358c5392bb5d7ae58d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:14:42 -0800
Subject: [TCP]: Prevent reordering adjustments during FRTO
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To be honest, I'm not too sure how the reord stuff works in the
first place but this seems necessary.

When FRTO has been active, the one and only retransmission could
be unnecessary but the state and sending order might not be what
the sacktag code expects it to be (to work correctly).

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 055721d8495..df516d4eca9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1224,7 +1224,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 
 	tp->left_out = tp->sacked_out + tp->lost_out;
 
-	if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss)
+	if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss &&
+	    (tp->frto_highmark && after(tp->snd_una, tp->frto_highmark)))
 		tcp_update_reordering(sk, ((tp->fackets_out + 1) - reord), 0);
 
 #if FASTRETRANS_DEBUG > 0
-- 
cgit v1.2.3


From 4dc2665e3634d720a62bd27128fc8781fcdad2dc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:16:11 -0800
Subject: [TCP]: SACK enhanced FRTO
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements the SACK-enhanced FRTO given in RFC4138 using the
variant given in Appendix B.

RFC4138, Appendix B:
  "This means that in order to declare timeout spurious, the TCP
   sender must receive an acknowledgment for non-retransmitted
   segment between SND.UNA and RecoveryPoint in algorithm step 3.
   RecoveryPoint is defined in conservative SACK-recovery
   algorithm [RFC3517]"

The basic version of the FRTO algorithm can still be used also
when SACK is enabled. To enabled SACK-enhanced version, tcp_frto
sysctl is set to 2.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 76 ++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 65 insertions(+), 11 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index df516d4eca9..bb3f234668b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -100,6 +100,7 @@ int sysctl_tcp_abc __read_mostly;
 #define FLAG_ECE		0x40 /* ECE in this ACK				*/
 #define FLAG_DATA_LOST		0x80 /* SACK detected data lossage.		*/
 #define FLAG_SLOWPATH		0x100 /* Do not skip RFC checks for window update.*/
+#define FLAG_ONLY_ORIG_SACKED	0x200 /* SACKs only non-rexmit sent before RTO */
 
 #define FLAG_ACKED		(FLAG_DATA_ACKED|FLAG_SYN_ACKED)
 #define FLAG_NOT_DUP		(FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
@@ -110,6 +111,8 @@ int sysctl_tcp_abc __read_mostly;
 #define IsFack(tp) ((tp)->rx_opt.sack_ok & 2)
 #define IsDSack(tp) ((tp)->rx_opt.sack_ok & 4)
 
+#define IsSackFrto() (sysctl_tcp_frto == 0x2)
+
 #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
 
 /* Adapt the MSS value used to make delayed ack decision to the
@@ -1159,6 +1162,18 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 						/* clear lost hint */
 						tp->retransmit_skb_hint = NULL;
 					}
+					/* SACK enhanced F-RTO detection.
+					 * Set flag if and only if non-rexmitted
+					 * segments below frto_highmark are
+					 * SACKed (RFC4138; Appendix B).
+					 * Clearing correct due to in-order walk
+					 */
+					if (after(end_seq, tp->frto_highmark)) {
+						flag &= ~FLAG_ONLY_ORIG_SACKED;
+					} else {
+						if (!(sacked & TCPCB_RETRANS))
+							flag |= FLAG_ONLY_ORIG_SACKED;
+					}
 				}
 
 				TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
@@ -1240,7 +1255,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 /* F-RTO can only be used if these conditions are satisfied:
  *  - there must be some unsent new data
  *  - the advertised window should allow sending it
- *  - TCP has never retransmitted anything other than head
+ *  - TCP has never retransmitted anything other than head (SACK enhanced
+ *    variant from Appendix B of RFC4138 is more robust here)
  */
 int tcp_use_frto(struct sock *sk)
 {
@@ -1252,6 +1268,9 @@ int tcp_use_frto(struct sock *sk)
 		      tp->snd_una + tp->snd_wnd))
 		return 0;
 
+	if (IsSackFrto())
+		return 1;
+
 	/* Avoid expensive walking of rexmit queue if possible */
 	if (tp->retrans_out > 1)
 		return 0;
@@ -1328,9 +1347,18 @@ void tcp_enter_frto(struct sock *sk)
 	}
 	tcp_sync_left_out(tp);
 
+	/* Earlier loss recovery underway (see RFC4138; Appendix B).
+	 * The last condition is necessary at least in tp->frto_counter case.
+	 */
+	if (IsSackFrto() && (tp->frto_counter ||
+	    ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
+	    after(tp->high_seq, tp->snd_una)) {
+		tp->frto_highmark = tp->high_seq;
+	} else {
+		tp->frto_highmark = tp->snd_nxt;
+	}
 	tcp_set_ca_state(sk, TCP_CA_Disorder);
 	tp->high_seq = tp->snd_nxt;
-	tp->frto_highmark = tp->snd_nxt;
 	tp->frto_counter = 1;
 }
 
@@ -2566,6 +2594,10 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
  * Rationale: if the RTO was spurious, new ACKs should arrive from the
  * original window even after we transmit two new data segments.
  *
+ * SACK version:
+ *   on first step, wait until first cumulative ACK arrives, then move to
+ *   the second step. In second step, the next ACK decides.
+ *
  * F-RTO is implemented (mainly) in four functions:
  *   - tcp_use_frto() is used to determine if TCP is can use F-RTO
  *   - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is
@@ -2590,16 +2622,38 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 		return 1;
 	}
 
-	/* RFC4138 shortcoming in step 2; should also have case c): ACK isn't
-	 * duplicate nor advances window, e.g., opposite dir data, winupdate
-	 */
-	if ((tp->snd_una == prior_snd_una) && (flag&FLAG_NOT_DUP) &&
-	    !(flag&FLAG_FORWARD_PROGRESS))
-		return 1;
+	if (!IsSackFrto() || IsReno(tp)) {
+		/* RFC4138 shortcoming in step 2; should also have case c):
+		 * ACK isn't duplicate nor advances window, e.g., opposite dir
+		 * data, winupdate
+		 */
+		if ((tp->snd_una == prior_snd_una) && (flag&FLAG_NOT_DUP) &&
+		    !(flag&FLAG_FORWARD_PROGRESS))
+			return 1;
 
-	if (!(flag&FLAG_DATA_ACKED)) {
-		tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3), flag);
-		return 1;
+		if (!(flag&FLAG_DATA_ACKED)) {
+			tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
+					    flag);
+			return 1;
+		}
+	} else {
+		if (!(flag&FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
+			/* Prevent sending of new data. */
+			tp->snd_cwnd = min(tp->snd_cwnd,
+					   tcp_packets_in_flight(tp));
+			return 1;
+		}
+
+		if ((tp->frto_counter == 2) &&
+		    (!(flag&FLAG_FORWARD_PROGRESS) ||
+		     ((flag&FLAG_DATA_SACKED) && !(flag&FLAG_ONLY_ORIG_SACKED)))) {
+			/* RFC4138 shortcoming (see comment above) */
+			if (!(flag&FLAG_FORWARD_PROGRESS) && (flag&FLAG_NOT_DUP))
+				return 1;
+
+			tcp_enter_frto_loss(sk, 3, flag);
+			return 1;
+		}
 	}
 
 	if (tp->frto_counter == 1) {
-- 
cgit v1.2.3


From 127af0c44fc916908abd145914d65b9fe598bcd7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Wed, 21 Feb 2007 23:16:38 -0800
Subject: [TCP] FRTO: Sysctl documentation for SACK enhanced version
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The description is overly verbose to avoid ambiguity between
"SACK enabled" and "SACK enhanced FRTO"

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 702d1d8dd04..719b4290731 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -183,7 +183,10 @@ tcp_frto - BOOLEAN
 	Enables F-RTO, an enhanced recovery algorithm for TCP retransmission
 	timeouts.  It is particularly beneficial in wireless environments
 	where packet loss is typically due to random radio interference
-	rather than intermediate router congestion.
+	rather than intermediate router congestion. If set to 1, basic
+	version is enabled. 2 enables SACK enhanced FRTO, which is
+	EXPERIMENTAL. The basic version can be used also when SACK is
+	enabled for a flow through tcp_sack sysctl.
 
 tcp_keepalive_time - INTEGER
 	How often TCP sends out keepalive messages when keepalive is enabled.
-- 
cgit v1.2.3


From 5ef814753eb810d900fbd77af7c87f6d04f0e551 Mon Sep 17 00:00:00 2001
From: "Angelo P. Castellani" <angelo.castellani@gmail.con>
Date: Thu, 22 Feb 2007 00:23:05 -0800
Subject: [TCP] YeAH-TCP: algorithm implementation

YeAH-TCP is a sender-side high-speed enabled TCP congestion control
algorithm, which uses a mixed loss/delay approach to compute the
congestion window. It's design goals target high efficiency, internal,
RTT and Reno fairness, resilience to link loss while keeping network
elements load as low as possible.

For further details look here:
    http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf

Signed-off-by: Angelo P. Castellani <angelo.castellani@gmail.con>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/Kconfig    |  14 +++
 net/ipv4/Makefile   |   1 +
 net/ipv4/tcp_yeah.c | 288 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/tcp_yeah.h | 134 ++++++++++++++++++++++++
 4 files changed, 437 insertions(+)
 create mode 100644 net/ipv4/tcp_yeah.c
 create mode 100644 net/ipv4/tcp_yeah.h

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 9e8ef509c51..dc61e664162 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -574,6 +574,20 @@ config TCP_CONG_VENO
 	loss packets.
 	See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf
 
+config TCP_CONG_YEAH
+	tristate "YeAH TCP"
+	depends on EXPERIMENTAL
+	default n
+	---help---
+	YeAH-TCP is a sender-side high-speed enabled TCP congestion control
+	algorithm, which uses a mixed loss/delay approach to compute the
+	congestion window. It's design goals target high efficiency,
+	internal, RTT and Reno fairness, resilience to link loss while
+	keeping network elements load as low as possible.
+
+	For further details look here:
+	  http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf
+
 choice
 	prompt "Default TCP congestion control"
 	default DEFAULT_CUBIC
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 7a068626fee..eeb94d5cac9 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -49,6 +49,7 @@ obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o
 obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
 obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
 obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
+obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
 obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
 
 obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
new file mode 100644
index 00000000000..815e020e98f
--- /dev/null
+++ b/net/ipv4/tcp_yeah.c
@@ -0,0 +1,288 @@
+/*
+ *
+ *   YeAH TCP
+ *
+ * For further details look at:
+ *    http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf
+ *
+ */
+
+#include "tcp_yeah.h"
+
+/* Default values of the Vegas variables, in fixed-point representation
+ * with V_PARAM_SHIFT bits to the right of the binary point.
+ */
+#define V_PARAM_SHIFT 1
+
+#define TCP_YEAH_ALPHA       80 //lin number of packets queued at the bottleneck
+#define TCP_YEAH_GAMMA        1 //lin fraction of queue to be removed per rtt
+#define TCP_YEAH_DELTA        3 //log minimum fraction of cwnd to be removed on loss
+#define TCP_YEAH_EPSILON      1 //log maximum fraction to be removed on early decongestion
+#define TCP_YEAH_PHY          8 //lin maximum delta from base
+#define TCP_YEAH_RHO         16 //lin minumum number of consecutive rtt to consider competition on loss
+#define TCP_YEAH_ZETA        50 //lin minimum number of state switchs to reset reno_count
+
+#define TCP_SCALABLE_AI_CNT	 100U
+
+/* YeAH variables */
+struct yeah {
+	/* Vegas */
+	u32	beg_snd_nxt;	/* right edge during last RTT */
+	u32	beg_snd_una;	/* left edge  during last RTT */
+	u32	beg_snd_cwnd;	/* saves the size of the cwnd */
+	u8	doing_vegas_now;/* if true, do vegas for this RTT */
+	u16	cntRTT;		/* # of RTTs measured within last RTT */
+	u32	minRTT;		/* min of RTTs measured within last RTT (in usec) */
+	u32	baseRTT;	/* the min of all Vegas RTT measurements seen (in usec) */
+
+	/* YeAH */
+	u32 lastQ;
+	u32 doing_reno_now;
+
+	u32 reno_count;
+	u32 fast_count;
+
+	u32 pkts_acked;
+};
+
+static void tcp_yeah_init(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct yeah *yeah = inet_csk_ca(sk);
+
+	tcp_vegas_init(sk);
+
+	yeah->doing_reno_now = 0;
+	yeah->lastQ = 0;
+
+	yeah->reno_count = 2;
+
+	/* Ensure the MD arithmetic works.  This is somewhat pedantic,
+	 * since I don't think we will see a cwnd this large. :) */
+	tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128);
+
+}
+
+
+static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked)
+{
+	const struct inet_connection_sock *icsk = inet_csk(sk);
+	struct yeah *yeah = inet_csk_ca(sk);
+
+	if (icsk->icsk_ca_state == TCP_CA_Open)
+		yeah->pkts_acked = pkts_acked;
+}
+
+/* 64bit divisor, dividend and result. dynamic precision */
+static inline u64 div64_64(u64 dividend, u64 divisor)
+{
+	u32 d = divisor;
+
+	if (divisor > 0xffffffffULL) {
+		unsigned int shift = fls(divisor >> 32);
+
+		d = divisor >> shift;
+		dividend >>= shift;
+	}
+
+	/* avoid 64 bit division if possible */
+	if (dividend >> 32)
+		do_div(dividend, d);
+	else
+		dividend = (u32) dividend / d;
+
+	return dividend;
+}
+
+static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
+				 u32 seq_rtt, u32 in_flight, int flag)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct yeah *yeah = inet_csk_ca(sk);
+
+	if (!tcp_is_cwnd_limited(sk, in_flight))
+		return;
+
+	if (tp->snd_cwnd <= tp->snd_ssthresh) {
+		tcp_slow_start(tp);
+	} else if (!yeah->doing_reno_now) {
+		/* Scalable */
+
+		tp->snd_cwnd_cnt+=yeah->pkts_acked;
+		if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){
+			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+				tp->snd_cwnd++;
+			tp->snd_cwnd_cnt = 0;
+		}
+
+		yeah->pkts_acked = 1;
+
+	} else {
+		/* Reno */
+
+		if (tp->snd_cwnd_cnt < tp->snd_cwnd)
+			tp->snd_cwnd_cnt++;
+
+		if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+			tp->snd_cwnd++;
+			tp->snd_cwnd_cnt = 0;
+		}
+	}
+
+	/* The key players are v_beg_snd_una and v_beg_snd_nxt.
+	 *
+	 * These are so named because they represent the approximate values
+	 * of snd_una and snd_nxt at the beginning of the current RTT. More
+	 * precisely, they represent the amount of data sent during the RTT.
+	 * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt,
+	 * we will calculate that (v_beg_snd_nxt - v_beg_snd_una) outstanding
+	 * bytes of data have been ACKed during the course of the RTT, giving
+	 * an "actual" rate of:
+	 *
+	 *     (v_beg_snd_nxt - v_beg_snd_una) / (rtt duration)
+	 *
+	 * Unfortunately, v_beg_snd_una is not exactly equal to snd_una,
+	 * because delayed ACKs can cover more than one segment, so they
+	 * don't line up yeahly with the boundaries of RTTs.
+	 *
+	 * Another unfortunate fact of life is that delayed ACKs delay the
+	 * advance of the left edge of our send window, so that the number
+	 * of bytes we send in an RTT is often less than our cwnd will allow.
+	 * So we keep track of our cwnd separately, in v_beg_snd_cwnd.
+	 */
+
+	if (after(ack, yeah->beg_snd_nxt)) {
+
+		/* We do the Vegas calculations only if we got enough RTT
+		 * samples that we can be reasonably sure that we got
+		 * at least one RTT sample that wasn't from a delayed ACK.
+		 * If we only had 2 samples total,
+		 * then that means we're getting only 1 ACK per RTT, which
+		 * means they're almost certainly delayed ACKs.
+		 * If  we have 3 samples, we should be OK.
+		 */
+
+		if (yeah->cntRTT > 2) {
+			u32 rtt;
+			u32 queue, maxqueue;
+
+			/* We have enough RTT samples, so, using the Vegas
+			 * algorithm, we determine if we should increase or
+			 * decrease cwnd, and by how much.
+			 */
+
+			/* Pluck out the RTT we are using for the Vegas
+			 * calculations. This is the min RTT seen during the
+			 * last RTT. Taking the min filters out the effects
+			 * of delayed ACKs, at the cost of noticing congestion
+			 * a bit later.
+			 */
+			rtt = yeah->minRTT;
+
+			queue = (u32)div64_64((u64)tp->snd_cwnd * (rtt - yeah->baseRTT), rtt);
+
+			maxqueue = TCP_YEAH_ALPHA;
+
+			if (queue > maxqueue ||
+				    rtt - yeah->baseRTT > (yeah->baseRTT / TCP_YEAH_PHY)) {
+
+				if (queue > maxqueue && tp->snd_cwnd > yeah->reno_count) {
+					u32 reduction = min( queue / TCP_YEAH_GAMMA ,
+					                 tp->snd_cwnd >> TCP_YEAH_EPSILON );
+
+					tp->snd_cwnd -= reduction;
+
+					tp->snd_cwnd = max( tp->snd_cwnd, yeah->reno_count);
+
+					tp->snd_ssthresh = tp->snd_cwnd;
+			}
+
+				if (yeah->reno_count <= 2)
+					yeah->reno_count = max( tp->snd_cwnd>>1, 2U);
+				else
+					yeah->reno_count++;
+
+				yeah->doing_reno_now =
+					           min_t( u32, yeah->doing_reno_now + 1 , 0xffffff);
+
+			} else {
+				yeah->fast_count++;
+
+				if (yeah->fast_count > TCP_YEAH_ZETA) {
+					yeah->reno_count = 2;
+					yeah->fast_count = 0;
+				}
+
+				yeah->doing_reno_now = 0;
+			}
+
+			yeah->lastQ = queue;
+
+		}
+
+		/* Save the extent of the current window so we can use this
+		 * at the end of the next RTT.
+		 */
+		yeah->beg_snd_una  = yeah->beg_snd_nxt;
+		yeah->beg_snd_nxt  = tp->snd_nxt;
+		yeah->beg_snd_cwnd = tp->snd_cwnd;
+
+		/* Wipe the slate clean for the next RTT. */
+		yeah->cntRTT = 0;
+		yeah->minRTT = 0x7fffffff;
+	}
+}
+
+static u32 tcp_yeah_ssthresh(struct sock *sk) {
+	const struct tcp_sock *tp = tcp_sk(sk);
+	struct yeah *yeah = inet_csk_ca(sk);
+	u32 reduction;
+
+	if (yeah->doing_reno_now < TCP_YEAH_RHO) {
+		reduction = yeah->lastQ;
+
+		reduction = min( reduction, max(tp->snd_cwnd>>1, 2U) );
+
+		reduction = max( reduction, tp->snd_cwnd >> TCP_YEAH_DELTA);
+	} else
+		reduction = max(tp->snd_cwnd>>1,2U);
+
+	yeah->fast_count = 0;
+	yeah->reno_count = max(yeah->reno_count>>1, 2U);
+
+	return tp->snd_cwnd - reduction;
+}
+
+static struct tcp_congestion_ops tcp_yeah = {
+	.init		= tcp_yeah_init,
+	.ssthresh	= tcp_yeah_ssthresh,
+	.cong_avoid	= tcp_yeah_cong_avoid,
+	.min_cwnd	= tcp_reno_min_cwnd,
+	.rtt_sample	= tcp_vegas_rtt_calc,
+	.set_state	= tcp_vegas_state,
+	.cwnd_event	= tcp_vegas_cwnd_event,
+	.get_info	= tcp_vegas_get_info,
+	.pkts_acked	= tcp_yeah_pkts_acked,
+
+	.owner		= THIS_MODULE,
+	.name		= "yeah",
+};
+
+static int __init tcp_yeah_register(void)
+{
+	BUG_ON(sizeof(struct yeah) > ICSK_CA_PRIV_SIZE);
+	tcp_register_congestion_control(&tcp_yeah);
+	return 0;
+}
+
+static void __exit tcp_yeah_unregister(void)
+{
+	tcp_unregister_congestion_control(&tcp_yeah);
+}
+
+module_init(tcp_yeah_register);
+module_exit(tcp_yeah_unregister);
+
+MODULE_AUTHOR("Angelo P. Castellani");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("YeAH TCP");
diff --git a/net/ipv4/tcp_yeah.h b/net/ipv4/tcp_yeah.h
new file mode 100644
index 00000000000..b3255dba4e2
--- /dev/null
+++ b/net/ipv4/tcp_yeah.h
@@ -0,0 +1,134 @@
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/inet_diag.h>
+
+#include <net/tcp.h>
+
+/* Vegas variables */
+struct vegas {
+	u32	beg_snd_nxt;	/* right edge during last RTT */
+	u32	beg_snd_una;	/* left edge  during last RTT */
+	u32	beg_snd_cwnd;	/* saves the size of the cwnd */
+	u8	doing_vegas_now;/* if true, do vegas for this RTT */
+	u16	cntRTT;		/* # of RTTs measured within last RTT */
+	u32	minRTT;		/* min of RTTs measured within last RTT (in usec) */
+	u32	baseRTT;	/* the min of all Vegas RTT measurements seen (in usec) */
+};
+
+/* There are several situations when we must "re-start" Vegas:
+ *
+ *  o when a connection is established
+ *  o after an RTO
+ *  o after fast recovery
+ *  o when we send a packet and there is no outstanding
+ *    unacknowledged data (restarting an idle connection)
+ *
+ * In these circumstances we cannot do a Vegas calculation at the
+ * end of the first RTT, because any calculation we do is using
+ * stale info -- both the saved cwnd and congestion feedback are
+ * stale.
+ *
+ * Instead we must wait until the completion of an RTT during
+ * which we actually receive ACKs.
+ */
+static inline void vegas_enable(struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	struct vegas *vegas = inet_csk_ca(sk);
+
+	/* Begin taking Vegas samples next time we send something. */
+	vegas->doing_vegas_now = 1;
+
+	/* Set the beginning of the next send window. */
+	vegas->beg_snd_nxt = tp->snd_nxt;
+
+	vegas->cntRTT = 0;
+	vegas->minRTT = 0x7fffffff;
+}
+
+/* Stop taking Vegas samples for now. */
+static inline void vegas_disable(struct sock *sk)
+{
+	struct vegas *vegas = inet_csk_ca(sk);
+
+	vegas->doing_vegas_now = 0;
+}
+
+static void tcp_vegas_init(struct sock *sk)
+{
+	struct vegas *vegas = inet_csk_ca(sk);
+
+	vegas->baseRTT = 0x7fffffff;
+	vegas_enable(sk);
+}
+
+static void tcp_vegas_state(struct sock *sk, u8 ca_state)
+{
+
+	if (ca_state == TCP_CA_Open)
+		vegas_enable(sk);
+	else
+		vegas_disable(sk);
+}
+
+/* Do RTT sampling needed for Vegas.
+ * Basically we:
+ *   o min-filter RTT samples from within an RTT to get the current
+ *     propagation delay + queuing delay (we are min-filtering to try to
+ *     avoid the effects of delayed ACKs)
+ *   o min-filter RTT samples from a much longer window (forever for now)
+ *     to find the propagation delay (baseRTT)
+ */
+static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt)
+{
+	struct vegas *vegas = inet_csk_ca(sk);
+	u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */
+
+	/* Filter to find propagation delay: */
+	if (vrtt < vegas->baseRTT)
+		vegas->baseRTT = vrtt;
+
+	/* Find the min RTT during the last RTT to find
+	 * the current prop. delay + queuing delay:
+	 */
+	vegas->minRTT = min(vegas->minRTT, vrtt);
+	vegas->cntRTT++;
+}
+
+/*
+ * If the connection is idle and we are restarting,
+ * then we don't want to do any Vegas calculations
+ * until we get fresh RTT samples.  So when we
+ * restart, we reset our Vegas state to a clean
+ * slate. After we get acks for this flight of
+ * packets, _then_ we can make Vegas calculations
+ * again.
+ */
+static void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+{
+	if (event == CA_EVENT_CWND_RESTART ||
+	    event == CA_EVENT_TX_START)
+		tcp_vegas_init(sk);
+}
+
+/* Extract info for Tcp socket info provided via netlink. */
+static void tcp_vegas_get_info(struct sock *sk, u32 ext,
+			       struct sk_buff *skb)
+{
+	const struct vegas *ca = inet_csk_ca(sk);
+	if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
+		struct tcpvegas_info *info;
+
+		info = RTA_DATA(__RTA_PUT(skb, INET_DIAG_VEGASINFO,
+					  sizeof(*info)));
+
+		info->tcpv_enabled = ca->doing_vegas_now;
+		info->tcpv_rttcnt = ca->cntRTT;
+		info->tcpv_rtt = ca->baseRTT;
+		info->tcpv_minrtt = ca->minRTT;
+	rtattr_failure:	;
+	}
+}
+
+
-- 
cgit v1.2.3


From 886236c1247ab5e2ad9c73f6e9a652e3ae3c8b07 Mon Sep 17 00:00:00 2001
From: John Heffner <jheffner@psc.edu>
Date: Sun, 25 Mar 2007 19:21:45 -0700
Subject: [TCP]: Add RFC3742 Limited Slow-Start, controlled by variable
 sysctl_tcp_max_ssthresh.

Signed-off-by: John Heffner <jheffner@psc.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sysctl.h     |  1 +
 include/net/tcp.h          |  1 +
 net/ipv4/sysctl_net_ipv4.c |  8 ++++++++
 net/ipv4/tcp_cong.c        | 31 ++++++++++++++++++++++---------
 4 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 9a8970bf99a..98e0fd241a2 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -438,6 +438,7 @@ enum
 	NET_CIPSOV4_RBM_STRICTVALID=121,
 	NET_TCP_AVAIL_CONG_CONTROL=122,
 	NET_TCP_ALLOWED_CONG_CONTROL=123,
+	NET_TCP_MAX_SSTHRESH=124,
 };
 
 enum {
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 7fd6b77519c..6d09f5085f6 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -230,6 +230,7 @@ extern int sysctl_tcp_mtu_probing;
 extern int sysctl_tcp_base_mss;
 extern int sysctl_tcp_workaround_signed_windows;
 extern int sysctl_tcp_slow_start_after_idle;
+extern int sysctl_tcp_max_ssthresh;
 
 extern atomic_t tcp_memory_allocated;
 extern atomic_t tcp_sockets_allocated;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 0aa304711a9..d68effe98e8 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -803,6 +803,14 @@ ctl_table ipv4_table[] = {
 		.proc_handler   = &proc_allowed_congestion_control,
 		.strategy	= &strategy_allowed_congestion_control,
 	},
+	{
+		.ctl_name	= NET_TCP_MAX_SSTHRESH,
+		.procname	= "tcp_max_ssthresh",
+		.data		= &sysctl_tcp_max_ssthresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 	{ .ctl_name = 0 }
 };
 
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 34ae3f13483..ccd88407e0c 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -12,6 +12,8 @@
 #include <linux/list.h>
 #include <net/tcp.h>
 
+int sysctl_tcp_max_ssthresh = 0;
+
 static DEFINE_SPINLOCK(tcp_cong_list_lock);
 static LIST_HEAD(tcp_cong_list);
 
@@ -274,10 +276,13 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
 
 
 /*
- * Linear increase during slow start
+ * Slow start (exponential increase) with
+ * RFC3742 Limited Slow Start (fast linear increase) support.
  */
 void tcp_slow_start(struct tcp_sock *tp)
 {
+	int cnt = 0;
+
 	if (sysctl_tcp_abc) {
 		/* RFC3465: Slow Start
 		 * TCP sender SHOULD increase cwnd by the number of
@@ -286,17 +291,25 @@ void tcp_slow_start(struct tcp_sock *tp)
 		 */
 		if (tp->bytes_acked < tp->mss_cache)
 			return;
-
-		/* We MAY increase by 2 if discovered delayed ack */
-		if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache) {
-			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-				tp->snd_cwnd++;
-		}
 	}
+
+	if (sysctl_tcp_max_ssthresh > 0 &&
+	    tp->snd_cwnd > sysctl_tcp_max_ssthresh)
+		cnt += sysctl_tcp_max_ssthresh>>1;
+	else
+		cnt += tp->snd_cwnd;
+
+	/* RFC3465: We MAY increase by 2 if discovered delayed ack */
+	if (sysctl_tcp_abc > 1 && tp->bytes_acked >= 2*tp->mss_cache)
+		cnt <<= 1;
 	tp->bytes_acked = 0;
 
-	if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-		tp->snd_cwnd++;
+	tp->snd_cwnd_cnt += cnt;
+	while (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
+		tp->snd_cwnd_cnt -= tp->snd_cwnd;
+		if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+			tp->snd_cwnd++;
+	}
 }
 EXPORT_SYMBOL_GPL(tcp_slow_start);
 
-- 
cgit v1.2.3


From cf4c6bf83d0fa070f60b1ba8124dfe0e65fbfbcc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Thu, 22 Feb 2007 01:13:58 -0800
Subject: [TCP]: struct *sock argument renamed: sp -> sk
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In general, TCP code uses "sk" for struct sock pointer.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0ba74bbe7d3..f6793b4cc66 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2293,13 +2293,13 @@ static void get_openreq4(struct sock *sk, struct request_sock *req,
 		req);
 }
 
-static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
+static void get_tcp4_sock(struct sock *sk, char *tmpbuf, int i)
 {
 	int timer_active;
 	unsigned long timer_expires;
-	struct tcp_sock *tp = tcp_sk(sp);
-	const struct inet_connection_sock *icsk = inet_csk(sp);
-	struct inet_sock *inet = inet_sk(sp);
+	struct tcp_sock *tp = tcp_sk(sk);
+	const struct inet_connection_sock *icsk = inet_csk(sk);
+	struct inet_sock *inet = inet_sk(sk);
 	__be32 dest = inet->daddr;
 	__be32 src = inet->rcv_saddr;
 	__u16 destp = ntohs(inet->dport);
@@ -2311,9 +2311,9 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
 		timer_active	= 4;
 		timer_expires	= icsk->icsk_timeout;
-	} else if (timer_pending(&sp->sk_timer)) {
+	} else if (timer_pending(&sk->sk_timer)) {
 		timer_active	= 2;
-		timer_expires	= sp->sk_timer.expires;
+		timer_expires	= sk->sk_timer.expires;
 	} else {
 		timer_active	= 0;
 		timer_expires = jiffies;
@@ -2321,17 +2321,17 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
 
 	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
 			"%08X %5d %8d %lu %d %p %u %u %u %u %d",
-		i, src, srcp, dest, destp, sp->sk_state,
+		i, src, srcp, dest, destp, sk->sk_state,
 		tp->write_seq - tp->snd_una,
-		sp->sk_state == TCP_LISTEN ? sp->sk_ack_backlog :
+		sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog :
 					     (tp->rcv_nxt - tp->copied_seq),
 		timer_active,
 		jiffies_to_clock_t(timer_expires - jiffies),
 		icsk->icsk_retransmits,
-		sock_i_uid(sp),
+		sock_i_uid(sk),
 		icsk->icsk_probes_out,
-		sock_i_ino(sp),
-		atomic_read(&sp->sk_refcnt), sp,
+		sock_i_ino(sk),
+		atomic_read(&sk->sk_refcnt), sk,
 		icsk->icsk_rto,
 		icsk->icsk_ack.ato,
 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
-- 
cgit v1.2.3


From 54287cc178cf85dbae0decec8b4dc190bff757ad Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Thu, 22 Feb 2007 03:20:44 -0800
Subject: [TCP]: Keep copied_seq, rcv_wup and rcv_next together.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I noticed in oprofile study a cache miss in tcp_rcv_established() to read
copied_seq.

ffffffff80400a80 <tcp_rcv_established>: /* tcp_rcv_established total: 4034293  
2.0400 */

 55493  0.0281 :ffffffff80400bc9:   mov    0x4c8(%r12),%eax copied_seq
543103  0.2746 :ffffffff80400bd1:   cmp    0x3e0(%r12),%eax   rcv_nxt    

if (tp->copied_seq == tp->rcv_nxt &&
        len - tcp_header_len <= tp->ucopy.len) {

In this function, the cache line 0x4c0 -> 0x500 is used only for this
reading 'copied_seq' field.

rcv_wup and copied_seq should be next to rcv_nxt field, to lower number of
active cache lines in hot paths. (tcp_rcv_established(), tcp_poll(), ...)

As you suggested, I changed tcp_create_openreq_child() so that these fields
are changed together, to avoid adding a new store buffer stall.

Patch is 64bit friendly (no new hole because of alignment constraints)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      | 4 ++--
 net/ipv4/tcp_minisocks.c | 6 ++----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 29d3089038a..415193e171e 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -242,6 +242,8 @@ struct tcp_sock {
  *	See RFC793 and RFC1122. The RFC writes these in capitals.
  */
  	u32	rcv_nxt;	/* What we want to receive next 	*/
+	u32	copied_seq;	/* Head of yet unread data		*/
+	u32	rcv_wup;	/* rcv_nxt on last window update sent	*/
  	u32	snd_nxt;	/* Next sequence we send		*/
 
  	u32	snd_una;	/* First byte we want an ack for	*/
@@ -307,10 +309,8 @@ struct tcp_sock {
 	struct sk_buff_head	out_of_order_queue; /* Out of order segments go here */
 
  	u32	rcv_wnd;	/* Current receiver window		*/
-	u32	rcv_wup;	/* rcv_nxt on last window update sent	*/
 	u32	write_seq;	/* Tail(+1) of data held in tcp send buffer */
 	u32	pushed_seq;	/* Last pushed seq, required to talk to windows */
-	u32	copied_seq;	/* Head of yet unread data		*/
 
 /*	SACKs data	*/
 	struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 6b5c64f3c92..706932726a1 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -387,8 +387,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		/* Now setup tcp_sock */
 		newtp = tcp_sk(newsk);
 		newtp->pred_flags = 0;
-		newtp->rcv_nxt = treq->rcv_isn + 1;
-		newtp->snd_nxt = newtp->snd_una = newtp->snd_sml = treq->snt_isn + 1;
+		newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1;
+		newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1;
 
 		tcp_prequeue_init(newtp);
 
@@ -422,10 +422,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		tcp_set_ca_state(newsk, TCP_CA_Open);
 		tcp_init_xmit_timers(newsk);
 		skb_queue_head_init(&newtp->out_of_order_queue);
-		newtp->rcv_wup = treq->rcv_isn + 1;
 		newtp->write_seq = treq->snt_isn + 1;
 		newtp->pushed_seq = newtp->write_seq;
-		newtp->copied_seq = treq->rcv_isn + 1;
 
 		newtp->rx_opt.saw_tstamp = 0;
 
-- 
cgit v1.2.3


From e0ef57cc56c3c96493f9b0d6c77bb9608eeaa173 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 22 Feb 2007 22:52:59 -0800
Subject: [TCP]: Make snd_cwnd_clamp a u32.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 415193e171e..18a468dd505 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -302,7 +302,7 @@ struct tcp_sock {
  	u32	snd_ssthresh;	/* Slow start size threshold		*/
  	u32	snd_cwnd;	/* Sending congestion window		*/
  	u16	snd_cwnd_cnt;	/* Linear increase counter		*/
-	u16	snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
+	u32	snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
 	u32	snd_cwnd_used;
 	u32	snd_cwnd_stamp;
 
-- 
cgit v1.2.3


From c5e7af0df5d7234afd8596560d9f570cfc6c18bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Fri, 23 Feb 2007 16:22:06 -0800
Subject: [TCP]: Correct reordering detection change (no FRTO case)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The reordering detection must work also when FRTO has not been
used at all which was the original intention of mine, just the
expression of the idea was flawed.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bb3f234668b..f6ba07f0d81 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1240,7 +1240,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	tp->left_out = tp->sacked_out + tp->lost_out;
 
 	if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss &&
-	    (tp->frto_highmark && after(tp->snd_una, tp->frto_highmark)))
+	    (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
 		tcp_update_reordering(sk, ((tp->fackets_out + 1) - reord), 0);
 
 #if FASTRETRANS_DEBUG > 0
-- 
cgit v1.2.3


From 3cfe3baaf07c9e40a75f9a70662de56df1c246a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Tue, 27 Feb 2007 10:09:49 -0800
Subject: [TCP]: Add two new spurious RTO responses to FRTO
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New sysctl tcp_frto_response is added to select amongst these
responses:
	- Rate halving based; reuses CA_CWR state (default)
	- Very conservative; used to be the only one available (=1)
	- Undo cwr; undoes ssthresh and cwnd reductions (=2)

The response with rate halving requires a new parameter to
tcp_enter_cwr because FRTO has already reduced ssthresh and
doing a second reduction there has to be prevented. In addition,
to keep things nice on 80 cols screen, a local variable was
added.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sysctl.h     |  1 +
 include/net/tcp.h          |  3 ++-
 net/ipv4/sysctl_net_ipv4.c |  8 ++++++++
 net/ipv4/tcp_input.c       | 36 ++++++++++++++++++++++++++++++++----
 net/ipv4/tcp_output.c      |  2 +-
 5 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 98e0fd241a2..c9ccb550206 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -439,6 +439,7 @@ enum
 	NET_TCP_AVAIL_CONG_CONTROL=122,
 	NET_TCP_ALLOWED_CONG_CONTROL=123,
 	NET_TCP_MAX_SSTHRESH=124,
+	NET_TCP_FRTO_RESPONSE=125,
 };
 
 enum {
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6d09f5085f6..f0c9e3400a0 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -220,6 +220,7 @@ extern int sysctl_tcp_app_win;
 extern int sysctl_tcp_adv_win_scale;
 extern int sysctl_tcp_tw_reuse;
 extern int sysctl_tcp_frto;
+extern int sysctl_tcp_frto_response;
 extern int sysctl_tcp_low_latency;
 extern int sysctl_tcp_dma_copybreak;
 extern int sysctl_tcp_nometrics_save;
@@ -738,7 +739,7 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp)
 	tp->left_out = tp->sacked_out + tp->lost_out;
 }
 
-extern void tcp_enter_cwr(struct sock *sk);
+extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh);
 extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst);
 
 /* Slow start with delack produces 3 packets of burst, so that
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d68effe98e8..6817d6485df 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -646,6 +646,14 @@ ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
 	},
+	{
+		.ctl_name	= NET_TCP_FRTO_RESPONSE,
+		.procname	= "tcp_frto_response",
+		.data		= &sysctl_tcp_frto_response,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
 	{
 		.ctl_name	= NET_TCP_LOW_LATENCY,
 		.procname	= "tcp_low_latency",
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f6ba07f0d81..322e43c5646 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -86,6 +86,7 @@ int sysctl_tcp_stdurg __read_mostly;
 int sysctl_tcp_rfc1337 __read_mostly;
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 int sysctl_tcp_frto __read_mostly;
+int sysctl_tcp_frto_response __read_mostly;
 int sysctl_tcp_nometrics_save __read_mostly;
 
 int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
@@ -762,15 +763,17 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
 }
 
 /* Set slow start threshold and cwnd not falling to slow start */
-void tcp_enter_cwr(struct sock *sk)
+void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	const struct inet_connection_sock *icsk = inet_csk(sk);
 
 	tp->prior_ssthresh = 0;
 	tp->bytes_acked = 0;
 	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
 		tp->undo_marker = 0;
-		tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
+		if (set_ssthresh)
+			tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
 		tp->snd_cwnd = min(tp->snd_cwnd,
 				   tcp_packets_in_flight(tp) + 1U);
 		tp->snd_cwnd_cnt = 0;
@@ -2003,7 +2006,7 @@ static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag)
 		tp->retrans_stamp = 0;
 
 	if (flag&FLAG_ECE)
-		tcp_enter_cwr(sk);
+		tcp_enter_cwr(sk, 1);
 
 	if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
 		int state = TCP_CA_Open;
@@ -2579,6 +2582,21 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
 	tcp_moderate_cwnd(tp);
 }
 
+/* A conservative spurious RTO response algorithm: reduce cwnd using
+ * rate halving and continue in congestion avoidance.
+ */
+static void tcp_ratehalving_spur_to_response(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	tcp_enter_cwr(sk, 0);
+	tp->high_seq = tp->frto_highmark; 	/* Smoother w/o this? - ij */
+}
+
+static void tcp_undo_spur_to_response(struct sock *sk)
+{
+	tcp_undo_cwr(sk, 1);
+}
+
 /* F-RTO spurious RTO detection algorithm (RFC4138)
  *
  * F-RTO affects during two new ACKs following RTO (well, almost, see inline
@@ -2661,7 +2679,17 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 		tp->frto_counter = 2;
 		return 1;
 	} else /* frto_counter == 2 */ {
-		tcp_conservative_spur_to_response(tp);
+		switch (sysctl_tcp_frto_response) {
+		case 2:
+			tcp_undo_spur_to_response(sk);
+			break;
+		case 1:
+			tcp_conservative_spur_to_response(tp);
+			break;
+		default:
+			tcp_ratehalving_spur_to_response(sk);
+			break;
+		};
 		tp->frto_counter = 0;
 	}
 	return 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3c24881f2a6..d19b2f3b70f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -545,7 +545,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	if (likely(err <= 0))
 		return err;
 
-	tcp_enter_cwr(sk);
+	tcp_enter_cwr(sk, 1);
 
 	return net_xmit_eval(err);
 
-- 
cgit v1.2.3


From 89808060b7a71376cc2ba8092d43b2010da465b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Tue, 27 Feb 2007 10:10:55 -0800
Subject: [TCP] Sysctl documentation: tcp_frto_response
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In addition, fixed minor things in tcp_frto sysctl.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 719b4290731..054c515bd72 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -179,15 +179,32 @@ tcp_fin_timeout - INTEGER
 	because they eat maximum 1.5K of memory, but they tend
 	to live longer.	Cf. tcp_max_orphans.
 
-tcp_frto - BOOLEAN
+tcp_frto - INTEGER
 	Enables F-RTO, an enhanced recovery algorithm for TCP retransmission
 	timeouts.  It is particularly beneficial in wireless environments
 	where packet loss is typically due to random radio interference
 	rather than intermediate router congestion. If set to 1, basic
-	version is enabled. 2 enables SACK enhanced FRTO, which is
+	version is enabled. 2 enables SACK enhanced F-RTO, which is
 	EXPERIMENTAL. The basic version can be used also when SACK is
 	enabled for a flow through tcp_sack sysctl.
 
+tcp_frto_response - INTEGER
+	When F-RTO has detected that a TCP retransmission timeout was
+	spurious (i.e, the timeout would have been avoided had TCP set a
+	longer retransmission timeout), TCP has several options what to do
+	next. Possible values are:
+		0 Rate halving based; a smooth and conservative response,
+		  results in halved cwnd and ssthresh after one RTT
+		1 Very conservative response; not recommended because even
+		  though being valid, it interacts poorly with the rest of
+		  Linux TCP, halves cwnd and ssthresh immediately
+		2 Aggressive response; undoes congestion control measures
+		  that are now known to be unnecessary (ignoring the
+		  possibility of a lost retransmission that would require
+		  TCP to be more cautious), cwnd and ssthresh are restored
+		  to the values prior timeout
+	Default: 0 (rate halving based)
+
 tcp_keepalive_time - INTEGER
 	How often TCP sends out keepalive messages when keepalive is enabled.
 	Default: 2hours.
-- 
cgit v1.2.3


From e01f9d7793be82e6c252efbd52c399d3eb65abe4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Fri, 2 Mar 2007 13:27:25 -0800
Subject: [TCP]: Complete icsk-to-local-variable change (in tcp_enter_cwr)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A local variable for icsk was created but this change was
missing. Spotted by Jarek Poplawski.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 322e43c5646..cb715eadf8f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -770,7 +770,7 @@ void tcp_enter_cwr(struct sock *sk, const int set_ssthresh)
 
 	tp->prior_ssthresh = 0;
 	tp->bytes_acked = 0;
-	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
+	if (icsk->icsk_ca_state < TCP_CA_CWR) {
 		tp->undo_marker = 0;
 		if (set_ssthresh)
 			tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
-- 
cgit v1.2.3


From e317f6f69cb95527799d308a9421b7dc1252989a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Fri, 2 Mar 2007 13:34:19 -0800
Subject: [TCP]: FRTO undo response falls back to ratehalving one if ECEd
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Undoing ssthresh is disabled in fastretrans_alert whenever
FLAG_ECE is set by clearing prior_ssthresh. The clearing does
not protect FRTO because FRTO operates before fastretrans_alert.
Moving the clearing of prior_ssthresh earlier seems to be a
suboptimal solution to the FRTO case because then FLAG_ECE will
cause a second ssthresh reduction in try_to_open (the first
occurred when FRTO was entered). So instead, FRTO falls back
immediately to the rate halving response, which switches TCP to
CA_CWR state preventing the latter reduction of ssthresh.

If the first ECE arrived before the ACK after which FRTO is able
to decide RTO as spurious, prior_ssthresh is already cleared.
Thus no undoing for ssthresh occurs. Besides, FLAG_ECE should be
set also in the following ACKs resulting in rate halving response
that sees TCP is already in CA_CWR, which again prevents an extra
ssthresh reduction on that round-trip.

If the first ECE arrived before RTO, ssthresh has already been
adapted and prior_ssthresh remains cleared on entry because TCP
is in CA_CWR (the same applies also to a case where FRTO is
entered more than once and ECE comes in the middle).

High_seq must not be touched after tcp_enter_cwr because CWR
round-trip calculation depends on it.

I believe that after this patch, FRTO should be ECN-safe and
even able to take advantage of synergy benefits.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index cb715eadf8f..d894bbcc1d2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2587,14 +2587,15 @@ static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
  */
 static void tcp_ratehalving_spur_to_response(struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
 	tcp_enter_cwr(sk, 0);
-	tp->high_seq = tp->frto_highmark; 	/* Smoother w/o this? - ij */
 }
 
-static void tcp_undo_spur_to_response(struct sock *sk)
+static void tcp_undo_spur_to_response(struct sock *sk, int flag)
 {
-	tcp_undo_cwr(sk, 1);
+	if (flag&FLAG_ECE)
+		tcp_ratehalving_spur_to_response(sk);
+	else
+		tcp_undo_cwr(sk, 1);
 }
 
 /* F-RTO spurious RTO detection algorithm (RFC4138)
@@ -2681,7 +2682,7 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 	} else /* frto_counter == 2 */ {
 		switch (sysctl_tcp_frto_response) {
 		case 2:
-			tcp_undo_spur_to_response(sk);
+			tcp_undo_spur_to_response(sk, flag);
 			break;
 		case 1:
 			tcp_conservative_spur_to_response(tp);
-- 
cgit v1.2.3


From fa438ccfdfd3f6db02c13b61b21454eb81cd6a13 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Sun, 4 Mar 2007 16:05:44 -0800
Subject: [NET]: Keep sk_backlog near sk_lock

sk_backlog is a critical field of struct sock. (known famous words)

It is (ab)used in hot paths, in particular in release_sock(), tcp_recvmsg(),
tcp_v4_rcv(), sk_receive_skb().

It really makes sense to place it next to sk_lock, because sk_backlog is only
used after sk_lock locked (and thus memory cache line in L1 cache). This
should reduce cache misses and sk_lock acquisition time.

(In theory, we could only move the head pointer near sk_lock, and leaving tail
far away, because 'tail' is normally not so hot, but keep it simple :) )

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 18 +++++++++---------
 net/core/sock.c    |  2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 2c7d60ca354..a3366c3c837 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -202,6 +202,15 @@ struct sock {
 	unsigned short		sk_type;
 	int			sk_rcvbuf;
 	socket_lock_t		sk_lock;
+	/*
+	 * The backlog queue is special, it is always used with
+	 * the per-socket spinlock held and requires low latency
+	 * access. Therefore we special case it's implementation.
+	 */
+	struct {
+		struct sk_buff *head;
+		struct sk_buff *tail;
+	} sk_backlog;
 	wait_queue_head_t	*sk_sleep;
 	struct dst_entry	*sk_dst_cache;
 	struct xfrm_policy	*sk_policy[2];
@@ -221,15 +230,6 @@ struct sock {
 	int			sk_rcvlowat;
 	unsigned long 		sk_flags;
 	unsigned long	        sk_lingertime;
-	/*
-	 * The backlog queue is special, it is always used with
-	 * the per-socket spinlock held and requires low latency
-	 * access. Therefore we special case it's implementation.
-	 */
-	struct {
-		struct sk_buff *head;
-		struct sk_buff *tail;
-	} sk_backlog;
 	struct sk_buff_head	sk_error_queue;
 	struct proto		*sk_prot_creator;
 	rwlock_t		sk_callback_lock;
diff --git a/net/core/sock.c b/net/core/sock.c
index 27c4f62382b..6d35d5775ba 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -904,6 +904,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 		sk_node_init(&newsk->sk_node);
 		sock_lock_init(newsk);
 		bh_lock_sock(newsk);
+		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
 
 		atomic_set(&newsk->sk_rmem_alloc, 0);
 		atomic_set(&newsk->sk_wmem_alloc, 0);
@@ -923,7 +924,6 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 		newsk->sk_wmem_queued	= 0;
 		newsk->sk_forward_alloc = 0;
 		newsk->sk_send_head	= NULL;
-		newsk->sk_backlog.head	= newsk->sk_backlog.tail = NULL;
 		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
 
 		sock_reset_flag(newsk, SOCK_DONE);
-- 
cgit v1.2.3


From 25c4e53a4c9bfe45be52821f54ec5ce957519db2 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Sun, 4 Mar 2007 16:06:47 -0800
Subject: [PKTGEN]: use pr_debug

Remove private debug macro and replace with standard version

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: Robert Olsson <robert.olsson@its.uu.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 34 +++++++++++++++-------------------
 1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 4b01496dc33..a6bdcb0d43d 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -164,9 +164,6 @@
 
 #define VERSION  "pktgen v2.68: Packet Generator for packet performance testing.\n"
 
-/* #define PG_DEBUG(a) a */
-#define PG_DEBUG(a)
-
 /* The buckets are exponential in 'width' */
 #define LAT_BUCKETS_MAX 32
 #define IP_NAME_SZ 32
@@ -1857,8 +1854,7 @@ static int pktgen_mark_device(const char *ifname)
 	int ret = 0;
 
 	mutex_lock(&pktgen_thread_lock);
-	PG_DEBUG(printk("pktgen: pktgen_mark_device marking %s for removal\n",
-			ifname));
+	pr_debug("pktgen: pktgen_mark_device marking %s for removal\n", ifname);
 
 	while (1) {
 
@@ -1867,8 +1863,8 @@ static int pktgen_mark_device(const char *ifname)
 			break;	/* success */
 
 		mutex_unlock(&pktgen_thread_lock);
-		PG_DEBUG(printk("pktgen: pktgen_mark_device waiting for %s "
-				"to disappear....\n", ifname));
+		pr_debug("pktgen: pktgen_mark_device waiting for %s "
+				"to disappear....\n", ifname);
 		schedule_timeout_interruptible(msecs_to_jiffies(msec_per_try));
 		mutex_lock(&pktgen_thread_lock);
 
@@ -2848,7 +2844,7 @@ static void pktgen_run(struct pktgen_thread *t)
 	struct pktgen_dev *pkt_dev;
 	int started = 0;
 
-	PG_DEBUG(printk("pktgen: entering pktgen_run. %p\n", t));
+	pr_debug("pktgen: entering pktgen_run. %p\n", t);
 
 	if_lock(t);
 	list_for_each_entry(pkt_dev, &t->if_list, list) {
@@ -2880,7 +2876,7 @@ static void pktgen_stop_all_threads_ifs(void)
 {
 	struct pktgen_thread *t;
 
-	PG_DEBUG(printk("pktgen: entering pktgen_stop_all_threads_ifs.\n"));
+	pr_debug("pktgen: entering pktgen_stop_all_threads_ifs.\n");
 
 	mutex_lock(&pktgen_thread_lock);
 
@@ -2948,7 +2944,7 @@ static void pktgen_run_all_threads(void)
 {
 	struct pktgen_thread *t;
 
-	PG_DEBUG(printk("pktgen: entering pktgen_run_all_threads.\n"));
+	pr_debug("pktgen: entering pktgen_run_all_threads.\n");
 
 	mutex_lock(&pktgen_thread_lock);
 
@@ -3040,7 +3036,7 @@ static void pktgen_stop(struct pktgen_thread *t)
 {
 	struct pktgen_dev *pkt_dev;
 
-	PG_DEBUG(printk("pktgen: entering pktgen_stop\n"));
+	pr_debug("pktgen: entering pktgen_stop\n");
 
 	if_lock(t);
 
@@ -3064,7 +3060,7 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
 	struct list_head *q, *n;
 	struct pktgen_dev *cur;
 
-	PG_DEBUG(printk("pktgen: entering pktgen_rem_one_if\n"));
+	pr_debug("pktgen: entering pktgen_rem_one_if\n");
 
 	if_lock(t);
 
@@ -3093,7 +3089,7 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
 
 	/* Remove all devices, free mem */
 
-	PG_DEBUG(printk("pktgen: entering pktgen_rem_all_ifs\n"));
+	pr_debug("pktgen: entering pktgen_rem_all_ifs\n");
 	if_lock(t);
 
 	list_for_each_safe(q, n, &t->if_list) {
@@ -3276,7 +3272,7 @@ static int pktgen_thread_worker(void *arg)
 
 	t->pid = current->pid;
 
-	PG_DEBUG(printk("pktgen: starting pktgen/%d:  pid=%d\n", cpu, current->pid));
+	pr_debug("pktgen: starting pktgen/%d:  pid=%d\n", cpu, current->pid);
 
 	max_before_softirq = t->max_before_softirq;
 
@@ -3339,13 +3335,13 @@ static int pktgen_thread_worker(void *arg)
 		set_current_state(TASK_INTERRUPTIBLE);
 	}
 
-	PG_DEBUG(printk("pktgen: %s stopping all device\n", t->tsk->comm));
+	pr_debug("pktgen: %s stopping all device\n", t->tsk->comm);
 	pktgen_stop(t);
 
-	PG_DEBUG(printk("pktgen: %s removing all device\n", t->tsk->comm));
+	pr_debug("pktgen: %s removing all device\n", t->tsk->comm);
 	pktgen_rem_all_ifs(t);
 
-	PG_DEBUG(printk("pktgen: %s removing thread.\n", t->tsk->comm));
+	pr_debug("pktgen: %s removing thread.\n", t->tsk->comm);
 	pktgen_rem_thread(t);
 
 	return 0;
@@ -3364,7 +3360,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
 		}
 
 	if_unlock(t);
-	PG_DEBUG(printk("pktgen: find_dev(%s) returning %p\n", ifname, pkt_dev));
+	pr_debug("pktgen: find_dev(%s) returning %p\n", ifname, pkt_dev);
 	return pkt_dev;
 }
 
@@ -3533,7 +3529,7 @@ static int pktgen_remove_device(struct pktgen_thread *t,
 				struct pktgen_dev *pkt_dev)
 {
 
-	PG_DEBUG(printk("pktgen: remove_device pkt_dev=%p\n", pkt_dev));
+	pr_debug("pktgen: remove_device pkt_dev=%p\n", pkt_dev);
 
 	if (pkt_dev->running) {
 		printk("pktgen:WARNING: trying to remove a running interface, stopping it now.\n");
-- 
cgit v1.2.3


From 5fa6fc76f55c5c42fff52ae1d57a685b9373fcdc Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Sun, 4 Mar 2007 16:07:28 -0800
Subject: [PKTGEN]: use random32

Can use random32() now.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: Robert Olsson <robert.olsson@its.uu.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 52 +++++++++++++++++++---------------------------------
 1 file changed, 19 insertions(+), 33 deletions(-)

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index a6bdcb0d43d..3b2556d01ad 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -465,17 +465,6 @@ static inline __u64 pg_div64(__u64 n, __u64 base)
 	return tmp;
 }
 
-static inline u32 pktgen_random(void)
-{
-#if 0
-	__u32 n;
-	get_random_bytes(&n, 4);
-	return n;
-#else
-	return net_random();
-#endif
-}
-
 static inline __u64 getCurMs(void)
 {
 	struct timeval tv;
@@ -2092,7 +2081,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 	int flow = 0;
 
 	if (pkt_dev->cflows) {
-		flow = pktgen_random() % pkt_dev->cflows;
+		flow = random32() % pkt_dev->cflows;
 
 		if (pkt_dev->flows[flow].count > pkt_dev->lflow)
 			pkt_dev->flows[flow].count = 0;
@@ -2104,7 +2093,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 		__u32 tmp;
 
 		if (pkt_dev->flags & F_MACSRC_RND)
-			mc = pktgen_random() % (pkt_dev->src_mac_count);
+			mc = random32() % pkt_dev->src_mac_count;
 		else {
 			mc = pkt_dev->cur_src_mac_offset++;
 			if (pkt_dev->cur_src_mac_offset >
@@ -2130,7 +2119,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 		__u32 tmp;
 
 		if (pkt_dev->flags & F_MACDST_RND)
-			mc = pktgen_random() % (pkt_dev->dst_mac_count);
+			mc = random32() % pkt_dev->dst_mac_count;
 
 		else {
 			mc = pkt_dev->cur_dst_mac_offset++;
@@ -2157,24 +2146,23 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 		for(i = 0; i < pkt_dev->nr_labels; i++)
 			if (pkt_dev->labels[i] & MPLS_STACK_BOTTOM)
 				pkt_dev->labels[i] = MPLS_STACK_BOTTOM |
-					     ((__force __be32)pktgen_random() &
+					     ((__force __be32)random32() &
 						      htonl(0x000fffff));
 	}
 
 	if ((pkt_dev->flags & F_VID_RND) && (pkt_dev->vlan_id != 0xffff)) {
-		pkt_dev->vlan_id = pktgen_random() % 4096;
+		pkt_dev->vlan_id = random32() & (4096-1);
 	}
 
 	if ((pkt_dev->flags & F_SVID_RND) && (pkt_dev->svlan_id != 0xffff)) {
-		pkt_dev->svlan_id = pktgen_random() % 4096;
+		pkt_dev->svlan_id = random32() & (4096 - 1);
 	}
 
 	if (pkt_dev->udp_src_min < pkt_dev->udp_src_max) {
 		if (pkt_dev->flags & F_UDPSRC_RND)
-			pkt_dev->cur_udp_src =
-			    ((pktgen_random() %
-			      (pkt_dev->udp_src_max - pkt_dev->udp_src_min)) +
-			     pkt_dev->udp_src_min);
+			pkt_dev->cur_udp_src = random32() %
+				(pkt_dev->udp_src_max - pkt_dev->udp_src_min)
+				+ pkt_dev->udp_src_min;
 
 		else {
 			pkt_dev->cur_udp_src++;
@@ -2185,10 +2173,9 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 
 	if (pkt_dev->udp_dst_min < pkt_dev->udp_dst_max) {
 		if (pkt_dev->flags & F_UDPDST_RND) {
-			pkt_dev->cur_udp_dst =
-			    ((pktgen_random() %
-			      (pkt_dev->udp_dst_max - pkt_dev->udp_dst_min)) +
-			     pkt_dev->udp_dst_min);
+			pkt_dev->cur_udp_dst = random32() %
+				(pkt_dev->udp_dst_max - pkt_dev->udp_dst_min)
+				+ pkt_dev->udp_dst_min;
 		} else {
 			pkt_dev->cur_udp_dst++;
 			if (pkt_dev->cur_udp_dst >= pkt_dev->udp_dst_max)
@@ -2203,7 +2190,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 							       saddr_max))) {
 			__u32 t;
 			if (pkt_dev->flags & F_IPSRC_RND)
-				t = ((pktgen_random() % (imx - imn)) + imn);
+				t = random32() % (imx - imn) + imn;
 			else {
 				t = ntohl(pkt_dev->cur_saddr);
 				t++;
@@ -2224,14 +2211,13 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 				__be32 s;
 				if (pkt_dev->flags & F_IPDST_RND) {
 
-					t = pktgen_random() % (imx - imn) + imn;
+					t = random32() % (imx - imn) + imn;
 					s = htonl(t);
 
 					while (LOOPBACK(s) || MULTICAST(s)
 					       || BADCLASS(s) || ZERONET(s)
 					       || LOCAL_MCAST(s)) {
-						t = (pktgen_random() %
-						      (imx - imn)) + imn;
+						t = random32() % (imx - imn) + imn;
 						s = htonl(t);
 					}
 					pkt_dev->cur_daddr = s;
@@ -2263,7 +2249,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 
 			for (i = 0; i < 4; i++) {
 				pkt_dev->cur_in6_daddr.s6_addr32[i] =
-				    (((__force __be32)pktgen_random() |
+				    (((__force __be32)random32() |
 				      pkt_dev->min_in6_daddr.s6_addr32[i]) &
 				     pkt_dev->max_in6_daddr.s6_addr32[i]);
 			}
@@ -2273,9 +2259,9 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 	if (pkt_dev->min_pkt_size < pkt_dev->max_pkt_size) {
 		__u32 t;
 		if (pkt_dev->flags & F_TXSIZE_RND) {
-			t = ((pktgen_random() %
-			      (pkt_dev->max_pkt_size - pkt_dev->min_pkt_size))
-			     + pkt_dev->min_pkt_size);
+			t = random32() %
+				(pkt_dev->max_pkt_size - pkt_dev->min_pkt_size)
+				+ pkt_dev->min_pkt_size;
 		} else {
 			t = pkt_dev->cur_pkt_size + 1;
 			if (t > pkt_dev->max_pkt_size)
-- 
cgit v1.2.3


From d5f1ce9a5e80fb315c86b036a89b1237fdf11938 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Sun, 4 Mar 2007 16:08:08 -0800
Subject: [PKTGEN]: don't use __constant_htonl()

The existing htonl() macro is smart enough to do the same code as
using __constant_htonl() and it looks cleaner.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: Robert Olsson <robert.olsson@its.uu.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 3b2556d01ad..edf46fbb27d 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -168,7 +168,7 @@
 #define LAT_BUCKETS_MAX 32
 #define IP_NAME_SZ 32
 #define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
-#define MPLS_STACK_BOTTOM __constant_htonl(0x00000100)
+#define MPLS_STACK_BOTTOM htonl(0x00000100)
 
 /* Device flag bits */
 #define F_IPSRC_RND   (1<<0)	/* IP-Src Random  */
@@ -2298,7 +2298,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	int datalen, iplen;
 	struct iphdr *iph;
 	struct pktgen_hdr *pgh = NULL;
-	__be16 protocol = __constant_htons(ETH_P_IP);
+	__be16 protocol = htons(ETH_P_IP);
 	__be32 *mpls;
 	__be16 *vlan_tci = NULL;                 /* Encapsulates priority and VLAN ID */
 	__be16 *vlan_encapsulated_proto = NULL;  /* packet type ID field (or len) for VLAN tag */
@@ -2307,10 +2307,10 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 
 
 	if (pkt_dev->nr_labels)
-		protocol = __constant_htons(ETH_P_MPLS_UC);
+		protocol = htons(ETH_P_MPLS_UC);
 
 	if (pkt_dev->vlan_id != 0xffff)
-		protocol = __constant_htons(ETH_P_8021Q);
+		protocol = htons(ETH_P_8021Q);
 
 	/* Update any of the values, used when we're incrementing various
 	 * fields.
@@ -2342,14 +2342,14 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 					       pkt_dev->svlan_cfi,
 					       pkt_dev->svlan_p);
 			svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
-			*svlan_encapsulated_proto = __constant_htons(ETH_P_8021Q);
+			*svlan_encapsulated_proto = htons(ETH_P_8021Q);
 		}
 		vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
 		*vlan_tci = build_tci(pkt_dev->vlan_id,
 				      pkt_dev->vlan_cfi,
 				      pkt_dev->vlan_p);
 		vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
-		*vlan_encapsulated_proto = __constant_htons(ETH_P_IP);
+		*vlan_encapsulated_proto = htons(ETH_P_IP);
 	}
 
 	iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr));
@@ -2636,7 +2636,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	int datalen;
 	struct ipv6hdr *iph;
 	struct pktgen_hdr *pgh = NULL;
-	__be16 protocol = __constant_htons(ETH_P_IPV6);
+	__be16 protocol = htons(ETH_P_IPV6);
 	__be32 *mpls;
 	__be16 *vlan_tci = NULL;                 /* Encapsulates priority and VLAN ID */
 	__be16 *vlan_encapsulated_proto = NULL;  /* packet type ID field (or len) for VLAN tag */
@@ -2644,10 +2644,10 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	__be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */
 
 	if (pkt_dev->nr_labels)
-		protocol = __constant_htons(ETH_P_MPLS_UC);
+		protocol = htons(ETH_P_MPLS_UC);
 
 	if (pkt_dev->vlan_id != 0xffff)
-		protocol = __constant_htons(ETH_P_8021Q);
+		protocol = htons(ETH_P_8021Q);
 
 	/* Update any of the values, used when we're incrementing various
 	 * fields.
@@ -2678,14 +2678,14 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 					       pkt_dev->svlan_cfi,
 					       pkt_dev->svlan_p);
 			svlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
-			*svlan_encapsulated_proto = __constant_htons(ETH_P_8021Q);
+			*svlan_encapsulated_proto = htons(ETH_P_8021Q);
 		}
 		vlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
 		*vlan_tci = build_tci(pkt_dev->vlan_id,
 				      pkt_dev->vlan_cfi,
 				      pkt_dev->vlan_p);
 		vlan_encapsulated_proto = (__be16 *)skb_put(skb, sizeof(__be16));
-		*vlan_encapsulated_proto = __constant_htons(ETH_P_IPV6);
+		*vlan_encapsulated_proto = htons(ETH_P_IPV6);
 	}
 
 	iph = (struct ipv6hdr *)skb_put(skb, sizeof(struct ipv6hdr));
@@ -2711,7 +2711,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	udph->len = htons(datalen + sizeof(struct udphdr));
 	udph->check = 0;	/* No checksum */
 
-	*(__be32 *) iph = __constant_htonl(0x60000000);	/* Version + flow */
+	*(__be32 *) iph = htonl(0x60000000);	/* Version + flow */
 
 	if (pkt_dev->traffic_class) {
 		/* Version + traffic class + flow (0) */
-- 
cgit v1.2.3


From 39df232f1a9ba48d41c68ee7d4046756e709cf91 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Sun, 4 Mar 2007 16:11:51 -0800
Subject: [PKTGEN]: fix device name handling

Since devices can change name and other wierdness, don't hold onto
a copy of device name, instead use pointer to output device.

Fix a couple of leaks in error handling path as well.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: Robert Olsson <robert.olsson@its.uu.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 137 ++++++++++++++++++++++++++++--------------------------
 1 file changed, 70 insertions(+), 67 deletions(-)

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index edf46fbb27d..895739fdfac 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -211,15 +211,11 @@ struct flow_state {
 };
 
 struct pktgen_dev {
-
 	/*
 	 * Try to keep frequent/infrequent used vars. separated.
 	 */
-
-	char ifname[IFNAMSIZ];
-	char result[512];
-
-	struct pktgen_thread *pg_thread;	/* the owner */
+	struct proc_dir_entry *entry;	/* proc file */
+	struct pktgen_thread *pg_thread;/* the owner */
 	struct list_head list;		/* Used for chaining in the thread's run-queue */
 
 	int running;		/* if this changes to false, the test will stop */
@@ -346,6 +342,8 @@ struct pktgen_dev {
 	unsigned cflows;	/* Concurrent flows (config) */
 	unsigned lflow;		/* Flow length  (config) */
 	unsigned nflows;	/* accumulated flows (stats) */
+
+	char result[512];
 };
 
 struct pktgen_hdr {
@@ -498,7 +496,7 @@ static void pktgen_stop_all_threads_ifs(void);
 static int pktgen_stop_device(struct pktgen_dev *pkt_dev);
 static void pktgen_stop(struct pktgen_thread *t);
 static void pktgen_clear_counters(struct pktgen_dev *pkt_dev);
-static int pktgen_mark_device(const char *ifname);
+
 static unsigned int scan_ip6(const char *s, char ip[16]);
 static unsigned int fmt_ip6(char *s, const char ip[16]);
 
@@ -592,7 +590,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
 		   "     frags: %d  delay: %u  clone_skb: %d  ifname: %s\n",
 		   pkt_dev->nfrags,
 		   1000 * pkt_dev->delay_us + pkt_dev->delay_ns,
-		   pkt_dev->clone_skb, pkt_dev->ifname);
+		   pkt_dev->clone_skb, pkt_dev->odev->name);
 
 	seq_printf(seq, "     flows: %u flowlen: %u\n", pkt_dev->cflows,
 		   pkt_dev->lflow);
@@ -1683,13 +1681,13 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
 	if_lock(t);
 	list_for_each_entry(pkt_dev, &t->if_list, list)
 		if (pkt_dev->running)
-			seq_printf(seq, "%s ", pkt_dev->ifname);
+			seq_printf(seq, "%s ", pkt_dev->odev->name);
 
 	seq_printf(seq, "\nStopped: ");
 
 	list_for_each_entry(pkt_dev, &t->if_list, list)
 		if (!pkt_dev->running)
-			seq_printf(seq, "%s ", pkt_dev->ifname);
+			seq_printf(seq, "%s ", pkt_dev->odev->name);
 
 	if (t->result[0])
 		seq_printf(seq, "\nResult: %s\n", t->result);
@@ -1835,12 +1833,11 @@ static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove)
 /*
  * mark a device for removal
  */
-static int pktgen_mark_device(const char *ifname)
+static void pktgen_mark_device(const char *ifname)
 {
 	struct pktgen_dev *pkt_dev = NULL;
 	const int max_tries = 10, msec_per_try = 125;
 	int i = 0;
-	int ret = 0;
 
 	mutex_lock(&pktgen_thread_lock);
 	pr_debug("pktgen: pktgen_mark_device marking %s for removal\n", ifname);
@@ -1861,32 +1858,49 @@ static int pktgen_mark_device(const char *ifname)
 			printk("pktgen_mark_device: timed out after waiting "
 			       "%d msec for device %s to be removed\n",
 			       msec_per_try * i, ifname);
-			ret = 1;
 			break;
 		}
 
 	}
 
 	mutex_unlock(&pktgen_thread_lock);
+}
 
-	return ret;
+static void pktgen_change_name(struct net_device *dev)
+{
+	struct pktgen_thread *t;
+
+	list_for_each_entry(t, &pktgen_threads, th_list) {
+		struct pktgen_dev *pkt_dev;
+
+		list_for_each_entry(pkt_dev, &t->if_list, list) {
+			if (pkt_dev->odev != dev)
+				continue;
+
+			remove_proc_entry(pkt_dev->entry->name, pg_proc_dir);
+
+			pkt_dev->entry = create_proc_entry(dev->name, 0600,
+							   pg_proc_dir);
+			if (!pkt_dev->entry)
+				printk(KERN_ERR "pktgen: can't move proc "
+				       " entry for '%s'\n", dev->name);
+			break;
+		}
+	}
 }
 
 static int pktgen_device_event(struct notifier_block *unused,
 			       unsigned long event, void *ptr)
 {
-	struct net_device *dev = (struct net_device *)(ptr);
+	struct net_device *dev = ptr;
 
 	/* It is OK that we do not hold the group lock right now,
 	 * as we run under the RTNL lock.
 	 */
 
 	switch (event) {
-	case NETDEV_CHANGEADDR:
-	case NETDEV_GOING_DOWN:
-	case NETDEV_DOWN:
-	case NETDEV_UP:
-		/* Ignore for now */
+	case NETDEV_CHANGENAME:
+		pktgen_change_name(dev);
 		break;
 
 	case NETDEV_UNREGISTER:
@@ -1899,41 +1913,36 @@ static int pktgen_device_event(struct notifier_block *unused,
 
 /* Associate pktgen_dev with a device. */
 
-static struct net_device *pktgen_setup_dev(struct pktgen_dev *pkt_dev)
+static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
 {
 	struct net_device *odev;
+	int err;
 
 	/* Clean old setups */
-
 	if (pkt_dev->odev) {
 		dev_put(pkt_dev->odev);
 		pkt_dev->odev = NULL;
 	}
 
-	odev = dev_get_by_name(pkt_dev->ifname);
-
+	odev = dev_get_by_name(ifname);
 	if (!odev) {
-		printk("pktgen: no such netdevice: \"%s\"\n", pkt_dev->ifname);
-		goto out;
+		printk("pktgen: no such netdevice: \"%s\"\n", ifname);
+		return -ENODEV;
 	}
+
 	if (odev->type != ARPHRD_ETHER) {
-		printk("pktgen: not an ethernet device: \"%s\"\n",
-		       pkt_dev->ifname);
-		goto out_put;
-	}
-	if (!netif_running(odev)) {
-		printk("pktgen: device is down: \"%s\"\n", pkt_dev->ifname);
-		goto out_put;
+		printk("pktgen: not an ethernet device: \"%s\"\n", ifname);
+		err = -EINVAL;
+	} else if (!netif_running(odev)) {
+		printk("pktgen: device is down: \"%s\"\n", ifname);
+		err = -ENETDOWN;
+	} else {
+		pkt_dev->odev = odev;
+		return 0;
 	}
-	pkt_dev->odev = odev;
 
-	return pkt_dev->odev;
-
-out_put:
 	dev_put(odev);
-out:
-	return NULL;
-
+	return err;
 }
 
 /* Read pkt_dev from the interface and set up internal pktgen_dev
@@ -1941,10 +1950,6 @@ out:
  */
 static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
 {
-	/* Try once more, just in case it works now. */
-	if (!pkt_dev->odev)
-		pktgen_setup_dev(pkt_dev);
-
 	if (!pkt_dev->odev) {
 		printk("pktgen: ERROR: pkt_dev->odev == NULL in setup_inject.\n");
 		sprintf(pkt_dev->result,
@@ -2988,7 +2993,7 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
 
 	if (!pkt_dev->running) {
 		printk("pktgen: interface: %s is already stopped\n",
-		       pkt_dev->ifname);
+		       pkt_dev->odev->name);
 		return -EINVAL;
 	}
 
@@ -3340,7 +3345,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
 	if_lock(t);
 
 	list_for_each_entry(p, &t->if_list, list)
-		if (strncmp(p->ifname, ifname, IFNAMSIZ) == 0) {
+		if (strncmp(p->odev->name, ifname, IFNAMSIZ) == 0) {
 			pkt_dev = p;
 			break;
 		}
@@ -3381,7 +3386,7 @@ out:
 static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
 {
 	struct pktgen_dev *pkt_dev;
-	struct proc_dir_entry *pe;
+	int err;
 
 	/* We don't allow a device to be on several threads */
 
@@ -3423,29 +3428,28 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
 	pkt_dev->svlan_cfi = 0;
 	pkt_dev->svlan_id = 0xffff;
 
-	strncpy(pkt_dev->ifname, ifname, IFNAMSIZ);
-
-	if (!pktgen_setup_dev(pkt_dev)) {
-		printk("pktgen: ERROR: pktgen_setup_dev failed.\n");
-		if (pkt_dev->flows)
-			vfree(pkt_dev->flows);
-		kfree(pkt_dev);
-		return -ENODEV;
-	}
+	err = pktgen_setup_dev(pkt_dev, ifname);
+	if (err)
+		goto out1;
 
-	pe = create_proc_entry(ifname, 0600, pg_proc_dir);
-	if (!pe) {
+	pkt_dev->entry = create_proc_entry(ifname, 0600, pg_proc_dir);
+	if (!pkt_dev->entry) {
 		printk("pktgen: cannot create %s/%s procfs entry.\n",
 		       PG_PROC_DIR, ifname);
-		if (pkt_dev->flows)
-			vfree(pkt_dev->flows);
-		kfree(pkt_dev);
-		return -EINVAL;
+		err = -EINVAL;
+		goto out2;
 	}
-	pe->proc_fops = &pktgen_if_fops;
-	pe->data = pkt_dev;
+	pkt_dev->entry->proc_fops = &pktgen_if_fops;
+	pkt_dev->entry->data = pkt_dev;
 
 	return add_dev_to_thread(t, pkt_dev);
+out2:
+	dev_put(pkt_dev->odev);
+out1:
+	if (pkt_dev->flows)
+		vfree(pkt_dev->flows);
+	kfree(pkt_dev);
+	return err;
 }
 
 static int __init pktgen_create_thread(int cpu)
@@ -3533,9 +3537,8 @@ static int pktgen_remove_device(struct pktgen_thread *t,
 
 	_rem_dev_from_if_list(t, pkt_dev);
 
-	/* Clean up proc file system */
-
-	remove_proc_entry(pkt_dev->ifname, pg_proc_dir);
+	if (pkt_dev->entry)
+		remove_proc_entry(pkt_dev->entry->name, pg_proc_dir);
 
 	if (pkt_dev->flows)
 		vfree(pkt_dev->flows);
-- 
cgit v1.2.3


From 9d729f72dca9406025bcfa9c1f660d71d9ef0ff5 Mon Sep 17 00:00:00 2001
From: James Morris <jmorris@namei.org>
Date: Sun, 4 Mar 2007 16:12:44 -0800
Subject: [NET]: Convert xtime.tv_sec to get_seconds()

Where appropriate, convert references to xtime.tv_sec to the
get_seconds() helper function.

Signed-off-by: James Morris <jmorris@namei.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h        |  4 ++--
 net/ipv4/route.c         |  2 +-
 net/ipv4/tcp_input.c     |  6 +++---
 net/ipv4/tcp_ipv4.c      | 10 +++++-----
 net/ipv4/tcp_minisocks.c |  8 ++++----
 net/ipv6/xfrm6_output.c  |  2 +-
 net/rxrpc/main.c         |  2 +-
 net/xfrm/xfrm_policy.c   | 12 ++++++------
 net/xfrm/xfrm_state.c    |  6 +++---
 9 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index f0c9e3400a0..181c0600af1 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1014,7 +1014,7 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int
 {
 	if ((s32)(rx_opt->rcv_tsval - rx_opt->ts_recent) >= 0)
 		return 0;
-	if (xtime.tv_sec >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)
+	if (get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)
 		return 0;
 
 	/* RST segments are not recommended to carry timestamp,
@@ -1029,7 +1029,7 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int
 
 	   However, we can relax time bounds for RST segments to MSL.
 	 */
-	if (rst && xtime.tv_sec >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
+	if (rst && get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_MSL)
 		return 0;
 	return 1;
 }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 37e0d4d5cf9..0b3d7bf40f4 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2683,7 +2683,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 		id = rt->peer->ip_id_count;
 		if (rt->peer->tcp_ts_stamp) {
 			ts = rt->peer->tcp_ts;
-			tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp;
+			tsage = get_seconds() - rt->peer->tcp_ts_stamp;
 		}
 	}
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d894bbcc1d2..d0a3630f41a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2933,7 +2933,7 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
 static inline void tcp_store_ts_recent(struct tcp_sock *tp)
 {
 	tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
-	tp->rx_opt.ts_recent_stamp = xtime.tv_sec;
+	tp->rx_opt.ts_recent_stamp = get_seconds();
 }
 
 static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
@@ -2947,7 +2947,7 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
 		 */
 
 		if((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 ||
-		   xtime.tv_sec >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS)
+		   get_seconds() >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS)
 			tcp_store_ts_recent(tp);
 	}
 }
@@ -2999,7 +2999,7 @@ static inline int tcp_paws_discard(const struct sock *sk, const struct sk_buff *
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW &&
-		xtime.tv_sec < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS &&
+		get_seconds() < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS &&
 		!tcp_disordered_ack(sk, skb));
 }
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f6793b4cc66..addac1110f9 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -149,7 +149,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 	 */
 	if (tcptw->tw_ts_recent_stamp &&
 	    (twp == NULL || (sysctl_tcp_tw_reuse &&
-			     xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
+			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
 		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
 		if (tp->write_seq == 0)
 			tp->write_seq = 1;
@@ -224,7 +224,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		 * when trying new connection.
 		 */
 		if (peer != NULL &&
-		    peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
+		    peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) {
 			tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
 			tp->rx_opt.ts_recent = peer->tcp_ts;
 		}
@@ -1351,7 +1351,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		    (dst = inet_csk_route_req(sk, req)) != NULL &&
 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
 		    peer->v4daddr == saddr) {
-			if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
+			if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
 			    (s32)(peer->tcp_ts - req->ts_recent) >
 							TCP_PAWS_WINDOW) {
 				NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
@@ -1770,7 +1770,7 @@ int tcp_v4_remember_stamp(struct sock *sk)
 
 	if (peer) {
 		if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
-		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
+		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
 		     peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
 			peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
 			peer->tcp_ts = tp->rx_opt.ts_recent;
@@ -1791,7 +1791,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
 		const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
 
 		if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
-		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
+		    (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() &&
 		     peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
 			peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
 			peer->tcp_ts	   = tcptw->tw_ts_recent;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 706932726a1..ac4ce48a659 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -149,7 +149,7 @@ kill_with_rst:
 		tw->tw_substate	  = TCP_TIME_WAIT;
 		tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
 		if (tmp_opt.saw_tstamp) {
-			tcptw->tw_ts_recent_stamp = xtime.tv_sec;
+			tcptw->tw_ts_recent_stamp = get_seconds();
 			tcptw->tw_ts_recent	  = tmp_opt.rcv_tsval;
 		}
 
@@ -208,7 +208,7 @@ kill:
 
 		if (tmp_opt.saw_tstamp) {
 			tcptw->tw_ts_recent	  = tmp_opt.rcv_tsval;
-			tcptw->tw_ts_recent_stamp = xtime.tv_sec;
+			tcptw->tw_ts_recent_stamp = get_seconds();
 		}
 
 		inet_twsk_put(tw);
@@ -458,7 +458,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 
 		if (newtp->rx_opt.tstamp_ok) {
 			newtp->rx_opt.ts_recent = req->ts_recent;
-			newtp->rx_opt.ts_recent_stamp = xtime.tv_sec;
+			newtp->rx_opt.ts_recent_stamp = get_seconds();
 			newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
 		} else {
 			newtp->rx_opt.ts_recent_stamp = 0;
@@ -504,7 +504,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 			 * it can be estimated (approximately)
 			 * from another data.
 			 */
-			tmp_opt.ts_recent_stamp = xtime.tv_sec - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
+			tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
 			paws_reject = tcp_paws_check(&tmp_opt, th->rst);
 		}
 	}
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index d6d786b89d2..8e4170f9a0d 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -76,7 +76,7 @@ static int xfrm6_output_one(struct sk_buff *skb)
 		x->curlft.bytes += skb->len;
 		x->curlft.packets++;
 		if (x->props.mode == XFRM_MODE_ROUTEOPTIMIZATION)
-			x->lastused = (u64)xtime.tv_sec;
+			x->lastused = get_seconds();
 
 		spin_unlock_bh(&x->lock);
 
diff --git a/net/rxrpc/main.c b/net/rxrpc/main.c
index baec1f7fd8b..cead31b5bdf 100644
--- a/net/rxrpc/main.c
+++ b/net/rxrpc/main.c
@@ -37,7 +37,7 @@ static int __init rxrpc_initialise(void)
 	int ret;
 
 	/* my epoch value */
-	rxrpc_epoch = htonl(xtime.tv_sec);
+	rxrpc_epoch = htonl(get_seconds());
 
 	/* register the /proc interface */
 #ifdef CONFIG_PROC_FS
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 785c3e39f06..19425755455 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -268,7 +268,7 @@ static inline unsigned long make_jiffies(long secs)
 static void xfrm_policy_timer(unsigned long data)
 {
 	struct xfrm_policy *xp = (struct xfrm_policy*)data;
-	unsigned long now = (unsigned long)xtime.tv_sec;
+	unsigned long now = get_seconds();
 	long next = LONG_MAX;
 	int warn = 0;
 	int dir;
@@ -690,7 +690,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	}
 	policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
 	hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));
-	policy->curlft.add_time = (unsigned long)xtime.tv_sec;
+	policy->curlft.add_time = get_seconds();
 	policy->curlft.use_time = 0;
 	if (!mod_timer(&policy->timer, jiffies + HZ))
 		xfrm_pol_hold(policy);
@@ -1133,7 +1133,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
 	old_pol = sk->sk_policy[dir];
 	sk->sk_policy[dir] = pol;
 	if (pol) {
-		pol->curlft.add_time = (unsigned long)xtime.tv_sec;
+		pol->curlft.add_time = get_seconds();
 		pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir);
 		__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
 	}
@@ -1386,7 +1386,7 @@ restart:
 		return 0;
 
 	family = dst_orig->ops->family;
-	policy->curlft.use_time = (unsigned long)xtime.tv_sec;
+	policy->curlft.use_time = get_seconds();
 	pols[0] = policy;
 	npols ++;
 	xfrm_nr += pols[0]->xfrm_nr;
@@ -1682,7 +1682,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		return 1;
 	}
 
-	pol->curlft.use_time = (unsigned long)xtime.tv_sec;
+	pol->curlft.use_time = get_seconds();
 
 	pols[0] = pol;
 	npols ++;
@@ -1694,7 +1694,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		if (pols[1]) {
 			if (IS_ERR(pols[1]))
 				return 0;
-			pols[1]->curlft.use_time = (unsigned long)xtime.tv_sec;
+			pols[1]->curlft.use_time = get_seconds();
 			npols ++;
 		}
 	}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index e3a0bcfa5df..63a20e81816 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -233,7 +233,7 @@ static inline unsigned long make_jiffies(long secs)
 static void xfrm_timer_handler(unsigned long data)
 {
 	struct xfrm_state *x = (struct xfrm_state*)data;
-	unsigned long now = (unsigned long)xtime.tv_sec;
+	unsigned long now = get_seconds();
 	long next = LONG_MAX;
 	int warn = 0;
 	int err = 0;
@@ -326,7 +326,7 @@ struct xfrm_state *xfrm_state_alloc(void)
 		init_timer(&x->rtimer);
 		x->rtimer.function = xfrm_replay_timer_handler;
 		x->rtimer.data     = (unsigned long)x;
-		x->curlft.add_time = (unsigned long)xtime.tv_sec;
+		x->curlft.add_time = get_seconds();
 		x->lft.soft_byte_limit = XFRM_INF;
 		x->lft.soft_packet_limit = XFRM_INF;
 		x->lft.hard_byte_limit = XFRM_INF;
@@ -1051,7 +1051,7 @@ EXPORT_SYMBOL(xfrm_state_update);
 int xfrm_state_check_expire(struct xfrm_state *x)
 {
 	if (!x->curlft.use_time)
-		x->curlft.use_time = (unsigned long)xtime.tv_sec;
+		x->curlft.use_time = get_seconds();
 
 	if (x->km.state != XFRM_STATE_VALID)
 		return -EINVAL;
-- 
cgit v1.2.3


From 3927f2e8f9afa3424bb51ca81f7abac01ffd0005 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Sun, 25 Mar 2007 19:54:23 -0700
Subject: [NET]: div64_64 consolidate (rev3)

Here is the current version of the 64 bit divide common code.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/asm-arm/div64.h      |  3 +++
 include/asm-generic/div64.h  |  7 +++++++
 include/asm-i386/div64.h     |  4 ++++
 include/asm-m68k/div64.h     |  3 +++
 include/asm-mips/div64.h     | 11 ++++++++++-
 include/asm-um/div64.h       |  1 +
 include/asm-xtensa/div64.h   |  6 ++++++
 lib/Makefile                 |  5 +++--
 lib/div64.c                  | 22 ++++++++++++++++++++++
 net/ipv4/tcp_cubic.c         | 23 -----------------------
 net/ipv4/tcp_yeah.c          | 21 ---------------------
 net/ipv4/tcp_yeah.h          |  1 +
 net/netfilter/xt_connbytes.c | 16 ----------------
 13 files changed, 60 insertions(+), 63 deletions(-)

diff --git a/include/asm-arm/div64.h b/include/asm-arm/div64.h
index 37e0a96e878..0b5f881c3d8 100644
--- a/include/asm-arm/div64.h
+++ b/include/asm-arm/div64.h
@@ -2,6 +2,7 @@
 #define __ASM_ARM_DIV64
 
 #include <asm/system.h>
+#include <linux/types.h>
 
 /*
  * The semantics of do_div() are:
@@ -223,4 +224,6 @@
 
 #endif
 
+extern uint64_t div64_64(uint64_t dividend, uint64_t divisor);
+
 #endif
diff --git a/include/asm-generic/div64.h b/include/asm-generic/div64.h
index 8f4e3193342..a4a49370793 100644
--- a/include/asm-generic/div64.h
+++ b/include/asm-generic/div64.h
@@ -30,6 +30,11 @@
 	__rem;							\
  })
 
+static inline uint64_t div64_64(uint64_t dividend, uint64_t divisor)
+{
+	return dividend / divisor;
+}
+
 #elif BITS_PER_LONG == 32
 
 extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor);
@@ -49,6 +54,8 @@ extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor);
 	__rem;						\
  })
 
+extern uint64_t div64_64(uint64_t dividend, uint64_t divisor);
+
 #else /* BITS_PER_LONG == ?? */
 
 # error do_div() does not yet support the C64
diff --git a/include/asm-i386/div64.h b/include/asm-i386/div64.h
index 75c67c785bb..438e980068b 100644
--- a/include/asm-i386/div64.h
+++ b/include/asm-i386/div64.h
@@ -1,6 +1,8 @@
 #ifndef __I386_DIV64
 #define __I386_DIV64
 
+#include <linux/types.h>
+
 /*
  * do_div() is NOT a C function. It wants to return
  * two values (the quotient and the remainder), but
@@ -45,4 +47,6 @@ div_ll_X_l_rem(long long divs, long div, long *rem)
 	return dum2;
 
 }
+
+extern uint64_t div64_64(uint64_t dividend, uint64_t divisor);
 #endif
diff --git a/include/asm-m68k/div64.h b/include/asm-m68k/div64.h
index 9f65de1a248..33caad1628d 100644
--- a/include/asm-m68k/div64.h
+++ b/include/asm-m68k/div64.h
@@ -1,6 +1,8 @@
 #ifndef _M68K_DIV64_H
 #define _M68K_DIV64_H
 
+#include <linux/types.h>
+
 /* n = n / base; return rem; */
 
 #define do_div(n, base) ({					\
@@ -23,4 +25,5 @@
 	__rem;							\
 })
 
+extern uint64_t div64_64(uint64_t dividend, uint64_t divisor);
 #endif /* _M68K_DIV64_H */
diff --git a/include/asm-mips/div64.h b/include/asm-mips/div64.h
index d107832de1b..66189f5f639 100644
--- a/include/asm-mips/div64.h
+++ b/include/asm-mips/div64.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2000, 2004  Maciej W. Rozycki
- * Copyright (C) 2003 Ralf Baechle
+ * Copyright (C) 2003, 07 Ralf Baechle (ralf@linux-mips.org)
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -9,6 +9,8 @@
 #ifndef _ASM_DIV64_H
 #define _ASM_DIV64_H
 
+#include <linux/types.h>
+
 #if (_MIPS_SZLONG == 32)
 
 #include <asm/compiler.h>
@@ -78,6 +80,8 @@
 	__quot = __quot << 32 | __low; \
 	(n) = __quot; \
 	__mod; })
+
+extern uint64_t div64_64(uint64_t dividend, uint64_t divisor);
 #endif /* (_MIPS_SZLONG == 32) */
 
 #if (_MIPS_SZLONG == 64)
@@ -101,6 +105,11 @@
 	(n) = __quot; \
 	__mod; })
 
+static inline uint64_t div64_64(uint64_t dividend, uint64_t divisor)
+{
+	return dividend / divisor;
+}
+
 #endif /* (_MIPS_SZLONG == 64) */
 
 #endif /* _ASM_DIV64_H */
diff --git a/include/asm-um/div64.h b/include/asm-um/div64.h
index 1e17f7409ca..7b73b2cd5b3 100644
--- a/include/asm-um/div64.h
+++ b/include/asm-um/div64.h
@@ -3,4 +3,5 @@
 
 #include "asm/arch/div64.h"
 
+extern uint64_t div64_64(uint64_t dividend, uint64_t divisor);
 #endif
diff --git a/include/asm-xtensa/div64.h b/include/asm-xtensa/div64.h
index c4a10577638..20965e3af1d 100644
--- a/include/asm-xtensa/div64.h
+++ b/include/asm-xtensa/div64.h
@@ -11,9 +11,15 @@
 #ifndef _XTENSA_DIV64_H
 #define _XTENSA_DIV64_H
 
+#include <linux/types.h>
+
 #define do_div(n,base) ({ \
 	int __res = n % ((unsigned int) base); \
 	n /= (unsigned int) base; \
 	__res; })
 
+static inline uint64_t div64_64(uint64_t dividend, uint64_t divisor)
+{
+	return dividend / divisor;
+}
 #endif
diff --git a/lib/Makefile b/lib/Makefile
index 992a39ef9ff..ae57f357fec 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -4,7 +4,7 @@
 
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o dump_stack.o \
-	 idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \
+	 idr.o int_sqrt.o bitmap.o extable.o prio_tree.o \
 	 sha1.o irq_regs.o reciprocal_div.o
 
 lib-$(CONFIG_MMU) += ioremap.o
@@ -12,7 +12,8 @@ lib-$(CONFIG_SMP) += cpumask.o
 
 lib-y	+= kobject.o kref.o kobject_uevent.o klist.o
 
-obj-y += sort.o parser.o halfmd4.o debug_locks.o random32.o bust_spinlocks.o
+obj-y += div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
+	 bust_spinlocks.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/div64.c b/lib/div64.c
index 365719f8483..c3d7655cdfb 100644
--- a/lib/div64.c
+++ b/lib/div64.c
@@ -58,4 +58,26 @@ uint32_t __div64_32(uint64_t *n, uint32_t base)
 
 EXPORT_SYMBOL(__div64_32);
 
+/* 64bit divisor, dividend and result. dynamic precision */
+uint64_t div64_64(uint64_t dividend, uint64_t divisor)
+{
+	uint32_t d = divisor;
+
+	if (divisor > 0xffffffffULL) {
+		unsigned int shift = fls(divisor >> 32);
+
+		d = divisor >> shift;
+		dividend >>= shift;
+	}
+
+	/* avoid 64 bit division if possible */
+	if (dividend >> 32)
+		do_div(dividend, d);
+	else
+		dividend = (uint32_t) dividend / d;
+
+	return dividend;
+}
+EXPORT_SYMBOL(div64_64);
+
 #endif /* BITS_PER_LONG == 32 */
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 9a582fb4ef9..6f08adbda54 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -51,8 +51,6 @@ MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_
 module_param(tcp_friendliness, int, 0644);
 MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
 
-#include <asm/div64.h>
-
 /* BIC TCP Parameters */
 struct bictcp {
 	u32	cnt;		/* increase cwnd by 1 after ACKs */
@@ -93,27 +91,6 @@ static void bictcp_init(struct sock *sk)
 		tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
 }
 
-/* 64bit divisor, dividend and result. dynamic precision */
-static inline u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
-{
-	u_int32_t d = divisor;
-
-	if (divisor > 0xffffffffULL) {
-		unsigned int shift = fls(divisor >> 32);
-
-		d = divisor >> shift;
-		dividend >>= shift;
-	}
-
-	/* avoid 64 bit division if possible */
-	if (dividend >> 32)
-		do_div(dividend, d);
-	else
-		dividend = (uint32_t) dividend / d;
-
-	return dividend;
-}
-
 /*
  * calculate the cubic root of x using Newton-Raphson
  */
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 815e020e98f..18355a2608e 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -73,27 +73,6 @@ static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked)
 		yeah->pkts_acked = pkts_acked;
 }
 
-/* 64bit divisor, dividend and result. dynamic precision */
-static inline u64 div64_64(u64 dividend, u64 divisor)
-{
-	u32 d = divisor;
-
-	if (divisor > 0xffffffffULL) {
-		unsigned int shift = fls(divisor >> 32);
-
-		d = divisor >> shift;
-		dividend >>= shift;
-	}
-
-	/* avoid 64 bit division if possible */
-	if (dividend >> 32)
-		do_div(dividend, d);
-	else
-		dividend = (u32) dividend / d;
-
-	return dividend;
-}
-
 static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
 				 u32 seq_rtt, u32 in_flight, int flag)
 {
diff --git a/net/ipv4/tcp_yeah.h b/net/ipv4/tcp_yeah.h
index b3255dba4e2..a62d82038fd 100644
--- a/net/ipv4/tcp_yeah.h
+++ b/net/ipv4/tcp_yeah.h
@@ -2,6 +2,7 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/inet_diag.h>
+#include <asm/div64.h>
 
 #include <net/tcp.h>
 
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 5e32dfa2668..302043bc41b 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -24,22 +24,6 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("iptables match for matching number of pkts/bytes per connection");
 MODULE_ALIAS("ipt_connbytes");
 
-/* 64bit divisor, dividend and result. dynamic precision */
-static u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
-{
-	u_int32_t d = divisor;
-
-	if (divisor > 0xffffffffULL) {
-		unsigned int shift = fls(divisor >> 32);
-
-		d = divisor >> shift;
-		dividend >>= shift;
-	}
-
-	do_div(dividend, d);
-	return dividend;
-}
-
 static int
 match(const struct sk_buff *skb,
       const struct net_device *in,
-- 
cgit v1.2.3


From b7aa0bf70c4afb9e38be25f5c0922498d0f8684c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Thu, 19 Apr 2007 16:16:32 -0700
Subject: [NET]: convert network timestamps to ktime_t

We currently use a special structure (struct skb_timeval) and plain
'struct timeval' to store packet timestamps in sk_buffs and struct
sock.

This has some drawbacks :
- Fixed resolution of micro second.
- Waste of space on 64bit platforms where sizeof(struct timeval)=16

I suggest using ktime_t that is a nice abstraction of high resolution
time services, currently capable of nanosecond resolution.

As sizeof(ktime_t) is 8 bytes, using ktime_t in 'struct sock' permits
a 8 byte shrink of this structure on 64bit architectures. Some other
structures also benefit from this size reduction (struct ipq in
ipv4/ip_fragment.c, struct frag_queue in ipv6/reassembly.c, ...)

Once this ktime infrastructure adopted, we can more easily provide
nanosecond resolution on top of it. (ioctl SIOCGSTAMPNS and/or
SO_TIMESTAMPNS/SCM_TIMESTAMPNS)

Note : this patch includes a bug correction in
compat_sock_get_timestamp() where a "err = 0;" was missing (so this
syscall returned -ENOENT instead of 0)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
CC: Stephen Hemminger <shemminger@linux-foundation.org>
CC: John find <linux.kernel@free.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h                  | 26 +++++---------------------
 include/net/sock.h                      | 18 +++++++++---------
 kernel/time.c                           |  1 +
 net/bridge/netfilter/ebt_ulog.c         |  6 ++++--
 net/compat.c                            | 15 ++++++++++-----
 net/core/dev.c                          | 19 ++++---------------
 net/core/sock.c                         | 16 +++++++++-------
 net/econet/af_econet.c                  |  2 +-
 net/ipv4/ip_fragment.c                  |  8 ++++----
 net/ipv4/netfilter/ip_queue.c           |  6 ++++--
 net/ipv4/netfilter/ipt_ULOG.c           |  8 +++++---
 net/ipv6/exthdrs.c                      |  2 +-
 net/ipv6/netfilter/ip6_queue.c          |  6 ++++--
 net/ipv6/netfilter/nf_conntrack_reasm.c |  6 +++---
 net/ipv6/reassembly.c                   |  6 +++---
 net/ipx/af_ipx.c                        |  4 ++--
 net/netfilter/nfnetlink_log.c           |  8 ++++----
 net/netfilter/nfnetlink_queue.c         |  8 ++++----
 net/packet/af_packet.c                  |  8 +++++---
 net/sunrpc/svcsock.c                    | 10 +++-------
 20 files changed, 85 insertions(+), 98 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5992f65b418..f9441b5f8d1 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -27,6 +27,7 @@
 #include <net/checksum.h>
 #include <linux/rcupdate.h>
 #include <linux/dmaengine.h>
+#include <linux/hrtimer.h>
 
 #define HAVE_ALLOC_SKB		/* For the drivers to know */
 #define HAVE_ALIGNABLE_SKB	/* Ditto 8)		   */
@@ -156,11 +157,6 @@ struct skb_shared_info {
 #define SKB_DATAREF_SHIFT 16
 #define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1)
 
-struct skb_timeval {
-	u32	off_sec;
-	u32	off_usec;
-};
-
 
 enum {
 	SKB_FCLONE_UNAVAILABLE,
@@ -233,7 +229,7 @@ struct sk_buff {
 	struct sk_buff		*prev;
 
 	struct sock		*sk;
-	struct skb_timeval	tstamp;
+	ktime_t			tstamp;
 	struct net_device	*dev;
 	int			iif;
 	/* 4 byte hole on 64 bit*/
@@ -1365,26 +1361,14 @@ extern void skb_add_mtu(int mtu);
  */
 static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval *stamp)
 {
-	stamp->tv_sec  = skb->tstamp.off_sec;
-	stamp->tv_usec = skb->tstamp.off_usec;
+	*stamp = ktime_to_timeval(skb->tstamp);
 }
 
-/**
- * 	skb_set_timestamp - set timestamp of a skb
- *	@skb: skb to set stamp of
- *	@stamp: pointer to struct timeval to get stamp from
- *
- *	Timestamps are stored in the skb as offsets to a base timestamp.
- *	This function converts a struct timeval to an offset and stores
- *	it in the skb.
- */
-static inline void skb_set_timestamp(struct sk_buff *skb, const struct timeval *stamp)
+static inline void __net_timestamp(struct sk_buff *skb)
 {
-	skb->tstamp.off_sec  = stamp->tv_sec;
-	skb->tstamp.off_usec = stamp->tv_usec;
+	skb->tstamp = ktime_get_real();
 }
 
-extern void __net_timestamp(struct sk_buff *skb);
 
 extern __sum16 __skb_checksum_complete(struct sk_buff *skb);
 
diff --git a/include/net/sock.h b/include/net/sock.h
index a3366c3c837..9583639090d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -244,7 +244,7 @@ struct sock {
 	struct sk_filter      	*sk_filter;
 	void			*sk_protinfo;
 	struct timer_list	sk_timer;
-	struct timeval		sk_stamp;
+	ktime_t			sk_stamp;
 	struct socket		*sk_socket;
 	void			*sk_user_data;
 	struct page		*sk_sndmsg_page;
@@ -1307,19 +1307,19 @@ static inline int sock_intr_errno(long timeo)
 static __inline__ void
 sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
 {
-	struct timeval stamp;
+	ktime_t kt = skb->tstamp;
 
-	skb_get_timestamp(skb, &stamp);
 	if (sock_flag(sk, SOCK_RCVTSTAMP)) {
+		struct timeval tv;
 		/* Race occurred between timestamp enabling and packet
 		   receiving.  Fill in the current time for now. */
-		if (stamp.tv_sec == 0)
-			do_gettimeofday(&stamp);
-		skb_set_timestamp(skb, &stamp);
-		put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(struct timeval),
-			 &stamp);
+		if (kt.tv64 == 0)
+			kt = ktime_get_real();
+		skb->tstamp = kt;
+		tv = ktime_to_timeval(kt);
+		put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(tv), &tv);
 	} else
-		sk->sk_stamp = stamp;
+		sk->sk_stamp = kt;
 }
 
 /**
diff --git a/kernel/time.c b/kernel/time.c
index 2f47888e46c..a1439f421d0 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -469,6 +469,7 @@ struct timeval ns_to_timeval(const s64 nsec)
 
 	return tv;
 }
+EXPORT_SYMBOL(ns_to_timeval);
 
 /*
  * Convert jiffies to milliseconds and back.
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 8e15cc47f6c..259f5c370f3 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -130,6 +130,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
 	unsigned int group = uloginfo->nlgroup;
 	ebt_ulog_buff_t *ub = &ulog_buffers[group];
 	spinlock_t *lock = &ub->lock;
+	ktime_t kt;
 
 	if ((uloginfo->cprange == 0) ||
 	    (uloginfo->cprange > skb->len + ETH_HLEN))
@@ -164,9 +165,10 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
 
 	/* Fill in the ulog data */
 	pm->version = EBT_ULOG_VERSION;
-	do_gettimeofday(&pm->stamp);
+	kt = ktime_get_real();
+	pm->stamp = ktime_to_timeval(kt);
 	if (ub->qlen == 1)
-		skb_set_timestamp(ub->skb, &pm->stamp);
+		ub->skb->tstamp = kt;
 	pm->data_len = copy_len;
 	pm->mark = skb->mark;
 	pm->hook = hooknr;
diff --git a/net/compat.c b/net/compat.c
index 1f32866d09b..17c2710b2b9 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -545,15 +545,20 @@ int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
 	struct compat_timeval __user *ctv =
 			(struct compat_timeval __user*) userstamp;
 	int err = -ENOENT;
+	struct timeval tv;
 
 	if (!sock_flag(sk, SOCK_TIMESTAMP))
 		sock_enable_timestamp(sk);
-	if (sk->sk_stamp.tv_sec == -1)
+	tv = ktime_to_timeval(sk->sk_stamp);
+	if (tv.tv_sec == -1)
 		return err;
-	if (sk->sk_stamp.tv_sec == 0)
-		do_gettimeofday(&sk->sk_stamp);
-	if (put_user(sk->sk_stamp.tv_sec, &ctv->tv_sec) ||
-			put_user(sk->sk_stamp.tv_usec, &ctv->tv_usec))
+	if (tv.tv_sec == 0) {
+		sk->sk_stamp = ktime_get_real();
+		tv = ktime_to_timeval(sk->sk_stamp);
+	}
+	err = 0;
+	if (put_user(tv.tv_sec, &ctv->tv_sec) ||
+			put_user(tv.tv_usec, &ctv->tv_usec))
 		err = -EFAULT;
 	return err;
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index 4dc93cc4d5b..582db646cc5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1031,23 +1031,12 @@ void net_disable_timestamp(void)
 	atomic_dec(&netstamp_needed);
 }
 
-void __net_timestamp(struct sk_buff *skb)
-{
-	struct timeval tv;
-
-	do_gettimeofday(&tv);
-	skb_set_timestamp(skb, &tv);
-}
-EXPORT_SYMBOL(__net_timestamp);
-
 static inline void net_timestamp(struct sk_buff *skb)
 {
 	if (atomic_read(&netstamp_needed))
 		__net_timestamp(skb);
-	else {
-		skb->tstamp.off_sec = 0;
-		skb->tstamp.off_usec = 0;
-	}
+	else
+		skb->tstamp.tv64 = 0;
 }
 
 /*
@@ -1577,7 +1566,7 @@ int netif_rx(struct sk_buff *skb)
 	if (netpoll_rx(skb))
 		return NET_RX_DROP;
 
-	if (!skb->tstamp.off_sec)
+	if (!skb->tstamp.tv64)
 		net_timestamp(skb);
 
 	/*
@@ -1769,7 +1758,7 @@ int netif_receive_skb(struct sk_buff *skb)
 	if (skb->dev->poll && netpoll_rx(skb))
 		return NET_RX_DROP;
 
-	if (!skb->tstamp.off_sec)
+	if (!skb->tstamp.tv64)
 		net_timestamp(skb);
 
 	if (!skb->iif)
diff --git a/net/core/sock.c b/net/core/sock.c
index 6d35d5775ba..6ddb3664b99 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1512,8 +1512,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
 	sk->sk_sndtimeo		=	MAX_SCHEDULE_TIMEOUT;
 
-	sk->sk_stamp.tv_sec     = -1L;
-	sk->sk_stamp.tv_usec    = -1L;
+	sk->sk_stamp = ktime_set(-1L, -1L);
 
 	atomic_set(&sk->sk_refcnt, 1);
 }
@@ -1554,14 +1553,17 @@ EXPORT_SYMBOL(release_sock);
 
 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
 {
+	struct timeval tv;
 	if (!sock_flag(sk, SOCK_TIMESTAMP))
 		sock_enable_timestamp(sk);
-	if (sk->sk_stamp.tv_sec == -1)
+	tv = ktime_to_timeval(sk->sk_stamp);
+	if (tv.tv_sec == -1)
 		return -ENOENT;
-	if (sk->sk_stamp.tv_sec == 0)
-		do_gettimeofday(&sk->sk_stamp);
-	return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ?
-		-EFAULT : 0;
+	if (tv.tv_sec == 0) {
+		sk->sk_stamp = ktime_get_real();
+		tv = ktime_to_timeval(sk->sk_stamp);
+	}
+	return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
 }
 EXPORT_SYMBOL(sock_get_timestamp);
 
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index bc12e36263f..f573eddc603 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -162,7 +162,7 @@ static int econet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	err = memcpy_toiovec(msg->msg_iov, skb->data, copied);
 	if (err)
 		goto out_free;
-	skb_get_timestamp(skb, &sk->sk_stamp);
+	sk->sk_stamp = skb->tstamp;
 
 	if (msg->msg_name)
 		memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b6f05538037..e10be7d7752 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -92,7 +92,7 @@ struct ipq {
 	spinlock_t	lock;
 	atomic_t	refcnt;
 	struct timer_list timer;	/* when will this queue expire?		*/
-	struct timeval	stamp;
+	ktime_t		stamp;
 	int             iif;
 	unsigned int    rid;
 	struct inet_peer *peer;
@@ -592,7 +592,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	if (skb->dev)
 		qp->iif = skb->dev->ifindex;
 	skb->dev = NULL;
-	skb_get_timestamp(skb, &qp->stamp);
+	qp->stamp = skb->tstamp;
 	qp->meat += skb->len;
 	atomic_add(skb->truesize, &ip_frag_mem);
 	if (offset == 0)
@@ -674,7 +674,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
 
 	head->next = NULL;
 	head->dev = dev;
-	skb_set_timestamp(head, &qp->stamp);
+	head->tstamp = qp->stamp;
 
 	iph = head->nh.iph;
 	iph->frag_off = 0;
@@ -734,7 +734,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
 	return NULL;
 }
 
-void ipfrag_init(void)
+void __init ipfrag_init(void)
 {
 	ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
 				 (jiffies ^ (jiffies >> 6)));
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index a14798a850d..5842f1aa973 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -197,6 +197,7 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
 	struct sk_buff *skb;
 	struct ipq_packet_msg *pmsg;
 	struct nlmsghdr *nlh;
+	struct timeval tv;
 
 	read_lock_bh(&queue_lock);
 
@@ -241,8 +242,9 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
 
 	pmsg->packet_id       = (unsigned long )entry;
 	pmsg->data_len        = data_len;
-	pmsg->timestamp_sec   = entry->skb->tstamp.off_sec;
-	pmsg->timestamp_usec  = entry->skb->tstamp.off_usec;
+	tv = ktime_to_timeval(entry->skb->tstamp);
+	pmsg->timestamp_sec   = tv.tv_sec;
+	pmsg->timestamp_usec  = tv.tv_usec;
 	pmsg->mark            = entry->skb->mark;
 	pmsg->hook            = entry->info->hook;
 	pmsg->hw_protocol     = entry->skb->protocol;
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 9acc018766f..9718b666a38 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -187,6 +187,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
 	ulog_packet_msg_t *pm;
 	size_t size, copy_len;
 	struct nlmsghdr *nlh;
+	struct timeval tv;
 
 	/* ffs == find first bit set, necessary because userspace
 	 * is already shifting groupnumber, but we need unshifted.
@@ -232,13 +233,14 @@ static void ipt_ulog_packet(unsigned int hooknum,
 	pm = NLMSG_DATA(nlh);
 
 	/* We might not have a timestamp, get one */
-	if (skb->tstamp.off_sec == 0)
+	if (skb->tstamp.tv64 == 0)
 		__net_timestamp((struct sk_buff *)skb);
 
 	/* copy hook, prefix, timestamp, payload, etc. */
 	pm->data_len = copy_len;
-	put_unaligned(skb->tstamp.off_sec, &pm->timestamp_sec);
-	put_unaligned(skb->tstamp.off_usec, &pm->timestamp_usec);
+	tv = ktime_to_timeval(skb->tstamp);
+	put_unaligned(tv.tv_sec, &pm->timestamp_sec);
+	put_unaligned(tv.tv_usec, &pm->timestamp_usec);
 	put_unaligned(skb->mark, &pm->mark);
 	pm->hook = hooknum;
 	if (prefix != NULL)
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index fb39604c3d0..a963a31e5fb 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -255,7 +255,7 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
 	ipv6_addr_copy(&ipv6h->saddr, &hao->addr);
 	ipv6_addr_copy(&hao->addr, &tmp_addr);
 
-	if (skb->tstamp.off_sec == 0)
+	if (skb->tstamp.tv64 == 0)
 		__net_timestamp(skb);
 
 	return 1;
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index fdb30a5916e..66a2c413525 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -195,6 +195,7 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
 	struct sk_buff *skb;
 	struct ipq_packet_msg *pmsg;
 	struct nlmsghdr *nlh;
+	struct timeval tv;
 
 	read_lock_bh(&queue_lock);
 
@@ -239,8 +240,9 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
 
 	pmsg->packet_id       = (unsigned long )entry;
 	pmsg->data_len        = data_len;
-	pmsg->timestamp_sec   = entry->skb->tstamp.off_sec;
-	pmsg->timestamp_usec  = entry->skb->tstamp.off_usec;
+	tv = ktime_to_timeval(entry->skb->tstamp);
+	pmsg->timestamp_sec   = tv.tv_sec;
+	pmsg->timestamp_usec  = tv.tv_usec;
 	pmsg->mark            = entry->skb->mark;
 	pmsg->hook            = entry->info->hook;
 	pmsg->hw_protocol     = entry->skb->protocol;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 15ab1e3e8b5..c311b9a12ca 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -82,7 +82,7 @@ struct nf_ct_frag6_queue
 	struct sk_buff		*fragments;
 	int			len;
 	int			meat;
-	struct timeval		stamp;
+	ktime_t			stamp;
 	unsigned int		csum;
 	__u8			last_in;	/* has first/last segment arrived? */
 #define COMPLETE		4
@@ -542,7 +542,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 		fq->fragments = skb;
 
 	skb->dev = NULL;
-	skb_get_timestamp(skb, &fq->stamp);
+	fq->stamp = skb->tstamp;
 	fq->meat += skb->len;
 	atomic_add(skb->truesize, &nf_ct_frag6_mem);
 
@@ -648,7 +648,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 
 	head->next = NULL;
 	head->dev = dev;
-	skb_set_timestamp(head, &fq->stamp);
+	head->tstamp = fq->stamp;
 	head->nh.ipv6h->payload_len = htons(payload_len);
 
 	/* Yes, and fold redundant checksum back. 8) */
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 7034c54e501..1dde449379f 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -88,7 +88,7 @@ struct frag_queue
 	int			len;
 	int			meat;
 	int			iif;
-	struct timeval		stamp;
+	ktime_t			stamp;
 	unsigned int		csum;
 	__u8			last_in;	/* has first/last segment arrived? */
 #define COMPLETE		4
@@ -562,7 +562,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 	if (skb->dev)
 		fq->iif = skb->dev->ifindex;
 	skb->dev = NULL;
-	skb_get_timestamp(skb, &fq->stamp);
+	fq->stamp = skb->tstamp;
 	fq->meat += skb->len;
 	atomic_add(skb->truesize, &ip6_frag_mem);
 
@@ -663,7 +663,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 
 	head->next = NULL;
 	head->dev = dev;
-	skb_set_timestamp(head, &fq->stamp);
+	head->tstamp = fq->stamp;
 	head->nh.ipv6h->payload_len = htons(payload_len);
 	IP6CB(head)->nhoff = nhoff;
 
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index cac35a77f06..6c6c0a3a0ab 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1807,8 +1807,8 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
 				     copied);
 	if (rc)
 		goto out_free;
-	if (skb->tstamp.off_sec)
-		skb_get_timestamp(skb, &sk->sk_stamp);
+	if (skb->tstamp.tv64)
+		sk->sk_stamp = skb->tstamp;
 
 	msg->msg_namelen = sizeof(*sipx);
 
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 5cb30ebba0f..5eeebd2efa7 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -509,11 +509,11 @@ __build_packet_message(struct nfulnl_instance *inst,
 		NFA_PUT(inst->skb, NFULA_HWADDR, sizeof(phw), &phw);
 	}
 
-	if (skb->tstamp.off_sec) {
+	if (skb->tstamp.tv64) {
 		struct nfulnl_msg_packet_timestamp ts;
-
-		ts.sec = cpu_to_be64(skb->tstamp.off_sec);
-		ts.usec = cpu_to_be64(skb->tstamp.off_usec);
+		struct timeval tv = ktime_to_timeval(skb->tstamp);
+		ts.sec = cpu_to_be64(tv.tv_sec);
+		ts.usec = cpu_to_be64(tv.tv_usec);
 
 		NFA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts);
 	}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index d9ce4a71d0f..cfbee39f61d 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -495,11 +495,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 		NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw);
 	}
 
-	if (entskb->tstamp.off_sec) {
+	if (entskb->tstamp.tv64) {
 		struct nfqnl_msg_packet_timestamp ts;
-
-		ts.sec = cpu_to_be64(entskb->tstamp.off_sec);
-		ts.usec = cpu_to_be64(entskb->tstamp.off_usec);
+		struct timeval tv = ktime_to_timeval(entskb->tstamp);
+		ts.sec = cpu_to_be64(tv.tv_sec);
+		ts.usec = cpu_to_be64(tv.tv_usec);
 
 		NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts);
 	}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 28d47e8f287..f9866a8456a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -582,6 +582,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 	unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
 	unsigned short macoff, netoff;
 	struct sk_buff *copy_skb = NULL;
+	struct timeval tv;
 
 	if (skb->pkt_type == PACKET_LOOPBACK)
 		goto drop;
@@ -656,12 +657,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 	h->tp_snaplen = snaplen;
 	h->tp_mac = macoff;
 	h->tp_net = netoff;
-	if (skb->tstamp.off_sec == 0) {
+	if (skb->tstamp.tv64 == 0) {
 		__net_timestamp(skb);
 		sock_enable_timestamp(sk);
 	}
-	h->tp_sec = skb->tstamp.off_sec;
-	h->tp_usec = skb->tstamp.off_usec;
+	tv = ktime_to_timeval(skb->tstamp);
+	h->tp_sec = tv.tv_sec;
+	h->tp_usec = tv.tv_usec;
 
 	sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
 	sll->sll_halen = 0;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 2772fee9388..22f61aee482 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -798,16 +798,12 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
 		dprintk("svc: recvfrom returned error %d\n", -err);
 	}
 	rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
-	if (skb->tstamp.off_sec == 0) {
-		struct timeval tv;
-
-		tv.tv_sec = xtime.tv_sec;
-		tv.tv_usec = xtime.tv_nsec / NSEC_PER_USEC;
-		skb_set_timestamp(skb, &tv);
+	if (skb->tstamp.tv64 == 0) {
+		skb->tstamp = ktime_get_real();
 		/* Don't enable netstamp, sunrpc doesn't
 		   need that much accuracy */
 	}
-	skb_get_timestamp(skb, &svsk->sk_sk->sk_stamp);
+	svsk->sk_sk->sk_stamp = skb->tstamp;
 	set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */
 
 	/*
-- 
cgit v1.2.3


From b558ff799977a4eda8b3823d1cf6c1c33becb671 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Tue, 6 Mar 2007 17:02:35 -0800
Subject: [NETLINK]: Mirror UDP MSG_TRUNC semantics.

If the user passes MSG_TRUNC in via msg_flags, return
the full packet size not the truncated size.

Idea from Herbert Xu and Thomas Graf.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c48b0f49f00..5890210d773 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1242,6 +1242,9 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 
 	scm_recv(sock, msg, siocb->scm, flags);
 
+	if (flags & MSG_TRUNC)
+		copied = skb->len;
+
 out:
 	netlink_rcv_wake(sk);
 	return err ? : copied;
-- 
cgit v1.2.3


From 7159039a128fa0a73ca7b532f6e1d30d9885277f Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Thu, 22 Feb 2007 22:05:40 +0900
Subject: [IPV6]: Decentralize EXPORT_SYMBOLs.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/Makefile        |  2 +-
 net/ipv6/addrconf.c      |  9 +++++++++
 net/ipv6/af_inet6.c      | 12 ++++++++++++
 net/ipv6/exthdrs.c       |  2 ++
 net/ipv6/icmp.c          |  5 +++++
 net/ipv6/ip6_output.c    |  2 ++
 net/ipv6/ipv6_sockglue.c |  3 +++
 net/ipv6/ipv6_syms.c     | 36 ------------------------------------
 net/ipv6/ndisc.c         |  2 ++
 net/ipv6/protocol.c      |  4 ++++
 net/ipv6/route.c         |  3 +++
 net/ipv6/xfrm6_input.c   |  4 ++++
 net/ipv6/xfrm6_output.c  |  2 ++
 13 files changed, 49 insertions(+), 37 deletions(-)
 delete mode 100644 net/ipv6/ipv6_syms.c

diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index d460017bb35..e9478688c3d 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -8,7 +8,7 @@ ipv6-objs :=	af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
 		route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
 		raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
 		exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \
-		ip6_flowlabel.o ipv6_syms.o inet6_connection_sock.o
+		ip6_flowlabel.o inet6_connection_sock.o
 
 ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
 	xfrm6_output.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 452a82ce479..e035896657b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -271,6 +271,8 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
 	call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu);
 }
 
+EXPORT_SYMBOL(in6_dev_finish_destroy);
+
 static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
 {
 	struct inet6_dev *ndev;
@@ -1107,6 +1109,7 @@ int ipv6_get_saddr(struct dst_entry *dst,
 	return ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL, daddr, saddr);
 }
 
+EXPORT_SYMBOL(ipv6_get_saddr);
 
 int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr)
 {
@@ -1161,6 +1164,8 @@ int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict)
 	return ifp != NULL;
 }
 
+EXPORT_SYMBOL(ipv6_chk_addr);
+
 static
 int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev)
 {
@@ -4021,11 +4026,15 @@ int register_inet6addr_notifier(struct notifier_block *nb)
 	return atomic_notifier_chain_register(&inet6addr_chain, nb);
 }
 
+EXPORT_SYMBOL(register_inet6addr_notifier);
+
 int unregister_inet6addr_notifier(struct notifier_block *nb)
 {
 	return atomic_notifier_chain_unregister(&inet6addr_chain,nb);
 }
 
+EXPORT_SYMBOL(unregister_inet6addr_notifier);
+
 /*
  *	Init / cleanup code
  */
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 5cac14a5c77..fed3758181e 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -349,6 +349,8 @@ out:
 	return err;
 }
 
+EXPORT_SYMBOL(inet6_bind);
+
 int inet6_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
@@ -365,6 +367,8 @@ int inet6_release(struct socket *sock)
 	return inet_release(sock);
 }
 
+EXPORT_SYMBOL(inet6_release);
+
 int inet6_destroy_sock(struct sock *sk)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -428,6 +432,8 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 	return(0);
 }
 
+EXPORT_SYMBOL(inet6_getname);
+
 int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
 	struct sock *sk = sock->sk;
@@ -457,6 +463,8 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	return(0);
 }
 
+EXPORT_SYMBOL(inet6_ioctl);
+
 const struct proto_ops inet6_stream_ops = {
 	.family		   = PF_INET6,
 	.owner		   = THIS_MODULE,
@@ -603,6 +611,8 @@ out_illegal:
 	goto out;
 }
 
+EXPORT_SYMBOL(inet6_register_protosw);
+
 void
 inet6_unregister_protosw(struct inet_protosw *p)
 {
@@ -619,6 +629,8 @@ inet6_unregister_protosw(struct inet_protosw *p)
 	}
 }
 
+EXPORT_SYMBOL(inet6_unregister_protosw);
+
 int inet6_sk_rebuild_header(struct sock *sk)
 {
 	int err;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index a963a31e5fb..bcc4820baa7 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -810,6 +810,8 @@ void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
 		ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt);
 }
 
+EXPORT_SYMBOL(ipv6_push_nfrag_opts);
+
 void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto)
 {
 	if (opt->dst1opt)
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index edfe98bf64c..a91dfbce843 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -68,6 +68,7 @@
 #include <asm/system.h>
 
 DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
+EXPORT_SYMBOL(icmpv6_statistics);
 
 /*
  *	The ICMP socket(s). This is the most convenient way to flow control
@@ -466,6 +467,8 @@ out:
 	icmpv6_xmit_unlock();
 }
 
+EXPORT_SYMBOL(icmpv6_send);
+
 static void icmpv6_echo_reply(struct sk_buff *skb)
 {
 	struct sock *sk;
@@ -865,6 +868,8 @@ int icmpv6_err_convert(int type, int code, int *err)
 	return fatal;
 }
 
+EXPORT_SYMBOL(icmpv6_err_convert);
+
 #ifdef CONFIG_SYSCTL
 ctl_table ipv6_icmp_table[] = {
 	{
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 305516921aa..0d60fbc59d8 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -239,6 +239,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 	return -EMSGSIZE;
 }
 
+EXPORT_SYMBOL(ip6_xmit);
+
 /*
  *	To avoid extra problems ND packets are send through this
  *	routine. It's code duplication but I really want to avoid
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index f5f9582a8d3..1cb7a084f0c 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -761,6 +761,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
 	return err;
 }
 
+EXPORT_SYMBOL(ipv6_setsockopt);
 
 #ifdef CONFIG_COMPAT
 int compat_ipv6_setsockopt(struct sock *sk, int level, int optname,
@@ -1066,6 +1067,8 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname,
 	return err;
 }
 
+EXPORT_SYMBOL(ipv6_getsockopt);
+
 #ifdef CONFIG_COMPAT
 int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
 			   char __user *optval, int __user *optlen)
diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c
deleted file mode 100644
index e12e3d4fcce..00000000000
--- a/net/ipv6/ipv6_syms.c
+++ /dev/null
@@ -1,36 +0,0 @@
-
-#include <linux/module.h>
-#include <net/protocol.h>
-#include <net/ipv6.h>
-#include <net/addrconf.h>
-#include <net/ip6_route.h>
-#include <net/xfrm.h>
-
-EXPORT_SYMBOL(icmpv6_send);
-EXPORT_SYMBOL(icmpv6_statistics);
-EXPORT_SYMBOL(icmpv6_err_convert);
-EXPORT_SYMBOL(ndisc_mc_map);
-EXPORT_SYMBOL(register_inet6addr_notifier);
-EXPORT_SYMBOL(unregister_inet6addr_notifier);
-EXPORT_SYMBOL(ip6_route_output);
-EXPORT_SYMBOL(ipv6_setsockopt);
-EXPORT_SYMBOL(ipv6_getsockopt);
-EXPORT_SYMBOL(inet6_register_protosw);
-EXPORT_SYMBOL(inet6_unregister_protosw);
-EXPORT_SYMBOL(inet6_add_protocol);
-EXPORT_SYMBOL(inet6_del_protocol);
-EXPORT_SYMBOL(ip6_xmit);
-EXPORT_SYMBOL(inet6_release);
-EXPORT_SYMBOL(inet6_bind);
-EXPORT_SYMBOL(inet6_getname);
-EXPORT_SYMBOL(inet6_ioctl);
-EXPORT_SYMBOL(ipv6_get_saddr);
-EXPORT_SYMBOL(ipv6_chk_addr);
-EXPORT_SYMBOL(in6_dev_finish_destroy);
-#ifdef CONFIG_XFRM
-EXPORT_SYMBOL(xfrm6_rcv);
-EXPORT_SYMBOL(xfrm6_input_addr);
-EXPORT_SYMBOL(xfrm6_find_1stfragopt);
-#endif
-EXPORT_SYMBOL(rt6_lookup);
-EXPORT_SYMBOL(ipv6_push_nfrag_opts);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 121f31c283f..886c5be1490 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -319,6 +319,8 @@ int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int d
 	return -EINVAL;
 }
 
+EXPORT_SYMBOL(ndisc_mc_map);
+
 static u32 ndisc_hash(const void *pkey, const struct net_device *dev)
 {
 	const u32 *p32 = pkey;
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index ef43bd57bae..f929f47b925 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -60,6 +60,8 @@ int inet6_add_protocol(struct inet6_protocol *prot, unsigned char protocol)
 	return ret;
 }
 
+EXPORT_SYMBOL(inet6_add_protocol);
+
 /*
  *	Remove a protocol from the hash tables.
  */
@@ -83,3 +85,5 @@ int inet6_del_protocol(struct inet6_protocol *prot, unsigned char protocol)
 
 	return ret;
 }
+
+EXPORT_SYMBOL(inet6_del_protocol);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index aebb4e2d5ae..cc08cc48e9e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -575,6 +575,8 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
 	return NULL;
 }
 
+EXPORT_SYMBOL(rt6_lookup);
+
 /* ip6_ins_rt is called with FREE table->tb6_lock.
    It takes new route entry, the addition fails by any reason the
    route is freed. In any case, if caller does not hold it, it may
@@ -829,6 +831,7 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
 	return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
 }
 
+EXPORT_SYMBOL(ip6_route_output);
 
 /*
  *	Destination cache support functions
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 31f651f9509..33a1b920043 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -140,6 +140,8 @@ int xfrm6_rcv(struct sk_buff **pskb)
 	return xfrm6_rcv_spi(*pskb, 0);
 }
 
+EXPORT_SYMBOL(xfrm6_rcv);
+
 int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
 		     xfrm_address_t *saddr, u8 proto)
 {
@@ -247,3 +249,5 @@ drop:
 		xfrm_state_put(xfrm_vec_one);
 	return -1;
 }
+
+EXPORT_SYMBOL(xfrm6_input_addr);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 8e4170f9a0d..c52e9d6c75e 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -23,6 +23,8 @@ int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
 	return ip6_find_1stfragopt(skb, prevhdr);
 }
 
+EXPORT_SYMBOL(xfrm6_find_1stfragopt);
+
 static int xfrm6_tunnel_check_size(struct sk_buff *skb)
 {
 	int mtu, ret = 0;
-- 
cgit v1.2.3


From e490d1d85cf5e191791979e5f260d32eb4f703a8 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Tue, 31 Oct 2006 23:11:25 +0900
Subject: [IPV6] IP6TUNNEL: Split out generic routine in ip6ip6_err().

This enables to add IPv4/IPv6 specific error handling later,

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 53 +++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 39 insertions(+), 14 deletions(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 08d944223ec..6022fc5e557 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -372,16 +372,16 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
 }
 
 /**
- * ip6ip6_err - tunnel error handler
+ * ip6_tnl_err - tunnel error handler
  *
  * Description:
- *   ip6ip6_err() should handle errors in the tunnel according
+ *   ip6_tnl_err() should handle errors in the tunnel according
  *   to the specifications in RFC 2473.
  **/
 
 static int
-ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-	   int type, int code, int offset, __be32 info)
+ip6_tnl_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+	    int *type, int *code, int *msg, __be32 *info, int offset)
 {
 	struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data;
 	struct ip6_tnl *t;
@@ -402,7 +402,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 	err = 0;
 
-	switch (type) {
+	switch (*type) {
 		__u32 teli;
 		struct ipv6_tlv_tnl_enc_lim *tel;
 		__u32 mtu;
@@ -414,7 +414,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		rel_msg = 1;
 		break;
 	case ICMPV6_TIME_EXCEED:
-		if (code == ICMPV6_EXC_HOPLIMIT) {
+		if ((*code) == ICMPV6_EXC_HOPLIMIT) {
 			if (net_ratelimit())
 				printk(KERN_WARNING
 				       "%s: Too small hop limit or "
@@ -425,10 +425,10 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		break;
 	case ICMPV6_PARAMPROB:
 		teli = 0;
-		if (code == ICMPV6_HDR_FIELD)
+		if ((*code) == ICMPV6_HDR_FIELD)
 			teli = parse_tlv_tnl_enc_lim(skb, skb->data);
 
-		if (teli && teli == ntohl(info) - 2) {
+		if (teli && teli == ntohl(*info) - 2) {
 			tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
 			if (tel->encap_limit == 0) {
 				if (net_ratelimit())
@@ -445,7 +445,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		}
 		break;
 	case ICMPV6_PKT_TOOBIG:
-		mtu = ntohl(info) - offset;
+		mtu = ntohl(*info) - offset;
 		if (mtu < IPV6_MIN_MTU)
 			mtu = IPV6_MIN_MTU;
 		t->dev->mtu = mtu;
@@ -458,12 +458,38 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		}
 		break;
 	}
-	if (rel_msg &&  pskb_may_pull(skb, offset + sizeof (*ipv6h))) {
+
+	*type = rel_type;
+	*code = rel_code;
+	*info = rel_info;
+	*msg = rel_msg;
+
+out:
+	read_unlock(&ip6ip6_lock);
+	return err;
+}
+
+static int
+ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+	   int type, int code, int offset, __u32 info)
+{
+	int rel_msg = 0;
+	int rel_type = type;
+	int rel_code = code;
+	__u32 rel_info = info;
+	int err;
+
+	err = ip6_tnl_err(skb, opt, &rel_type, &rel_code, &rel_msg, &rel_info,
+			  offset);
+	if (err < 0)
+		return err;
+
+	if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) {
 		struct rt6_info *rt;
 		struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 
 		if (!skb2)
-			goto out;
+			return 0;
 
 		dst_release(skb2->dst);
 		skb2->dst = NULL;
@@ -483,9 +509,8 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 
 		kfree_skb(skb2);
 	}
-out:
-	read_unlock(&ip6ip6_lock);
-	return err;
+
+	return 0;
 }
 
 static inline void ip6ip6_ecn_decapsulate(struct ipv6hdr *outer_iph,
-- 
cgit v1.2.3


From 8359925be8bb5960f614e3f25454f3ef7cc9df65 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Fri, 3 Nov 2006 09:39:14 +0900
Subject: [IPV6] IP6TUNNEL: Split out generic routine in ip6ip6_rcv().

This enables to add IPv4/IPv6 specific handling later,

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 34 +++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 6022fc5e557..985d106dff6 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -513,14 +513,17 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	return 0;
 }
 
-static inline void ip6ip6_ecn_decapsulate(struct ipv6hdr *outer_iph,
-					  struct sk_buff *skb)
+static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
+					struct ipv6hdr *ipv6h,
+					struct sk_buff *skb)
 {
-	struct ipv6hdr *inner_iph = skb->nh.ipv6h;
+	if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
+		ipv6_copy_dscp(ipv6h, skb->nh.ipv6h);
 
-	if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
-		IP6_ECN_set_ce(inner_iph);
+	if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h)))
+		IP6_ECN_set_ce(skb->nh.ipv6h);
 }
+
 static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
 {
 	struct ip6_tnl_parm *p = &t->parms;
@@ -546,12 +549,16 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
 /**
  * ip6ip6_rcv - decapsulate IPv6 packet and retransmit it locally
  *   @skb: received socket buffer
+ *   @protocol: ethernet protocol ID
+ *   @dscp_ecn_decapsulate: the function to decapsulate DSCP code and ECN
  *
  * Return: 0
  **/
 
-static int
-ip6ip6_rcv(struct sk_buff *skb)
+static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
+		       void (*dscp_ecn_decapsulate)(struct ip6_tnl *t,
+						    struct ipv6hdr *ipv6h,
+						    struct sk_buff *skb))
 {
 	struct ipv6hdr *ipv6h;
 	struct ip6_tnl *t;
@@ -574,16 +581,16 @@ ip6ip6_rcv(struct sk_buff *skb)
 		secpath_reset(skb);
 		skb->mac.raw = skb->nh.raw;
 		skb->nh.raw = skb->data;
-		skb->protocol = htons(ETH_P_IPV6);
+		skb->protocol = htons(protocol);
 		skb->pkt_type = PACKET_HOST;
 		memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
 		skb->dev = t->dev;
 		dst_release(skb->dst);
 		skb->dst = NULL;
 		nf_reset(skb);
-		if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
-			ipv6_copy_dscp(ipv6h, skb->nh.ipv6h);
-		ip6ip6_ecn_decapsulate(ipv6h, skb);
+
+		dscp_ecn_decapsulate(t, ipv6h, skb);
+
 		t->stat.rx_packets++;
 		t->stat.rx_bytes += skb->len;
 		netif_rx(skb);
@@ -598,6 +605,11 @@ discard:
 	return 0;
 }
 
+static int ip6ip6_rcv(struct sk_buff *skb)
+{
+	return ip6_tnl_rcv(skb, ETH_P_IPV6, ip6ip6_dscp_ecn_decapsulate);
+}
+
 struct ipv6_tel_txoption {
 	struct ipv6_txoptions ops;
 	__u8 dst_opt[8];
-- 
cgit v1.2.3


From 61ec2aec28ba8de09f76a558a5d6d3893b1d2e47 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Sun, 5 Nov 2006 22:56:45 +0900
Subject: [IPV6] IP6TUNNEL: Split out generic routine in ip6ip6_xmit().

This enables to add IPv4/IPv6 specific handling later,

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 143 ++++++++++++++++++++++++++++++++++----------------
 1 file changed, 98 insertions(+), 45 deletions(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 985d106dff6..4546bb923a2 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -678,9 +678,13 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
 	return ret;
 }
 /**
- * ip6ip6_tnl_xmit - encapsulate packet and send
+ * ip6_tnl_xmit2 - encapsulate packet and send
  *   @skb: the outgoing socket buffer
  *   @dev: the outgoing tunnel device
+ *   @dsfield: dscp code for outer header
+ *   @fl: flow of tunneled packet
+ *   @encap_limit: encapsulation limit
+ *   @pmtu: Path MTU is stored if packet is too big
  *
  * Description:
  *   Build new header and do some sanity checks on the packet before sending
@@ -688,62 +692,35 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
  *
  * Return:
  *   0
+ *   -1 fail
+ *   %-EMSGSIZE message too big. return mtu in this case.
  **/
 
-static int
-ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+static int ip6_tnl_xmit2(struct sk_buff *skb,
+			 struct net_device *dev,
+			 __u8 dsfield,
+			 struct flowi *fl,
+			 int encap_limit,
+			 __u32 *pmtu)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
 	struct net_device_stats *stats = &t->stat;
 	struct ipv6hdr *ipv6h = skb->nh.ipv6h;
-	int encap_limit = -1;
 	struct ipv6_tel_txoption opt;
-	__u16 offset;
-	struct flowi fl;
 	struct dst_entry *dst;
 	struct net_device *tdev;
 	int mtu;
 	int max_headroom = sizeof(struct ipv6hdr);
 	u8 proto;
-	int err;
+	int err = -1;
 	int pkt_len;
-	int dsfield;
-
-	if (t->recursion++) {
-		stats->collisions++;
-		goto tx_err;
-	}
-	if (skb->protocol != htons(ETH_P_IPV6) ||
-	    !ip6_tnl_xmit_ctl(t) || ip6ip6_tnl_addr_conflict(t, ipv6h))
-		goto tx_err;
-
-	if ((offset = parse_tlv_tnl_enc_lim(skb, skb->nh.raw)) > 0) {
-		struct ipv6_tlv_tnl_enc_lim *tel;
-		tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->nh.raw[offset];
-		if (tel->encap_limit == 0) {
-			icmpv6_send(skb, ICMPV6_PARAMPROB,
-				    ICMPV6_HDR_FIELD, offset + 2, skb->dev);
-			goto tx_err;
-		}
-		encap_limit = tel->encap_limit - 1;
-	} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
-		encap_limit = t->parms.encap_limit;
-
-	memcpy(&fl, &t->fl, sizeof (fl));
-	proto = fl.proto;
-
-	dsfield = ipv6_get_dsfield(ipv6h);
-	if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
-		fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
-	if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL))
-		fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
 
 	if ((dst = ip6_tnl_dst_check(t)) != NULL)
 		dst_hold(dst);
 	else {
-		dst = ip6_route_output(NULL, &fl);
+		dst = ip6_route_output(NULL, fl);
 
-		if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0) < 0)
+		if (dst->error || xfrm_lookup(&dst, fl, NULL, 0) < 0)
 			goto tx_err_link_failure;
 	}
 
@@ -767,7 +744,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (skb->dst)
 		skb->dst->ops->update_pmtu(skb->dst, mtu);
 	if (skb->len > mtu) {
-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+		*pmtu = mtu;
+		err = -EMSGSIZE;
 		goto tx_err_dst_release;
 	}
 
@@ -793,20 +771,21 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	skb->h.raw = skb->nh.raw;
 
+	proto = fl->proto;
 	if (encap_limit >= 0) {
 		init_tel_txopt(&opt, encap_limit);
 		ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
 	}
 	skb->nh.raw = skb_push(skb, sizeof(struct ipv6hdr));
 	ipv6h = skb->nh.ipv6h;
-	*(__be32*)ipv6h = fl.fl6_flowlabel | htonl(0x60000000);
+	*(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000);
 	dsfield = INET_ECN_encapsulate(0, dsfield);
 	ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
 	ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
 	ipv6h->hop_limit = t->parms.hop_limit;
 	ipv6h->nexthdr = proto;
-	ipv6_addr_copy(&ipv6h->saddr, &fl.fl6_src);
-	ipv6_addr_copy(&ipv6h->daddr, &fl.fl6_dst);
+	ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src);
+	ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst);
 	nf_reset(skb);
 	pkt_len = skb->len;
 	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL,
@@ -820,13 +799,87 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 		stats->tx_aborted_errors++;
 	}
 	ip6_tnl_dst_store(t, dst);
-	t->recursion--;
 	return 0;
 tx_err_link_failure:
 	stats->tx_carrier_errors++;
 	dst_link_failure(skb);
 tx_err_dst_release:
 	dst_release(dst);
+	return err;
+}
+
+static inline int
+ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct ipv6hdr *ipv6h = skb->nh.ipv6h;
+	int encap_limit = -1;
+	__u16 offset;
+	struct flowi fl;
+	__u8 dsfield;
+	__u32 mtu;
+	int err;
+
+	if (!ip6_tnl_xmit_ctl(t) || ip6ip6_tnl_addr_conflict(t, ipv6h))
+		return -1;
+
+	if ((offset = parse_tlv_tnl_enc_lim(skb, skb->nh.raw)) > 0) {
+		struct ipv6_tlv_tnl_enc_lim *tel;
+		tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->nh.raw[offset];
+		if (tel->encap_limit == 0) {
+			icmpv6_send(skb, ICMPV6_PARAMPROB,
+				    ICMPV6_HDR_FIELD, offset + 2, skb->dev);
+			return -1;
+		}
+		encap_limit = tel->encap_limit - 1;
+	} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+		encap_limit = t->parms.encap_limit;
+
+	memcpy(&fl, &t->fl, sizeof (fl));
+	fl.proto = IPPROTO_IPV6;
+
+	dsfield = ipv6_get_dsfield(ipv6h);
+	if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
+		fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
+	if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL))
+		fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK);
+
+	err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu);
+	if (err != 0) {
+		if (err == -EMSGSIZE)
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct net_device_stats *stats = &t->stat;
+	int ret;
+
+	if (t->recursion++) {
+		t->stat.collisions++;
+		goto tx_err;
+	}
+
+	switch (skb->protocol) {
+	case __constant_htons(ETH_P_IPV6):
+		ret = ip6ip6_tnl_xmit(skb, dev);
+		break;
+	default:
+		goto tx_err;
+	}
+
+	if (ret < 0)
+		goto tx_err;
+
+	t->recursion--;
+	return 0;
+
 tx_err:
 	stats->tx_errors++;
 	stats->tx_dropped++;
@@ -1088,7 +1141,7 @@ static void ip6ip6_tnl_dev_setup(struct net_device *dev)
 	SET_MODULE_OWNER(dev);
 	dev->uninit = ip6ip6_tnl_dev_uninit;
 	dev->destructor = free_netdev;
-	dev->hard_start_xmit = ip6ip6_tnl_xmit;
+	dev->hard_start_xmit = ip6_tnl_xmit;
 	dev->get_stats = ip6ip6_tnl_get_stats;
 	dev->do_ioctl = ip6ip6_tnl_ioctl;
 	dev->change_mtu = ip6ip6_tnl_change_mtu;
-- 
cgit v1.2.3


From c4d3efafcc933fd2ffd169d7dc4f980393a13796 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Thu, 15 Feb 2007 00:43:16 +0900
Subject: [IPV6] IP6TUNNEL: Add support to IPv4 over IPv6 tunnel.

Some notes
- Protocol number IPPROTO_IPIP is used for IPv4 over IPv6 packets.
- If IP6_TNL_F_USE_ORIG_TCLASS is set, TOS in IPv4 header is copied to
  Traffic Class in outer IPv6 header on xmit.
- IP6_TNL_F_USE_ORIG_FLOWLABEL is ignored on xmit of IPv4 packets, because
  IPv4 header does not have flow label.
- Kernel sends ICMP error if IPv4 packet is too big on xmit, even if
  DF flag is not set.

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 190 ++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 185 insertions(+), 5 deletions(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 4546bb923a2..a6541495dea 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1,14 +1,15 @@
 /*
- *	IPv6 over IPv6 tunnel device
+ *	IPv6 tunneling device
  *	Linux INET6 implementation
  *
  *	Authors:
  *	Ville Nuorvala		<vnuorval@tcs.hut.fi>
+ *	Yasuyuki Kozakai	<kozakai@linux-ipv6.org>
  *
  *	$Id$
  *
  *      Based on:
- *      linux/net/ipv6/sit.c
+ *      linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c
  *
  *      RFC 2473
  *
@@ -24,6 +25,7 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/sockios.h>
+#include <linux/icmp.h>
 #include <linux/if.h>
 #include <linux/in.h>
 #include <linux/ip.h>
@@ -41,6 +43,7 @@
 #include <asm/uaccess.h>
 #include <asm/atomic.h>
 
+#include <net/icmp.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
@@ -51,7 +54,7 @@
 #include <net/inet_ecn.h>
 
 MODULE_AUTHOR("Ville Nuorvala");
-MODULE_DESCRIPTION("IPv6-in-IPv6 tunnel");
+MODULE_DESCRIPTION("IPv6 tunneling device");
 MODULE_LICENSE("GPL");
 
 #define IPV6_TLV_TEL_DST_SIZE 8
@@ -63,6 +66,7 @@ MODULE_LICENSE("GPL");
 #endif
 
 #define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
+#define IPV6_TCLASS_SHIFT 20
 
 #define HASH_SIZE  32
 
@@ -469,6 +473,104 @@ out:
 	return err;
 }
 
+static int
+ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+	   int type, int code, int offset, __u32 info)
+{
+	int rel_msg = 0;
+	int rel_type = type;
+	int rel_code = code;
+	__u32 rel_info = info;
+	int err;
+	struct sk_buff *skb2;
+	struct iphdr *eiph;
+	struct flowi fl;
+	struct rtable *rt;
+
+	err = ip6_tnl_err(skb, opt, &rel_type, &rel_code, &rel_msg, &rel_info,
+			  offset);
+	if (err < 0)
+		return err;
+
+	if (rel_msg == 0)
+		return 0;
+
+	switch (rel_type) {
+	case ICMPV6_DEST_UNREACH:
+		if (rel_code != ICMPV6_ADDR_UNREACH)
+			return 0;
+		rel_type = ICMP_DEST_UNREACH;
+		rel_code = ICMP_HOST_UNREACH;
+		break;
+	case ICMPV6_PKT_TOOBIG:
+		if (rel_code != 0)
+			return 0;
+		rel_type = ICMP_DEST_UNREACH;
+		rel_code = ICMP_FRAG_NEEDED;
+		break;
+	default:
+		return 0;
+	}
+
+	if (!pskb_may_pull(skb, offset + sizeof(struct iphdr)))
+		return 0;
+
+	skb2 = skb_clone(skb, GFP_ATOMIC);
+	if (!skb2)
+		return 0;
+
+	dst_release(skb2->dst);
+	skb2->dst = NULL;
+	skb_pull(skb2, offset);
+	skb2->nh.raw = skb2->data;
+	eiph = skb2->nh.iph;
+
+	/* Try to guess incoming interface */
+	memset(&fl, 0, sizeof(fl));
+	fl.fl4_dst = eiph->saddr;
+	fl.fl4_tos = RT_TOS(eiph->tos);
+	fl.proto = IPPROTO_IPIP;
+	if (ip_route_output_key(&rt, &fl))
+		goto out;
+
+	skb2->dev = rt->u.dst.dev;
+
+	/* route "incoming" packet */
+	if (rt->rt_flags & RTCF_LOCAL) {
+		ip_rt_put(rt);
+		rt = NULL;
+		fl.fl4_dst = eiph->daddr;
+		fl.fl4_src = eiph->saddr;
+		fl.fl4_tos = eiph->tos;
+		if (ip_route_output_key(&rt, &fl) ||
+		    rt->u.dst.dev->type != ARPHRD_TUNNEL) {
+			ip_rt_put(rt);
+			goto out;
+		}
+	} else {
+		ip_rt_put(rt);
+		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
+				   skb2->dev) ||
+		    skb2->dst->dev->type != ARPHRD_TUNNEL)
+			goto out;
+	}
+
+	/* change mtu on this route */
+	if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) {
+		if (rel_info > dst_mtu(skb2->dst))
+			goto out;
+
+		skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
+		rel_info = htonl(rel_info);
+	}
+
+	icmp_send(skb2, rel_type, rel_code, rel_info);
+
+out:
+	kfree_skb(skb2);
+	return 0;
+}
+
 static int
 ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	   int type, int code, int offset, __u32 info)
@@ -513,6 +615,19 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	return 0;
 }
 
+static void ip4ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
+					struct ipv6hdr *ipv6h,
+					struct sk_buff *skb)
+{
+	__u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
+
+	if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
+		ipv4_change_dsfield(skb->nh.iph, INET_ECN_MASK, dsfield);
+
+	if (INET_ECN_is_ce(dsfield))
+		IP_ECN_set_ce(skb->nh.iph);
+}
+
 static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
 					struct ipv6hdr *ipv6h,
 					struct sk_buff *skb)
@@ -605,6 +720,11 @@ discard:
 	return 0;
 }
 
+static int ip4ip6_rcv(struct sk_buff *skb)
+{
+	return ip6_tnl_rcv(skb, ETH_P_IP, ip4ip6_dscp_ecn_decapsulate);
+}
+
 static int ip6ip6_rcv(struct sk_buff *skb)
 {
 	return ip6_tnl_rcv(skb, ETH_P_IPV6, ip6ip6_dscp_ecn_decapsulate);
@@ -691,7 +811,7 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
  *   it.
  *
  * Return:
- *   0
+ *   0 on success
  *   -1 fail
  *   %-EMSGSIZE message too big. return mtu in this case.
  **/
@@ -808,6 +928,44 @@ tx_err_dst_release:
 	return err;
 }
 
+static inline int
+ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ip6_tnl *t = netdev_priv(dev);
+	struct iphdr  *iph = skb->nh.iph;
+	int encap_limit = -1;
+	struct flowi fl;
+	__u8 dsfield;
+	__u32 mtu;
+	int err;
+
+	if (!ip6_tnl_xmit_ctl(t))
+		return -1;
+
+	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+		encap_limit = t->parms.encap_limit;
+
+	memcpy(&fl, &t->fl, sizeof (fl));
+	fl.proto = IPPROTO_IPIP;
+
+	dsfield = ipv4_get_dsfield(iph);
+
+	if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
+		fl.fl6_flowlabel |= ntohl(((__u32)iph->tos << IPV6_TCLASS_SHIFT)
+					  & IPV6_TCLASS_MASK);
+
+	err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu);
+	if (err != 0) {
+		/* XXX: send ICMP error even if DF is not set. */
+		if (err == -EMSGSIZE)
+			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+				  htonl(mtu));
+		return -1;
+	}
+
+	return 0;
+}
+
 static inline int
 ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 {
@@ -867,6 +1025,9 @@ ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	switch (skb->protocol) {
+	case __constant_htons(ETH_P_IP):
+		ret = ip4ip6_tnl_xmit(skb, dev);
+		break;
 	case __constant_htons(ETH_P_IPV6):
 		ret = ip6ip6_tnl_xmit(skb, dev);
 		break;
@@ -1199,6 +1360,12 @@ ip6ip6_fb_tnl_dev_init(struct net_device *dev)
 	return 0;
 }
 
+static struct xfrm6_tunnel ip4ip6_handler = {
+	.handler	= ip4ip6_rcv,
+	.err_handler	= ip4ip6_err,
+	.priority	=	1,
+};
+
 static struct xfrm6_tunnel ip6ip6_handler = {
 	.handler	= ip6ip6_rcv,
 	.err_handler	= ip6ip6_err,
@@ -1215,9 +1382,16 @@ static int __init ip6_tunnel_init(void)
 {
 	int  err;
 
+	if (xfrm6_tunnel_register(&ip4ip6_handler, AF_INET)) {
+		printk(KERN_ERR "ip4ip6 init: can't register tunnel\n");
+		err = -EAGAIN;
+		goto out;
+	}
+
 	if (xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6)) {
 		printk(KERN_ERR "ip6ip6 init: can't register tunnel\n");
-		return -EAGAIN;
+		err = -EAGAIN;
+		goto unreg_ip4ip6;
 	}
 	ip6ip6_fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
 					 ip6ip6_tnl_dev_setup);
@@ -1235,6 +1409,9 @@ static int __init ip6_tunnel_init(void)
 	return 0;
 fail:
 	xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
+unreg_ip4ip6:
+	xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
+out:
 	return err;
 }
 
@@ -1258,6 +1435,9 @@ static void __exit ip6ip6_destroy_tunnels(void)
 
 static void __exit ip6_tunnel_cleanup(void)
 {
+	if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET))
+		printk(KERN_INFO "ip4ip6 close: can't deregister tunnel\n");
+
 	if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
 		printk(KERN_INFO "ip6ip6 close: can't deregister tunnel\n");
 
-- 
cgit v1.2.3


From 3144581cb0b4b1ef897470195128cc1c8dc037b6 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Sat, 10 Feb 2007 00:30:33 +0900
Subject: [IPV6] IP6TUNNEL: Rename functions ip6ip6_* to ip6_tnl_*.

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 192 +++++++++++++++++++++++++-------------------------
 1 file changed, 96 insertions(+), 96 deletions(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index a6541495dea..70684e0d31f 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -74,12 +74,12 @@ MODULE_LICENSE("GPL");
 		     (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \
 		    (HASH_SIZE - 1))
 
-static int ip6ip6_fb_tnl_dev_init(struct net_device *dev);
-static int ip6ip6_tnl_dev_init(struct net_device *dev);
-static void ip6ip6_tnl_dev_setup(struct net_device *dev);
+static int ip6_fb_tnl_dev_init(struct net_device *dev);
+static int ip6_tnl_dev_init(struct net_device *dev);
+static void ip6_tnl_dev_setup(struct net_device *dev);
 
 /* the IPv6 tunnel fallback device */
-static struct net_device *ip6ip6_fb_tnl_dev;
+static struct net_device *ip6_fb_tnl_dev;
 
 
 /* lists for storing tunnels in use */
@@ -88,7 +88,7 @@ static struct ip6_tnl *tnls_wc[1];
 static struct ip6_tnl **tnls[2] = { tnls_wc, tnls_r_l };
 
 /* lock for the tunnel lists */
-static DEFINE_RWLOCK(ip6ip6_lock);
+static DEFINE_RWLOCK(ip6_tnl_lock);
 
 static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
 {
@@ -119,7 +119,7 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
 }
 
 /**
- * ip6ip6_tnl_lookup - fetch tunnel matching the end-point addresses
+ * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
  *   @remote: the address of the tunnel exit-point
  *   @local: the address of the tunnel entry-point
  *
@@ -130,7 +130,7 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
  **/
 
 static struct ip6_tnl *
-ip6ip6_tnl_lookup(struct in6_addr *remote, struct in6_addr *local)
+ip6_tnl_lookup(struct in6_addr *remote, struct in6_addr *local)
 {
 	unsigned h0 = HASH(remote);
 	unsigned h1 = HASH(local);
@@ -149,18 +149,18 @@ ip6ip6_tnl_lookup(struct in6_addr *remote, struct in6_addr *local)
 }
 
 /**
- * ip6ip6_bucket - get head of list matching given tunnel parameters
+ * ip6_tnl_bucket - get head of list matching given tunnel parameters
  *   @p: parameters containing tunnel end-points
  *
  * Description:
- *   ip6ip6_bucket() returns the head of the list matching the
+ *   ip6_tnl_bucket() returns the head of the list matching the
  *   &struct in6_addr entries laddr and raddr in @p.
  *
  * Return: head of IPv6 tunnel list
  **/
 
 static struct ip6_tnl **
-ip6ip6_bucket(struct ip6_tnl_parm *p)
+ip6_tnl_bucket(struct ip6_tnl_parm *p)
 {
 	struct in6_addr *remote = &p->raddr;
 	struct in6_addr *local = &p->laddr;
@@ -175,36 +175,36 @@ ip6ip6_bucket(struct ip6_tnl_parm *p)
 }
 
 /**
- * ip6ip6_tnl_link - add tunnel to hash table
+ * ip6_tnl_link - add tunnel to hash table
  *   @t: tunnel to be added
  **/
 
 static void
-ip6ip6_tnl_link(struct ip6_tnl *t)
+ip6_tnl_link(struct ip6_tnl *t)
 {
-	struct ip6_tnl **tp = ip6ip6_bucket(&t->parms);
+	struct ip6_tnl **tp = ip6_tnl_bucket(&t->parms);
 
 	t->next = *tp;
-	write_lock_bh(&ip6ip6_lock);
+	write_lock_bh(&ip6_tnl_lock);
 	*tp = t;
-	write_unlock_bh(&ip6ip6_lock);
+	write_unlock_bh(&ip6_tnl_lock);
 }
 
 /**
- * ip6ip6_tnl_unlink - remove tunnel from hash table
+ * ip6_tnl_unlink - remove tunnel from hash table
  *   @t: tunnel to be removed
  **/
 
 static void
-ip6ip6_tnl_unlink(struct ip6_tnl *t)
+ip6_tnl_unlink(struct ip6_tnl *t)
 {
 	struct ip6_tnl **tp;
 
-	for (tp = ip6ip6_bucket(&t->parms); *tp; tp = &(*tp)->next) {
+	for (tp = ip6_tnl_bucket(&t->parms); *tp; tp = &(*tp)->next) {
 		if (t == *tp) {
-			write_lock_bh(&ip6ip6_lock);
+			write_lock_bh(&ip6_tnl_lock);
 			*tp = t->next;
-			write_unlock_bh(&ip6ip6_lock);
+			write_unlock_bh(&ip6_tnl_lock);
 			break;
 		}
 	}
@@ -241,12 +241,12 @@ static struct ip6_tnl *ip6_tnl_create(struct ip6_tnl_parm *p)
 		if (i == IP6_TNL_MAX)
 			goto failed;
 	}
-	dev = alloc_netdev(sizeof (*t), name, ip6ip6_tnl_dev_setup);
+	dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup);
 	if (dev == NULL)
 		goto failed;
 
 	t = netdev_priv(dev);
-	dev->init = ip6ip6_tnl_dev_init;
+	dev->init = ip6_tnl_dev_init;
 	t->parms = *p;
 
 	if ((err = register_netdevice(dev)) < 0) {
@@ -254,19 +254,19 @@ static struct ip6_tnl *ip6_tnl_create(struct ip6_tnl_parm *p)
 		goto failed;
 	}
 	dev_hold(dev);
-	ip6ip6_tnl_link(t);
+	ip6_tnl_link(t);
 	return t;
 failed:
 	return NULL;
 }
 
 /**
- * ip6ip6_tnl_locate - find or create tunnel matching given parameters
+ * ip6_tnl_locate - find or create tunnel matching given parameters
  *   @p: tunnel parameters
  *   @create: != 0 if allowed to create new tunnel if no match found
  *
  * Description:
- *   ip6ip6_tnl_locate() first tries to locate an existing tunnel
+ *   ip6_tnl_locate() first tries to locate an existing tunnel
  *   based on @parms. If this is unsuccessful, but @create is set a new
  *   tunnel device is created and registered for use.
  *
@@ -274,13 +274,13 @@ failed:
  *   matching tunnel or NULL
  **/
 
-static struct ip6_tnl *ip6ip6_tnl_locate(struct ip6_tnl_parm *p, int create)
+static struct ip6_tnl *ip6_tnl_locate(struct ip6_tnl_parm *p, int create)
 {
 	struct in6_addr *remote = &p->raddr;
 	struct in6_addr *local = &p->laddr;
 	struct ip6_tnl *t;
 
-	for (t = *ip6ip6_bucket(p); t; t = t->next) {
+	for (t = *ip6_tnl_bucket(p); t; t = t->next) {
 		if (ipv6_addr_equal(local, &t->parms.laddr) &&
 		    ipv6_addr_equal(remote, &t->parms.raddr))
 			return t;
@@ -291,24 +291,24 @@ static struct ip6_tnl *ip6ip6_tnl_locate(struct ip6_tnl_parm *p, int create)
 }
 
 /**
- * ip6ip6_tnl_dev_uninit - tunnel device uninitializer
+ * ip6_tnl_dev_uninit - tunnel device uninitializer
  *   @dev: the device to be destroyed
  *
  * Description:
- *   ip6ip6_tnl_dev_uninit() removes tunnel from its list
+ *   ip6_tnl_dev_uninit() removes tunnel from its list
  **/
 
 static void
-ip6ip6_tnl_dev_uninit(struct net_device *dev)
+ip6_tnl_dev_uninit(struct net_device *dev)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
 
-	if (dev == ip6ip6_fb_tnl_dev) {
-		write_lock_bh(&ip6ip6_lock);
+	if (dev == ip6_fb_tnl_dev) {
+		write_lock_bh(&ip6_tnl_lock);
 		tnls_wc[0] = NULL;
-		write_unlock_bh(&ip6ip6_lock);
+		write_unlock_bh(&ip6_tnl_lock);
 	} else {
-		ip6ip6_tnl_unlink(t);
+		ip6_tnl_unlink(t);
 	}
 	ip6_tnl_dst_reset(t);
 	dev_put(dev);
@@ -400,8 +400,8 @@ ip6_tnl_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	   in trouble since we might need the source address for further
 	   processing of the error. */
 
-	read_lock(&ip6ip6_lock);
-	if ((t = ip6ip6_tnl_lookup(&ipv6h->daddr, &ipv6h->saddr)) == NULL)
+	read_lock(&ip6_tnl_lock);
+	if ((t = ip6_tnl_lookup(&ipv6h->daddr, &ipv6h->saddr)) == NULL)
 		goto out;
 
 	err = 0;
@@ -469,7 +469,7 @@ ip6_tnl_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	*msg = rel_msg;
 
 out:
-	read_unlock(&ip6ip6_lock);
+	read_unlock(&ip6_tnl_lock);
 	return err;
 }
 
@@ -662,7 +662,7 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
 }
 
 /**
- * ip6ip6_rcv - decapsulate IPv6 packet and retransmit it locally
+ * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally
  *   @skb: received socket buffer
  *   @protocol: ethernet protocol ID
  *   @dscp_ecn_decapsulate: the function to decapsulate DSCP code and ECN
@@ -680,17 +680,17 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
 
 	ipv6h = skb->nh.ipv6h;
 
-	read_lock(&ip6ip6_lock);
+	read_lock(&ip6_tnl_lock);
 
-	if ((t = ip6ip6_tnl_lookup(&ipv6h->saddr, &ipv6h->daddr)) != NULL) {
+	if ((t = ip6_tnl_lookup(&ipv6h->saddr, &ipv6h->daddr)) != NULL) {
 		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
-			read_unlock(&ip6ip6_lock);
+			read_unlock(&ip6_tnl_lock);
 			goto discard;
 		}
 
 		if (!ip6_tnl_rcv_ctl(t)) {
 			t->stat.rx_dropped++;
-			read_unlock(&ip6ip6_lock);
+			read_unlock(&ip6_tnl_lock);
 			goto discard;
 		}
 		secpath_reset(skb);
@@ -709,10 +709,10 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
 		t->stat.rx_packets++;
 		t->stat.rx_bytes += skb->len;
 		netif_rx(skb);
-		read_unlock(&ip6ip6_lock);
+		read_unlock(&ip6_tnl_lock);
 		return 0;
 	}
-	read_unlock(&ip6ip6_lock);
+	read_unlock(&ip6_tnl_lock);
 	return 1;
 
 discard:
@@ -750,7 +750,7 @@ static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
 }
 
 /**
- * ip6ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
+ * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
  *   @t: the outgoing tunnel device
  *   @hdr: IPv6 header from the incoming packet
  *
@@ -764,7 +764,7 @@ static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
  **/
 
 static inline int
-ip6ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr)
+ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr)
 {
 	return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
 }
@@ -978,7 +978,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	__u32 mtu;
 	int err;
 
-	if (!ip6_tnl_xmit_ctl(t) || ip6ip6_tnl_addr_conflict(t, ipv6h))
+	if (!ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h))
 		return -1;
 
 	if ((offset = parse_tlv_tnl_enc_lim(skb, skb->nh.raw)) > 0) {
@@ -1068,7 +1068,7 @@ static void ip6_tnl_set_cap(struct ip6_tnl *t)
 	}
 }
 
-static void ip6ip6_tnl_link_config(struct ip6_tnl *t)
+static void ip6_tnl_link_config(struct ip6_tnl *t)
 {
 	struct net_device *dev = t->dev;
 	struct ip6_tnl_parm *p = &t->parms;
@@ -1121,17 +1121,17 @@ static void ip6ip6_tnl_link_config(struct ip6_tnl *t)
 }
 
 /**
- * ip6ip6_tnl_change - update the tunnel parameters
+ * ip6_tnl_change - update the tunnel parameters
  *   @t: tunnel to be changed
  *   @p: tunnel configuration parameters
  *   @active: != 0 if tunnel is ready for use
  *
  * Description:
- *   ip6ip6_tnl_change() updates the tunnel parameters
+ *   ip6_tnl_change() updates the tunnel parameters
  **/
 
 static int
-ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
+ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
 {
 	ipv6_addr_copy(&t->parms.laddr, &p->laddr);
 	ipv6_addr_copy(&t->parms.raddr, &p->raddr);
@@ -1141,18 +1141,18 @@ ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
 	t->parms.flowinfo = p->flowinfo;
 	t->parms.link = p->link;
 	ip6_tnl_dst_reset(t);
-	ip6ip6_tnl_link_config(t);
+	ip6_tnl_link_config(t);
 	return 0;
 }
 
 /**
- * ip6ip6_tnl_ioctl - configure ipv6 tunnels from userspace
+ * ip6_tnl_ioctl - configure ipv6 tunnels from userspace
  *   @dev: virtual device associated with tunnel
  *   @ifr: parameters passed from userspace
  *   @cmd: command to be performed
  *
  * Description:
- *   ip6ip6_tnl_ioctl() is used for managing IPv6 tunnels
+ *   ip6_tnl_ioctl() is used for managing IPv6 tunnels
  *   from userspace.
  *
  *   The possible commands are the following:
@@ -1174,7 +1174,7 @@ ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
  **/
 
 static int
-ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	int err = 0;
 	struct ip6_tnl_parm p;
@@ -1182,12 +1182,12 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
 	switch (cmd) {
 	case SIOCGETTUNNEL:
-		if (dev == ip6ip6_fb_tnl_dev) {
+		if (dev == ip6_fb_tnl_dev) {
 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) {
 				err = -EFAULT;
 				break;
 			}
-			t = ip6ip6_tnl_locate(&p, 0);
+			t = ip6_tnl_locate(&p, 0);
 		}
 		if (t == NULL)
 			t = netdev_priv(dev);
@@ -1207,8 +1207,8 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		err = -EINVAL;
 		if (p.proto != IPPROTO_IPV6)
 			break;
-		t = ip6ip6_tnl_locate(&p, cmd == SIOCADDTUNNEL);
-		if (dev != ip6ip6_fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
+		t = ip6_tnl_locate(&p, cmd == SIOCADDTUNNEL);
+		if (dev != ip6_fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
 			if (t != NULL) {
 				if (t->dev != dev) {
 					err = -EEXIST;
@@ -1217,9 +1217,9 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 			} else
 				t = netdev_priv(dev);
 
-			ip6ip6_tnl_unlink(t);
-			err = ip6ip6_tnl_change(t, &p);
-			ip6ip6_tnl_link(t);
+			ip6_tnl_unlink(t);
+			err = ip6_tnl_change(t, &p);
+			ip6_tnl_link(t);
 			netdev_state_change(dev);
 		}
 		if (t) {
@@ -1235,15 +1235,15 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		if (!capable(CAP_NET_ADMIN))
 			break;
 
-		if (dev == ip6ip6_fb_tnl_dev) {
+		if (dev == ip6_fb_tnl_dev) {
 			err = -EFAULT;
 			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
 				break;
 			err = -ENOENT;
-			if ((t = ip6ip6_tnl_locate(&p, 0)) == NULL)
+			if ((t = ip6_tnl_locate(&p, 0)) == NULL)
 				break;
 			err = -EPERM;
-			if (t->dev == ip6ip6_fb_tnl_dev)
+			if (t->dev == ip6_fb_tnl_dev)
 				break;
 			dev = t->dev;
 		}
@@ -1257,20 +1257,20 @@ ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 }
 
 /**
- * ip6ip6_tnl_get_stats - return the stats for tunnel device
+ * ip6_tnl_get_stats - return the stats for tunnel device
  *   @dev: virtual device associated with tunnel
  *
  * Return: stats for device
  **/
 
 static struct net_device_stats *
-ip6ip6_tnl_get_stats(struct net_device *dev)
+ip6_tnl_get_stats(struct net_device *dev)
 {
 	return &(((struct ip6_tnl *)netdev_priv(dev))->stat);
 }
 
 /**
- * ip6ip6_tnl_change_mtu - change mtu manually for tunnel device
+ * ip6_tnl_change_mtu - change mtu manually for tunnel device
  *   @dev: virtual device associated with tunnel
  *   @new_mtu: the new mtu
  *
@@ -1280,7 +1280,7 @@ ip6ip6_tnl_get_stats(struct net_device *dev)
  **/
 
 static int
-ip6ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
+ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
 {
 	if (new_mtu < IPV6_MIN_MTU) {
 		return -EINVAL;
@@ -1290,22 +1290,22 @@ ip6ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
 }
 
 /**
- * ip6ip6_tnl_dev_setup - setup virtual tunnel device
+ * ip6_tnl_dev_setup - setup virtual tunnel device
  *   @dev: virtual device associated with tunnel
  *
  * Description:
  *   Initialize function pointers and device parameters
  **/
 
-static void ip6ip6_tnl_dev_setup(struct net_device *dev)
+static void ip6_tnl_dev_setup(struct net_device *dev)
 {
 	SET_MODULE_OWNER(dev);
-	dev->uninit = ip6ip6_tnl_dev_uninit;
+	dev->uninit = ip6_tnl_dev_uninit;
 	dev->destructor = free_netdev;
 	dev->hard_start_xmit = ip6_tnl_xmit;
-	dev->get_stats = ip6ip6_tnl_get_stats;
-	dev->do_ioctl = ip6ip6_tnl_ioctl;
-	dev->change_mtu = ip6ip6_tnl_change_mtu;
+	dev->get_stats = ip6_tnl_get_stats;
+	dev->do_ioctl = ip6_tnl_ioctl;
+	dev->change_mtu = ip6_tnl_change_mtu;
 
 	dev->type = ARPHRD_TUNNEL6;
 	dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
@@ -1316,12 +1316,12 @@ static void ip6ip6_tnl_dev_setup(struct net_device *dev)
 
 
 /**
- * ip6ip6_tnl_dev_init_gen - general initializer for all tunnel devices
+ * ip6_tnl_dev_init_gen - general initializer for all tunnel devices
  *   @dev: virtual device associated with tunnel
  **/
 
 static inline void
-ip6ip6_tnl_dev_init_gen(struct net_device *dev)
+ip6_tnl_dev_init_gen(struct net_device *dev)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
 	t->fl.proto = IPPROTO_IPV6;
@@ -1330,31 +1330,31 @@ ip6ip6_tnl_dev_init_gen(struct net_device *dev)
 }
 
 /**
- * ip6ip6_tnl_dev_init - initializer for all non fallback tunnel devices
+ * ip6_tnl_dev_init - initializer for all non fallback tunnel devices
  *   @dev: virtual device associated with tunnel
  **/
 
 static int
-ip6ip6_tnl_dev_init(struct net_device *dev)
+ip6_tnl_dev_init(struct net_device *dev)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
-	ip6ip6_tnl_dev_init_gen(dev);
-	ip6ip6_tnl_link_config(t);
+	ip6_tnl_dev_init_gen(dev);
+	ip6_tnl_link_config(t);
 	return 0;
 }
 
 /**
- * ip6ip6_fb_tnl_dev_init - initializer for fallback tunnel device
+ * ip6_fb_tnl_dev_init - initializer for fallback tunnel device
  *   @dev: fallback device
  *
  * Return: 0
  **/
 
 static int
-ip6ip6_fb_tnl_dev_init(struct net_device *dev)
+ip6_fb_tnl_dev_init(struct net_device *dev)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
-	ip6ip6_tnl_dev_init_gen(dev);
+	ip6_tnl_dev_init_gen(dev);
 	dev_hold(dev);
 	tnls_wc[0] = t;
 	return 0;
@@ -1383,27 +1383,27 @@ static int __init ip6_tunnel_init(void)
 	int  err;
 
 	if (xfrm6_tunnel_register(&ip4ip6_handler, AF_INET)) {
-		printk(KERN_ERR "ip4ip6 init: can't register tunnel\n");
+		printk(KERN_ERR "ip6_tunnel init: can't register ip4ip6\n");
 		err = -EAGAIN;
 		goto out;
 	}
 
 	if (xfrm6_tunnel_register(&ip6ip6_handler, AF_INET6)) {
-		printk(KERN_ERR "ip6ip6 init: can't register tunnel\n");
+		printk(KERN_ERR "ip6_tunnel init: can't register ip6ip6\n");
 		err = -EAGAIN;
 		goto unreg_ip4ip6;
 	}
-	ip6ip6_fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
-					 ip6ip6_tnl_dev_setup);
+	ip6_fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
+				      ip6_tnl_dev_setup);
 
-	if (!ip6ip6_fb_tnl_dev) {
+	if (!ip6_fb_tnl_dev) {
 		err = -ENOMEM;
 		goto fail;
 	}
-	ip6ip6_fb_tnl_dev->init = ip6ip6_fb_tnl_dev_init;
+	ip6_fb_tnl_dev->init = ip6_fb_tnl_dev_init;
 
-	if ((err = register_netdev(ip6ip6_fb_tnl_dev))) {
-		free_netdev(ip6ip6_fb_tnl_dev);
+	if ((err = register_netdev(ip6_fb_tnl_dev))) {
+		free_netdev(ip6_fb_tnl_dev);
 		goto fail;
 	}
 	return 0;
@@ -1415,7 +1415,7 @@ out:
 	return err;
 }
 
-static void __exit ip6ip6_destroy_tunnels(void)
+static void __exit ip6_tnl_destroy_tunnels(void)
 {
 	int h;
 	struct ip6_tnl *t;
@@ -1436,13 +1436,13 @@ static void __exit ip6ip6_destroy_tunnels(void)
 static void __exit ip6_tunnel_cleanup(void)
 {
 	if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET))
-		printk(KERN_INFO "ip4ip6 close: can't deregister tunnel\n");
+		printk(KERN_INFO "ip6_tunnel close: can't deregister ip4ip6\n");
 
 	if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
-		printk(KERN_INFO "ip6ip6 close: can't deregister tunnel\n");
+		printk(KERN_INFO "ip6_tunnel close: can't deregister ip6ip6\n");
 
 	rtnl_lock();
-	ip6ip6_destroy_tunnels();
+	ip6_tnl_destroy_tunnels();
 	rtnl_unlock();
 }
 
-- 
cgit v1.2.3


From 502b093569e48db264831be7966e1c447de2f52f Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Thu, 30 Nov 2006 14:43:28 +0900
Subject: [IPV6] IP6TUNNEL: Enable to control the handled inner protocol.

ip6_tunnel before supporting IPv4/IPv6 tunnel allows only IPPROTO_IPV6
in configurations from userland. This allows userland to set IPPROTO_IPIP
and 0(wildcard). ip6_tunnel only handles allowed inner protocols.

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 70684e0d31f..d8c84d8d7cf 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -384,7 +384,7 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
  **/
 
 static int
-ip6_tnl_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
 	    int *type, int *code, int *msg, __be32 *info, int offset)
 {
 	struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data;
@@ -404,6 +404,9 @@ ip6_tnl_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if ((t = ip6_tnl_lookup(&ipv6h->daddr, &ipv6h->saddr)) == NULL)
 		goto out;
 
+	if (t->parms.proto != ipproto && t->parms.proto != 0)
+		goto out;
+
 	err = 0;
 
 	switch (*type) {
@@ -487,8 +490,8 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	struct flowi fl;
 	struct rtable *rt;
 
-	err = ip6_tnl_err(skb, opt, &rel_type, &rel_code, &rel_msg, &rel_info,
-			  offset);
+	err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code,
+			  &rel_msg, &rel_info, offset);
 	if (err < 0)
 		return err;
 
@@ -581,8 +584,8 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	__u32 rel_info = info;
 	int err;
 
-	err = ip6_tnl_err(skb, opt, &rel_type, &rel_code, &rel_msg, &rel_info,
-			  offset);
+	err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
+			  &rel_msg, &rel_info, offset);
 	if (err < 0)
 		return err;
 
@@ -671,6 +674,7 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
  **/
 
 static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
+		       __u8 ipproto,
 		       void (*dscp_ecn_decapsulate)(struct ip6_tnl *t,
 						    struct ipv6hdr *ipv6h,
 						    struct sk_buff *skb))
@@ -683,6 +687,11 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
 	read_lock(&ip6_tnl_lock);
 
 	if ((t = ip6_tnl_lookup(&ipv6h->saddr, &ipv6h->daddr)) != NULL) {
+		if (t->parms.proto != ipproto && t->parms.proto != 0) {
+			read_unlock(&ip6_tnl_lock);
+			goto discard;
+		}
+
 		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 			read_unlock(&ip6_tnl_lock);
 			goto discard;
@@ -722,12 +731,14 @@ discard:
 
 static int ip4ip6_rcv(struct sk_buff *skb)
 {
-	return ip6_tnl_rcv(skb, ETH_P_IP, ip4ip6_dscp_ecn_decapsulate);
+	return ip6_tnl_rcv(skb, ETH_P_IP, IPPROTO_IPIP,
+			   ip4ip6_dscp_ecn_decapsulate);
 }
 
 static int ip6ip6_rcv(struct sk_buff *skb)
 {
-	return ip6_tnl_rcv(skb, ETH_P_IPV6, ip6ip6_dscp_ecn_decapsulate);
+	return ip6_tnl_rcv(skb, ETH_P_IPV6, IPPROTO_IPV6,
+			   ip6ip6_dscp_ecn_decapsulate);
 }
 
 struct ipv6_tel_txoption {
@@ -939,7 +950,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	__u32 mtu;
 	int err;
 
-	if (!ip6_tnl_xmit_ctl(t))
+	if ((t->parms.proto != IPPROTO_IPIP && t->parms.proto != 0) ||
+	    !ip6_tnl_xmit_ctl(t))
 		return -1;
 
 	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
@@ -978,7 +990,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	__u32 mtu;
 	int err;
 
-	if (!ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h))
+	if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
+	    !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h))
 		return -1;
 
 	if ((offset = parse_tlv_tnl_enc_lim(skb, skb->nh.raw)) > 0) {
@@ -1140,6 +1153,7 @@ ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
 	t->parms.encap_limit = p->encap_limit;
 	t->parms.flowinfo = p->flowinfo;
 	t->parms.link = p->link;
+	t->parms.proto = p->proto;
 	ip6_tnl_dst_reset(t);
 	ip6_tnl_link_config(t);
 	return 0;
@@ -1205,7 +1219,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p)))
 			break;
 		err = -EINVAL;
-		if (p.proto != IPPROTO_IPV6)
+		if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
+		    p.proto != 0)
 			break;
 		t = ip6_tnl_locate(&p, cmd == SIOCADDTUNNEL);
 		if (dev != ip6_fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
@@ -1324,7 +1339,6 @@ static inline void
 ip6_tnl_dev_init_gen(struct net_device *dev)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
-	t->fl.proto = IPPROTO_IPV6;
 	t->dev = dev;
 	strcpy(t->parms.name, dev->name);
 }
@@ -1355,6 +1369,7 @@ ip6_fb_tnl_dev_init(struct net_device *dev)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
 	ip6_tnl_dev_init_gen(dev);
+	t->parms.proto = IPPROTO_IPV6;
 	dev_hold(dev);
 	tnls_wc[0] = t;
 	return 0;
-- 
cgit v1.2.3


From 95c385b4d5a71b8ad552aecaa968ea46d7da2f6a Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Wed, 25 Apr 2007 17:08:10 -0700
Subject: [IPV6] ADDRCONF: Optimistic Duplicate Address Detection (RFC 4429)
 Support.

Nominally an autoconfigured IPv6 address is added to an interface in the
Tentative state (as per RFC 2462).  Addresses in this state remain in this
state while the Duplicate Address Detection process operates on them to
determine their uniqueness on the network.  During this period, these
tentative addresses may not be used for communication, increasing the time
before a node may be able to communicate on a network.  Using Optimistic
Duplicate Address Detection, autoconfigured addresses may be used
immediately for communication on the network, as long as certain rules are
followed to avoid conflicts with other nodes during the Duplicate Address
Detection process.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_addr.h |   1 +
 include/linux/ipv6.h    |   4 ++
 include/net/addrconf.h  |   4 +-
 net/ipv6/Kconfig        |  10 +++++
 net/ipv6/addrconf.c     | 106 +++++++++++++++++++++++++++++++++++++++++-------
 net/ipv6/ip6_output.c   |  35 ++++++++++++++++
 net/ipv6/mcast.c        |   4 +-
 net/ipv6/ndisc.c        |  84 +++++++++++++++++++++++++++-----------
 8 files changed, 207 insertions(+), 41 deletions(-)

diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h
index d557e4ce9b6..43f3bedaafd 100644
--- a/include/linux/if_addr.h
+++ b/include/linux/if_addr.h
@@ -39,6 +39,7 @@ enum
 #define IFA_F_TEMPORARY		IFA_F_SECONDARY
 
 #define	IFA_F_NODAD		0x02
+#define IFA_F_OPTIMISTIC	0x04
 #define	IFA_F_HOMEADDRESS	0x10
 #define IFA_F_DEPRECATED	0x20
 #define IFA_F_TENTATIVE		0x40
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 713eb5eaa81..e046b22a222 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -178,6 +178,9 @@ struct ipv6_devconf {
 #endif
 	__s32		proxy_ndp;
 	__s32		accept_source_route;
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+	__s32		optimistic_dad;
+#endif
 	void		*sysctl;
 };
 
@@ -208,6 +211,7 @@ enum {
 	DEVCONF_PROXY_NDP,
 	__DEVCONF_OPTIMISTIC_DAD,
 	DEVCONF_ACCEPT_SOURCE_ROUTE,
+	DEVCONF_OPTIMISTIC_DAD,
 	DEVCONF_MAX
 };
 
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 88df8fc814e..f3531d0bcd0 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -73,7 +73,9 @@ extern int			ipv6_get_saddr(struct dst_entry *dst,
 extern int			ipv6_dev_get_saddr(struct net_device *dev, 
 					       struct in6_addr *daddr,
 					       struct in6_addr *saddr);
-extern int			ipv6_get_lladdr(struct net_device *dev, struct in6_addr *);
+extern int			ipv6_get_lladdr(struct net_device *dev,
+						struct in6_addr *addr,
+						unsigned char banned_flags);
 extern int			ipv6_rcv_saddr_equal(const struct sock *sk, 
 						      const struct sock *sk2);
 extern void			addrconf_join_solict(struct net_device *dev,
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 79682efb14b..8e5d54f23b4 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -57,6 +57,16 @@ config IPV6_ROUTE_INFO
 
 	  If unsure, say N.
 
+config IPV6_OPTIMISTIC_DAD
+	bool "IPv6: Enable RFC 4429 Optimistic DAD (EXPERIMENTAL)"
+	depends on IPV6 && EXPERIMENTAL
+	---help---
+	  This is experimental support for optimistic Duplicate
+	  Address Detection.  It allows for autoconfigured addresses
+	  to be used more quickly.
+
+	  If unsure, say N.
+
 config INET6_AH
 	tristate "IPv6: AH transformation"
 	depends on IPV6
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e035896657b..38274c20eaa 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -530,6 +530,16 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 
 	ifa->rt = rt;
 
+	/*
+	 * part one of RFC 4429, section 3.3
+	 * We should not configure an address as
+	 * optimistic if we do not yet know the link
+	 * layer address of our nexhop router
+	 */
+
+	if (rt->rt6i_nexthop == NULL)
+		ifa->flags &= ~IFA_F_OPTIMISTIC;
+
 	ifa->idev = idev;
 	in6_dev_hold(idev);
 	/* For caller */
@@ -706,6 +716,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i
 	int tmp_plen;
 	int ret = 0;
 	int max_addresses;
+	u32 addr_flags;
 
 	write_lock(&idev->lock);
 	if (ift) {
@@ -763,10 +774,17 @@ retry:
 	spin_unlock_bh(&ifp->lock);
 
 	write_unlock(&idev->lock);
+
+	addr_flags = IFA_F_TEMPORARY;
+	/* set in addrconf_prefix_rcv() */
+	if (ifp->flags & IFA_F_OPTIMISTIC)
+		addr_flags |= IFA_F_OPTIMISTIC;
+
 	ift = !max_addresses ||
 	      ipv6_count_addresses(idev) < max_addresses ?
 		ipv6_add_addr(idev, &addr, tmp_plen,
-			      ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, IFA_F_TEMPORARY) : NULL;
+			      ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK,
+			      addr_flags) : NULL;
 	if (!ift || IS_ERR(ift)) {
 		in6_ifa_put(ifp);
 		in6_dev_put(idev);
@@ -898,13 +916,14 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
 			 * - Tentative Address (RFC2462 section 5.4)
 			 *  - A tentative address is not considered
 			 *    "assigned to an interface" in the traditional
-			 *    sense.
+			 *    sense, unless it is also flagged as optimistic.
 			 * - Candidate Source Address (section 4)
 			 *  - In any case, anycast addresses, multicast
 			 *    addresses, and the unspecified address MUST
 			 *    NOT be included in a candidate set.
 			 */
-			if (ifa->flags & IFA_F_TENTATIVE)
+			if ((ifa->flags & IFA_F_TENTATIVE) &&
+			    (!(ifa->flags & IFA_F_OPTIMISTIC)))
 				continue;
 			if (unlikely(score.addr_type == IPV6_ADDR_ANY ||
 				     score.addr_type & IPV6_ADDR_MULTICAST)) {
@@ -963,15 +982,17 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
 				}
 			}
 
-			/* Rule 3: Avoid deprecated address */
+			/* Rule 3: Avoid deprecated and optimistic addresses */
 			if (hiscore.rule < 3) {
 				if (ipv6_saddr_preferred(hiscore.addr_type) ||
-				    !(ifa_result->flags & IFA_F_DEPRECATED))
+				   (((ifa_result->flags &
+				    (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0)))
 					hiscore.attrs |= IPV6_SADDR_SCORE_PREFERRED;
 				hiscore.rule++;
 			}
 			if (ipv6_saddr_preferred(score.addr_type) ||
-			    !(ifa->flags & IFA_F_DEPRECATED)) {
+			   (((ifa_result->flags &
+			    (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0))) {
 				score.attrs |= IPV6_SADDR_SCORE_PREFERRED;
 				if (!(hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)) {
 					score.rule = 3;
@@ -1111,7 +1132,8 @@ int ipv6_get_saddr(struct dst_entry *dst,
 
 EXPORT_SYMBOL(ipv6_get_saddr);
 
-int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr)
+int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
+		    unsigned char banned_flags)
 {
 	struct inet6_dev *idev;
 	int err = -EADDRNOTAVAIL;
@@ -1122,7 +1144,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr)
 
 		read_lock_bh(&idev->lock);
 		for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
-			if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {
+			if (ifp->scope == IFA_LINK && !(ifp->flags & banned_flags)) {
 				ipv6_addr_copy(addr, &ifp->addr);
 				err = 0;
 				break;
@@ -1674,6 +1696,13 @@ ok:
 
 		if (ifp == NULL && valid_lft) {
 			int max_addresses = in6_dev->cnf.max_addresses;
+			u32 addr_flags = 0;
+
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+			if (in6_dev->cnf.optimistic_dad &&
+			    !ipv6_devconf.forwarding)
+				addr_flags = IFA_F_OPTIMISTIC;
+#endif
 
 			/* Do not allow to create too much of autoconfigured
 			 * addresses; this would be too easy way to crash kernel.
@@ -1681,7 +1710,8 @@ ok:
 			if (!max_addresses ||
 			    ipv6_count_addresses(in6_dev) < max_addresses)
 				ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len,
-						    addr_type&IPV6_ADDR_SCOPE_MASK, 0);
+						    addr_type&IPV6_ADDR_SCOPE_MASK,
+						    addr_flags);
 
 			if (!ifp || IS_ERR(ifp)) {
 				in6_dev_put(in6_dev);
@@ -1889,6 +1919,11 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen,
 
 		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
 				      jiffies_to_clock_t(valid_lft * HZ), flags);
+		/*
+		 * Note that section 3.1 of RFC 4429 indicates
+		 * that the Optimistic flag should not be set for
+		 * manually configured addresses
+		 */
 		addrconf_dad_start(ifp, 0);
 		in6_ifa_put(ifp);
 		addrconf_verify(0);
@@ -2065,8 +2100,16 @@ static void init_loopback(struct net_device *dev)
 static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr)
 {
 	struct inet6_ifaddr * ifp;
+	u32 addr_flags = IFA_F_PERMANENT;
+
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+	if (idev->cnf.optimistic_dad &&
+	    !ipv6_devconf.forwarding)
+		addr_flags |= IFA_F_OPTIMISTIC;
+#endif
 
-	ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, IFA_F_PERMANENT);
+
+	ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, addr_flags);
 	if (!IS_ERR(ifp)) {
 		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0);
 		addrconf_dad_start(ifp, 0);
@@ -2134,7 +2177,7 @@ ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev)
 {
 	struct in6_addr lladdr;
 
-	if (!ipv6_get_lladdr(link_dev, &lladdr)) {
+	if (!ipv6_get_lladdr(link_dev, &lladdr, IFA_F_TENTATIVE)) {
 		addrconf_add_linklocal(idev, &lladdr);
 		return 0;
 	}
@@ -2479,7 +2522,11 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
 	unsigned long rand_num;
 	struct inet6_dev *idev = ifp->idev;
 
-	rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
+	if (ifp->flags & IFA_F_OPTIMISTIC)
+		rand_num = 0;
+	else
+		rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
+
 	ifp->probes = idev->cnf.dad_transmits;
 	addrconf_mod_timer(ifp, AC_DAD, rand_num);
 }
@@ -2501,7 +2548,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 	if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
 	    !(ifp->flags&IFA_F_TENTATIVE) ||
 	    ifp->flags & IFA_F_NODAD) {
-		ifp->flags &= ~IFA_F_TENTATIVE;
+		ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC);
 		spin_unlock_bh(&ifp->lock);
 		read_unlock_bh(&idev->lock);
 
@@ -2521,6 +2568,14 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 		addrconf_dad_stop(ifp);
 		return;
 	}
+
+	/*
+	 * Optimistic nodes can start receiving
+	 * Frames right away
+	 */
+	if(ifp->flags & IFA_F_OPTIMISTIC)
+		ip6_ins_rt(ifp->rt);
+
 	addrconf_dad_kick(ifp);
 	spin_unlock_bh(&ifp->lock);
 out:
@@ -2545,7 +2600,7 @@ static void addrconf_dad_timer(unsigned long data)
 		 * DAD was successful
 		 */
 
-		ifp->flags &= ~IFA_F_TENTATIVE;
+		ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC);
 		spin_unlock_bh(&ifp->lock);
 		read_unlock_bh(&idev->lock);
 
@@ -3364,6 +3419,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 #endif
 	array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
 	array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route;
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+	array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad;
+#endif
 }
 
 static inline size_t inet6_if_nlmsg_size(void)
@@ -3578,7 +3636,14 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 
 	switch (event) {
 	case RTM_NEWADDR:
-		ip6_ins_rt(ifp->rt);
+		/*
+		 * If the address was optimistic
+		 * we inserted the route at the start of
+		 * our DAD process, so we don't need
+		 * to do it again
+		 */
+		if (!(ifp->rt->rt6i_node))
+			ip6_ins_rt(ifp->rt);
 		if (ifp->idev->cnf.forwarding)
 			addrconf_join_anycast(ifp);
 		break;
@@ -3899,6 +3964,17 @@ static struct addrconf_sysctl_table
 			.mode		=	0644,
 			.proc_handler	=	&proc_dointvec,
 		},
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+		{
+			.ctl_name	=	CTL_UNNUMBERED,
+			.procname       =       "optimistic_dad",
+			.data           =       &ipv6_devconf.optimistic_dad,
+			.maxlen         =       sizeof(int),
+			.mode           =       0644,
+			.proc_handler   =       &proc_dointvec,
+
+		},
+#endif
 		{
 			.ctl_name	=	0,	/* sentinel */
 		}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0d60fbc59d8..7e25043d826 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -863,6 +863,41 @@ static int ip6_dst_lookup_tail(struct sock *sk,
 			goto out_err_release;
 	}
 
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+		/*
+		 * Here if the dst entry we've looked up
+		 * has a neighbour entry that is in the INCOMPLETE
+		 * state and the src address from the flow is
+		 * marked as OPTIMISTIC, we release the found
+		 * dst entry and replace it instead with the
+		 * dst entry of the nexthop router
+		 */
+		if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
+			struct inet6_ifaddr *ifp;
+			struct flowi fl_gw;
+			int redirect;
+
+			ifp = ipv6_get_ifaddr(&fl->fl6_src, (*dst)->dev, 1);
+
+			redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
+			if (ifp)
+				in6_ifa_put(ifp);
+
+			if (redirect) {
+				/*
+				 * We need to get the dst entry for the
+				 * default router instead
+				 */
+				dst_release(*dst);
+				memcpy(&fl_gw, fl, sizeof(struct flowi));
+				memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
+				*dst = ip6_route_output(sk, &fl_gw);
+				if ((err = (*dst)->error))
+					goto out_err_release;
+			}
+		}
+#endif
+
 	return 0;
 
 out_err_release:
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index a8d6625ec78..924e24907c3 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1411,7 +1411,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
 
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
 
-	if (ipv6_get_lladdr(dev, &addr_buf)) {
+	if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
 		/* <draft-ietf-magma-mld-source-05.txt>:
 		 * use unspecified address as the source address
 		 * when a valid link-local address is not available.
@@ -1791,7 +1791,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
 
-	if (ipv6_get_lladdr(dev, &addr_buf)) {
+	if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
 		/* <draft-ietf-magma-mld-source-05.txt>:
 		 * use unspecified address as the source address
 		 * when a valid link-local address is not available.
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 886c5be1490..b79b0004231 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -449,6 +449,8 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
 	ifp = ipv6_get_ifaddr(solicited_addr, dev, 1);
 	if (ifp) {
 		src_addr = solicited_addr;
+		if (ifp->flags & IFA_F_OPTIMISTIC)
+			override = 0;
 		in6_ifa_put(ifp);
 	} else {
 		if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr))
@@ -544,7 +546,8 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
 	int send_llinfo;
 
 	if (saddr == NULL) {
-		if (ipv6_get_lladdr(dev, &addr_buf))
+		if (ipv6_get_lladdr(dev, &addr_buf,
+				   (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)))
 			return;
 		saddr = &addr_buf;
 	}
@@ -624,9 +627,33 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
 	struct sk_buff *skb;
 	struct icmp6hdr *hdr;
 	__u8 * opt;
+	struct inet6_ifaddr *ifp;
+	int send_sllao = dev->addr_len;
 	int len;
 	int err;
 
+
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+	/*
+	 * According to section 2.2 of RFC 4429, we must not
+	 * send router solicitations with a sllao from
+	 * optimistic addresses, but we may send the solicitation
+	 * if we don't include the sllao.  So here we check
+	 * if our address is optimistic, and if so, we
+	 * supress the inclusion of the sllao.
+	 */
+	if (send_sllao) {
+		ifp = ipv6_get_ifaddr(saddr, dev, 1);
+		if (ifp) {
+			if (ifp->flags & IFA_F_OPTIMISTIC)  {
+				send_sllao=0;
+				in6_ifa_put(ifp);
+			}
+		} else {
+			send_sllao = 0;
+		}
+	}
+#endif
 	ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr,
 			dev->ifindex);
 
@@ -639,7 +666,7 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
 		return;
 
 	len = sizeof(struct icmp6hdr);
-	if (dev->addr_len)
+	if (send_sllao)
 		len += ndisc_opt_addr_space(dev);
 
 	skb = sock_alloc_send_skb(sk,
@@ -666,7 +693,7 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
 
 	opt = (u8*) (hdr + 1);
 
-	if (dev->addr_len)
+	if (send_sllao)
 		ndisc_fill_addr_option(opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr,
 				       dev->addr_len, dev->type);
 
@@ -798,28 +825,39 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 	inc = ipv6_addr_is_multicast(daddr);
 
 	if ((ifp = ipv6_get_ifaddr(&msg->target, dev, 1)) != NULL) {
-		if (ifp->flags & IFA_F_TENTATIVE) {
-			/* Address is tentative. If the source
-			   is unspecified address, it is someone
-			   does DAD, otherwise we ignore solicitations
-			   until DAD timer expires.
-			 */
-			if (!dad)
+
+		if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
+			if (dad) {
+				if (dev->type == ARPHRD_IEEE802_TR) {
+					unsigned char *sadr = skb->mac.raw;
+					if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 &&
+					    sadr[9] == dev->dev_addr[1] &&
+					    sadr[10] == dev->dev_addr[2] &&
+					    sadr[11] == dev->dev_addr[3] &&
+					    sadr[12] == dev->dev_addr[4] &&
+					    sadr[13] == dev->dev_addr[5]) {
+						/* looped-back to us */
+						goto out;
+					}
+				}
+
+				/*
+				 * We are colliding with another node
+				 * who is doing DAD
+				 * so fail our DAD process
+				 */
+				addrconf_dad_failure(ifp);
 				goto out;
-			if (dev->type == ARPHRD_IEEE802_TR) {
-				unsigned char *sadr = skb->mac.raw;
-				if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 &&
-				    sadr[9] == dev->dev_addr[1] &&
-				    sadr[10] == dev->dev_addr[2] &&
-				    sadr[11] == dev->dev_addr[3] &&
-				    sadr[12] == dev->dev_addr[4] &&
-				    sadr[13] == dev->dev_addr[5]) {
-					/* looped-back to us */
+			} else {
+				/*
+				 * This is not a dad solicitation.
+				 * If we are an optimistic node,
+				 * we should respond.
+				 * Otherwise, we should ignore it.
+				 */
+				if (!(ifp->flags & IFA_F_OPTIMISTIC))
 					goto out;
-				}
 			}
-			addrconf_dad_failure(ifp);
-			return;
 		}
 
 		idev = ifp->idev;
@@ -1408,7 +1446,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 
 	dev = skb->dev;
 
-	if (ipv6_get_lladdr(dev, &saddr_buf)) {
+	if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
 		ND_PRINTK2(KERN_WARNING
 			   "ICMPv6 Redirect: no link-local address on %s\n",
 			   dev->name);
-- 
cgit v1.2.3


From ca043569390c528de4cd5ec9e07502f2bf4ecd1f Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 28 Feb 2007 23:13:20 +0900
Subject: [IPV6] ADDRCONF: Fix possible inet6_ifaddr leakage with
 CONFIG_OPTIMISTIC_DAD.

The inet6_ifaddr for source address of RS is leaked if the address
is not an optimistic address.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ndisc.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index b79b0004231..053147a0027 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -627,7 +627,6 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
 	struct sk_buff *skb;
 	struct icmp6hdr *hdr;
 	__u8 * opt;
-	struct inet6_ifaddr *ifp;
 	int send_sllao = dev->addr_len;
 	int len;
 	int err;
@@ -643,12 +642,12 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
 	 * supress the inclusion of the sllao.
 	 */
 	if (send_sllao) {
-		ifp = ipv6_get_ifaddr(saddr, dev, 1);
+		struct inet6_ifaddr *ifp = ipv6_get_ifaddr(saddr, dev, 1);
 		if (ifp) {
 			if (ifp->flags & IFA_F_OPTIMISTIC)  {
-				send_sllao=0;
-				in6_ifa_put(ifp);
+				send_sllao = 0;
 			}
+			in6_ifa_put(ifp);
 		} else {
 			send_sllao = 0;
 		}
-- 
cgit v1.2.3


From fc910a27839584209726537698b596576940add4 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 25 Mar 2007 20:27:59 -0700
Subject: [NETLINK]: Limit NLMSG_GOODSIZE to 8K.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h | 11 +++++++++--
 include/linux/skbuff.h  |  8 +++++---
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 2a20f488ac1..a9d3ad5bc80 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -171,9 +171,16 @@ int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol);
 
 /*
  *	skb should fit one page. This choice is good for headerless malloc.
+ *	But we should limit to 8K so that userspace does not have to
+ *	use enormous buffer sizes on recvmsg() calls just to avoid
+ *	MSG_TRUNC when PAGE_SIZE is very large.
  */
-#define NLMSG_GOODORDER 0
-#define NLMSG_GOODSIZE (SKB_MAX_ORDER(0, NLMSG_GOODORDER))
+#if PAGE_SIZE < 8192UL
+#define NLMSG_GOODSIZE	SKB_WITH_OVERHEAD(PAGE_SIZE)
+#else
+#define NLMSG_GOODSIZE	SKB_WITH_OVERHEAD(8192UL)
+#endif
+
 #define NLMSG_DEFAULT_SIZE (NLMSG_GOODSIZE - NLMSG_HDRLEN)
 
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f9441b5f8d1..30089adb2e7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -39,9 +39,11 @@
 
 #define SKB_DATA_ALIGN(X)	(((X) + (SMP_CACHE_BYTES - 1)) & \
 				 ~(SMP_CACHE_BYTES - 1))
-#define SKB_MAX_ORDER(X, ORDER)	(((PAGE_SIZE << (ORDER)) - (X) - \
-				  sizeof(struct skb_shared_info)) & \
-				  ~(SMP_CACHE_BYTES - 1))
+#define SKB_WITH_OVERHEAD(X)	\
+	(((X) - sizeof(struct skb_shared_info)) & \
+	 ~(SMP_CACHE_BYTES - 1))
+#define SKB_MAX_ORDER(X, ORDER) \
+	SKB_WITH_OVERHEAD((PAGE_SIZE << (ORDER)) - (X))
 #define SKB_MAX_HEAD(X)		(SKB_MAX_ORDER((X), 0))
 #define SKB_MAX_ALLOC		(SKB_MAX_ORDER(0, 2))
 
-- 
cgit v1.2.3


From 8570419fb7be0af84085ac8f13307392a748482c Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 6 Mar 2007 20:19:26 -0800
Subject: [ATM] ENI: Convert to struct timeval to ktime_t.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/eni.c | 4 ++--
 drivers/atm/eni.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index 8fccf018f16..0d3a38b1cb0 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -536,7 +536,7 @@ static int rx_aal0(struct atm_vcc *vcc)
 		return 0;
 	}
 	skb_put(skb,length);
-	skb_set_timestamp(skb, &eni_vcc->timestamp);
+	skb->tstamp = eni_vcc->timestamp;
 	DPRINTK("got len %ld\n",length);
 	if (do_rx_dma(vcc,skb,1,length >> 2,length >> 2)) return 1;
 	eni_vcc->rxing++;
@@ -701,7 +701,7 @@ static void get_service(struct atm_dev *dev)
 			DPRINTK("Grr, servicing VCC %ld twice\n",vci);
 			continue;
 		}
-		do_gettimeofday(&ENI_VCC(vcc)->timestamp);
+		ENI_VCC(vcc)->timestamp = ktime_get_real();
 		ENI_VCC(vcc)->next = NULL;
 		if (vcc->qos.rxtp.traffic_class == ATM_CBR) {
 			if (eni_dev->fast)
diff --git a/drivers/atm/eni.h b/drivers/atm/eni.h
index 385090c2a58..d04fefb0841 100644
--- a/drivers/atm/eni.h
+++ b/drivers/atm/eni.h
@@ -59,7 +59,7 @@ struct eni_vcc {
 	int rxing;			/* number of pending PDUs */
 	int servicing;			/* number of waiting VCs (0 or 1) */
 	int txing;			/* number of pending TX bytes */
-	struct timeval timestamp;	/* for RX timing */
+	ktime_t timestamp;		/* for RX timing */
 	struct atm_vcc *next;		/* next pending RX */
 	struct sk_buff *last;		/* last PDU being DMAed (used to carry
 					   discard information) */
-- 
cgit v1.2.3


From c5f5877c043ca471c3a607fa2c864848b19bc49a Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Sun, 25 Mar 2007 20:21:15 -0700
Subject: [TCP] tcp_cubic: faster cube root

The Newton-Raphson method is quadratically convergent so
only a small fixed number of steps are necessary.
Therefore it is faster to unroll the loop. Since div64_64 is no longer
inline it won't cause code explosion.

Also fixes a bug that can occur if x^2 was bigger than 32 bits.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cubic.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 6f08adbda54..0e6cdfeb207 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -96,23 +96,17 @@ static void bictcp_init(struct sock *sk)
  */
 static u32 cubic_root(u64 a)
 {
-	u32 x, x1;
+	u32 x;
 
 	/* Initial estimate is based on:
 	 * cbrt(x) = exp(log(x) / 3)
 	 */
 	x = 1u << (fls64(a)/3);
 
-	/*
-	 * Iteration based on:
-	 *                         2
-	 * x    = ( 2 * x  +  a / x  ) / 3
-	 *  k+1          k         k
-	 */
-	do {
-		x1 = x;
-		x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3;
-	} while (abs(x1 - x) > 1);
+	/* converges to 32 bits in 3 iterations */
+	x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3;
+	x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3;
+	x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3;
 
 	return x;
 }
-- 
cgit v1.2.3


From 43e683926f808cec9802466c27cee7499eda3d11 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Tue, 6 Mar 2007 20:21:20 -0800
Subject: [TCP] TCP Yeah: cleanup

Eliminate need for full 6/4/64 divide to compute queue.
Variable maxqueue was really a constant.
Fix indentation.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_yeah.c | 42 +++++++++++++++++++++++-------------------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 18355a2608e..46dd1bee583 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -74,7 +74,7 @@ static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked)
 }
 
 static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
-				 u32 seq_rtt, u32 in_flight, int flag)
+				u32 seq_rtt, u32 in_flight, int flag)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct yeah *yeah = inet_csk_ca(sk);
@@ -142,8 +142,8 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
 		 */
 
 		if (yeah->cntRTT > 2) {
-			u32 rtt;
-			u32 queue, maxqueue;
+			u32 rtt, queue;
+			u64 bw;
 
 			/* We have enough RTT samples, so, using the Vegas
 			 * algorithm, we determine if we should increase or
@@ -158,32 +158,36 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
 			 */
 			rtt = yeah->minRTT;
 
-			queue = (u32)div64_64((u64)tp->snd_cwnd * (rtt - yeah->baseRTT), rtt);
-
-			maxqueue = TCP_YEAH_ALPHA;
-
-			if (queue > maxqueue ||
-				    rtt - yeah->baseRTT > (yeah->baseRTT / TCP_YEAH_PHY)) {
-
-				if (queue > maxqueue && tp->snd_cwnd > yeah->reno_count) {
-					u32 reduction = min( queue / TCP_YEAH_GAMMA ,
-					                 tp->snd_cwnd >> TCP_YEAH_EPSILON );
+			/* Compute excess number of packets above bandwidth
+			 * Avoid doing full 64 bit divide.
+			 */
+			bw = tp->snd_cwnd;
+			bw *= rtt - yeah->baseRTT;
+			do_div(bw, rtt);
+			queue = bw;
+
+			if (queue > TCP_YEAH_ALPHA ||
+			    rtt - yeah->baseRTT > (yeah->baseRTT / TCP_YEAH_PHY)) {
+				if (queue > TCP_YEAH_ALPHA
+				    && tp->snd_cwnd > yeah->reno_count) {
+					u32 reduction = min(queue / TCP_YEAH_GAMMA ,
+							    tp->snd_cwnd >> TCP_YEAH_EPSILON);
 
 					tp->snd_cwnd -= reduction;
 
-					tp->snd_cwnd = max( tp->snd_cwnd, yeah->reno_count);
+					tp->snd_cwnd = max(tp->snd_cwnd,
+							   yeah->reno_count);
 
 					tp->snd_ssthresh = tp->snd_cwnd;
-			}
+				}
 
 				if (yeah->reno_count <= 2)
-					yeah->reno_count = max( tp->snd_cwnd>>1, 2U);
+					yeah->reno_count = max(tp->snd_cwnd>>1, 2U);
 				else
 					yeah->reno_count++;
 
-				yeah->doing_reno_now =
-					           min_t( u32, yeah->doing_reno_now + 1 , 0xffffff);
-
+				yeah->doing_reno_now = min(yeah->doing_reno_now + 1,
+							   0xffffffU);
 			} else {
 				yeah->fast_count++;
 
-- 
cgit v1.2.3


From 243bbcaa09e8482aa28065cbc2eb99f0ca2fc8d6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 6 Mar 2007 20:23:10 -0800
Subject: [IPV4]: Optimize inet_getpeer()

1) Some sysctl vars are declared __read_mostly

2) We can avoid updating stack[] when doing an AVL lookup only.

    lookup() macro is extended to receive a second parameter, that may be NULL
in case of a pure lookup (no need to save the AVL path). This removes
unnecessary instructions, because compiler knows if this _stack parameter is
NULL or not.

    text size of net/ipv4/inetpeer.o is 2063 bytes instead of 2107 on x86_64

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inetpeer.c | 38 ++++++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index db3ef96bdfd..2f44e612806 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -87,10 +87,12 @@ static DEFINE_RWLOCK(peer_pool_lock);
 
 static int peer_total;
 /* Exported for sysctl_net_ipv4.  */
-int inet_peer_threshold = 65536 + 128;	/* start to throw entries more
+int inet_peer_threshold __read_mostly = 65536 + 128;	/* start to throw entries more
 					 * aggressively at this stage */
-int inet_peer_minttl = 120 * HZ;	/* TTL under high load: 120 sec */
-int inet_peer_maxttl = 10 * 60 * HZ;	/* usual time to live: 10 min */
+int inet_peer_minttl __read_mostly = 120 * HZ;	/* TTL under high load: 120 sec */
+int inet_peer_maxttl __read_mostly = 10 * 60 * HZ;	/* usual time to live: 10 min */
+int inet_peer_gc_mintime __read_mostly = 10 * HZ;
+int inet_peer_gc_maxtime __read_mostly = 120 * HZ;
 
 static struct inet_peer *inet_peer_unused_head;
 static struct inet_peer **inet_peer_unused_tailp = &inet_peer_unused_head;
@@ -99,9 +101,6 @@ static DEFINE_SPINLOCK(inet_peer_unused_lock);
 static void peer_check_expire(unsigned long dummy);
 static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0);
 
-/* Exported for sysctl_net_ipv4.  */
-int inet_peer_gc_mintime = 10 * HZ,
-    inet_peer_gc_maxtime = 120 * HZ;
 
 /* Called from ip_output.c:ip_init  */
 void __init inet_initpeers(void)
@@ -151,20 +150,27 @@ static void unlink_from_unused(struct inet_peer *p)
 	spin_unlock_bh(&inet_peer_unused_lock);
 }
 
-/* Called with local BH disabled and the pool lock held. */
-#define lookup(daddr) 						\
+/*
+ * Called with local BH disabled and the pool lock held.
+ * _stack is known to be NULL or not at compile time,
+ * so compiler will optimize the if (_stack) tests.
+ */
+#define lookup(_daddr,_stack) 					\
 ({								\
 	struct inet_peer *u, **v;				\
-	stackptr = stack;					\
-	*stackptr++ = &peer_root;				\
+	if (_stack) {						\
+		stackptr = _stack;				\
+		*stackptr++ = &peer_root;			\
+	}							\
 	for (u = peer_root; u != peer_avl_empty; ) {		\
-		if (daddr == u->v4daddr)			\
+		if (_daddr == u->v4daddr)			\
 			break;					\
-		if ((__force __u32)daddr < (__force __u32)u->v4daddr)	\
+		if ((__force __u32)_daddr < (__force __u32)u->v4daddr)	\
 			v = &u->avl_left;			\
 		else						\
 			v = &u->avl_right;			\
-		*stackptr++ = v;				\
+		if (_stack)					\
+			*stackptr++ = v;			\
 		u = *v;						\
 	}							\
 	u;							\
@@ -288,7 +294,7 @@ static void unlink_from_pool(struct inet_peer *p)
 	if (atomic_read(&p->refcnt) == 1) {
 		struct inet_peer **stack[PEER_MAXDEPTH];
 		struct inet_peer ***stackptr, ***delp;
-		if (lookup(p->v4daddr) != p)
+		if (lookup(p->v4daddr, stack) != p)
 			BUG();
 		delp = stackptr - 1; /* *delp[0] == p */
 		if (p->avl_left == peer_avl_empty) {
@@ -373,7 +379,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
 
 	/* Look up for the address quickly. */
 	read_lock_bh(&peer_pool_lock);
-	p = lookup(daddr);
+	p = lookup(daddr, NULL);
 	if (p != peer_avl_empty)
 		atomic_inc(&p->refcnt);
 	read_unlock_bh(&peer_pool_lock);
@@ -400,7 +406,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
 
 	write_lock_bh(&peer_pool_lock);
 	/* Check if an entry has suddenly appeared. */
-	p = lookup(daddr);
+	p = lookup(daddr, stack);
 	if (p != peer_avl_empty)
 		goto out_free;
 
-- 
cgit v1.2.3


From 1ab6eb62b02e0949a392fb19bf31ba59ae1022b1 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 6 Mar 2007 20:29:58 -0800
Subject: [UDP6]: Restore sk_filter optimisation

This reverts the changeset

    [IPV6]: UDPv6 checksum.

    We always need to check UDPv6 checksum because it is mandatory.

The sk_filter optimisation has nothing to do whether we verify the
checksum.  It simply postpones it to the point when the user calls
recv or poll.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f590db57a7c..3413fc22ce4 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -279,8 +279,10 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 		}
 	}
 
-	if (udp_lib_checksum_complete(skb))
-		goto drop;
+	if (sk->sk_filter) {
+		if (udp_lib_checksum_complete(skb))
+			goto drop;
+	}
 
 	if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
 		/* Note that an ENOMEM error is charged twice */
-- 
cgit v1.2.3


From 759e5d006462d53fb708daa8284b4ad909415da1 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 25 Mar 2007 20:10:56 -0700
Subject: [UDP]: Clean up UDP-Lite receive checksum

This patch eliminates some duplicate code for the verification of
receive checksums between UDP-Lite and UDP.  It does this by
introducing __skb_checksum_complete_head which is identical to
__skb_checksum_complete_head apart from the fact that it takes
a length parameter rather than computing the first skb->len bytes.

As a result UDP-Lite will be able to use hardware checksum offload
for packets which do not use partial coverage checksums.  It also
means that UDP-Lite loopback no longer does unnecessary checksum
verification.

If any NICs start support UDP-Lite this would also start working
automatically.

This patch removes the assumption that msg_flags has MSG_TRUNC clear
upon entry in recvmsg.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  1 +
 include/net/udp.h      |  5 +--
 include/net/udplite.h  | 39 ++++----------------
 net/core/datagram.c    | 10 ++++--
 net/ipv4/udp.c         | 96 ++++++++++++++++++++++++++------------------------
 net/ipv4/udplite.c     |  2 +-
 net/ipv6/udp.c         | 74 +++++++++++++++++++++-----------------
 net/ipv6/udplite.c     |  2 +-
 8 files changed, 109 insertions(+), 120 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 30089adb2e7..df229bd5f1a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1372,6 +1372,7 @@ static inline void __net_timestamp(struct sk_buff *skb)
 }
 
 
+extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len);
 extern __sum16 __skb_checksum_complete(struct sk_buff *skb);
 
 /**
diff --git a/include/net/udp.h b/include/net/udp.h
index 1b921fa8147..4a9699f7928 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -72,10 +72,7 @@ struct sk_buff;
  */
 static inline __sum16 __udp_lib_checksum_complete(struct sk_buff *skb)
 {
-	if (! UDP_SKB_CB(skb)->partial_cov)
-		return __skb_checksum_complete(skb);
-	return csum_fold(skb_checksum(skb, 0, UDP_SKB_CB(skb)->cscov,
-				      skb->csum));
+	return __skb_checksum_complete_head(skb, UDP_SKB_CB(skb)->cscov);
 }
 
 static inline int udp_lib_checksum_complete(struct sk_buff *skb)
diff --git a/include/net/udplite.h b/include/net/udplite.h
index 67ac5142430..d99df75fe54 100644
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -47,11 +47,10 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh)
 		return 1;
 	}
 
-        UDP_SKB_CB(skb)->partial_cov = 0;
 	cscov = ntohs(uh->len);
 
 	if (cscov == 0)		 /* Indicates that full coverage is required. */
-		cscov = skb->len;
+		;
 	else if (cscov < 8  || cscov > skb->len) {
 		/*
 		 * Coverage length violates RFC 3828: log and discard silently.
@@ -60,42 +59,16 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh)
 			       cscov, skb->len);
 		return 1;
 
-	} else if (cscov < skb->len)
+	} else if (cscov < skb->len) {
         	UDP_SKB_CB(skb)->partial_cov = 1;
-
-        UDP_SKB_CB(skb)->cscov = cscov;
-
-	/*
-	 * There is no known NIC manufacturer supporting UDP-Lite yet,
-	 * hence ip_summed is always (re-)set to CHECKSUM_NONE.
-	 */
-	skb->ip_summed = CHECKSUM_NONE;
+		UDP_SKB_CB(skb)->cscov = cscov;
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->ip_summed = CHECKSUM_NONE;
+        }
 
 	return 0;
 }
 
-static __inline__ int udplite4_csum_init(struct sk_buff *skb, struct udphdr *uh)
-{
-	int rc = udplite_checksum_init(skb, uh);
-
-	if (!rc)
-		skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr,
-					       skb->nh.iph->daddr,
-					       skb->len, IPPROTO_UDPLITE, 0);
-	return rc;
-}
-
-static __inline__ int udplite6_csum_init(struct sk_buff *skb, struct udphdr *uh)
-{
-	int rc = udplite_checksum_init(skb, uh);
-
-	if (!rc)
-		skb->csum = ~csum_unfold(csum_ipv6_magic(&skb->nh.ipv6h->saddr,
-					     &skb->nh.ipv6h->daddr,
-					     skb->len, IPPROTO_UDPLITE, 0));
-	return rc;
-}
-
 static inline int udplite_sender_cscov(struct udp_sock *up, struct udphdr *uh)
 {
 	int cscov = up->len;
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 186212b5b7d..cb056f47612 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -411,11 +411,11 @@ fault:
 	return -EFAULT;
 }
 
-__sum16 __skb_checksum_complete(struct sk_buff *skb)
+__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
 {
 	__sum16 sum;
 
-	sum = csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
+	sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
 	if (likely(!sum)) {
 		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
 			netdev_rx_csum_fault(skb->dev);
@@ -423,6 +423,12 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb)
 	}
 	return sum;
 }
+EXPORT_SYMBOL(__skb_checksum_complete_head);
+
+__sum16 __skb_checksum_complete(struct sk_buff *skb)
+{
+	return __skb_checksum_complete_head(skb, skb->len);
+}
 EXPORT_SYMBOL(__skb_checksum_complete);
 
 /**
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fc620a7c1db..86368832d48 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -810,7 +810,9 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	struct inet_sock *inet = inet_sk(sk);
 	struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
 	struct sk_buff *skb;
-	int copied, err, copy_only, is_udplite = IS_UDPLITE(sk);
+	unsigned int ulen, copied;
+	int err;
+	int is_udplite = IS_UDPLITE(sk);
 
 	/*
 	 *	Check any passed addresses
@@ -826,28 +828,25 @@ try_again:
 	if (!skb)
 		goto out;
 
-	copied = skb->len - sizeof(struct udphdr);
-	if (copied > len) {
-		copied = len;
+	ulen = skb->len - sizeof(struct udphdr);
+	copied = len;
+	if (copied > ulen)
+		copied = ulen;
+	else if (copied < ulen)
 		msg->msg_flags |= MSG_TRUNC;
-	}
 
 	/*
-	 * 	Decide whether to checksum and/or copy data.
-	 *
-	 * 	UDP:      checksum may have been computed in HW,
-	 * 	          (re-)compute it if message is truncated.
-	 * 	UDP-Lite: always needs to checksum, no HW support.
+	 * If checksum is needed at all, try to do it while copying the
+	 * data.  If the data is truncated, or if we only want a partial
+	 * coverage checksum (UDP-Lite), do it before the copy.
 	 */
-	copy_only = (skb->ip_summed==CHECKSUM_UNNECESSARY);
 
-	if (is_udplite  ||  (!copy_only  &&  msg->msg_flags&MSG_TRUNC)) {
-		if (__udp_lib_checksum_complete(skb))
+	if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
+		if (udp_lib_checksum_complete(skb))
 			goto csum_copy_err;
-		copy_only = 1;
 	}
 
-	if (copy_only)
+	if (skb->ip_summed == CHECKSUM_UNNECESSARY)
 		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
 					      msg->msg_iov, copied       );
 	else {
@@ -875,7 +874,7 @@ try_again:
 
 	err = copied;
 	if (flags & MSG_TRUNC)
-		err = skb->len - sizeof(struct udphdr);
+		err = ulen;
 
 out_free:
 	skb_free_datagram(sk, skb);
@@ -1095,10 +1094,9 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 		}
 	}
 
-	if (sk->sk_filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
-		if (__udp_lib_checksum_complete(skb))
+	if (sk->sk_filter) {
+		if (udp_lib_checksum_complete(skb))
 			goto drop;
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 
 	if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
@@ -1166,25 +1164,36 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
  * Otherwise, csum completion requires chacksumming packet body,
  * including udp header and folding it to skb->csum.
  */
-static inline void udp4_csum_init(struct sk_buff *skb, struct udphdr *uh)
+static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
+				 int proto)
 {
+	int err;
+
+	UDP_SKB_CB(skb)->partial_cov = 0;
+	UDP_SKB_CB(skb)->cscov = skb->len;
+
+	if (proto == IPPROTO_UDPLITE) {
+		err = udplite_checksum_init(skb, uh);
+		if (err)
+			return err;
+	}
+
 	if (uh->check == 0) {
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	} else if (skb->ip_summed == CHECKSUM_COMPLETE) {
 	       if (!csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
-				      skb->len, IPPROTO_UDP, skb->csum       ))
+				      skb->len, proto, skb->csum))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
 		skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr,
 					       skb->nh.iph->daddr,
-					       skb->len, IPPROTO_UDP, 0);
+					       skb->len, proto, 0);
 	/* Probably, we should checksum udp header (it should be in cache
 	 * in any case) and data in tiny packets (< rx copybreak).
 	 */
 
-	/* UDP = UDP-Lite with a non-partial checksum coverage */
-	UDP_SKB_CB(skb)->partial_cov = 0;
+	return 0;
 }
 
 /*
@@ -1192,7 +1201,7 @@ static inline void udp4_csum_init(struct sk_buff *skb, struct udphdr *uh)
  */
 
 int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
-		   int is_udplite)
+		   int proto)
 {
 	struct sock *sk;
 	struct udphdr *uh = skb->h.uh;
@@ -1211,19 +1220,16 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 	if (ulen > skb->len)
 		goto short_packet;
 
-	if(! is_udplite ) {		/* UDP validates ulen. */
-
+	if (proto == IPPROTO_UDP) {
+		/* UDP validates ulen. */
 		if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
 			goto short_packet;
 		uh = skb->h.uh;
-
-		udp4_csum_init(skb, uh);
-
-	} else 	{			/* UDP-Lite validates cscov. */
-		if (udplite4_csum_init(skb, uh))
-			goto csum_error;
 	}
 
+	if (udp4_csum_init(skb, uh, proto))
+		goto csum_error;
+
 	if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
 		return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
 
@@ -1250,7 +1256,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 	if (udp_lib_checksum_complete(skb))
 		goto csum_error;
 
-	UDP_INC_STATS_BH(UDP_MIB_NOPORTS, is_udplite);
+	UDP_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
 
 	/*
@@ -1262,7 +1268,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 
 short_packet:
 	LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
-		       is_udplite? "-Lite" : "",
+		       proto == IPPROTO_UDPLITE ? "-Lite" : "",
 		       NIPQUAD(saddr),
 		       ntohs(uh->source),
 		       ulen,
@@ -1277,21 +1283,21 @@ csum_error:
 	 * the network is concerned, anyway) as per 4.1.3.4 (MUST).
 	 */
 	LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
-		       is_udplite? "-Lite" : "",
+		       proto == IPPROTO_UDPLITE ? "-Lite" : "",
 		       NIPQUAD(saddr),
 		       ntohs(uh->source),
 		       NIPQUAD(daddr),
 		       ntohs(uh->dest),
 		       ulen);
 drop:
-	UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
+	UDP_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
 	kfree_skb(skb);
 	return(0);
 }
 
 __inline__ int udp_rcv(struct sk_buff *skb)
 {
-	return __udp4_lib_rcv(skb, udp_hash, 0);
+	return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP);
 }
 
 int udp_destroy_sock(struct sock *sk)
@@ -1486,15 +1492,11 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
 		struct sk_buff *skb;
 
 		spin_lock_bh(&rcvq->lock);
-		while ((skb = skb_peek(rcvq)) != NULL) {
-			if (udp_lib_checksum_complete(skb)) {
-				UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite);
-				__skb_unlink(skb, rcvq);
-				kfree_skb(skb);
-			} else {
-				skb->ip_summed = CHECKSUM_UNNECESSARY;
-				break;
-			}
+		while ((skb = skb_peek(rcvq)) != NULL &&
+		       udp_lib_checksum_complete(skb)) {
+			UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite);
+			__skb_unlink(skb, rcvq);
+			kfree_skb(skb);
 		}
 		spin_unlock_bh(&rcvq->lock);
 
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index b28fe1edf98..f34fd686a8f 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -31,7 +31,7 @@ static int udplite_v4_get_port(struct sock *sk, unsigned short snum)
 
 static int udplite_rcv(struct sk_buff *skb)
 {
-	return __udp4_lib_rcv(skb, udplite_hash, 1);
+	return __udp4_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE);
 }
 
 static void udplite_err(struct sk_buff *skb, u32 info)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3413fc22ce4..73337168979 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -120,8 +120,9 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct inet_sock *inet = inet_sk(sk);
 	struct sk_buff *skb;
-	size_t copied;
-	int err, copy_only, is_udplite = IS_UDPLITE(sk);
+	unsigned int ulen, copied;
+	int err;
+	int is_udplite = IS_UDPLITE(sk);
 
 	if (addr_len)
 		*addr_len=sizeof(struct sockaddr_in6);
@@ -134,24 +135,25 @@ try_again:
 	if (!skb)
 		goto out;
 
-	copied = skb->len - sizeof(struct udphdr);
-	if (copied > len) {
-		copied = len;
+	ulen = skb->len - sizeof(struct udphdr);
+	copied = len;
+	if (copied > ulen)
+		copied = ulen;
+	else if (copied < ulen)
 		msg->msg_flags |= MSG_TRUNC;
-	}
 
 	/*
-	 * 	Decide whether to checksum and/or copy data.
+	 * If checksum is needed at all, try to do it while copying the
+	 * data.  If the data is truncated, or if we only want a partial
+	 * coverage checksum (UDP-Lite), do it before the copy.
 	 */
-	copy_only = (skb->ip_summed==CHECKSUM_UNNECESSARY);
 
-	if (is_udplite  ||  (!copy_only  &&  msg->msg_flags&MSG_TRUNC)) {
-		if (__udp_lib_checksum_complete(skb))
+	if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
+		if (udp_lib_checksum_complete(skb))
 			goto csum_copy_err;
-		copy_only = 1;
 	}
 
-	if (copy_only)
+	if (skb->ip_summed == CHECKSUM_UNNECESSARY)
 		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
 					      msg->msg_iov, copied       );
 	else {
@@ -194,7 +196,7 @@ try_again:
 
 	err = copied;
 	if (flags & MSG_TRUNC)
-		err = skb->len - sizeof(struct udphdr);
+		err = ulen;
 
 out_free:
 	skb_free_datagram(sk, skb);
@@ -368,9 +370,20 @@ out:
 	return 0;
 }
 
-static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh)
-
+static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh,
+				 int proto)
 {
+	int err;
+
+	UDP_SKB_CB(skb)->partial_cov = 0;
+	UDP_SKB_CB(skb)->cscov = skb->len;
+
+	if (proto == IPPROTO_UDPLITE) {
+		err = udplite_checksum_init(skb, uh);
+		if (err)
+			return err;
+	}
+
 	if (uh->check == 0) {
 		/* RFC 2460 section 8.1 says that we SHOULD log
 		   this error. Well, it is reasonable.
@@ -380,20 +393,19 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh)
 	}
 	if (skb->ip_summed == CHECKSUM_COMPLETE &&
 	    !csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr,
-			     skb->len, IPPROTO_UDP, skb->csum             ))
+			     skb->len, proto, skb->csum))
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
 	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
 		skb->csum = ~csum_unfold(csum_ipv6_magic(&skb->nh.ipv6h->saddr,
 							 &skb->nh.ipv6h->daddr,
-							 skb->len, IPPROTO_UDP,
-							 0));
+							 skb->len, proto, 0));
 
-	return (UDP_SKB_CB(skb)->partial_cov = 0);
+	return 0;
 }
 
 int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
-		   int is_udplite)
+		   int proto)
 {
 	struct sk_buff *skb = *pskb;
 	struct sock *sk;
@@ -413,7 +425,8 @@ int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
 	if (ulen > skb->len)
 		goto short_packet;
 
-	if(! is_udplite ) {		/* UDP validates ulen. */
+	if (proto == IPPROTO_UDP) {
+		/* UDP validates ulen. */
 
 		/* Check for jumbo payload */
 		if (ulen == 0)
@@ -429,15 +442,11 @@ int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
 			daddr = &skb->nh.ipv6h->daddr;
 			uh = skb->h.uh;
 		}
-
-		if (udp6_csum_init(skb, uh))
-			goto discard;
-
-	} else 	{			/* UDP-Lite validates cscov. */
-		if (udplite6_csum_init(skb, uh))
-			goto discard;
 	}
 
+	if (udp6_csum_init(skb, uh, proto))
+		goto discard;
+
 	/*
 	 *	Multicast receive code
 	 */
@@ -459,7 +468,7 @@ int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
 
 		if (udp_lib_checksum_complete(skb))
 			goto discard;
-		UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, is_udplite);
+		UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
 
 		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev);
 
@@ -475,17 +484,18 @@ int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
 
 short_packet:
 	LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n",
-		       is_udplite? "-Lite" : "",  ulen, skb->len);
+		       proto == IPPROTO_UDPLITE ? "-Lite" : "",
+		       ulen, skb->len);
 
 discard:
-	UDP6_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
+	UDP6_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
 	kfree_skb(skb);
 	return(0);
 }
 
 static __inline__ int udpv6_rcv(struct sk_buff **pskb)
 {
-	return __udp6_lib_rcv(pskb, udp_hash, 0);
+	return __udp6_lib_rcv(pskb, udp_hash, IPPROTO_UDP);
 }
 
 /*
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 629f97162fb..f54016a5500 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -19,7 +19,7 @@ DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6) __read_mostly;
 
 static int udplitev6_rcv(struct sk_buff **pskb)
 {
-	return __udp6_lib_rcv(pskb, udplite_hash, 1);
+	return __udp6_lib_rcv(pskb, udplite_hash, IPPROTO_UDPLITE);
 }
 
 static void udplitev6_err(struct sk_buff *skb,
-- 
cgit v1.2.3


From 2953fd246845f4d00af3717163f37b2ff4c5ce29 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sun, 25 Mar 2007 20:11:55 -0700
Subject: [NET] 802: Use hton{s,l}() where appropriate.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/802/fddi.c  | 4 ++--
 net/802/hippi.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/802/fddi.c b/net/802/fddi.c
index ace6386384b..8c86216b1c8 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -100,7 +100,7 @@ static int fddi_rebuild_header(struct sk_buff	*skb)
 	struct fddihdr *fddi = (struct fddihdr *)skb->data;
 
 #ifdef CONFIG_INET
-	if (fddi->hdr.llc_snap.ethertype == __constant_htons(ETH_P_IP))
+	if (fddi->hdr.llc_snap.ethertype == htons(ETH_P_IP))
 		/* Try to get ARP to resolve the header and fill destination address */
 		return arp_find(fddi->daddr, skb);
 	else
@@ -135,7 +135,7 @@ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev)
 	if(fddi->hdr.llc_8022_1.dsap==0xe0)
 	{
 		skb_pull(skb, FDDI_K_8022_HLEN-3);
-		type = __constant_htons(ETH_P_802_2);
+		type = htons(ETH_P_802_2);
 	}
 	else
 	{
diff --git a/net/802/hippi.c b/net/802/hippi.c
index 578f2a3d692..35dd938cff9 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -60,7 +60,7 @@ static int hippi_header(struct sk_buff *skb, struct net_device *dev,
 	 * Due to the stupidity of the little endian byte-order we
 	 * have to set the fp field this way.
 	 */
-	hip->fp.fixed		= __constant_htonl(0x04800018);
+	hip->fp.fixed		= htonl(0x04800018);
 	hip->fp.d2_size		= htonl(len + 8);
 	hip->le.fc		= 0;
 	hip->le.double_wide	= 0;	/* only HIPPI 800 for the time being */
@@ -104,7 +104,7 @@ static int hippi_rebuild_header(struct sk_buff *skb)
 	 * Only IP is currently supported
 	 */
 
-	if(hip->snap.ethertype != __constant_htons(ETH_P_IP))
+	if(hip->snap.ethertype != htons(ETH_P_IP))
 	{
 		printk(KERN_DEBUG "%s: unable to resolve type %X addresses.\n",skb->dev->name,ntohs(hip->snap.ethertype));
 		return 0;
-- 
cgit v1.2.3


From b93b7eebd328d5c1d171896fb823267539d4a0f6 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sun, 25 Mar 2007 20:12:18 -0700
Subject: [NET] 8021Q: Use htons() where appropriate.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index b6e0eea1e39..0991e293940 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -258,7 +258,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	 * won't work for fault tolerant netware but does for the rest.
 	 */
 	if (*(unsigned short *)rawp == 0xFFFF) {
-		skb->protocol = __constant_htons(ETH_P_802_3);
+		skb->protocol = htons(ETH_P_802_3);
 		/* place it back on the queue to be handled by true layer 3 protocols.
 		 */
 
@@ -281,7 +281,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	/*
 	 *	Real 802.2 LLC
 	 */
-	skb->protocol = __constant_htons(ETH_P_802_2);
+	skb->protocol = htons(ETH_P_802_2);
 	/* place it back on the queue to be handled by upper layer protocols.
 	 */
 
@@ -448,7 +448,7 @@ int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
 	 */
 
-	if (veth->h_vlan_proto != __constant_htons(ETH_P_8021Q)) {
+	if (veth->h_vlan_proto != htons(ETH_P_8021Q)) {
 		int orig_headroom = skb_headroom(skb);
 		unsigned short veth_TCI;
 
-- 
cgit v1.2.3


From acde4855bb8f5fba8bb065d35ff6ac8a94b3dfa8 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sun, 25 Mar 2007 20:12:32 -0700
Subject: [NET] ATM: Use htons() where appropriate.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/br2684.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index ec4ebd3299e..c444f5eda22 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -375,11 +375,11 @@ packet_fails_filter(__be16 type, struct br2684_vcc *brvcc, struct sk_buff *skb)
 {
 	if (brvcc->filter.netmask == 0)
 		return 0;			/* no filter in place */
-	if (type == __constant_htons(ETH_P_IP) &&
+	if (type == htons(ETH_P_IP) &&
 	    (((struct iphdr *) (skb->data))->daddr & brvcc->filter.
 	     netmask) == brvcc->filter.prefix)
 		return 0;
-	if (type == __constant_htons(ETH_P_ARP))
+	if (type == htons(ETH_P_ARP))
 		return 0;
 	/* TODO: we should probably filter ARPs too.. don't want to have
 	 *   them returning values that don't make sense, or is that ok?
-- 
cgit v1.2.3


From aca3192cc60d2bf193c2252e45563c32e3117289 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sun, 25 Mar 2007 20:12:50 -0700
Subject: [NET] BLUETOOTH: Use cpu_to_le{16,32}() where appropriate.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/hci_conn.c  | 36 ++++++++++++------------
 net/bluetooth/hci_core.c  | 20 +++++++-------
 net/bluetooth/hci_event.c |  8 +++---
 net/bluetooth/l2cap.c     | 70 +++++++++++++++++++++++------------------------
 4 files changed, 67 insertions(+), 67 deletions(-)

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index f3403fdb59f..63980bd6b5f 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -72,11 +72,11 @@ void hci_acl_connect(struct hci_conn *conn)
 			inquiry_entry_age(ie) <= INQUIRY_ENTRY_AGE_MAX) {
 		cp.pscan_rep_mode = ie->data.pscan_rep_mode;
 		cp.pscan_mode     = ie->data.pscan_mode;
-		cp.clock_offset   = ie->data.clock_offset | __cpu_to_le16(0x8000);
+		cp.clock_offset   = ie->data.clock_offset | cpu_to_le16(0x8000);
 		memcpy(conn->dev_class, ie->data.dev_class, 3);
 	}
 
-	cp.pkt_type = __cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK);
+	cp.pkt_type = cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK);
 	if (lmp_rswitch_capable(hdev) && !(hdev->link_mode & HCI_LM_MASTER))
 		cp.role_switch	= 0x01;
 	else
@@ -107,7 +107,7 @@ void hci_acl_disconn(struct hci_conn *conn, __u8 reason)
 
 	conn->state = BT_DISCONN;
 
-	cp.handle = __cpu_to_le16(conn->handle);
+	cp.handle = cpu_to_le16(conn->handle);
 	cp.reason = reason;
 	hci_send_cmd(conn->hdev, OGF_LINK_CTL,
 				OCF_DISCONNECT, sizeof(cp), &cp);
@@ -123,8 +123,8 @@ void hci_add_sco(struct hci_conn *conn, __u16 handle)
 	conn->state = BT_CONNECT;
 	conn->out = 1;
 
-	cp.pkt_type = __cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK);
-	cp.handle   = __cpu_to_le16(handle);
+	cp.pkt_type = cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK);
+	cp.handle   = cpu_to_le16(handle);
 
 	hci_send_cmd(hdev, OGF_LINK_CTL, OCF_ADD_SCO, sizeof(cp), &cp);
 }
@@ -348,7 +348,7 @@ int hci_conn_auth(struct hci_conn *conn)
 
 	if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) {
 		struct hci_cp_auth_requested cp;
-		cp.handle = __cpu_to_le16(conn->handle);
+		cp.handle = cpu_to_le16(conn->handle);
 		hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_AUTH_REQUESTED, sizeof(cp), &cp);
 	}
 	return 0;
@@ -368,7 +368,7 @@ int hci_conn_encrypt(struct hci_conn *conn)
 
 	if (hci_conn_auth(conn)) {
 		struct hci_cp_set_conn_encrypt cp;
-		cp.handle  = __cpu_to_le16(conn->handle);
+		cp.handle  = cpu_to_le16(conn->handle);
 		cp.encrypt = 1;
 		hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_SET_CONN_ENCRYPT, sizeof(cp), &cp);
 	}
@@ -383,7 +383,7 @@ int hci_conn_change_link_key(struct hci_conn *conn)
 
 	if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) {
 		struct hci_cp_change_conn_link_key cp;
-		cp.handle = __cpu_to_le16(conn->handle);
+		cp.handle = cpu_to_le16(conn->handle);
 		hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_CHANGE_CONN_LINK_KEY, sizeof(cp), &cp);
 	}
 	return 0;
@@ -423,7 +423,7 @@ void hci_conn_enter_active_mode(struct hci_conn *conn)
 
 	if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) {
 		struct hci_cp_exit_sniff_mode cp;
-		cp.handle = __cpu_to_le16(conn->handle);
+		cp.handle = cpu_to_le16(conn->handle);
 		hci_send_cmd(hdev, OGF_LINK_POLICY,
 				OCF_EXIT_SNIFF_MODE, sizeof(cp), &cp);
 	}
@@ -452,21 +452,21 @@ void hci_conn_enter_sniff_mode(struct hci_conn *conn)
 
 	if (lmp_sniffsubr_capable(hdev) && lmp_sniffsubr_capable(conn)) {
 		struct hci_cp_sniff_subrate cp;
-		cp.handle             = __cpu_to_le16(conn->handle);
-		cp.max_latency        = __constant_cpu_to_le16(0);
-		cp.min_remote_timeout = __constant_cpu_to_le16(0);
-		cp.min_local_timeout  = __constant_cpu_to_le16(0);
+		cp.handle             = cpu_to_le16(conn->handle);
+		cp.max_latency        = cpu_to_le16(0);
+		cp.min_remote_timeout = cpu_to_le16(0);
+		cp.min_local_timeout  = cpu_to_le16(0);
 		hci_send_cmd(hdev, OGF_LINK_POLICY,
 				OCF_SNIFF_SUBRATE, sizeof(cp), &cp);
 	}
 
 	if (!test_and_set_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend)) {
 		struct hci_cp_sniff_mode cp;
-		cp.handle       = __cpu_to_le16(conn->handle);
-		cp.max_interval = __cpu_to_le16(hdev->sniff_max_interval);
-		cp.min_interval = __cpu_to_le16(hdev->sniff_min_interval);
-		cp.attempt      = __constant_cpu_to_le16(4);
-		cp.timeout      = __constant_cpu_to_le16(1);
+		cp.handle       = cpu_to_le16(conn->handle);
+		cp.max_interval = cpu_to_le16(hdev->sniff_max_interval);
+		cp.min_interval = cpu_to_le16(hdev->sniff_min_interval);
+		cp.attempt      = cpu_to_le16(4);
+		cp.timeout      = cpu_to_le16(1);
 		hci_send_cmd(hdev, OGF_LINK_POLICY,
 				OCF_SNIFF_MODE, sizeof(cp), &cp);
 	}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 4917919d86a..64fea0903fd 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -216,10 +216,10 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
 	/* Host buffer size */
 	{
 		struct hci_cp_host_buffer_size cp;
-		cp.acl_mtu = __cpu_to_le16(HCI_MAX_ACL_SIZE);
+		cp.acl_mtu = cpu_to_le16(HCI_MAX_ACL_SIZE);
 		cp.sco_mtu = HCI_MAX_SCO_SIZE;
-		cp.acl_max_pkt = __cpu_to_le16(0xffff);
-		cp.sco_max_pkt = __cpu_to_le16(0xffff);
+		cp.acl_max_pkt = cpu_to_le16(0xffff);
+		cp.sco_max_pkt = cpu_to_le16(0xffff);
 		hci_send_cmd(hdev, OGF_HOST_CTL, OCF_HOST_BUFFER_SIZE, sizeof(cp), &cp);
 	}
 #endif
@@ -240,11 +240,11 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
 	}
 
 	/* Page timeout ~20 secs */
-	param = __cpu_to_le16(0x8000);
+	param = cpu_to_le16(0x8000);
 	hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_PG_TIMEOUT, 2, &param);
 
 	/* Connection accept timeout ~20 secs */
-	param = __cpu_to_le16(0x7d00);
+	param = cpu_to_le16(0x7d00);
 	hci_send_cmd(hdev, OGF_HOST_CTL, OCF_WRITE_CA_TIMEOUT, 2, &param);
 }
 
@@ -1034,7 +1034,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 ogf, __u16 ocf, __u32 plen, void *p
 	}
 
 	hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE);
-	hdr->opcode = __cpu_to_le16(hci_opcode_pack(ogf, ocf));
+	hdr->opcode = cpu_to_le16(hci_opcode_pack(ogf, ocf));
 	hdr->plen   = plen;
 
 	if (plen)
@@ -1060,7 +1060,7 @@ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 ogf, __u16 ocf)
 
 	hdr = (void *) hdev->sent_cmd->data;
 
-	if (hdr->opcode != __cpu_to_le16(hci_opcode_pack(ogf, ocf)))
+	if (hdr->opcode != cpu_to_le16(hci_opcode_pack(ogf, ocf)))
 		return NULL;
 
 	BT_DBG("%s ogf 0x%x ocf 0x%x", hdev->name, ogf, ocf);
@@ -1075,8 +1075,8 @@ static void hci_add_acl_hdr(struct sk_buff *skb, __u16 handle, __u16 flags)
 	int len = skb->len;
 
 	hdr = (struct hci_acl_hdr *) skb_push(skb, HCI_ACL_HDR_SIZE);
-	hdr->handle = __cpu_to_le16(hci_handle_pack(handle, flags));
-	hdr->dlen   = __cpu_to_le16(len);
+	hdr->handle = cpu_to_le16(hci_handle_pack(handle, flags));
+	hdr->dlen   = cpu_to_le16(len);
 
 	skb->h.raw = (void *) hdr;
 }
@@ -1140,7 +1140,7 @@ int hci_send_sco(struct hci_conn *conn, struct sk_buff *skb)
 		return -EINVAL;
 	}
 
-	hdr.handle = __cpu_to_le16(conn->handle);
+	hdr.handle = cpu_to_le16(conn->handle);
 	hdr.dlen   = skb->len;
 
 	skb->h.raw = skb_push(skb, HCI_SCO_HDR_SIZE);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 936d3fc479c..447ba713122 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -783,7 +783,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 		if (conn->type == ACL_LINK && hdev->link_policy) {
 			struct hci_cp_write_link_policy cp;
 			cp.handle = ev->handle;
-			cp.policy = __cpu_to_le16(hdev->link_policy);
+			cp.policy = cpu_to_le16(hdev->link_policy);
 			hci_send_cmd(hdev, OGF_LINK_POLICY,
 				OCF_WRITE_LINK_POLICY, sizeof(cp), &cp);
 		}
@@ -793,8 +793,8 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 			struct hci_cp_change_conn_ptype cp;
 			cp.handle = ev->handle;
 			cp.pkt_type = (conn->type == ACL_LINK) ?
-				__cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK):
-				__cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK);
+				cpu_to_le16(hdev->pkt_type & ACL_PTYPE_MASK):
+				cpu_to_le16(hdev->pkt_type & SCO_PTYPE_MASK);
 
 			hci_send_cmd(hdev, OGF_LINK_CTL,
 				OCF_CHANGE_CONN_PTYPE, sizeof(cp), &cp);
@@ -970,7 +970,7 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 		if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) {
 			if (!ev->status) {
 				struct hci_cp_set_conn_encrypt cp;
-				cp.handle  = __cpu_to_le16(conn->handle);
+				cp.handle  = cpu_to_le16(conn->handle);
 				cp.encrypt = 1;
 				hci_send_cmd(conn->hdev, OGF_LINK_CTL,
 					OCF_SET_CONN_ENCRYPT, sizeof(cp), &cp);
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index e83ee82440d..162eab6a447 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -459,8 +459,8 @@ static void __l2cap_sock_close(struct sock *sk, int reason)
 			sk->sk_state = BT_DISCONN;
 			l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
 
-			req.dcid = __cpu_to_le16(l2cap_pi(sk)->dcid);
-			req.scid = __cpu_to_le16(l2cap_pi(sk)->scid);
+			req.dcid = cpu_to_le16(l2cap_pi(sk)->dcid);
+			req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
 			l2cap_send_cmd(conn, l2cap_get_ident(conn),
 					L2CAP_DISCONN_REQ, sizeof(req), &req);
 		} else {
@@ -652,7 +652,7 @@ static int l2cap_do_connect(struct sock *sk)
 		if (sk->sk_type == SOCK_SEQPACKET) {
 			struct l2cap_conn_req req;
 			l2cap_pi(sk)->ident = l2cap_get_ident(conn);
-			req.scid = __cpu_to_le16(l2cap_pi(sk)->scid);
+			req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
 			req.psm  = l2cap_pi(sk)->psm;
 			l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
 					L2CAP_CONN_REQ, sizeof(req), &req);
@@ -868,8 +868,8 @@ static inline int l2cap_do_send(struct sock *sk, struct msghdr *msg, int len)
 
 	/* Create L2CAP header */
 	lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
-	lh->cid = __cpu_to_le16(l2cap_pi(sk)->dcid);
-	lh->len = __cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE));
+	lh->cid = cpu_to_le16(l2cap_pi(sk)->dcid);
+	lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE));
 
 	if (sk->sk_type == SOCK_DGRAM)
 		put_unaligned(l2cap_pi(sk)->psm, (u16 *) skb_put(skb, 2));
@@ -1096,7 +1096,7 @@ static void l2cap_conn_ready(struct l2cap_conn *conn)
 		} else if (sk->sk_state == BT_CONNECT) {
 			struct l2cap_conn_req req;
 			l2cap_pi(sk)->ident = l2cap_get_ident(conn);
-			req.scid = __cpu_to_le16(l2cap_pi(sk)->scid);
+			req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
 			req.psm  = l2cap_pi(sk)->psm;
 			l2cap_send_cmd(conn, l2cap_pi(sk)->ident, L2CAP_CONN_REQ, sizeof(req), &req);
 		}
@@ -1192,13 +1192,13 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
 		return NULL;
 
 	lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
-	lh->len = __cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen);
-	lh->cid = __cpu_to_le16(0x0001);
+	lh->len = cpu_to_le16(L2CAP_CMD_HDR_SIZE + dlen);
+	lh->cid = cpu_to_le16(0x0001);
 
 	cmd = (struct l2cap_cmd_hdr *) skb_put(skb, L2CAP_CMD_HDR_SIZE);
 	cmd->code  = code;
 	cmd->ident = ident;
-	cmd->len   = __cpu_to_le16(dlen);
+	cmd->len   = cpu_to_le16(dlen);
 
 	if (dlen) {
 		count -= L2CAP_HDR_SIZE + L2CAP_CMD_HDR_SIZE;
@@ -1316,11 +1316,11 @@ static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val)
 		break;
 
 	case 2:
-		*((u16 *) opt->val) = __cpu_to_le16(val);
+		*((u16 *) opt->val) = cpu_to_le16(val);
 		break;
 
 	case 4:
-		*((u32 *) opt->val) = __cpu_to_le32(val);
+		*((u32 *) opt->val) = cpu_to_le32(val);
 		break;
 
 	default:
@@ -1346,8 +1346,8 @@ static int l2cap_build_conf_req(struct sock *sk, void *data)
 	//if (flush_to != L2CAP_DEFAULT_FLUSH_TO)
 	//   l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, pi->flush_to);
 
-	req->dcid  = __cpu_to_le16(pi->dcid);
-	req->flags = __cpu_to_le16(0);
+	req->dcid  = cpu_to_le16(pi->dcid);
+	req->flags = cpu_to_le16(0);
 
 	return ptr - data;
 }
@@ -1383,9 +1383,9 @@ static int l2cap_build_conf_rsp(struct sock *sk, void *data, int *result)
 	else
 		flags = 0x0001;
 
-	rsp->scid   = __cpu_to_le16(l2cap_pi(sk)->dcid);
-	rsp->result = __cpu_to_le16(result ? *result : 0);
-	rsp->flags  = __cpu_to_le16(flags);
+	rsp->scid   = cpu_to_le16(l2cap_pi(sk)->dcid);
+	rsp->result = cpu_to_le16(result ? *result : 0);
+	rsp->flags  = cpu_to_le16(flags);
 
 	return ptr - data;
 }
@@ -1470,10 +1470,10 @@ response:
 	bh_unlock_sock(parent);
 
 sendresp:
-	rsp.scid   = __cpu_to_le16(scid);
-	rsp.dcid   = __cpu_to_le16(dcid);
-	rsp.result = __cpu_to_le16(result);
-	rsp.status = __cpu_to_le16(status);
+	rsp.scid   = cpu_to_le16(scid);
+	rsp.dcid   = cpu_to_le16(dcid);
+	rsp.result = cpu_to_le16(result);
+	rsp.status = cpu_to_le16(status);
 	l2cap_send_cmd(conn, cmd->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp);
 	return 0;
 }
@@ -1613,8 +1613,8 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 		l2cap_sock_set_timer(sk, HZ * 5);
 		{
 			struct l2cap_disconn_req req;
-			req.dcid = __cpu_to_le16(l2cap_pi(sk)->dcid);
-			req.scid = __cpu_to_le16(l2cap_pi(sk)->scid);
+			req.dcid = cpu_to_le16(l2cap_pi(sk)->dcid);
+			req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
 			l2cap_send_cmd(conn, l2cap_get_ident(conn),
 					L2CAP_DISCONN_REQ, sizeof(req), &req);
 		}
@@ -1652,8 +1652,8 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd
 	if (!(sk = l2cap_get_chan_by_scid(&conn->chan_list, dcid)))
 		return 0;
 
-	rsp.dcid = __cpu_to_le16(l2cap_pi(sk)->scid);
-	rsp.scid = __cpu_to_le16(l2cap_pi(sk)->dcid);
+	rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
+	rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
 	l2cap_send_cmd(conn, cmd->ident, L2CAP_DISCONN_RSP, sizeof(rsp), &rsp);
 
 	sk->sk_shutdown = SHUTDOWN_MASK;
@@ -1696,8 +1696,8 @@ static inline int l2cap_information_req(struct l2cap_conn *conn, struct l2cap_cm
 
 	BT_DBG("type 0x%4.4x", type);
 
-	rsp.type   = __cpu_to_le16(type);
-	rsp.result = __cpu_to_le16(L2CAP_IR_NOTSUPP);
+	rsp.type   = cpu_to_le16(type);
+	rsp.result = cpu_to_le16(L2CAP_IR_NOTSUPP);
 	l2cap_send_cmd(conn, cmd->ident, L2CAP_INFO_RSP, sizeof(rsp), &rsp);
 
 	return 0;
@@ -1794,7 +1794,7 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *sk
 			BT_DBG("error %d", err);
 
 			/* FIXME: Map err to a valid reason */
-			rej.reason = __cpu_to_le16(0);
+			rej.reason = cpu_to_le16(0);
 			l2cap_send_cmd(conn, cmd.ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej);
 		}
 
@@ -1993,10 +1993,10 @@ static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status)
 			result = L2CAP_CR_SEC_BLOCK;
 		}
 
-		rsp.scid   = __cpu_to_le16(l2cap_pi(sk)->dcid);
-		rsp.dcid   = __cpu_to_le16(l2cap_pi(sk)->scid);
-		rsp.result = __cpu_to_le16(result);
-		rsp.status = __cpu_to_le16(0);
+		rsp.scid   = cpu_to_le16(l2cap_pi(sk)->dcid);
+		rsp.dcid   = cpu_to_le16(l2cap_pi(sk)->scid);
+		rsp.result = cpu_to_le16(result);
+		rsp.status = cpu_to_le16(0);
 		l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
 				L2CAP_CONN_RSP, sizeof(rsp), &rsp);
 
@@ -2041,10 +2041,10 @@ static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status)
 			result = L2CAP_CR_SEC_BLOCK;
 		}
 
-		rsp.scid   = __cpu_to_le16(l2cap_pi(sk)->dcid);
-		rsp.dcid   = __cpu_to_le16(l2cap_pi(sk)->scid);
-		rsp.result = __cpu_to_le16(result);
-		rsp.status = __cpu_to_le16(0);
+		rsp.scid   = cpu_to_le16(l2cap_pi(sk)->dcid);
+		rsp.dcid   = cpu_to_le16(l2cap_pi(sk)->scid);
+		rsp.result = cpu_to_le16(result);
+		rsp.status = cpu_to_le16(0);
 		l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
 				L2CAP_CONN_RSP, sizeof(rsp), &rsp);
 
-- 
cgit v1.2.3


From 724800d61b8bc574a364707b6a6c6a6252e8cdb4 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sun, 25 Mar 2007 20:13:04 -0700
Subject: [NET] CORE: Use htons() where appropriate.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 4581ece48bb..32a9f80b5f1 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -443,7 +443,7 @@ int __netpoll_rx(struct sk_buff *skb)
 		goto out;
 
 	/* check if netpoll clients need ARP */
-	if (skb->protocol == __constant_htons(ETH_P_ARP) &&
+	if (skb->protocol == htons(ETH_P_ARP) &&
 	    atomic_read(&trapped)) {
 		skb_queue_tail(&npi->arp_tx, skb);
 		return 1;
-- 
cgit v1.2.3


From f576e24ffaf2c6b01af389e3bad3342681a8b84f Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 7 Mar 2007 14:19:03 +0900
Subject: [NET] ETHERNET: Use htons() where appropriate.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethernet/eth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 7391f55904d..41c5065f4a8 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -228,7 +228,7 @@ int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh)
 	eth = (struct ethhdr *)
 	    (((u8 *) hh->hh_data) + (HH_DATA_OFF(sizeof(*eth))));
 
-	if (type == __constant_htons(ETH_P_802_3))
+	if (type == htons(ETH_P_802_3))
 		return -1;
 
 	eth->h_proto = type;
-- 
cgit v1.2.3


From 1c9e8ef7f731c2548414644e5bf540c38c85aff0 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 7 Mar 2007 14:19:05 +0900
Subject: [NET] IEEE80211: Use htons() where appropriate.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ieee80211/ieee80211_tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c
index 0292d6348e1..3fca4345ebe 100644
--- a/net/ieee80211/ieee80211_tx.c
+++ b/net/ieee80211/ieee80211_tx.c
@@ -225,7 +225,7 @@ static int ieee80211_classify(struct sk_buff *skb)
 	struct iphdr *ip;
 
 	eth = (struct ethhdr *)skb->data;
-	if (eth->h_proto != __constant_htons(ETH_P_IP))
+	if (eth->h_proto != htons(ETH_P_IP))
 		return 0;
 
 	ip = skb->nh.iph;
-- 
cgit v1.2.3


From 4412ec494868160d57da6e436a92b0696f40b19d Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 7 Mar 2007 14:19:10 +0900
Subject: [NET] IPV4: Use hton{s,l}() where appropriate.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipvs/ip_vs_core.c                  | 10 +++++-----
 net/ipv4/ipvs/ip_vs_proto_ah.c              | 16 ++++++++--------
 net/ipv4/ipvs/ip_vs_xmit.c                  | 16 ++++++++--------
 net/ipv4/netfilter/ip_conntrack_proto_tcp.c |  9 ++++-----
 4 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 24d7b66eb6d..858686d616a 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -617,7 +617,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
 	*related = 1;
 
 	/* reassemble IP fragments */
-	if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
+	if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
 		skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
 		if (!skb)
 			return NF_STOLEN;
@@ -659,7 +659,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
 		return NF_ACCEPT;
 
 	/* Is the embedded protocol header present? */
-	if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) &&
+	if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
 		     pp->dont_defrag))
 		return NF_ACCEPT;
 
@@ -755,7 +755,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
 		return NF_ACCEPT;
 
 	/* reassemble IP fragments */
-	if (unlikely(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET) &&
+	if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) &&
 		     !pp->dont_defrag)) {
 		skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
 		if (!skb)
@@ -861,7 +861,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
 	*related = 1;
 
 	/* reassemble IP fragments */
-	if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
+	if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
 		skb = ip_vs_gather_frags(skb,
 					 hooknum == NF_IP_LOCAL_IN ?
 					 IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD);
@@ -905,7 +905,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
 		return NF_ACCEPT;
 
 	/* Is the embedded protocol header present? */
-	if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) &&
+	if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
 		     pp->dont_defrag))
 		return NF_ACCEPT;
 
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c
index 8b0505b0931..a842676e1c6 100644
--- a/net/ipv4/ipvs/ip_vs_proto_ah.c
+++ b/net/ipv4/ipvs/ip_vs_proto_ah.c
@@ -52,15 +52,15 @@ ah_conn_in_get(const struct sk_buff *skb,
 	if (likely(!inverse)) {
 		cp = ip_vs_conn_in_get(IPPROTO_UDP,
 				       iph->saddr,
-				       __constant_htons(PORT_ISAKMP),
+				       htons(PORT_ISAKMP),
 				       iph->daddr,
-				       __constant_htons(PORT_ISAKMP));
+				       htons(PORT_ISAKMP));
 	} else {
 		cp = ip_vs_conn_in_get(IPPROTO_UDP,
 				       iph->daddr,
-				       __constant_htons(PORT_ISAKMP),
+				       htons(PORT_ISAKMP),
 				       iph->saddr,
-				       __constant_htons(PORT_ISAKMP));
+				       htons(PORT_ISAKMP));
 	}
 
 	if (!cp) {
@@ -89,15 +89,15 @@ ah_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
 	if (likely(!inverse)) {
 		cp = ip_vs_conn_out_get(IPPROTO_UDP,
 					iph->saddr,
-					__constant_htons(PORT_ISAKMP),
+					htons(PORT_ISAKMP),
 					iph->daddr,
-					__constant_htons(PORT_ISAKMP));
+					htons(PORT_ISAKMP));
 	} else {
 		cp = ip_vs_conn_out_get(IPPROTO_UDP,
 					iph->daddr,
-					__constant_htons(PORT_ISAKMP),
+					htons(PORT_ISAKMP),
 					iph->saddr,
-					__constant_htons(PORT_ISAKMP));
+					htons(PORT_ISAKMP));
 	}
 
 	if (!cp) {
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index e1f77bd7c9a..f73c5acf5dd 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -178,7 +178,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->u.dst);
-	if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
+	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
 		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 		IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n");
@@ -245,7 +245,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->u.dst);
-	if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
+	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
 		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 		IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
@@ -329,10 +329,10 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	EnterFunction(10);
 
-	if (skb->protocol != __constant_htons(ETH_P_IP)) {
+	if (skb->protocol != htons(ETH_P_IP)) {
 		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, "
 			     "ETH_P_IP: %d, skb protocol: %d\n",
-			     __constant_htons(ETH_P_IP), skb->protocol);
+			     htons(ETH_P_IP), skb->protocol);
 		goto tx_error;
 	}
 
@@ -350,9 +350,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (skb->dst)
 		skb->dst->ops->update_pmtu(skb->dst, mtu);
 
-	df |= (old_iph->frag_off&__constant_htons(IP_DF));
+	df |= (old_iph->frag_off & htons(IP_DF));
 
-	if ((old_iph->frag_off&__constant_htons(IP_DF))
+	if ((old_iph->frag_off & htons(IP_DF))
 	    && mtu < ntohs(old_iph->tot_len)) {
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 		ip_rt_put(rt);
@@ -445,7 +445,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->u.dst);
-	if ((iph->frag_off&__constant_htons(IP_DF)) && skb->len > mtu) {
+	if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 		ip_rt_put(rt);
 		IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n");
@@ -519,7 +519,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->u.dst);
-	if ((skb->len > mtu) && (skb->nh.iph->frag_off&__constant_htons(IP_DF))) {
+	if ((skb->len > mtu) && (skb->nh.iph->frag_off & htons(IP_DF))) {
 		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 		IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index 0a72eab1462..7ff11977eb4 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -515,11 +515,10 @@ static void tcp_sack(const struct sk_buff *skb,
 
 	/* Fast path for timestamp-only option */
 	if (length == TCPOLEN_TSTAMP_ALIGNED*4
-	    && *(__be32 *)ptr ==
-		__constant_htonl((TCPOPT_NOP << 24)
-				 | (TCPOPT_NOP << 16)
-				 | (TCPOPT_TIMESTAMP << 8)
-				 | TCPOLEN_TIMESTAMP))
+	    && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
+				       | (TCPOPT_NOP << 16)
+				       | (TCPOPT_TIMESTAMP << 8)
+				       | TCPOLEN_TIMESTAMP))
 		return;
 
 	while (length > 0) {
-- 
cgit v1.2.3


From 8f05ce91c8b801af106611ad83b1d8d7429b9b46 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 7 Mar 2007 14:21:00 +0900
Subject: [NET] NETFILTER: Use htonl() where appropriate.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto_tcp.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 153d6619993..9e496319f60 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -470,11 +470,10 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
 
 	/* Fast path for timestamp-only option */
 	if (length == TCPOLEN_TSTAMP_ALIGNED*4
-	    && *(__be32 *)ptr ==
-		__constant_htonl((TCPOPT_NOP << 24)
-				 | (TCPOPT_NOP << 16)
-				 | (TCPOPT_TIMESTAMP << 8)
-				 | TCPOLEN_TIMESTAMP))
+	    && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
+				       | (TCPOPT_NOP << 16)
+				       | (TCPOPT_TIMESTAMP << 8)
+				       | TCPOLEN_TIMESTAMP))
 		return;
 
 	while (length > 0) {
-- 
cgit v1.2.3


From b6d9bcb0697e60d5424e2f395fe950f0e22f4418 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 7 Mar 2007 14:21:20 +0900
Subject: [NET] SCHED: Use htons() where appropriate.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_rsvp.h | 2 +-
 net/sched/sch_api.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 7853621a04c..b6ac0e28787 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -160,7 +160,7 @@ restart:
 	dst = &nhptr->daddr;
 	protocol = nhptr->protocol;
 	xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
-	if (nhptr->frag_off&__constant_htons(IP_MF|IP_OFFSET))
+	if (nhptr->frag_off & htons(IP_MF|IP_OFFSET))
 		return -1;
 #endif
 
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index ecc988af4a9..4a927a5e1fa 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1146,7 +1146,7 @@ reclassify:
 
 	for ( ; tp; tp = tp->next) {
 		if ((tp->protocol == protocol ||
-			tp->protocol == __constant_htons(ETH_P_ALL)) &&
+			tp->protocol == htons(ETH_P_ALL)) &&
 			(err = tp->classify(skb, tp, res)) >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
 			if ( TC_ACT_RECLASSIFY == err) {
-- 
cgit v1.2.3


From 02ea4923b4997d7e1310c027081f46d584b9d714 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 7 Mar 2007 14:21:31 +0900
Subject: [NET] TIPC: Use htons() where appropriate.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/eth_media.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 9be4839e32c..f71ba9db611 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -140,7 +140,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
 		return -EDQUOT;
 	if (!eb_ptr->dev) {
 		eb_ptr->dev = dev;
-		eb_ptr->tipc_packet_type.type = __constant_htons(ETH_P_TIPC);
+		eb_ptr->tipc_packet_type.type = htons(ETH_P_TIPC);
 		eb_ptr->tipc_packet_type.dev = dev;
 		eb_ptr->tipc_packet_type.func = recv_msg;
 		eb_ptr->tipc_packet_type.af_packet_priv = eb_ptr;
-- 
cgit v1.2.3


From fe067e8ab5e0dc5ca3c54634924c628da92090b4 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Wed, 7 Mar 2007 12:12:44 -0800
Subject: [TCP]: Abstract out all write queue operations.

This allows the write queue implementation to be changed,
for example, to one which allows fast interval searching.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h    |  21 ----------
 include/net/tcp.h     | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/tcp.c        |  32 +++++++-------
 net/ipv4/tcp_input.c  |  64 ++++++++++++++++++----------
 net/ipv4/tcp_ipv4.c   |   2 +-
 net/ipv4/tcp_output.c |  95 +++++++++++++++++++----------------------
 net/ipv4/tcp_timer.c  |  10 ++---
 7 files changed, 221 insertions(+), 117 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 9583639090d..2974bacc885 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -710,15 +710,6 @@ static inline void sk_stream_mem_reclaim(struct sock *sk)
 		__sk_stream_mem_reclaim(sk);
 }
 
-static inline void sk_stream_writequeue_purge(struct sock *sk)
-{
-	struct sk_buff *skb;
-
-	while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL)
-		sk_stream_free_skb(sk, skb);
-	sk_stream_mem_reclaim(sk);
-}
-
 static inline int sk_stream_rmem_schedule(struct sock *sk, struct sk_buff *skb)
 {
 	return (int)skb->truesize <= sk->sk_forward_alloc ||
@@ -1256,18 +1247,6 @@ static inline struct page *sk_stream_alloc_page(struct sock *sk)
 	return page;
 }
 
-#define sk_stream_for_retrans_queue(skb, sk)				\
-		for (skb = (sk)->sk_write_queue.next;			\
-		     (skb != (sk)->sk_send_head) &&			\
-		     (skb != (struct sk_buff *)&(sk)->sk_write_queue);	\
-		     skb = skb->next)
-
-/*from STCP for fast SACK Process*/
-#define sk_stream_for_retrans_queue_from(skb, sk)			\
-		for (; (skb != (sk)->sk_send_head) &&                   \
-		     (skb != (struct sk_buff *)&(sk)->sk_write_queue);	\
-		     skb = skb->next)
-
 /*
  *	Default write policy as shown to user space via poll/select/SIGIO
  */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 181c0600af1..6dacc352dcf 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1162,6 +1162,120 @@ static inline void		tcp_put_md5sig_pool(void)
 	put_cpu();
 }
 
+/* write queue abstraction */
+static inline void tcp_write_queue_purge(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+	while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL)
+		sk_stream_free_skb(sk, skb);
+	sk_stream_mem_reclaim(sk);
+}
+
+static inline struct sk_buff *tcp_write_queue_head(struct sock *sk)
+{
+	struct sk_buff *skb = sk->sk_write_queue.next;
+	if (skb == (struct sk_buff *) &sk->sk_write_queue)
+		return NULL;
+	return skb;
+}
+
+static inline struct sk_buff *tcp_write_queue_tail(struct sock *sk)
+{
+	struct sk_buff *skb = sk->sk_write_queue.prev;
+	if (skb == (struct sk_buff *) &sk->sk_write_queue)
+		return NULL;
+	return skb;
+}
+
+static inline struct sk_buff *tcp_write_queue_next(struct sock *sk, struct sk_buff *skb)
+{
+	return skb->next;
+}
+
+#define tcp_for_write_queue(skb, sk)					\
+		for (skb = (sk)->sk_write_queue.next;			\
+		     (skb != (struct sk_buff *)&(sk)->sk_write_queue);	\
+		     skb = skb->next)
+
+#define tcp_for_write_queue_from(skb, sk)				\
+		for (; (skb != (struct sk_buff *)&(sk)->sk_write_queue);\
+		     skb = skb->next)
+
+static inline struct sk_buff *tcp_send_head(struct sock *sk)
+{
+	return sk->sk_send_head;
+}
+
+static inline void tcp_advance_send_head(struct sock *sk, struct sk_buff *skb)
+{
+	sk->sk_send_head = skb->next;
+	if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue)
+		sk->sk_send_head = NULL;
+}
+
+static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked)
+{
+	if (sk->sk_send_head == skb_unlinked)
+		sk->sk_send_head = NULL;
+}
+
+static inline void tcp_init_send_head(struct sock *sk)
+{
+	sk->sk_send_head = NULL;
+}
+
+static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb)
+{
+	__skb_queue_tail(&sk->sk_write_queue, skb);
+}
+
+static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb)
+{
+	__tcp_add_write_queue_tail(sk, skb);
+
+	/* Queue it, remembering where we must start sending. */
+	if (sk->sk_send_head == NULL)
+		sk->sk_send_head = skb;
+}
+
+static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff *skb)
+{
+	__skb_queue_head(&sk->sk_write_queue, skb);
+}
+
+/* Insert buff after skb on the write queue of sk.  */
+static inline void tcp_insert_write_queue_after(struct sk_buff *skb,
+						struct sk_buff *buff,
+						struct sock *sk)
+{
+	__skb_append(skb, buff, &sk->sk_write_queue);
+}
+
+/* Insert skb between prev and next on the write queue of sk.  */
+static inline void tcp_insert_write_queue_before(struct sk_buff *new,
+						  struct sk_buff *skb,
+						  struct sock *sk)
+{
+	__skb_insert(new, skb->prev, skb, &sk->sk_write_queue);
+}
+
+static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk)
+{
+	__skb_unlink(skb, &sk->sk_write_queue);
+}
+
+static inline int tcp_skb_is_last(const struct sock *sk,
+				  const struct sk_buff *skb)
+{
+	return skb->next == (struct sk_buff *)&sk->sk_write_queue;
+}
+
+static inline int tcp_write_queue_empty(struct sock *sk)
+{
+	return skb_queue_empty(&sk->sk_write_queue);
+}
+
 /* /proc */
 enum tcp_seq_states {
 	TCP_SEQ_STATE_LISTENING,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3834b10b511..689f9330f1b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -470,10 +470,8 @@ static inline void skb_entail(struct sock *sk, struct tcp_sock *tp,
 	tcb->flags   = TCPCB_FLAG_ACK;
 	tcb->sacked  = 0;
 	skb_header_release(skb);
-	__skb_queue_tail(&sk->sk_write_queue, skb);
+	tcp_add_write_queue_tail(sk, skb);
 	sk_charge_skb(sk, skb);
-	if (!sk->sk_send_head)
-		sk->sk_send_head = skb;
 	if (tp->nonagle & TCP_NAGLE_PUSH)
 		tp->nonagle &= ~TCP_NAGLE_PUSH;
 }
@@ -491,8 +489,8 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags,
 static inline void tcp_push(struct sock *sk, struct tcp_sock *tp, int flags,
 			    int mss_now, int nonagle)
 {
-	if (sk->sk_send_head) {
-		struct sk_buff *skb = sk->sk_write_queue.prev;
+	if (tcp_send_head(sk)) {
+		struct sk_buff *skb = tcp_write_queue_tail(sk);
 		if (!(flags & MSG_MORE) || forced_push(tp))
 			tcp_mark_push(tp, skb);
 		tcp_mark_urg(tp, flags, skb);
@@ -526,13 +524,13 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
 		goto do_error;
 
 	while (psize > 0) {
-		struct sk_buff *skb = sk->sk_write_queue.prev;
+		struct sk_buff *skb = tcp_write_queue_tail(sk);
 		struct page *page = pages[poffset / PAGE_SIZE];
 		int copy, i, can_coalesce;
 		int offset = poffset % PAGE_SIZE;
 		int size = min_t(size_t, psize, PAGE_SIZE - offset);
 
-		if (!sk->sk_send_head || (copy = size_goal - skb->len) <= 0) {
+		if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) {
 new_segment:
 			if (!sk_stream_memory_free(sk))
 				goto wait_for_sndbuf;
@@ -589,7 +587,7 @@ new_segment:
 		if (forced_push(tp)) {
 			tcp_mark_push(tp, skb);
 			__tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH);
-		} else if (skb == sk->sk_send_head)
+		} else if (skb == tcp_send_head(sk))
 			tcp_push_one(sk, mss_now);
 		continue;
 
@@ -704,9 +702,9 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		while (seglen > 0) {
 			int copy;
 
-			skb = sk->sk_write_queue.prev;
+			skb = tcp_write_queue_tail(sk);
 
-			if (!sk->sk_send_head ||
+			if (!tcp_send_head(sk) ||
 			    (copy = size_goal - skb->len) <= 0) {
 
 new_segment:
@@ -833,7 +831,7 @@ new_segment:
 			if (forced_push(tp)) {
 				tcp_mark_push(tp, skb);
 				__tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH);
-			} else if (skb == sk->sk_send_head)
+			} else if (skb == tcp_send_head(sk))
 				tcp_push_one(sk, mss_now);
 			continue;
 
@@ -860,9 +858,11 @@ out:
 
 do_fault:
 	if (!skb->len) {
-		if (sk->sk_send_head == skb)
-			sk->sk_send_head = NULL;
-		__skb_unlink(skb, &sk->sk_write_queue);
+		tcp_unlink_write_queue(skb, sk);
+		/* It is the one place in all of TCP, except connection
+		 * reset, where we can be unlinking the send_head.
+		 */
+		tcp_check_send_head(sk, skb);
 		sk_stream_free_skb(sk, skb);
 	}
 
@@ -1732,7 +1732,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 
 	tcp_clear_xmit_timers(sk);
 	__skb_queue_purge(&sk->sk_receive_queue);
-	sk_stream_writequeue_purge(sk);
+	tcp_write_queue_purge(sk);
 	__skb_queue_purge(&tp->out_of_order_queue);
 #ifdef CONFIG_NET_DMA
 	__skb_queue_purge(&sk->sk_async_wait_queue);
@@ -1758,7 +1758,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tcp_set_ca_state(sk, TCP_CA_Open);
 	tcp_clear_retrans(tp);
 	inet_csk_delack_init(sk);
-	sk->sk_send_head = NULL;
+	tcp_init_send_head(sk);
 	tp->rx_opt.saw_tstamp = 0;
 	tcp_sack_reset(&tp->rx_opt);
 	__sk_dst_reset(sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d0a3630f41a..22d0bb03c5d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1044,7 +1044,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	cached_skb = tp->fastpath_skb_hint;
 	cached_fack_count = tp->fastpath_cnt_hint;
 	if (!cached_skb) {
-		cached_skb = sk->sk_write_queue.next;
+		cached_skb = tcp_write_queue_head(sk);
 		cached_fack_count = 0;
 	}
 
@@ -1061,10 +1061,13 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 		if (after(end_seq, tp->high_seq))
 			flag |= FLAG_DATA_LOST;
 
-		sk_stream_for_retrans_queue_from(skb, sk) {
+		tcp_for_write_queue_from(skb, sk) {
 			int in_sack, pcount;
 			u8 sacked;
 
+			if (skb == tcp_send_head(sk))
+				break;
+
 			cached_skb = skb;
 			cached_fack_count = fack_count;
 			if (i == first_sack_index) {
@@ -1213,7 +1216,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 	if (lost_retrans && icsk->icsk_ca_state == TCP_CA_Recovery) {
 		struct sk_buff *skb;
 
-		sk_stream_for_retrans_queue(skb, sk) {
+		tcp_for_write_queue(skb, sk) {
+			if (skb == tcp_send_head(sk))
+				break;
 			if (after(TCP_SKB_CB(skb)->seq, lost_retrans))
 				break;
 			if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
@@ -1266,8 +1271,8 @@ int tcp_use_frto(struct sock *sk)
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 
-	if (!sysctl_tcp_frto || !sk->sk_send_head ||
-		after(TCP_SKB_CB(sk->sk_send_head)->end_seq,
+	if (!sysctl_tcp_frto || !tcp_send_head(sk) ||
+		after(TCP_SKB_CB(tcp_send_head(sk))->end_seq,
 		      tp->snd_una + tp->snd_wnd))
 		return 0;
 
@@ -1278,8 +1283,11 @@ int tcp_use_frto(struct sock *sk)
 	if (tp->retrans_out > 1)
 		return 0;
 
-	skb = skb_peek(&sk->sk_write_queue)->next;	/* Skips head */
-	sk_stream_for_retrans_queue_from(skb, sk) {
+	skb = tcp_write_queue_head(sk);
+	skb = tcp_write_queue_next(sk, skb);	/* Skips head */
+	tcp_for_write_queue_from(skb, sk) {
+		if (skb == tcp_send_head(sk))
+			break;
 		if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
 			return 0;
 		/* Short-circuit when first non-SACKed skb has been checked */
@@ -1343,7 +1351,7 @@ void tcp_enter_frto(struct sock *sk)
 	tp->undo_marker = tp->snd_una;
 	tp->undo_retrans = 0;
 
-	skb = skb_peek(&sk->sk_write_queue);
+	skb = tcp_write_queue_head(sk);
 	if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
 		TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
 		tp->retrans_out -= tcp_skb_pcount(skb);
@@ -1380,7 +1388,9 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
 	tp->fackets_out = 0;
 	tp->retrans_out = 0;
 
-	sk_stream_for_retrans_queue(skb, sk) {
+	tcp_for_write_queue(skb, sk) {
+		if (skb == tcp_send_head(sk))
+			break;
 		cnt += tcp_skb_pcount(skb);
 		/*
 		 * Count the retransmission made on RTO correctly (only when
@@ -1468,7 +1478,9 @@ void tcp_enter_loss(struct sock *sk, int how)
 	if (!how)
 		tp->undo_marker = tp->snd_una;
 
-	sk_stream_for_retrans_queue(skb, sk) {
+	tcp_for_write_queue(skb, sk) {
+		if (skb == tcp_send_head(sk))
+			break;
 		cnt += tcp_skb_pcount(skb);
 		if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
 			tp->undo_marker = 0;
@@ -1503,14 +1515,14 @@ static int tcp_check_sack_reneging(struct sock *sk)
 	 * receiver _host_ is heavily congested (or buggy).
 	 * Do processing similar to RTO timeout.
 	 */
-	if ((skb = skb_peek(&sk->sk_write_queue)) != NULL &&
+	if ((skb = tcp_write_queue_head(sk)) != NULL &&
 	    (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
 		struct inet_connection_sock *icsk = inet_csk(sk);
 		NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING);
 
 		tcp_enter_loss(sk, 1);
 		icsk->icsk_retransmits++;
-		tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue));
+		tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 					  icsk->icsk_rto, TCP_RTO_MAX);
 		return 1;
@@ -1531,7 +1543,7 @@ static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
 static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp)
 {
 	return tp->packets_out &&
-	       tcp_skb_timedout(sk, skb_peek(&sk->sk_write_queue));
+	       tcp_skb_timedout(sk, tcp_write_queue_head(sk));
 }
 
 /* Linux NewReno/SACK/FACK/ECN state machine.
@@ -1726,11 +1738,13 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
 		skb = tp->lost_skb_hint;
 		cnt = tp->lost_cnt_hint;
 	} else {
-		skb = sk->sk_write_queue.next;
+		skb = tcp_write_queue_head(sk);
 		cnt = 0;
 	}
 
-	sk_stream_for_retrans_queue_from(skb, sk) {
+	tcp_for_write_queue_from(skb, sk) {
+		if (skb == tcp_send_head(sk))
+			break;
 		/* TODO: do this better */
 		/* this is not the most efficient way to do this... */
 		tp->lost_skb_hint = skb;
@@ -1777,9 +1791,11 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
 		struct sk_buff *skb;
 
 		skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
-			: sk->sk_write_queue.next;
+			: tcp_write_queue_head(sk);
 
-		sk_stream_for_retrans_queue_from(skb, sk) {
+		tcp_for_write_queue_from(skb, sk) {
+			if (skb == tcp_send_head(sk))
+				break;
 			if (!tcp_skb_timedout(sk, skb))
 				break;
 
@@ -1970,7 +1986,9 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp)
 {
 	if (tcp_may_undo(tp)) {
 		struct sk_buff *skb;
-		sk_stream_for_retrans_queue(skb, sk) {
+		tcp_for_write_queue(skb, sk) {
+			if (skb == tcp_send_head(sk))
+				break;
 			TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
 		}
 
@@ -2382,8 +2400,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 		= icsk->icsk_ca_ops->rtt_sample;
 	struct timeval tv = { .tv_sec = 0, .tv_usec = 0 };
 
-	while ((skb = skb_peek(&sk->sk_write_queue)) &&
-	       skb != sk->sk_send_head) {
+	while ((skb = tcp_write_queue_head(sk)) &&
+	       skb != tcp_send_head(sk)) {
 		struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
 		__u8 sacked = scb->sacked;
 
@@ -2446,7 +2464,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 		}
 		tcp_dec_pcount_approx(&tp->fackets_out, skb);
 		tcp_packets_out_dec(tp, skb);
-		__skb_unlink(skb, &sk->sk_write_queue);
+		tcp_unlink_write_queue(skb, sk);
 		sk_stream_free_skb(sk, skb);
 		clear_all_retrans_hints(tp);
 	}
@@ -2495,7 +2513,7 @@ static void tcp_ack_probe(struct sock *sk)
 
 	/* Was it a usable window open? */
 
-	if (!after(TCP_SKB_CB(sk->sk_send_head)->end_seq,
+	if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq,
 		   tp->snd_una + tp->snd_wnd)) {
 		icsk->icsk_backoff = 0;
 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
@@ -2795,7 +2813,7 @@ no_queue:
 	 * being used to time the probes, and is probably far higher than
 	 * it needs to be for normal retransmission.
 	 */
-	if (sk->sk_send_head)
+	if (tcp_send_head(sk))
 		tcp_ack_probe(sk);
 	return 1;
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index addac1110f9..3326681b842 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1890,7 +1890,7 @@ int tcp_v4_destroy_sock(struct sock *sk)
 	tcp_cleanup_congestion_control(sk);
 
 	/* Cleanup up the write buffer. */
-	sk_stream_writequeue_purge(sk);
+	tcp_write_queue_purge(sk);
 
 	/* Cleans up our, hopefully empty, out_of_order_queue. */
 	__skb_queue_purge(&tp->out_of_order_queue);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index d19b2f3b70f..2a62b55b15f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -65,9 +65,7 @@ int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
 static void update_send_head(struct sock *sk, struct tcp_sock *tp,
 			     struct sk_buff *skb)
 {
-	sk->sk_send_head = skb->next;
-	if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue)
-		sk->sk_send_head = NULL;
+	tcp_advance_send_head(sk, skb);
 	tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
 	tcp_packets_out_inc(sk, tp, skb);
 }
@@ -567,12 +565,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
 	/* Advance write_seq and place onto the write_queue. */
 	tp->write_seq = TCP_SKB_CB(skb)->end_seq;
 	skb_header_release(skb);
-	__skb_queue_tail(&sk->sk_write_queue, skb);
+	tcp_add_write_queue_tail(sk, skb);
 	sk_charge_skb(sk, skb);
-
-	/* Queue it, remembering where we must start sending. */
-	if (sk->sk_send_head == NULL)
-		sk->sk_send_head = skb;
 }
 
 static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
@@ -705,7 +699,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
 
 	/* Link BUFF into the send queue. */
 	skb_header_release(buff);
-	__skb_append(skb, buff, &sk->sk_write_queue);
+	tcp_insert_write_queue_after(skb, buff, sk);
 
 	return 0;
 }
@@ -1056,7 +1050,7 @@ static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, uns
 	return !after(end_seq, tp->snd_una + tp->snd_wnd);
 }
 
-/* This checks if the data bearing packet SKB (usually sk->sk_send_head)
+/* This checks if the data bearing packet SKB (usually tcp_send_head(sk))
  * should be put on the wire right now.  If so, it returns the number of
  * packets allowed by the congestion window.
  */
@@ -1079,15 +1073,9 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
 	return cwnd_quota;
 }
 
-static inline int tcp_skb_is_last(const struct sock *sk,
-				  const struct sk_buff *skb)
-{
-	return skb->next == (struct sk_buff *)&sk->sk_write_queue;
-}
-
 int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp)
 {
-	struct sk_buff *skb = sk->sk_send_head;
+	struct sk_buff *skb = tcp_send_head(sk);
 
 	return (skb &&
 		tcp_snd_test(sk, skb, tcp_current_mss(sk, 1),
@@ -1143,7 +1131,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
 
 	/* Link BUFF into the send queue. */
 	skb_header_release(buff);
-	__skb_append(skb, buff, &sk->sk_write_queue);
+	tcp_insert_write_queue_after(skb, buff, sk);
 
 	return 0;
 }
@@ -1249,10 +1237,10 @@ static int tcp_mtu_probe(struct sock *sk)
 
 	/* Have enough data in the send queue to probe? */
 	len = 0;
-	if ((skb = sk->sk_send_head) == NULL)
+	if ((skb = tcp_send_head(sk)) == NULL)
 		return -1;
 	while ((len += skb->len) < probe_size && !tcp_skb_is_last(sk, skb))
-		skb = skb->next;
+		skb = tcp_write_queue_next(sk, skb);
 	if (len < probe_size)
 		return -1;
 
@@ -1279,9 +1267,9 @@ static int tcp_mtu_probe(struct sock *sk)
 		return -1;
 	sk_charge_skb(sk, nskb);
 
-	skb = sk->sk_send_head;
-	__skb_insert(nskb, skb->prev, skb, &sk->sk_write_queue);
-	sk->sk_send_head = nskb;
+	skb = tcp_send_head(sk);
+	tcp_insert_write_queue_before(nskb, skb, sk);
+	tcp_advance_send_head(sk, skb);
 
 	TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
 	TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
@@ -1292,7 +1280,7 @@ static int tcp_mtu_probe(struct sock *sk)
 
 	len = 0;
 	while (len < probe_size) {
-		next = skb->next;
+		next = tcp_write_queue_next(sk, skb);
 
 		copy = min_t(int, skb->len, probe_size - len);
 		if (nskb->ip_summed)
@@ -1305,7 +1293,7 @@ static int tcp_mtu_probe(struct sock *sk)
 			/* We've eaten all the data from this skb.
 			 * Throw it away. */
 			TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags;
-			__skb_unlink(skb, &sk->sk_write_queue);
+			tcp_unlink_write_queue(skb, sk);
 			sk_stream_free_skb(sk, skb);
 		} else {
 			TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags &
@@ -1377,7 +1365,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
 		sent_pkts = 1;
 	}
 
-	while ((skb = sk->sk_send_head)) {
+	while ((skb = tcp_send_head(sk))) {
 		unsigned int limit;
 
 		tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
@@ -1435,7 +1423,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
 		tcp_cwnd_validate(sk, tp);
 		return 0;
 	}
-	return !tp->packets_out && sk->sk_send_head;
+	return !tp->packets_out && tcp_send_head(sk);
 }
 
 /* Push out any pending frames which were held back due to
@@ -1445,7 +1433,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
 void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp,
 			       unsigned int cur_mss, int nonagle)
 {
-	struct sk_buff *skb = sk->sk_send_head;
+	struct sk_buff *skb = tcp_send_head(sk);
 
 	if (skb) {
 		if (tcp_write_xmit(sk, cur_mss, nonagle))
@@ -1459,7 +1447,7 @@ void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp,
 void tcp_push_one(struct sock *sk, unsigned int mss_now)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *skb = sk->sk_send_head;
+	struct sk_buff *skb = tcp_send_head(sk);
 	unsigned int tso_segs, cwnd_quota;
 
 	BUG_ON(!skb || skb->len < mss_now);
@@ -1620,7 +1608,7 @@ u32 __tcp_select_window(struct sock *sk)
 static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *next_skb = skb->next;
+	struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
 
 	/* The first test we must make is that neither of these two
 	 * SKB's are still referenced by someone else.
@@ -1652,7 +1640,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
 		clear_all_retrans_hints(tp);
 
 		/* Ok.	We will be able to collapse the packet. */
-		__skb_unlink(next_skb, &sk->sk_write_queue);
+		tcp_unlink_write_queue(next_skb, sk);
 
 		memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
 
@@ -1706,7 +1694,9 @@ void tcp_simple_retransmit(struct sock *sk)
 	unsigned int mss = tcp_current_mss(sk, 0);
 	int lost = 0;
 
-	sk_stream_for_retrans_queue(skb, sk) {
+	tcp_for_write_queue(skb, sk) {
+		if (skb == tcp_send_head(sk))
+			break;
 		if (skb->len > mss &&
 		    !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
 			if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {
@@ -1790,10 +1780,10 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	/* Collapse two adjacent packets if worthwhile and we can. */
 	if(!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&
 	   (skb->len < (cur_mss >> 1)) &&
-	   (skb->next != sk->sk_send_head) &&
-	   (skb->next != (struct sk_buff *)&sk->sk_write_queue) &&
-	   (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(skb->next)->nr_frags == 0) &&
-	   (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(skb->next) == 1) &&
+	   (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) &&
+	   (!tcp_skb_is_last(sk, skb)) &&
+	   (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) &&
+	   (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) &&
 	   (sysctl_tcp_retrans_collapse != 0))
 		tcp_retrans_try_collapse(sk, skb, cur_mss);
 
@@ -1872,15 +1862,17 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 		skb = tp->retransmit_skb_hint;
 		packet_cnt = tp->retransmit_cnt_hint;
 	}else{
-		skb = sk->sk_write_queue.next;
+		skb = tcp_write_queue_head(sk);
 		packet_cnt = 0;
 	}
 
 	/* First pass: retransmit lost packets. */
 	if (tp->lost_out) {
-		sk_stream_for_retrans_queue_from(skb, sk) {
+		tcp_for_write_queue_from(skb, sk) {
 			__u8 sacked = TCP_SKB_CB(skb)->sacked;
 
+			if (skb == tcp_send_head(sk))
+				break;
 			/* we could do better than to assign each time */
 			tp->retransmit_skb_hint = skb;
 			tp->retransmit_cnt_hint = packet_cnt;
@@ -1906,8 +1898,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 					else
 						NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS);
 
-					if (skb ==
-					    skb_peek(&sk->sk_write_queue))
+					if (skb == tcp_write_queue_head(sk))
 						inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 									  inet_csk(sk)->icsk_rto,
 									  TCP_RTO_MAX);
@@ -1944,11 +1935,13 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 		skb = tp->forward_skb_hint;
 		packet_cnt = tp->forward_cnt_hint;
 	} else{
-		skb = sk->sk_write_queue.next;
+		skb = tcp_write_queue_head(sk);
 		packet_cnt = 0;
 	}
 
-	sk_stream_for_retrans_queue_from(skb, sk) {
+	tcp_for_write_queue_from(skb, sk) {
+		if (skb == tcp_send_head(sk))
+			break;
 		tp->forward_cnt_hint = packet_cnt;
 		tp->forward_skb_hint = skb;
 
@@ -1973,7 +1966,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 			break;
 		}
 
-		if (skb == skb_peek(&sk->sk_write_queue))
+		if (skb == tcp_write_queue_head(sk))
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 						  inet_csk(sk)->icsk_rto,
 						  TCP_RTO_MAX);
@@ -1989,7 +1982,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 void tcp_send_fin(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *skb = skb_peek_tail(&sk->sk_write_queue);
+	struct sk_buff *skb = tcp_write_queue_tail(sk);
 	int mss_now;
 
 	/* Optimization, tack on the FIN if we have a queue of
@@ -1998,7 +1991,7 @@ void tcp_send_fin(struct sock *sk)
 	 */
 	mss_now = tcp_current_mss(sk, 1);
 
-	if (sk->sk_send_head != NULL) {
+	if (tcp_send_head(sk) != NULL) {
 		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
 		TCP_SKB_CB(skb)->end_seq++;
 		tp->write_seq++;
@@ -2071,7 +2064,7 @@ int tcp_send_synack(struct sock *sk)
 {
 	struct sk_buff* skb;
 
-	skb = skb_peek(&sk->sk_write_queue);
+	skb = tcp_write_queue_head(sk);
 	if (skb == NULL || !(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_SYN)) {
 		printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
 		return -EFAULT;
@@ -2081,9 +2074,9 @@ int tcp_send_synack(struct sock *sk)
 			struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
 			if (nskb == NULL)
 				return -ENOMEM;
-			__skb_unlink(skb, &sk->sk_write_queue);
+			tcp_unlink_write_queue(skb, sk);
 			skb_header_release(nskb);
-			__skb_queue_head(&sk->sk_write_queue, nskb);
+			__tcp_add_write_queue_head(sk, nskb);
 			sk_stream_free_skb(sk, skb);
 			sk_charge_skb(sk, nskb);
 			skb = nskb;
@@ -2285,7 +2278,7 @@ int tcp_connect(struct sock *sk)
 	TCP_SKB_CB(buff)->when = tcp_time_stamp;
 	tp->retrans_stamp = TCP_SKB_CB(buff)->when;
 	skb_header_release(buff);
-	__skb_queue_tail(&sk->sk_write_queue, buff);
+	__tcp_add_write_queue_tail(sk, buff);
 	sk_charge_skb(sk, buff);
 	tp->packets_out += tcp_skb_pcount(buff);
 	tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
@@ -2441,7 +2434,7 @@ int tcp_write_wakeup(struct sock *sk)
 		struct tcp_sock *tp = tcp_sk(sk);
 		struct sk_buff *skb;
 
-		if ((skb = sk->sk_send_head) != NULL &&
+		if ((skb = tcp_send_head(sk)) != NULL &&
 		    before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) {
 			int err;
 			unsigned int mss = tcp_current_mss(sk, 0);
@@ -2491,7 +2484,7 @@ void tcp_send_probe0(struct sock *sk)
 
 	err = tcp_write_wakeup(sk);
 
-	if (tp->packets_out || !sk->sk_send_head) {
+	if (tp->packets_out || !tcp_send_head(sk)) {
 		/* Cancel probe timer, if it is not required. */
 		icsk->icsk_probes_out = 0;
 		icsk->icsk_backoff = 0;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index a9243cfc1be..2ca97b20929 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -233,7 +233,7 @@ static void tcp_probe_timer(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	int max_probes;
 
-	if (tp->packets_out || !sk->sk_send_head) {
+	if (tp->packets_out || !tcp_send_head(sk)) {
 		icsk->icsk_probes_out = 0;
 		return;
 	}
@@ -284,7 +284,7 @@ static void tcp_retransmit_timer(struct sock *sk)
 	if (!tp->packets_out)
 		goto out;
 
-	BUG_TRAP(!skb_queue_empty(&sk->sk_write_queue));
+	BUG_TRAP(!tcp_write_queue_empty(sk));
 
 	if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
 	    !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
@@ -306,7 +306,7 @@ static void tcp_retransmit_timer(struct sock *sk)
 			goto out;
 		}
 		tcp_enter_loss(sk, 0);
-		tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue));
+		tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
 		__sk_dst_reset(sk);
 		goto out_reset_timer;
 	}
@@ -341,7 +341,7 @@ static void tcp_retransmit_timer(struct sock *sk)
 		tcp_enter_loss(sk, 0);
 	}
 
-	if (tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)) > 0) {
+	if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) {
 		/* Retransmission failed because of local congestion,
 		 * do not backoff.
 		 */
@@ -482,7 +482,7 @@ static void tcp_keepalive_timer (unsigned long data)
 	elapsed = keepalive_time_when(tp);
 
 	/* It is alive without keepalive 8) */
-	if (tp->packets_out || sk->sk_send_head)
+	if (tp->packets_out || tcp_send_head(sk))
 		goto resched;
 
 	elapsed = tcp_time_stamp - tp->rcv_tstamp;
-- 
cgit v1.2.3


From cb69cc52364690d7789940c480b3a9490784b680 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 7 Mar 2007 19:33:52 -0800
Subject: [TCP/DCCP/RANDOM]: Remove unused exports.

This patch removes the following not or no longer used exports:
- drivers/char/random.c: secure_tcp_sequence_number
- net/dccp/options.c: sysctl_dccp_feat_sequence_window
- net/netlink/af_netlink.c: netlink_set_err

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/char/random.c    | 2 --
 net/dccp/options.c       | 2 --
 net/netlink/af_netlink.c | 1 -
 3 files changed, 5 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index b9dc7aa1dfb..03af50f900d 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1556,8 +1556,6 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
 	return seq;
 }
 
-EXPORT_SYMBOL(secure_tcp_sequence_number);
-
 /* Generate secure starting point for ephemeral IPV4 transport port search */
 u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
 {
diff --git a/net/dccp/options.c b/net/dccp/options.c
index ca13f773199..9074ca7977b 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -29,8 +29,6 @@ int sysctl_dccp_feat_ack_ratio	      = DCCPF_INITIAL_ACK_RATIO;
 int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR;
 int sysctl_dccp_feat_send_ndp_count  = DCCPF_INITIAL_SEND_NDP_COUNT;
 
-EXPORT_SYMBOL_GPL(sysctl_dccp_feat_sequence_window);
-
 void dccp_minisock_init(struct dccp_minisock *dmsk)
 {
 	dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 5890210d773..350ed1c0e70 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1828,7 +1828,6 @@ EXPORT_SYMBOL(netlink_broadcast);
 EXPORT_SYMBOL(netlink_dump_start);
 EXPORT_SYMBOL(netlink_kernel_create);
 EXPORT_SYMBOL(netlink_register_notifier);
-EXPORT_SYMBOL(netlink_set_err);
 EXPORT_SYMBOL(netlink_set_nonroot);
 EXPORT_SYMBOL(netlink_unicast);
 EXPORT_SYMBOL(netlink_unregister_notifier);
-- 
cgit v1.2.3


From ae40eb1ef30ab4120bd3c8b7e3da99ee53d27a23 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Sun, 18 Mar 2007 17:33:16 -0700
Subject: [NET]: Introduce SIOCGSTAMPNS ioctl to get timestamps with nanosec
 resolution

Now network timestamps use ktime_t infrastructure, we can add a new
ioctl() SIOCGSTAMPNS command to get timestamps in 'struct timespec'.
User programs can thus access to nanosecond resolution.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
CC: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/compat_ioctl.c             | 18 ++++++++++++++++++
 include/asm-alpha/sockios.h   |  3 ++-
 include/asm-arm/sockios.h     |  3 ++-
 include/asm-arm26/sockios.h   |  3 ++-
 include/asm-avr32/sockios.h   |  3 ++-
 include/asm-cris/sockios.h    |  3 ++-
 include/asm-frv/sockios.h     |  3 ++-
 include/asm-h8300/sockios.h   |  3 ++-
 include/asm-i386/sockios.h    |  3 ++-
 include/asm-ia64/sockios.h    |  3 ++-
 include/asm-m32r/sockios.h    |  3 ++-
 include/asm-m68k/sockios.h    |  3 ++-
 include/asm-mips/sockios.h    |  3 ++-
 include/asm-parisc/sockios.h  |  3 ++-
 include/asm-powerpc/sockios.h |  3 ++-
 include/asm-s390/sockios.h    |  3 ++-
 include/asm-sh/sockios.h      |  3 ++-
 include/asm-sh64/sockios.h    |  3 ++-
 include/asm-sparc/sockios.h   |  3 ++-
 include/asm-sparc64/sockios.h |  3 ++-
 include/asm-v850/sockios.h    |  3 ++-
 include/asm-x86_64/sockios.h  |  3 ++-
 include/asm-xtensa/sockios.h  |  3 ++-
 include/net/compat.h          |  1 +
 include/net/sock.h            |  1 +
 net/appletalk/ddp.c           |  3 +++
 net/atm/ioctl.c               |  3 +++
 net/ax25/af_ax25.c            |  4 ++++
 net/compat.c                  | 24 ++++++++++++++++++++++++
 net/core/sock.c               | 16 ++++++++++++++++
 net/econet/af_econet.c        |  3 +++
 net/ipv4/af_inet.c            |  3 +++
 net/ipv6/af_inet6.c           |  3 +++
 net/netrom/af_netrom.c        |  6 ++++++
 net/packet/af_packet.c        |  2 ++
 net/rose/af_rose.c            |  3 +++
 net/x25/af_x25.c              | 12 ++++++++++++
 37 files changed, 146 insertions(+), 22 deletions(-)

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 8b1c5d8bf4e..c68b055fa26 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -266,6 +266,23 @@ static int do_siocgstamp(unsigned int fd, unsigned int cmd, unsigned long arg)
 	return err;
 }
 
+static int do_siocgstampns(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+	struct compat_timespec __user *up = compat_ptr(arg);
+	struct timespec kts;
+	mm_segment_t old_fs = get_fs();
+	int err;
+
+	set_fs(KERNEL_DS);
+	err = sys_ioctl(fd, cmd, (unsigned long)&kts);
+	set_fs(old_fs);
+	if (!err) {
+		err = put_user(kts.tv_sec, &up->tv_sec);
+		err |= __put_user(kts.tv_nsec, &up->tv_nsec);
+	}
+	return err;
+}
+
 struct ifmap32 {
 	compat_ulong_t mem_start;
 	compat_ulong_t mem_end;
@@ -2437,6 +2454,7 @@ HANDLE_IOCTL(SIOCBRDELIF, dev_ifsioc)
 /* Note SIOCRTMSG is no longer, so this is safe and * the user would have seen just an -EINVAL anyways. */
 HANDLE_IOCTL(SIOCRTMSG, ret_einval)
 HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp)
+HANDLE_IOCTL(SIOCGSTAMPNS, do_siocgstampns)
 #endif
 #ifdef CONFIG_BLOCK
 HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo)
diff --git a/include/asm-alpha/sockios.h b/include/asm-alpha/sockios.h
index e4961a740e5..7932c7ab4a4 100644
--- a/include/asm-alpha/sockios.h
+++ b/include/asm-alpha/sockios.h
@@ -10,6 +10,7 @@
 #define SIOCSPGRP	_IOW('s', 8, pid_t)
 #define SIOCGPGRP	_IOR('s', 9, pid_t)
 
-#define SIOCGSTAMP	0x8906		/* Get stamp - linux-specific */
+#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
 
 #endif /* _ASM_ALPHA_SOCKIOS_H */
diff --git a/include/asm-arm/sockios.h b/include/asm-arm/sockios.h
index 77c34087d51..a2588a2512d 100644
--- a/include/asm-arm/sockios.h
+++ b/include/asm-arm/sockios.h
@@ -7,6 +7,7 @@
 #define FIOGETOWN	0x8903
 #define SIOCGPGRP	0x8904
 #define SIOCATMARK	0x8905
-#define SIOCGSTAMP	0x8906		/* Get stamp */
+#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
 
 #endif
diff --git a/include/asm-arm26/sockios.h b/include/asm-arm26/sockios.h
index 77c34087d51..a2588a2512d 100644
--- a/include/asm-arm26/sockios.h
+++ b/include/asm-arm26/sockios.h
@@ -7,6 +7,7 @@
 #define FIOGETOWN	0x8903
 #define SIOCGPGRP	0x8904
 #define SIOCATMARK	0x8905
-#define SIOCGSTAMP	0x8906		/* Get stamp */
+#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
 
 #endif
diff --git a/include/asm-avr32/sockios.h b/include/asm-avr32/sockios.h
index 84f3d65b3b3..0802d742f97 100644
--- a/include/asm-avr32/sockios.h
+++ b/include/asm-avr32/sockios.h
@@ -7,6 +7,7 @@
 #define FIOGETOWN	0x8903
 #define SIOCGPGRP	0x8904
 #define SIOCATMARK	0x8905
-#define SIOCGSTAMP	0x8906		/* Get stamp */
+#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
 
 #endif /* __ASM_AVR32_SOCKIOS_H */
diff --git a/include/asm-cris/sockios.h b/include/asm-cris/sockios.h
index 6c4012f0b29..cfe7bfecf59 100644
--- a/include/asm-cris/sockios.h
+++ b/include/asm-cris/sockios.h
@@ -7,6 +7,7 @@
 #define FIOGETOWN	0x8903
 #define SIOCGPGRP	0x8904
 #define SIOCATMARK	0x8905
-#define SIOCGSTAMP	0x8906		/* Get stamp */
+#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
 
 #endif
diff --git a/include/asm-frv/sockios.h b/include/asm-frv/sockios.h
index 8a6e4b2074b..5dbdd13e6de 100644
--- a/include/asm-frv/sockios.h
+++ b/include/asm-frv/sockios.h
@@ -7,7 +7,8 @@
 #define FIOGETOWN	0x8903
 #define SIOCGPGRP	0x8904
 #define SIOCATMARK	0x8905
-#define SIOCGSTAMP	0x8906		/* Get stamp */
+#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
 
 #endif /* _ASM_SOCKIOS__ */
 
diff --git a/include/asm-h8300/sockios.h b/include/asm-h8300/sockios.h
index d005d9594cc..e9c7ec810c2 100644
--- a/include/asm-h8300/sockios.h
+++ b/include/asm-h8300/sockios.h
@@ -7,6 +7,7 @@
 #define FIOGETOWN	0x8903
 #define SIOCGPGRP	0x8904
 #define SIOCATMARK	0x8905
-#define SIOCGSTAMP	0x8906		/* Get stamp */
+#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
 
 #endif /* __ARCH_H8300_SOCKIOS__ */
diff --git a/include/asm-i386/sockios.h b/include/asm-i386/sockios.h
index 6b747f8e228..ff528c7d255 100644
--- a/include/asm-i386/sockios.h
+++ b/include/asm-i386/sockios.h
@@ -7,6 +7,7 @@
 #define FIOGETOWN	0x8903
 #define SIOCGPGRP	0x8904
 #define SIOCATMARK	0x8905
-#define SIOCGSTAMP	0x8906		/* Get stamp */
+#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
 
 #endif
diff --git a/include/asm-ia64/sockios.h b/include/asm-ia64/sockios.h
index cf94857c8a5..15c92468ad3 100644
--- a/include/asm-ia64/sockios.h
+++ b/include/asm-ia64/sockios.h
@@ -14,6 +14,7 @@
 #define FIOGETOWN	0x8903
 #define SIOCGPGRP	0x8904
 #define SIOCATMARK	0x8905
-#define SIOCGSTAMP	0x8906		/* Get stamp */
+#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
 
 #endif /* _ASM_IA64_SOCKIOS_H */
diff --git a/include/asm-m32r/sockios.h b/include/asm-m32r/sockios.h
index f89962e231f..6c1fb9b43bd 100644
--- a/include/asm-m32r/sockios.h
+++ b/include/asm-m32r/sockios.h
@@ -7,6 +7,7 @@
 #define FIOGETOWN	0x8903
 #define SIOCGPGRP	0x8904
 #define SIOCATMARK	0x8905
-#define SIOCGSTAMP	0x8906		/* Get stamp */
+#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
 
 #endif  /* _ASM_M32R_SOCKIOS_H */
diff --git a/include/asm-m68k/sockios.h b/include/asm-m68k/sockios.h
index 9b9ed973c24..c04a23943cb 100644
--- a/include/asm-m68k/sockios.h
+++ b/include/asm-m68k/sockios.h
@@ -7,6 +7,7 @@
 #define FIOGETOWN	0x8903
 #define SIOCGPGRP	0x8904
 #define SIOCATMARK	0x8905
-#define SIOCGSTAMP	0x8906		/* Get stamp */
+#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
 
 #endif /* __ARCH_M68K_SOCKIOS__ */
diff --git a/include/asm-mips/sockios.h b/include/asm-mips/sockios.h
index 87a50bf039e..ed1a5f78d22 100644
--- a/include/asm-mips/sockios.h
+++ b/include/asm-mips/sockios.h
@@ -20,6 +20,7 @@
 #define SIOCSPGRP	_IOW('s', 8, pid_t)
 #define SIOCGPGRP	_IOR('s', 9, pid_t)
 
-#define SIOCGSTAMP	0x8906			/* Get stamp - linux-specific */
+#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
 
 #endif /* _ASM_SOCKIOS_H */
diff --git a/include/asm-parisc/sockios.h b/include/asm-parisc/sockios.h
index aace4962994..dabfbc7483f 100644
--- a/include/asm-parisc/sockios.h
+++ b/include/asm-parisc/sockios.h
@@ -7,6 +7,7 @@
 #define FIOGETOWN	0x8903
 #define SIOCGPGRP	0x8904
 #define SIOCATMARK	0x8905
-#define SIOCGSTAMP	0x8906		/* Get stamp */
+#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
 
 #endif
diff --git a/include/asm-powerpc/sockios.h b/include/asm-powerpc/sockios.h
index 590078d8ed2..55cef7675a3 100644
--- a/include/asm-powerpc/sockios.h
+++ b/include/asm-powerpc/sockios.h
@@ -14,6 +14,7 @@
 #define FIOGETOWN	0x8903
 #define SIOCGPGRP	0x8904
 #define SIOCATMARK	0x8905
-#define SIOCGSTAMP	0x8906		/* Get stamp */
+#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
 
 #endif	/* _ASM_POWERPC_SOCKIOS_H */
diff --git a/include/asm-s390/sockios.h b/include/asm-s390/sockios.h
index 412aeb4dd6c..f4fc16c7da5 100644
--- a/include/asm-s390/sockios.h
+++ b/include/asm-s390/sockios.h
@@ -15,6 +15,7 @@
 #define FIOGETOWN	0x8903
 #define SIOCGPGRP	0x8904
 #define SIOCATMARK	0x8905
-#define SIOCGSTAMP	0x8906		/* Get stamp */
+#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
 
 #endif
diff --git a/include/asm-sh/sockios.h b/include/asm-sh/sockios.h
index 08a71df8a8b..cf8b96b1f9a 100644
--- a/include/asm-sh/sockios.h
+++ b/include/asm-sh/sockios.h
@@ -9,5 +9,6 @@
 #define SIOCSPGRP	_IOW('s', 8, pid_t)
 #define SIOCGPGRP	_IOR('s', 9, pid_t)
 
-#define SIOCGSTAMP	_IOR('s', 100, struct timeval) /* Get stamp - linux-specific */
+#define SIOCGSTAMP	_IOR('s', 100, struct timeval) /* Get stamp (timeval) */
+#define SIOCGSTAMPNS	_IOR('s', 101, struct timespec) /* Get stamp (timespec) */
 #endif /* __ASM_SH_SOCKIOS_H */
diff --git a/include/asm-sh64/sockios.h b/include/asm-sh64/sockios.h
index 1ae23ae8297..419e76f12f4 100644
--- a/include/asm-sh64/sockios.h
+++ b/include/asm-sh64/sockios.h
@@ -20,5 +20,6 @@
 #define SIOCSPGRP	_IOW('s', 8, pid_t)
 #define SIOCGPGRP	_IOR('s', 9, pid_t)
 
-#define SIOCGSTAMP	_IOR('s', 100, struct timeval) /* Get stamp - linux-specific */
+#define SIOCGSTAMP	_IOR('s', 100, struct timeval) /* Get stamp (timeval) */
+#define SIOCGSTAMPNS	_IOR('s', 101, struct timespec) /* Get stamp (timespec) */
 #endif /* __ASM_SH64_SOCKIOS_H */
diff --git a/include/asm-sparc/sockios.h b/include/asm-sparc/sockios.h
index 0c01b597b06..990ea746486 100644
--- a/include/asm-sparc/sockios.h
+++ b/include/asm-sparc/sockios.h
@@ -7,7 +7,8 @@
 #define FIOGETOWN	0x8903
 #define SIOCGPGRP	0x8904
 #define SIOCATMARK	0x8905
-#define SIOCGSTAMP	0x8906		/* Get stamp */
+#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
 
 #endif /* !(_ASM_SPARC_SOCKIOS_H) */
 
diff --git a/include/asm-sparc64/sockios.h b/include/asm-sparc64/sockios.h
index 6735bab4f39..c7d9900638d 100644
--- a/include/asm-sparc64/sockios.h
+++ b/include/asm-sparc64/sockios.h
@@ -7,7 +7,8 @@
 #define FIOGETOWN	0x8903
 #define SIOCGPGRP	0x8904
 #define SIOCATMARK	0x8905
-#define SIOCGSTAMP	0x8906		/* Get stamp */
+#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
 
 #endif /* !(_ASM_SPARC64_SOCKIOS_H) */
 
diff --git a/include/asm-v850/sockios.h b/include/asm-v850/sockios.h
index cf4874c2fd8..823e106e6cd 100644
--- a/include/asm-v850/sockios.h
+++ b/include/asm-v850/sockios.h
@@ -7,6 +7,7 @@
 #define FIOGETOWN	0x8903
 #define SIOCGPGRP	0x8904
 #define SIOCATMARK	0x8905
-#define SIOCGSTAMP	0x8906		/* Get stamp */
+#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
 
 #endif /* __V850_SOCKIOS_H__ */
diff --git a/include/asm-x86_64/sockios.h b/include/asm-x86_64/sockios.h
index 2eefd10d4f4..d726ba2513e 100644
--- a/include/asm-x86_64/sockios.h
+++ b/include/asm-x86_64/sockios.h
@@ -7,6 +7,7 @@
 #define FIOGETOWN	0x8903
 #define SIOCGPGRP	0x8904
 #define SIOCATMARK	0x8905
-#define SIOCGSTAMP	0x8906		/* Get stamp */
+#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
 
 #endif
diff --git a/include/asm-xtensa/sockios.h b/include/asm-xtensa/sockios.h
index 20d2ba10ecd..efe0af379f0 100644
--- a/include/asm-xtensa/sockios.h
+++ b/include/asm-xtensa/sockios.h
@@ -25,6 +25,7 @@
 #define SIOCSPGRP	_IOW('s', 8, pid_t)
 #define SIOCGPGRP	_IOR('s', 9, pid_t)
 
-#define SIOCGSTAMP	0x8906		/* Get stamp - linux-specific */
+#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
 
 #endif	/* _XTENSA_SOCKIOS_H */
diff --git a/include/net/compat.h b/include/net/compat.h
index 9859b60280d..406db242f73 100644
--- a/include/net/compat.h
+++ b/include/net/compat.h
@@ -25,6 +25,7 @@ struct compat_cmsghdr {
 };
 
 extern int compat_sock_get_timestamp(struct sock *, struct timeval __user *);
+extern int compat_sock_get_timestampns(struct sock *, struct timespec __user *);
 
 #else /* defined(CONFIG_COMPAT) */
 #define compat_msghdr	msghdr		/* to avoid compiler warnings */
diff --git a/include/net/sock.h b/include/net/sock.h
index 2974bacc885..d093e49fdc8 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1329,6 +1329,7 @@ static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_e
 
 extern void sock_enable_timestamp(struct sock *sk);
 extern int sock_get_timestamp(struct sock *, struct timeval __user *);
+extern int sock_get_timestampns(struct sock *, struct timespec __user *);
 
 /* 
  *	Enable debug/info messages 
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index c8b7dc2c325..32b82705b68 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1771,6 +1771,9 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		case SIOCGSTAMP:
 			rc = sock_get_timestamp(sk, argp);
 			break;
+		case SIOCGSTAMPNS:
+			rc = sock_get_timestampns(sk, argp);
+			break;
 		/* Routing */
 		case SIOCADDRT:
 		case SIOCDELRT:
diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index 8ccee4591f6..7afd8e7754f 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -82,6 +82,9 @@ int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		case SIOCGSTAMP: /* borrowed from IP */
 			error = sock_get_timestamp(sk, argp);
 			goto done;
+		case SIOCGSTAMPNS: /* borrowed from IP */
+			error = sock_get_timestampns(sk, argp);
+			goto done;
 		case ATM_SETSC:
 			printk(KERN_WARNING "ATM_SETSC is obsolete\n");
 			error = 0;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 1c07c6a50eb..62605dc5a2c 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1711,6 +1711,10 @@ static int ax25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		res = sock_get_timestamp(sk, argp);
 		break;
 
+	case SIOCGSTAMPNS:
+		res = sock_get_timestampns(sk, argp);
+		break;
+
 	case SIOCAX25ADDUID:	/* Add a uid to the uid/call map table */
 	case SIOCAX25DELUID:	/* Delete a uid from the uid/call map table */
 	case SIOCAX25GETUID: {
diff --git a/net/compat.c b/net/compat.c
index 17c2710b2b9..2fc6d9bb622 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -564,6 +564,30 @@ int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
 }
 EXPORT_SYMBOL(compat_sock_get_timestamp);
 
+int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
+{
+	struct compat_timespec __user *ctv =
+			(struct compat_timespec __user*) userstamp;
+	int err = -ENOENT;
+	struct timespec ts;
+
+	if (!sock_flag(sk, SOCK_TIMESTAMP))
+		sock_enable_timestamp(sk);
+	ts = ktime_to_timespec(sk->sk_stamp);
+	if (ts.tv_sec == -1)
+		return err;
+	if (ts.tv_sec == 0) {
+		sk->sk_stamp = ktime_get_real();
+		ts = ktime_to_timespec(sk->sk_stamp);
+	}
+	err = 0;
+	if (put_user(ts.tv_sec, &ctv->tv_sec) ||
+			put_user(ts.tv_nsec, &ctv->tv_nsec))
+		err = -EFAULT;
+	return err;
+}
+EXPORT_SYMBOL(compat_sock_get_timestampns);
+
 asmlinkage long compat_sys_getsockopt(int fd, int level, int optname,
 				char __user *optval, int __user *optlen)
 {
diff --git a/net/core/sock.c b/net/core/sock.c
index 6ddb3664b99..cb48fa0e124 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1567,6 +1567,22 @@ int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
 }
 EXPORT_SYMBOL(sock_get_timestamp);
 
+int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
+{
+	struct timespec ts;
+	if (!sock_flag(sk, SOCK_TIMESTAMP))
+		sock_enable_timestamp(sk);
+	ts = ktime_to_timespec(sk->sk_stamp);
+	if (ts.tv_sec == -1)
+		return -ENOENT;
+	if (ts.tv_sec == 0) {
+		sk->sk_stamp = ktime_get_real();
+		ts = ktime_to_timespec(sk->sk_stamp);
+	}
+	return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
+}
+EXPORT_SYMBOL(sock_get_timestampns);
+
 void sock_enable_timestamp(struct sock *sk)
 {
 	if (!sock_flag(sk, SOCK_TIMESTAMP)) {
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index f573eddc603..487f879f5a1 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -727,6 +727,9 @@ static int econet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg
 		case SIOCGSTAMP:
 			return sock_get_timestamp(sk, argp);
 
+		case SIOCGSTAMPNS:
+			return sock_get_timestampns(sk, argp);
+
 		case SIOCSIFADDR:
 		case SIOCGIFADDR:
 			return ec_dev_ioctl(sock, cmd, argp);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index cf358c84c44..df41856fc60 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -755,6 +755,9 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		case SIOCGSTAMP:
 			err = sock_get_timestamp(sk, (struct timeval __user *)arg);
 			break;
+		case SIOCGSTAMPNS:
+			err = sock_get_timestampns(sk, (struct timespec __user *)arg);
+			break;
 		case SIOCADDRT:
 		case SIOCDELRT:
 		case SIOCRTMSG:
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index fed3758181e..2ff07041795 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -443,6 +443,9 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	case SIOCGSTAMP:
 		return sock_get_timestamp(sk, (struct timeval __user *)arg);
 
+	case SIOCGSTAMPNS:
+		return sock_get_timestampns(sk, (struct timespec __user *)arg);
+
 	case SIOCADDRT:
 	case SIOCDELRT:
 
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index bf9837dd95c..a54e7ef2568 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1209,6 +1209,12 @@ static int nr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		release_sock(sk);
 		return ret;
 
+	case SIOCGSTAMPNS:
+		lock_sock(sk);
+		ret = sock_get_timestampns(sk, argp);
+		release_sock(sk);
+		return ret;
+
 	case SIOCGIFADDR:
 	case SIOCSIFADDR:
 	case SIOCGIFDSTADDR:
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f9866a8456a..6f8c72d2413 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1545,6 +1545,8 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd,
 		}
 		case SIOCGSTAMP:
 			return sock_get_timestamp(sk, (struct timeval __user *)arg);
+		case SIOCGSTAMPNS:
+			return sock_get_timestampns(sk, (struct timespec __user *)arg);
 
 #ifdef CONFIG_INET
 		case SIOCADDRT:
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index f92d5310847..f64be9369ef 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1296,6 +1296,9 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	case SIOCGSTAMP:
 		return sock_get_timestamp(sk, (struct timeval __user *) argp);
 
+	case SIOCGSTAMPNS:
+		return sock_get_timestampns(sk, (struct timespec __user *) argp);
+
 	case SIOCGIFADDR:
 	case SIOCSIFADDR:
 	case SIOCGIFDSTADDR:
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index e62ba41b05c..a1988431562 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1280,6 +1280,12 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 				rc = sock_get_timestamp(sk,
 						(struct timeval __user *)argp);
 			break;
+		case SIOCGSTAMPNS:
+			rc = -EINVAL;
+			if (sk)
+				rc = sock_get_timestampns(sk,
+						(struct timespec __user *)argp);
+			break;
 		case SIOCGIFADDR:
 		case SIOCSIFADDR:
 		case SIOCGIFDSTADDR:
@@ -1521,6 +1527,12 @@ static int compat_x25_ioctl(struct socket *sock, unsigned int cmd,
 			rc = compat_sock_get_timestamp(sk,
 					(struct timeval __user*)argp);
 		break;
+	case SIOCGSTAMPNS:
+		rc = -EINVAL;
+		if (sk)
+			rc = compat_sock_get_timestampns(sk,
+					(struct timespec __user*)argp);
+		break;
 	case SIOCGIFADDR:
 	case SIOCSIFADDR:
 	case SIOCGIFDSTADDR:
-- 
cgit v1.2.3


From a2a316fd068c455c609ecc155dcfaa7e208d29fe Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Thu, 8 Mar 2007 20:41:08 -0800
Subject: [NET]: Replace CONFIG_NET_DEBUG with sysctl.

Covert network warning messages from a compile time to runtime choice.
Removes kernel config option and replaces it with new /proc/sys/net/core/warnings.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/filesystems/proc.txt |  9 +++++++++
 include/linux/sysctl.h             |  1 +
 include/net/sock.h                 | 12 +++++-------
 net/Kconfig                        |  7 -------
 net/core/sysctl_net_core.c         |  8 ++++++++
 net/core/utils.c                   |  2 ++
 6 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 5484ab5efd4..7aaf09b86a5 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1421,6 +1421,15 @@ fewer messages that will be written. Message_burst controls when messages will
 be dropped.  The  default  settings  limit  warning messages to one every five
 seconds.
 
+warnings
+--------
+
+This controls console messages from the networking stack that can occur because
+of problems on the network like duplicate address or bad checksums. Normally,
+this should be enabled, but if the problem persists the messages can be
+disabled.
+
+
 netdev_max_backlog
 ------------------
 
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index c9ccb550206..df2d9ed20a4 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -290,6 +290,7 @@ enum
 	NET_CORE_BUDGET=19,
 	NET_CORE_AEVENT_ETIME=20,
 	NET_CORE_AEVENT_RSEQTH=21,
+	NET_CORE_WARNINGS=22,
 };
 
 /* /proc/sys/net/ethernet */
diff --git a/include/net/sock.h b/include/net/sock.h
index d093e49fdc8..51246579592 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1334,14 +1334,12 @@ extern int sock_get_timestampns(struct sock *, struct timespec __user *);
 /* 
  *	Enable debug/info messages 
  */
+extern int net_msg_warn;
+#define NETDEBUG(fmt, args...) \
+	do { if (net_msg_warn) printk(fmt,##args); } while (0)
 
-#ifdef CONFIG_NETDEBUG
-#define NETDEBUG(fmt, args...)	printk(fmt,##args)
-#define LIMIT_NETDEBUG(fmt, args...) do { if (net_ratelimit()) printk(fmt,##args); } while(0)
-#else
-#define NETDEBUG(fmt, args...)	do { } while (0)
-#define LIMIT_NETDEBUG(fmt, args...) do { } while(0)
-#endif
+#define LIMIT_NETDEBUG(fmt, args...) \
+	do { if (net_msg_warn && net_ratelimit()) printk(fmt,##args); } while(0)
 
 /*
  * Macros for sleeping on a socket. Use them like this:
diff --git a/net/Kconfig b/net/Kconfig
index 915657832d9..e2d9b3b9cda 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -27,13 +27,6 @@ if NET
 
 menu "Networking options"
 
-config NETDEBUG
-	bool "Network packet debugging"
-	help
-	  You can say Y here if you want to get additional messages useful in
-	  debugging bad packets, but can overwhelm logs under denial of service
-	  attacks.
-
 source "net/packet/Kconfig"
 source "net/unix/Kconfig"
 source "net/xfrm/Kconfig"
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 1e75b158546..b29712033dd 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -136,6 +136,14 @@ ctl_table core_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
 	},
+	{
+		.ctl_name	= NET_CORE_WARNINGS,
+		.procname	= "warnings",
+		.data		= &net_msg_warn,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
 	{ .ctl_name = 0 }
 };
 
diff --git a/net/core/utils.c b/net/core/utils.c
index 07236c17fab..34f08107b98 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -32,6 +32,8 @@
 
 int net_msg_cost = 5*HZ;
 int net_msg_burst = 10;
+int net_msg_warn = 1;
+EXPORT_SYMBOL(net_msg_warn);
 
 /*
  * All net warning printk()s should be guarded by this function.
-- 
cgit v1.2.3


From 6516c65573fde5e421c6c92c4b180bbe2245b23b Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Thu, 8 Mar 2007 20:41:55 -0800
Subject: [UDP]: ipv4 whitespace cleanup

Fix whitespace around keywords.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 95 +++++++++++++++++++++++++++++-----------------------------
 1 file changed, 48 insertions(+), 47 deletions(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 86368832d48..21901d2c532 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -175,7 +175,8 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum,
 			;
 		}
 		result = best;
-		for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) {
+		for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE;
+		     i++, result += UDP_HTABLE_SIZE) {
 			if (result > sysctl_local_port_range[1])
 				result = sysctl_local_port_range[0]
 					+ ((result - sysctl_local_port_range[0]) &
@@ -270,10 +271,10 @@ static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport,
 					continue;
 				score+=2;
 			}
-			if(score == 9) {
+			if (score == 9) {
 				result = sk;
 				break;
-			} else if(score > badness) {
+			} else if (score > badness) {
 				result = sk;
 				badness = score;
 			}
@@ -765,38 +766,38 @@ out:
 
 int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 {
-	switch(cmd)
+	switch (cmd) {
+	case SIOCOUTQ:
 	{
-		case SIOCOUTQ:
-		{
-			int amount = atomic_read(&sk->sk_wmem_alloc);
-			return put_user(amount, (int __user *)arg);
-		}
+		int amount = atomic_read(&sk->sk_wmem_alloc);
+		return put_user(amount, (int __user *)arg);
+	}
 
-		case SIOCINQ:
-		{
-			struct sk_buff *skb;
-			unsigned long amount;
-
-			amount = 0;
-			spin_lock_bh(&sk->sk_receive_queue.lock);
-			skb = skb_peek(&sk->sk_receive_queue);
-			if (skb != NULL) {
-				/*
-				 * We will only return the amount
-				 * of this packet since that is all
-				 * that will be read.
-				 */
-				amount = skb->len - sizeof(struct udphdr);
-			}
-			spin_unlock_bh(&sk->sk_receive_queue.lock);
-			return put_user(amount, (int __user *)arg);
+	case SIOCINQ:
+	{
+		struct sk_buff *skb;
+		unsigned long amount;
+
+		amount = 0;
+		spin_lock_bh(&sk->sk_receive_queue.lock);
+		skb = skb_peek(&sk->sk_receive_queue);
+		if (skb != NULL) {
+			/*
+			 * We will only return the amount
+			 * of this packet since that is all
+			 * that will be read.
+			 */
+			amount = skb->len - sizeof(struct udphdr);
 		}
+		spin_unlock_bh(&sk->sk_receive_queue.lock);
+		return put_user(amount, (int __user *)arg);
+	}
 
-		default:
-			return -ENOIOCTLCMD;
+	default:
+		return -ENOIOCTLCMD;
 	}
-	return(0);
+
+	return 0;
 }
 
 /*
@@ -958,7 +959,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
 		/* Check if this is a keepalive packet.  If so, eat it. */
 		if (len == 1 && udpdata[0] == 0xff) {
 			return 0;
-		} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0 ) {
+		} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
 			/* ESP Packet without Non-ESP header */
 			len = sizeof(struct udphdr);
 		} else
@@ -1141,10 +1142,10 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
 
 			sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr,
 						   uh->source, saddr, dif);
-			if(sknext)
+			if (sknext)
 				skb1 = skb_clone(skb, GFP_ATOMIC);
 
-			if(skb1) {
+			if (skb1) {
 				int ret = udp_queue_rcv_skb(sk, skb1);
 				if (ret > 0)
 					/* we should probably re-process instead
@@ -1152,7 +1153,7 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
 					kfree_skb(skb1);
 			}
 			sk = sknext;
-		} while(sknext);
+		} while (sknext);
 	} else
 		kfree_skb(skb);
 	read_unlock(&udp_hash_lock);
@@ -1230,7 +1231,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 	if (udp4_csum_init(skb, uh, proto))
 		goto csum_error;
 
-	if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
+	if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
 		return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
 
 	sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest,
@@ -1264,7 +1265,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 	 * don't wanna listen.  Ignore it.
 	 */
 	kfree_skb(skb);
-	return(0);
+	return 0;
 
 short_packet:
 	LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
@@ -1292,7 +1293,7 @@ csum_error:
 drop:
 	UDP_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
 	kfree_skb(skb);
-	return(0);
+	return 0;
 }
 
 __inline__ int udp_rcv(struct sk_buff *skb)
@@ -1319,13 +1320,13 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 	int val;
 	int err = 0;
 
-	if(optlen<sizeof(int))
+	if (optlen<sizeof(int))
 		return -EINVAL;
 
 	if (get_user(val, (int __user *)optval))
 		return -EFAULT;
 
-	switch(optname) {
+	switch (optname) {
 	case UDP_CORK:
 		if (val != 0) {
 			up->corkflag = 1;
@@ -1379,7 +1380,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 	default:
 		err = -ENOPROTOOPT;
 		break;
-	};
+	}
 
 	return err;
 }
@@ -1410,15 +1411,15 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
 	struct udp_sock *up = udp_sk(sk);
 	int val, len;
 
-	if(get_user(len,optlen))
+	if (get_user(len,optlen))
 		return -EFAULT;
 
 	len = min_t(unsigned int, len, sizeof(int));
 
-	if(len < 0)
+	if (len < 0)
 		return -EINVAL;
 
-	switch(optname) {
+	switch (optname) {
 	case UDP_CORK:
 		val = up->corkflag;
 		break;
@@ -1439,11 +1440,11 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
 
 	default:
 		return -ENOPROTOOPT;
-	};
+	}
 
-	if(put_user(len, optlen))
+	if (put_user(len, optlen))
 		return -EFAULT;
-	if(copy_to_user(optval, &val,len))
+	if (copy_to_user(optval, &val,len))
 		return -EFAULT;
 	return 0;
 }
@@ -1575,7 +1576,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
 	struct sock *sk = udp_get_first(seq);
 
 	if (sk)
-		while(pos && (sk = udp_get_next(seq, sk)) != NULL)
+		while (pos && (sk = udp_get_next(seq, sk)) != NULL)
 			--pos;
 	return pos ? NULL : sk;
 }
-- 
cgit v1.2.3


From add459aa1afe05472abc96f6a29aefd0c84e73d6 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Thu, 8 Mar 2007 20:42:35 -0800
Subject: [UDP]: ipv6 style cleanup

Fix whitespace around keywords. Eliminate unnecessary ()'s on return
statements.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 73337168979..87b06a80102 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -93,10 +93,10 @@ static struct sock *__udp6_lib_lookup(struct in6_addr *saddr, __be16 sport,
 					continue;
 				score++;
 			}
-			if(score == 4) {
+			if (score == 4) {
 				result = sk;
 				break;
-			} else if(score > badness) {
+			} else if (score > badness) {
 				result = sk;
 				badness = score;
 			}
@@ -329,7 +329,7 @@ static struct sock *udp_v6_mcast_next(struct sock *sk,
 				if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr))
 					continue;
 			}
-			if(!inet6_mc_check(s, loc_addr, rmt_addr))
+			if (!inet6_mc_check(s, loc_addr, rmt_addr))
 				continue;
 			return s;
 		}
@@ -473,14 +473,14 @@ int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
 		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev);
 
 		kfree_skb(skb);
-		return(0);
+		return 0;
 	}
 
 	/* deliver */
 
 	udpv6_queue_rcv_skb(sk, skb);
 	sock_put(sk);
-	return(0);
+	return 0;
 
 short_packet:
 	LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n",
@@ -490,7 +490,7 @@ short_packet:
 discard:
 	UDP6_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
 	kfree_skb(skb);
-	return(0);
+	return 0;
 }
 
 static __inline__ int udpv6_rcv(struct sk_buff **pskb)
-- 
cgit v1.2.3


From e71a4783aae059931f63b2d4e7013e36529badef Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Tue, 10 Apr 2007 20:10:33 -0700
Subject: [NET] core: whitespace cleanup

Fix whitespace around keywords. Fix indentation especially of switch
statements.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/compat.c         |  30 +--
 net/core/ethtool.c   |   4 +-
 net/core/net-sysfs.c |   4 +-
 net/core/pktgen.c    |  16 +-
 net/core/sock.c      | 712 +++++++++++++++++++++++++--------------------------
 net/core/wireless.c  | 297 +++++++++++----------
 net/socket.c         |   2 +-
 7 files changed, 530 insertions(+), 535 deletions(-)

diff --git a/net/compat.c b/net/compat.c
index 2fc6d9bb622..0e407563ae8 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -34,11 +34,11 @@ static inline int iov_from_user_compat_to_kern(struct iovec *kiov,
 {
 	int tot_len = 0;
 
-	while(niov > 0) {
+	while (niov > 0) {
 		compat_uptr_t buf;
 		compat_size_t len;
 
-		if(get_user(len, &uiov32->iov_len) ||
+		if (get_user(len, &uiov32->iov_len) ||
 		   get_user(buf, &uiov32->iov_base)) {
 			tot_len = -EFAULT;
 			break;
@@ -78,12 +78,12 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
 {
 	int tot_len;
 
-	if(kern_msg->msg_namelen) {
-		if(mode==VERIFY_READ) {
+	if (kern_msg->msg_namelen) {
+		if (mode==VERIFY_READ) {
 			int err = move_addr_to_kernel(kern_msg->msg_name,
 						      kern_msg->msg_namelen,
 						      kern_address);
-			if(err < 0)
+			if (err < 0)
 				return err;
 		}
 		kern_msg->msg_name = kern_address;
@@ -93,7 +93,7 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
 	tot_len = iov_from_user_compat_to_kern(kern_iov,
 					  (struct compat_iovec __user *)kern_msg->msg_iov,
 					  kern_msg->msg_iovlen);
-	if(tot_len >= 0)
+	if (tot_len >= 0)
 		kern_msg->msg_iov = kern_iov;
 
 	return tot_len;
@@ -146,8 +146,8 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
 	kcmlen = 0;
 	kcmsg_base = kcmsg = (struct cmsghdr *)stackbuf;
 	ucmsg = CMSG_COMPAT_FIRSTHDR(kmsg);
-	while(ucmsg != NULL) {
-		if(get_user(ucmlen, &ucmsg->cmsg_len))
+	while (ucmsg != NULL) {
+		if (get_user(ucmlen, &ucmsg->cmsg_len))
 			return -EFAULT;
 
 		/* Catch bogons. */
@@ -160,7 +160,7 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
 		kcmlen += tmp;
 		ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen);
 	}
-	if(kcmlen == 0)
+	if (kcmlen == 0)
 		return -EINVAL;
 
 	/* The kcmlen holds the 64-bit version of the control length.
@@ -176,7 +176,7 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
 	/* Now copy them over neatly. */
 	memset(kcmsg, 0, kcmlen);
 	ucmsg = CMSG_COMPAT_FIRSTHDR(kmsg);
-	while(ucmsg != NULL) {
+	while (ucmsg != NULL) {
 		if (__get_user(ucmlen, &ucmsg->cmsg_len))
 			goto Efault;
 		if (!CMSG_COMPAT_OK(ucmlen, ucmsg, kmsg))
@@ -219,7 +219,7 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat
 	struct compat_cmsghdr cmhdr;
 	int cmlen;
 
-	if(cm == NULL || kmsg->msg_controllen < sizeof(*cm)) {
+	if (cm == NULL || kmsg->msg_controllen < sizeof(*cm)) {
 		kmsg->msg_flags |= MSG_CTRUNC;
 		return 0; /* XXX: return error? check spec. */
 	}
@@ -233,7 +233,7 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat
 	}
 
 	cmlen = CMSG_COMPAT_LEN(len);
-	if(kmsg->msg_controllen < cmlen) {
+	if (kmsg->msg_controllen < cmlen) {
 		kmsg->msg_flags |= MSG_CTRUNC;
 		cmlen = kmsg->msg_controllen;
 	}
@@ -241,9 +241,9 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat
 	cmhdr.cmsg_type = type;
 	cmhdr.cmsg_len = cmlen;
 
-	if(copy_to_user(cm, &cmhdr, sizeof cmhdr))
+	if (copy_to_user(cm, &cmhdr, sizeof cmhdr))
 		return -EFAULT;
-	if(copy_to_user(CMSG_COMPAT_DATA(cm), data, cmlen - sizeof(struct compat_cmsghdr)))
+	if (copy_to_user(CMSG_COMPAT_DATA(cm), data, cmlen - sizeof(struct compat_cmsghdr)))
 		return -EFAULT;
 	cmlen = CMSG_COMPAT_SPACE(len);
 	kmsg->msg_control += cmlen;
@@ -646,7 +646,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 	a0 = a[0];
 	a1 = a[1];
 
-	switch(call) {
+	switch (call) {
 	case SYS_SOCKET:
 		ret = sys_socket(a0, a1, a[2]);
 		break;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 6168edd137d..8d5e5a09b57 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -836,7 +836,7 @@ int dev_ethtool(struct ifreq *ifr)
 			return -EPERM;
 	}
 
-	if(dev->ethtool_ops->begin)
+	if (dev->ethtool_ops->begin)
 		if ((rc = dev->ethtool_ops->begin(dev)) < 0)
 			return rc;
 
@@ -952,7 +952,7 @@ int dev_ethtool(struct ifreq *ifr)
 		rc =  -EOPNOTSUPP;
 	}
 
-	if(dev->ethtool_ops->complete)
+	if (dev->ethtool_ops->complete)
 		dev->ethtool_ops->complete(dev);
 
 	if (old_features != dev->features)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 4cbb1290a6a..221a64ab64f 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -352,8 +352,8 @@ static ssize_t wireless_show(struct device *d, char *buf,
 
 	read_lock(&dev_base_lock);
 	if (dev_isalive(dev)) {
-		if(dev->wireless_handlers &&
-		   dev->wireless_handlers->get_wireless_stats)
+		if (dev->wireless_handlers &&
+		    dev->wireless_handlers->get_wireless_stats)
 			iw = dev->wireless_handlers->get_wireless_stats(dev);
 		if (iw != NULL)
 			ret = (*format)(iw, buf);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 895739fdfac..10d33fc233b 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -645,7 +645,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
 	if (pkt_dev->nr_labels) {
 		unsigned i;
 		seq_printf(seq, "     mpls: ");
-		for(i = 0; i < pkt_dev->nr_labels; i++)
+		for (i = 0; i < pkt_dev->nr_labels; i++)
 			seq_printf(seq, "%08x%s", ntohl(pkt_dev->labels[i]),
 				   i == pkt_dev->nr_labels-1 ? "\n" : ", ");
 	}
@@ -750,7 +750,7 @@ static int hex32_arg(const char __user *user_buffer, unsigned long maxlen, __u32
 	int i = 0;
 	*num = 0;
 
-	for(; i < maxlen; i++) {
+	for (; i < maxlen; i++) {
 		char c;
 		*num <<= 4;
 		if (get_user(c, &user_buffer[i]))
@@ -858,7 +858,7 @@ static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev)
 		n++;
 		if (n >= MAX_MPLS_LABELS)
 			return -E2BIG;
-	} while(c == ',');
+	} while (c == ',');
 
 	pkt_dev->nr_labels = n;
 	return i;
@@ -1487,7 +1487,7 @@ static ssize_t pktgen_if_write(struct file *file,
 		if (len < 0) { return len; }
 		i += len;
 		offset = sprintf(pg_result, "OK: mpls=");
-		for(n = 0; n < pkt_dev->nr_labels; n++)
+		for (n = 0; n < pkt_dev->nr_labels; n++)
 			offset += sprintf(pg_result + offset,
 					  "%08x%s", ntohl(pkt_dev->labels[n]),
 					  n == pkt_dev->nr_labels-1 ? "" : ",");
@@ -2148,7 +2148,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 
 	if (pkt_dev->flags & F_MPLS_RND) {
 		unsigned i;
-		for(i = 0; i < pkt_dev->nr_labels; i++)
+		for (i = 0; i < pkt_dev->nr_labels; i++)
 			if (pkt_dev->labels[i] & MPLS_STACK_BOTTOM)
 				pkt_dev->labels[i] = MPLS_STACK_BOTTOM |
 					     ((__force __be32)random32() &
@@ -2281,7 +2281,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
 static void mpls_push(__be32 *mpls, struct pktgen_dev *pkt_dev)
 {
 	unsigned i;
-	for(i = 0; i < pkt_dev->nr_labels; i++) {
+	for (i = 0; i < pkt_dev->nr_labels; i++) {
 		*mpls++ = pkt_dev->labels[i] & ~MPLS_STACK_BOTTOM;
 	}
 	mpls--;
@@ -2341,7 +2341,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 		mpls_push(mpls, pkt_dev);
 
 	if (pkt_dev->vlan_id != 0xffff) {
-		if(pkt_dev->svlan_id != 0xffff) {
+		if (pkt_dev->svlan_id != 0xffff) {
 			svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
 			*svlan_tci = build_tci(pkt_dev->svlan_id,
 					       pkt_dev->svlan_cfi,
@@ -2677,7 +2677,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 		mpls_push(mpls, pkt_dev);
 
 	if (pkt_dev->vlan_id != 0xffff) {
-		if(pkt_dev->svlan_id != 0xffff) {
+		if (pkt_dev->svlan_id != 0xffff) {
 			svlan_tci = (__be16 *)skb_put(skb, sizeof(__be16));
 			*svlan_tci = build_tci(pkt_dev->svlan_id,
 					       pkt_dev->svlan_cfi,
diff --git a/net/core/sock.c b/net/core/sock.c
index cb48fa0e124..792ae39804a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -361,8 +361,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 	}
 #endif
 
-	if(optlen<sizeof(int))
-		return(-EINVAL);
+	if (optlen < sizeof(int))
+		return -EINVAL;
 
 	if (get_user(val, (int __user *)optval))
 		return -EFAULT;
@@ -371,265 +371,263 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 
 	lock_sock(sk);
 
-	switch(optname)
-	{
-		case SO_DEBUG:
-			if(val && !capable(CAP_NET_ADMIN))
-			{
-				ret = -EACCES;
-			}
-			else if (valbool)
-				sock_set_flag(sk, SOCK_DBG);
-			else
-				sock_reset_flag(sk, SOCK_DBG);
-			break;
-		case SO_REUSEADDR:
-			sk->sk_reuse = valbool;
-			break;
-		case SO_TYPE:
-		case SO_ERROR:
-			ret = -ENOPROTOOPT;
-			break;
-		case SO_DONTROUTE:
-			if (valbool)
-				sock_set_flag(sk, SOCK_LOCALROUTE);
-			else
-				sock_reset_flag(sk, SOCK_LOCALROUTE);
-			break;
-		case SO_BROADCAST:
-			sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
-			break;
-		case SO_SNDBUF:
-			/* Don't error on this BSD doesn't and if you think
-			   about it this is right. Otherwise apps have to
-			   play 'guess the biggest size' games. RCVBUF/SNDBUF
-			   are treated in BSD as hints */
-
-			if (val > sysctl_wmem_max)
-				val = sysctl_wmem_max;
+	switch(optname) {
+	case SO_DEBUG:
+		if (val && !capable(CAP_NET_ADMIN)) {
+			ret = -EACCES;
+		}
+		else if (valbool)
+			sock_set_flag(sk, SOCK_DBG);
+		else
+			sock_reset_flag(sk, SOCK_DBG);
+		break;
+	case SO_REUSEADDR:
+		sk->sk_reuse = valbool;
+		break;
+	case SO_TYPE:
+	case SO_ERROR:
+		ret = -ENOPROTOOPT;
+		break;
+	case SO_DONTROUTE:
+		if (valbool)
+			sock_set_flag(sk, SOCK_LOCALROUTE);
+		else
+			sock_reset_flag(sk, SOCK_LOCALROUTE);
+		break;
+	case SO_BROADCAST:
+		sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
+		break;
+	case SO_SNDBUF:
+		/* Don't error on this BSD doesn't and if you think
+		   about it this is right. Otherwise apps have to
+		   play 'guess the biggest size' games. RCVBUF/SNDBUF
+		   are treated in BSD as hints */
+
+		if (val > sysctl_wmem_max)
+			val = sysctl_wmem_max;
 set_sndbuf:
-			sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
-			if ((val * 2) < SOCK_MIN_SNDBUF)
-				sk->sk_sndbuf = SOCK_MIN_SNDBUF;
-			else
-				sk->sk_sndbuf = val * 2;
+		sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+		if ((val * 2) < SOCK_MIN_SNDBUF)
+			sk->sk_sndbuf = SOCK_MIN_SNDBUF;
+		else
+			sk->sk_sndbuf = val * 2;
 
-			/*
-			 *	Wake up sending tasks if we
-			 *	upped the value.
-			 */
-			sk->sk_write_space(sk);
-			break;
+		/*
+		 *	Wake up sending tasks if we
+		 *	upped the value.
+		 */
+		sk->sk_write_space(sk);
+		break;
 
-		case SO_SNDBUFFORCE:
-			if (!capable(CAP_NET_ADMIN)) {
-				ret = -EPERM;
-				break;
-			}
-			goto set_sndbuf;
+	case SO_SNDBUFFORCE:
+		if (!capable(CAP_NET_ADMIN)) {
+			ret = -EPERM;
+			break;
+		}
+		goto set_sndbuf;
 
-		case SO_RCVBUF:
-			/* Don't error on this BSD doesn't and if you think
-			   about it this is right. Otherwise apps have to
-			   play 'guess the biggest size' games. RCVBUF/SNDBUF
-			   are treated in BSD as hints */
+	case SO_RCVBUF:
+		/* Don't error on this BSD doesn't and if you think
+		   about it this is right. Otherwise apps have to
+		   play 'guess the biggest size' games. RCVBUF/SNDBUF
+		   are treated in BSD as hints */
 
-			if (val > sysctl_rmem_max)
-				val = sysctl_rmem_max;
+		if (val > sysctl_rmem_max)
+			val = sysctl_rmem_max;
 set_rcvbuf:
-			sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
-			/*
-			 * We double it on the way in to account for
-			 * "struct sk_buff" etc. overhead.   Applications
-			 * assume that the SO_RCVBUF setting they make will
-			 * allow that much actual data to be received on that
-			 * socket.
-			 *
-			 * Applications are unaware that "struct sk_buff" and
-			 * other overheads allocate from the receive buffer
-			 * during socket buffer allocation.
-			 *
-			 * And after considering the possible alternatives,
-			 * returning the value we actually used in getsockopt
-			 * is the most desirable behavior.
-			 */
-			if ((val * 2) < SOCK_MIN_RCVBUF)
-				sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
-			else
-				sk->sk_rcvbuf = val * 2;
+		sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+		/*
+		 * We double it on the way in to account for
+		 * "struct sk_buff" etc. overhead.   Applications
+		 * assume that the SO_RCVBUF setting they make will
+		 * allow that much actual data to be received on that
+		 * socket.
+		 *
+		 * Applications are unaware that "struct sk_buff" and
+		 * other overheads allocate from the receive buffer
+		 * during socket buffer allocation.
+		 *
+		 * And after considering the possible alternatives,
+		 * returning the value we actually used in getsockopt
+		 * is the most desirable behavior.
+		 */
+		if ((val * 2) < SOCK_MIN_RCVBUF)
+			sk->sk_rcvbuf = SOCK_MIN_RCVBUF;
+		else
+			sk->sk_rcvbuf = val * 2;
+		break;
+
+	case SO_RCVBUFFORCE:
+		if (!capable(CAP_NET_ADMIN)) {
+			ret = -EPERM;
 			break;
+		}
+		goto set_rcvbuf;
 
-		case SO_RCVBUFFORCE:
-			if (!capable(CAP_NET_ADMIN)) {
-				ret = -EPERM;
-				break;
-			}
-			goto set_rcvbuf;
-
-		case SO_KEEPALIVE:
+	case SO_KEEPALIVE:
 #ifdef CONFIG_INET
-			if (sk->sk_protocol == IPPROTO_TCP)
-				tcp_set_keepalive(sk, valbool);
+		if (sk->sk_protocol == IPPROTO_TCP)
+			tcp_set_keepalive(sk, valbool);
 #endif
-			sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
-			break;
-
-		case SO_OOBINLINE:
-			sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
+		sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
+		break;
+
+	case SO_OOBINLINE:
+		sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
+		break;
+
+	case SO_NO_CHECK:
+		sk->sk_no_check = valbool;
+		break;
+
+	case SO_PRIORITY:
+		if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
+			sk->sk_priority = val;
+		else
+			ret = -EPERM;
+		break;
+
+	case SO_LINGER:
+		if (optlen < sizeof(ling)) {
+			ret = -EINVAL;	/* 1003.1g */
 			break;
-
-		case SO_NO_CHECK:
-			sk->sk_no_check = valbool;
-			break;
-
-		case SO_PRIORITY:
-			if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN))
-				sk->sk_priority = val;
-			else
-				ret = -EPERM;
+		}
+		if (copy_from_user(&ling,optval,sizeof(ling))) {
+			ret = -EFAULT;
 			break;
-
-		case SO_LINGER:
-			if(optlen<sizeof(ling)) {
-				ret = -EINVAL;	/* 1003.1g */
-				break;
-			}
-			if (copy_from_user(&ling,optval,sizeof(ling))) {
-				ret = -EFAULT;
-				break;
-			}
-			if (!ling.l_onoff)
-				sock_reset_flag(sk, SOCK_LINGER);
-			else {
+		}
+		if (!ling.l_onoff)
+			sock_reset_flag(sk, SOCK_LINGER);
+		else {
 #if (BITS_PER_LONG == 32)
-				if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
-					sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
-				else
-#endif
-					sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
-				sock_set_flag(sk, SOCK_LINGER);
-			}
-			break;
-
-		case SO_BSDCOMPAT:
-			sock_warn_obsolete_bsdism("setsockopt");
-			break;
-
-		case SO_PASSCRED:
-			if (valbool)
-				set_bit(SOCK_PASSCRED, &sock->flags);
+			if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
+				sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
 			else
-				clear_bit(SOCK_PASSCRED, &sock->flags);
-			break;
-
-		case SO_TIMESTAMP:
-			if (valbool)  {
-				sock_set_flag(sk, SOCK_RCVTSTAMP);
-				sock_enable_timestamp(sk);
-			} else
-				sock_reset_flag(sk, SOCK_RCVTSTAMP);
-			break;
-
-		case SO_RCVLOWAT:
-			if (val < 0)
-				val = INT_MAX;
-			sk->sk_rcvlowat = val ? : 1;
-			break;
+#endif
+				sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
+			sock_set_flag(sk, SOCK_LINGER);
+		}
+		break;
+
+	case SO_BSDCOMPAT:
+		sock_warn_obsolete_bsdism("setsockopt");
+		break;
+
+	case SO_PASSCRED:
+		if (valbool)
+			set_bit(SOCK_PASSCRED, &sock->flags);
+		else
+			clear_bit(SOCK_PASSCRED, &sock->flags);
+		break;
+
+	case SO_TIMESTAMP:
+		if (valbool)  {
+			sock_set_flag(sk, SOCK_RCVTSTAMP);
+			sock_enable_timestamp(sk);
+		} else
+			sock_reset_flag(sk, SOCK_RCVTSTAMP);
+		break;
+
+	case SO_RCVLOWAT:
+		if (val < 0)
+			val = INT_MAX;
+		sk->sk_rcvlowat = val ? : 1;
+		break;
+
+	case SO_RCVTIMEO:
+		ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
+		break;
+
+	case SO_SNDTIMEO:
+		ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
+		break;
 
-		case SO_RCVTIMEO:
-			ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
-			break;
+#ifdef CONFIG_NETDEVICES
+	case SO_BINDTODEVICE:
+	{
+		char devname[IFNAMSIZ];
 
-		case SO_SNDTIMEO:
-			ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
+		/* Sorry... */
+		if (!capable(CAP_NET_RAW)) {
+			ret = -EPERM;
 			break;
+		}
 
-#ifdef CONFIG_NETDEVICES
-		case SO_BINDTODEVICE:
-		{
-			char devname[IFNAMSIZ];
+		/* Bind this socket to a particular device like "eth0",
+		 * as specified in the passed interface name. If the
+		 * name is "" or the option length is zero the socket
+		 * is not bound.
+		 */
 
-			/* Sorry... */
-			if (!capable(CAP_NET_RAW)) {
-				ret = -EPERM;
+		if (!valbool) {
+			sk->sk_bound_dev_if = 0;
+		} else {
+			if (optlen > IFNAMSIZ - 1)
+				optlen = IFNAMSIZ - 1;
+			memset(devname, 0, sizeof(devname));
+			if (copy_from_user(devname, optval, optlen)) {
+				ret = -EFAULT;
 				break;
 			}
 
-			/* Bind this socket to a particular device like "eth0",
-			 * as specified in the passed interface name. If the
-			 * name is "" or the option length is zero the socket
-			 * is not bound.
-			 */
+			/* Remove any cached route for this socket. */
+			sk_dst_reset(sk);
 
-			if (!valbool) {
+			if (devname[0] == '\0') {
 				sk->sk_bound_dev_if = 0;
 			} else {
-				if (optlen > IFNAMSIZ - 1)
-					optlen = IFNAMSIZ - 1;
-				memset(devname, 0, sizeof(devname));
-				if (copy_from_user(devname, optval, optlen)) {
-					ret = -EFAULT;
+				struct net_device *dev = dev_get_by_name(devname);
+				if (!dev) {
+					ret = -ENODEV;
 					break;
 				}
-
-				/* Remove any cached route for this socket. */
-				sk_dst_reset(sk);
-
-				if (devname[0] == '\0') {
-					sk->sk_bound_dev_if = 0;
-				} else {
-					struct net_device *dev = dev_get_by_name(devname);
-					if (!dev) {
-						ret = -ENODEV;
-						break;
-					}
-					sk->sk_bound_dev_if = dev->ifindex;
-					dev_put(dev);
-				}
+				sk->sk_bound_dev_if = dev->ifindex;
+				dev_put(dev);
 			}
-			break;
 		}
+		break;
+	}
 #endif
 
 
-		case SO_ATTACH_FILTER:
-			ret = -EINVAL;
-			if (optlen == sizeof(struct sock_fprog)) {
-				struct sock_fprog fprog;
-
-				ret = -EFAULT;
-				if (copy_from_user(&fprog, optval, sizeof(fprog)))
-					break;
-
-				ret = sk_attach_filter(&fprog, sk);
-			}
-			break;
+	case SO_ATTACH_FILTER:
+		ret = -EINVAL;
+		if (optlen == sizeof(struct sock_fprog)) {
+			struct sock_fprog fprog;
 
-		case SO_DETACH_FILTER:
-			rcu_read_lock_bh();
-			filter = rcu_dereference(sk->sk_filter);
-			if (filter) {
-				rcu_assign_pointer(sk->sk_filter, NULL);
-				sk_filter_release(sk, filter);
-				rcu_read_unlock_bh();
+			ret = -EFAULT;
+			if (copy_from_user(&fprog, optval, sizeof(fprog)))
 				break;
-			}
+
+			ret = sk_attach_filter(&fprog, sk);
+		}
+		break;
+
+	case SO_DETACH_FILTER:
+		rcu_read_lock_bh();
+		filter = rcu_dereference(sk->sk_filter);
+		if (filter) {
+			rcu_assign_pointer(sk->sk_filter, NULL);
+			sk_filter_release(sk, filter);
 			rcu_read_unlock_bh();
-			ret = -ENONET;
 			break;
+		}
+		rcu_read_unlock_bh();
+		ret = -ENONET;
+		break;
 
-		case SO_PASSSEC:
-			if (valbool)
-				set_bit(SOCK_PASSSEC, &sock->flags);
-			else
-				clear_bit(SOCK_PASSSEC, &sock->flags);
-			break;
+	case SO_PASSSEC:
+		if (valbool)
+			set_bit(SOCK_PASSSEC, &sock->flags);
+		else
+			clear_bit(SOCK_PASSSEC, &sock->flags);
+		break;
 
 		/* We implement the SO_SNDLOWAT etc to
 		   not be settable (1003.1g 5.3) */
-		default:
-			ret = -ENOPROTOOPT;
-			break;
+	default:
+		ret = -ENOPROTOOPT;
+		break;
 	}
 	release_sock(sk);
 	return ret;
@@ -641,8 +639,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 {
 	struct sock *sk = sock->sk;
 
-	union
-	{
+	union {
 		int val;
 		struct linger ling;
 		struct timeval tm;
@@ -651,148 +648,148 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 	unsigned int lv = sizeof(int);
 	int len;
 
-	if(get_user(len,optlen))
+	if (get_user(len, optlen))
 		return -EFAULT;
-	if(len < 0)
+	if (len < 0)
 		return -EINVAL;
 
-	switch(optname)
-	{
-		case SO_DEBUG:
-			v.val = sock_flag(sk, SOCK_DBG);
-			break;
-
-		case SO_DONTROUTE:
-			v.val = sock_flag(sk, SOCK_LOCALROUTE);
-			break;
-
-		case SO_BROADCAST:
-			v.val = !!sock_flag(sk, SOCK_BROADCAST);
-			break;
-
-		case SO_SNDBUF:
-			v.val = sk->sk_sndbuf;
-			break;
-
-		case SO_RCVBUF:
-			v.val = sk->sk_rcvbuf;
-			break;
-
-		case SO_REUSEADDR:
-			v.val = sk->sk_reuse;
-			break;
-
-		case SO_KEEPALIVE:
-			v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
-			break;
-
-		case SO_TYPE:
-			v.val = sk->sk_type;
-			break;
-
-		case SO_ERROR:
-			v.val = -sock_error(sk);
-			if(v.val==0)
-				v.val = xchg(&sk->sk_err_soft, 0);
-			break;
-
-		case SO_OOBINLINE:
-			v.val = !!sock_flag(sk, SOCK_URGINLINE);
-			break;
-
-		case SO_NO_CHECK:
-			v.val = sk->sk_no_check;
-			break;
-
-		case SO_PRIORITY:
-			v.val = sk->sk_priority;
-			break;
-
-		case SO_LINGER:
-			lv		= sizeof(v.ling);
-			v.ling.l_onoff	= !!sock_flag(sk, SOCK_LINGER);
-			v.ling.l_linger	= sk->sk_lingertime / HZ;
-			break;
-
-		case SO_BSDCOMPAT:
-			sock_warn_obsolete_bsdism("getsockopt");
-			break;
-
-		case SO_TIMESTAMP:
-			v.val = sock_flag(sk, SOCK_RCVTSTAMP);
-			break;
-
-		case SO_RCVTIMEO:
-			lv=sizeof(struct timeval);
-			if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
-				v.tm.tv_sec = 0;
-				v.tm.tv_usec = 0;
-			} else {
-				v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
-				v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
-			}
-			break;
+	switch(optname) {
+	case SO_DEBUG:
+		v.val = sock_flag(sk, SOCK_DBG);
+		break;
+
+	case SO_DONTROUTE:
+		v.val = sock_flag(sk, SOCK_LOCALROUTE);
+		break;
+
+	case SO_BROADCAST:
+		v.val = !!sock_flag(sk, SOCK_BROADCAST);
+		break;
+
+	case SO_SNDBUF:
+		v.val = sk->sk_sndbuf;
+		break;
+
+	case SO_RCVBUF:
+		v.val = sk->sk_rcvbuf;
+		break;
+
+	case SO_REUSEADDR:
+		v.val = sk->sk_reuse;
+		break;
+
+	case SO_KEEPALIVE:
+		v.val = !!sock_flag(sk, SOCK_KEEPOPEN);
+		break;
+
+	case SO_TYPE:
+		v.val = sk->sk_type;
+		break;
+
+	case SO_ERROR:
+		v.val = -sock_error(sk);
+		if (v.val==0)
+			v.val = xchg(&sk->sk_err_soft, 0);
+		break;
+
+	case SO_OOBINLINE:
+		v.val = !!sock_flag(sk, SOCK_URGINLINE);
+		break;
+
+	case SO_NO_CHECK:
+		v.val = sk->sk_no_check;
+		break;
+
+	case SO_PRIORITY:
+		v.val = sk->sk_priority;
+		break;
+
+	case SO_LINGER:
+		lv		= sizeof(v.ling);
+		v.ling.l_onoff	= !!sock_flag(sk, SOCK_LINGER);
+		v.ling.l_linger	= sk->sk_lingertime / HZ;
+		break;
+
+	case SO_BSDCOMPAT:
+		sock_warn_obsolete_bsdism("getsockopt");
+		break;
+
+	case SO_TIMESTAMP:
+		v.val = sock_flag(sk, SOCK_RCVTSTAMP);
+		break;
+
+	case SO_RCVTIMEO:
+		lv=sizeof(struct timeval);
+		if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
+			v.tm.tv_sec = 0;
+			v.tm.tv_usec = 0;
+		} else {
+			v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
+			v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
+		}
+		break;
+
+	case SO_SNDTIMEO:
+		lv=sizeof(struct timeval);
+		if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
+			v.tm.tv_sec = 0;
+			v.tm.tv_usec = 0;
+		} else {
+			v.tm.tv_sec = sk->sk_sndtimeo / HZ;
+			v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
+		}
+		break;
 
-		case SO_SNDTIMEO:
-			lv=sizeof(struct timeval);
-			if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
-				v.tm.tv_sec = 0;
-				v.tm.tv_usec = 0;
-			} else {
-				v.tm.tv_sec = sk->sk_sndtimeo / HZ;
-				v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
-			}
-			break;
+	case SO_RCVLOWAT:
+		v.val = sk->sk_rcvlowat;
+		break;
 
-		case SO_RCVLOWAT:
-			v.val = sk->sk_rcvlowat;
-			break;
+	case SO_SNDLOWAT:
+		v.val=1;
+		break;
 
-		case SO_SNDLOWAT:
-			v.val=1;
-			break;
+	case SO_PASSCRED:
+		v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
+		break;
 
-		case SO_PASSCRED:
-			v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0;
-			break;
+	case SO_PEERCRED:
+		if (len > sizeof(sk->sk_peercred))
+			len = sizeof(sk->sk_peercred);
+		if (copy_to_user(optval, &sk->sk_peercred, len))
+			return -EFAULT;
+		goto lenout;
 
-		case SO_PEERCRED:
-			if (len > sizeof(sk->sk_peercred))
-				len = sizeof(sk->sk_peercred);
-			if (copy_to_user(optval, &sk->sk_peercred, len))
-				return -EFAULT;
-			goto lenout;
-
-		case SO_PEERNAME:
-		{
-			char address[128];
-
-			if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
-				return -ENOTCONN;
-			if (lv < len)
-				return -EINVAL;
-			if (copy_to_user(optval, address, len))
-				return -EFAULT;
-			goto lenout;
-		}
+	case SO_PEERNAME:
+	{
+		char address[128];
+
+		if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
+			return -ENOTCONN;
+		if (lv < len)
+			return -EINVAL;
+		if (copy_to_user(optval, address, len))
+			return -EFAULT;
+		goto lenout;
+	}
 
-		/* Dubious BSD thing... Probably nobody even uses it, but
-		 * the UNIX standard wants it for whatever reason... -DaveM
-		 */
-		case SO_ACCEPTCONN:
-			v.val = sk->sk_state == TCP_LISTEN;
-			break;
+	/* Dubious BSD thing... Probably nobody even uses it, but
+	 * the UNIX standard wants it for whatever reason... -DaveM
+	 */
+	case SO_ACCEPTCONN:
+		v.val = sk->sk_state == TCP_LISTEN;
+		break;
 
-		case SO_PASSSEC:
-			v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
-			break;
+	case SO_PASSSEC:
+		v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0;
+		break;
 
-		case SO_PEERSEC:
-			return security_socket_getpeersec_stream(sock, optval, optlen, len);
+	case SO_PEERSEC:
+		return security_socket_getpeersec_stream(sock, optval, optlen, len);
 
-		default:
-			return(-ENOPROTOOPT);
+	default:
+		return -ENOPROTOOPT;
 	}
+
 	if (len > lv)
 		len = lv;
 	if (copy_to_user(optval, &v, len))
@@ -1220,13 +1217,13 @@ static void __lock_sock(struct sock *sk)
 {
 	DEFINE_WAIT(wait);
 
-	for(;;) {
+	for (;;) {
 		prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
 					TASK_UNINTERRUPTIBLE);
 		spin_unlock_bh(&sk->sk_lock.slock);
 		schedule();
 		spin_lock_bh(&sk->sk_lock.slock);
-		if(!sock_owned_by_user(sk))
+		if (!sock_owned_by_user(sk))
 			break;
 	}
 	finish_wait(&sk->sk_lock.wq, &wait);
@@ -1258,7 +1255,7 @@ static void __release_sock(struct sock *sk)
 		} while (skb != NULL);
 
 		bh_lock_sock(sk);
-	} while((skb = sk->sk_backlog.head) != NULL);
+	} while ((skb = sk->sk_backlog.head) != NULL);
 }
 
 /**
@@ -1420,7 +1417,7 @@ static void sock_def_write_space(struct sock *sk)
 	/* Do not wake up a writer until he can make "significant"
 	 * progress.  --DaveM
 	 */
-	if((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
+	if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
 		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
 			wake_up_interruptible(sk->sk_sleep);
 
@@ -1482,8 +1479,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 
 	sock_set_flag(sk, SOCK_ZAPPED);
 
-	if(sock)
-	{
+	if (sock) {
 		sk->sk_type	=	sock->type;
 		sk->sk_sleep	=	&sock->wait;
 		sock->sk	=	sk;
diff --git a/net/core/wireless.c b/net/core/wireless.c
index b07fe270a50..8f7f3abdb22 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -463,17 +463,17 @@ static inline iw_handler get_handler(struct net_device *dev,
 	unsigned int	index;		/* *MUST* be unsigned */
 
 	/* Check if we have some wireless handlers defined */
-	if(dev->wireless_handlers == NULL)
+	if (dev->wireless_handlers == NULL)
 		return NULL;
 
 	/* Try as a standard command */
 	index = cmd - SIOCIWFIRST;
-	if(index < dev->wireless_handlers->num_standard)
+	if (index < dev->wireless_handlers->num_standard)
 		return dev->wireless_handlers->standard[index];
 
 	/* Try as a private command */
 	index = cmd - SIOCIWFIRSTPRIV;
-	if(index < dev->wireless_handlers->num_private)
+	if (index < dev->wireless_handlers->num_private)
 		return dev->wireless_handlers->private[index];
 
 	/* Not found */
@@ -487,7 +487,7 @@ static inline iw_handler get_handler(struct net_device *dev,
 static inline struct iw_statistics *get_wireless_stats(struct net_device *dev)
 {
 	/* New location */
-	if((dev->wireless_handlers != NULL) &&
+	if ((dev->wireless_handlers != NULL) &&
 	   (dev->wireless_handlers->get_wireless_stats != NULL))
 		return dev->wireless_handlers->get_wireless_stats(dev);
 
@@ -516,7 +516,7 @@ static inline struct iw_statistics *get_wireless_stats(struct net_device *dev)
  */
 static inline int call_commit_handler(struct net_device *	dev)
 {
-	if((netif_running(dev)) &&
+	if ((netif_running(dev)) &&
 	   (dev->wireless_handlers->standard[0] != NULL)) {
 		/* Call the commit handler on the driver */
 		return dev->wireless_handlers->standard[0](dev, NULL,
@@ -577,7 +577,7 @@ static int iw_handler_get_iwstats(struct net_device *		dev,
 		wrqu->data.length = sizeof(struct iw_statistics);
 
 		/* Check if we need to clear the updated flag */
-		if(wrqu->data.flags != 0)
+		if (wrqu->data.flags != 0)
 			stats->qual.updated &= ~IW_QUAL_ALL_UPDATED;
 		return 0;
 	} else
@@ -596,12 +596,12 @@ static int iw_handler_get_private(struct net_device *		dev,
 				  char *			extra)
 {
 	/* Check if the driver has something to export */
-	if((dev->wireless_handlers->num_private_args == 0) ||
+	if ((dev->wireless_handlers->num_private_args == 0) ||
 	   (dev->wireless_handlers->private_args == NULL))
 		return -EOPNOTSUPP;
 
 	/* Check if there is enough buffer up there */
-	if(wrqu->data.length < dev->wireless_handlers->num_private_args) {
+	if (wrqu->data.length < dev->wireless_handlers->num_private_args) {
 		/* User space can't know in advance how large the buffer
 		 * needs to be. Give it a hint, so that we can support
 		 * any size buffer we want somewhat efficiently... */
@@ -735,7 +735,7 @@ static int ioctl_standard_call(struct net_device *	dev,
 	int					ret = -EINVAL;
 
 	/* Get the description of the IOCTL */
-	if((cmd - SIOCIWFIRST) >= standard_ioctl_num)
+	if ((cmd - SIOCIWFIRST) >= standard_ioctl_num)
 		return -EOPNOTSUPP;
 	descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
 
@@ -750,14 +750,14 @@ static int ioctl_standard_call(struct net_device *	dev,
 	info.flags = 0;
 
 	/* Check if we have a pointer to user space data or not */
-	if(descr->header_type != IW_HEADER_TYPE_POINT) {
+	if (descr->header_type != IW_HEADER_TYPE_POINT) {
 
 		/* No extra arguments. Trivial to handle */
 		ret = handler(dev, &info, &(iwr->u), NULL);
 
 #ifdef WE_SET_EVENT
 		/* Generate an event to notify listeners of the change */
-		if((descr->flags & IW_DESCR_FLAG_EVENT) &&
+		if ((descr->flags & IW_DESCR_FLAG_EVENT) &&
 		   ((ret == 0) || (ret == -EIWCOMMIT)))
 			wireless_send_event(dev, cmd, &(iwr->u), NULL);
 #endif	/* WE_SET_EVENT */
@@ -800,19 +800,19 @@ static int ioctl_standard_call(struct net_device *	dev,
 		iwr->u.data.length -= essid_compat;
 
 		/* Check what user space is giving us */
-		if(IW_IS_SET(cmd)) {
+		if (IW_IS_SET(cmd)) {
 			/* Check NULL pointer */
-			if((iwr->u.data.pointer == NULL) &&
+			if ((iwr->u.data.pointer == NULL) &&
 			   (iwr->u.data.length != 0))
 				return -EFAULT;
 			/* Check if number of token fits within bounds */
-			if(iwr->u.data.length > descr->max_tokens)
+			if (iwr->u.data.length > descr->max_tokens)
 				return -E2BIG;
-			if(iwr->u.data.length < descr->min_tokens)
+			if (iwr->u.data.length < descr->min_tokens)
 				return -EINVAL;
 		} else {
 			/* Check NULL pointer */
-			if(iwr->u.data.pointer == NULL)
+			if (iwr->u.data.pointer == NULL)
 				return -EFAULT;
 			/* Save user space buffer size for checking */
 			user_length = iwr->u.data.length;
@@ -822,7 +822,7 @@ static int ioctl_standard_call(struct net_device *	dev,
 			 * implied by the test at the end. */
 
 			/* Support for very large requests */
-			if((descr->flags & IW_DESCR_FLAG_NOMAX) &&
+			if ((descr->flags & IW_DESCR_FLAG_NOMAX) &&
 			   (user_length > descr->max_tokens)) {
 				/* Allow userspace to GET more than max so
 				 * we can support any size GET requests.
@@ -848,7 +848,7 @@ static int ioctl_standard_call(struct net_device *	dev,
 		}
 
 		/* If it is a SET, get all the extra data in here */
-		if(IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
+		if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
 			err = copy_from_user(extra, iwr->u.data.pointer,
 					     iwr->u.data.length *
 					     descr->token_size);
@@ -871,7 +871,7 @@ static int ioctl_standard_call(struct net_device *	dev,
 		/* If we have something to return to the user */
 		if (!ret && IW_IS_GET(cmd)) {
 			/* Check if there is enough buffer up there */
-			if(user_length < iwr->u.data.length) {
+			if (user_length < iwr->u.data.length) {
 				kfree(extra);
 				return -E2BIG;
 			}
@@ -890,9 +890,9 @@ static int ioctl_standard_call(struct net_device *	dev,
 
 #ifdef WE_SET_EVENT
 		/* Generate an event to notify listeners of the change */
-		if((descr->flags & IW_DESCR_FLAG_EVENT) &&
+		if ((descr->flags & IW_DESCR_FLAG_EVENT) &&
 		   ((ret == 0) || (ret == -EIWCOMMIT))) {
-			if(descr->flags & IW_DESCR_FLAG_RESTRICT)
+			if (descr->flags & IW_DESCR_FLAG_RESTRICT)
 				/* If the event is restricted, don't
 				 * export the payload */
 				wireless_send_event(dev, cmd, &(iwr->u), NULL);
@@ -907,7 +907,7 @@ static int ioctl_standard_call(struct net_device *	dev,
 	}
 
 	/* Call commit handler if needed and defined */
-	if(ret == -EIWCOMMIT)
+	if (ret == -EIWCOMMIT)
 		ret = call_commit_handler(dev);
 
 	/* Here, we will generate the appropriate event if needed */
@@ -944,8 +944,8 @@ static inline int ioctl_private_call(struct net_device *	dev,
 	int				ret = -EINVAL;
 
 	/* Get the description of the IOCTL */
-	for(i = 0; i < dev->wireless_handlers->num_private_args; i++)
-		if(cmd == dev->wireless_handlers->private_args[i].cmd) {
+	for (i = 0; i < dev->wireless_handlers->num_private_args; i++)
+		if (cmd == dev->wireless_handlers->private_args[i].cmd) {
 			descr = &(dev->wireless_handlers->private_args[i]);
 			break;
 		}
@@ -953,7 +953,7 @@ static inline int ioctl_private_call(struct net_device *	dev,
 #ifdef WE_IOCTL_DEBUG
 	printk(KERN_DEBUG "%s (WE) : Found private handler for 0x%04X\n",
 	       ifr->ifr_name, cmd);
-	if(descr) {
+	if (descr) {
 		printk(KERN_DEBUG "%s (WE) : Name %s, set %X, get %X\n",
 		       dev->name, descr->name,
 		       descr->set_args, descr->get_args);
@@ -961,11 +961,11 @@ static inline int ioctl_private_call(struct net_device *	dev,
 #endif	/* WE_IOCTL_DEBUG */
 
 	/* Compute the size of the set/get arguments */
-	if(descr != NULL) {
-		if(IW_IS_SET(cmd)) {
+	if (descr != NULL) {
+		if (IW_IS_SET(cmd)) {
 			int	offset = 0;	/* For sub-ioctls */
 			/* Check for sub-ioctl handler */
-			if(descr->name[0] == '\0')
+			if (descr->name[0] == '\0')
 				/* Reserve one int for sub-ioctl index */
 				offset = sizeof(__u32);
 
@@ -973,7 +973,7 @@ static inline int ioctl_private_call(struct net_device *	dev,
 			extra_size = get_priv_size(descr->set_args);
 
 			/* Does it fits in iwr ? */
-			if((descr->set_args & IW_PRIV_SIZE_FIXED) &&
+			if ((descr->set_args & IW_PRIV_SIZE_FIXED) &&
 			   ((extra_size + offset) <= IFNAMSIZ))
 				extra_size = 0;
 		} else {
@@ -981,7 +981,7 @@ static inline int ioctl_private_call(struct net_device *	dev,
 			extra_size = get_priv_size(descr->get_args);
 
 			/* Does it fits in iwr ? */
-			if((descr->get_args & IW_PRIV_SIZE_FIXED) &&
+			if ((descr->get_args & IW_PRIV_SIZE_FIXED) &&
 			   (extra_size <= IFNAMSIZ))
 				extra_size = 0;
 		}
@@ -992,7 +992,7 @@ static inline int ioctl_private_call(struct net_device *	dev,
 	info.flags = 0;
 
 	/* Check if we have a pointer to user space data or not. */
-	if(extra_size == 0) {
+	if (extra_size == 0) {
 		/* No extra arguments. Trivial to handle */
 		ret = handler(dev, &info, &(iwr->u), (char *) &(iwr->u));
 	} else {
@@ -1000,19 +1000,19 @@ static inline int ioctl_private_call(struct net_device *	dev,
 		int	err;
 
 		/* Check what user space is giving us */
-		if(IW_IS_SET(cmd)) {
+		if (IW_IS_SET(cmd)) {
 			/* Check NULL pointer */
-			if((iwr->u.data.pointer == NULL) &&
+			if ((iwr->u.data.pointer == NULL) &&
 			   (iwr->u.data.length != 0))
 				return -EFAULT;
 
 			/* Does it fits within bounds ? */
-			if(iwr->u.data.length > (descr->set_args &
+			if (iwr->u.data.length > (descr->set_args &
 						 IW_PRIV_SIZE_MASK))
 				return -E2BIG;
 		} else {
 			/* Check NULL pointer */
-			if(iwr->u.data.pointer == NULL)
+			if (iwr->u.data.pointer == NULL)
 				return -EFAULT;
 		}
 
@@ -1029,7 +1029,7 @@ static inline int ioctl_private_call(struct net_device *	dev,
 		}
 
 		/* If it is a SET, get all the extra data in here */
-		if(IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
+		if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
 			err = copy_from_user(extra, iwr->u.data.pointer,
 					     extra_size);
 			if (err) {
@@ -1071,7 +1071,7 @@ static inline int ioctl_private_call(struct net_device *	dev,
 
 
 	/* Call commit handler if needed and defined */
-	if(ret == -EIWCOMMIT)
+	if (ret == -EIWCOMMIT)
 		ret = call_commit_handler(dev);
 
 	return ret;
@@ -1098,51 +1098,50 @@ int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd)
 	/* A bunch of special cases, then the generic case...
 	 * Note that 'cmd' is already filtered in dev_ioctl() with
 	 * (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) */
-	switch(cmd)
-	{
-		case SIOCGIWSTATS:
-			/* Get Wireless Stats */
+	switch (cmd) {
+	case SIOCGIWSTATS:
+		/* Get Wireless Stats */
+		return ioctl_standard_call(dev,
+					   ifr,
+					   cmd,
+					   &iw_handler_get_iwstats);
+
+	case SIOCGIWPRIV:
+		/* Check if we have some wireless handlers defined */
+		if (dev->wireless_handlers != NULL) {
+			/* We export to user space the definition of
+			 * the private handler ourselves */
 			return ioctl_standard_call(dev,
 						   ifr,
 						   cmd,
-						   &iw_handler_get_iwstats);
-
-		case SIOCGIWPRIV:
-			/* Check if we have some wireless handlers defined */
-			if(dev->wireless_handlers != NULL) {
-				/* We export to user space the definition of
-				 * the private handler ourselves */
+						   &iw_handler_get_private);
+		}
+		// ## Fall-through for old API ##
+	default:
+		/* Generic IOCTL */
+		/* Basic check */
+		if (!netif_device_present(dev))
+			return -ENODEV;
+		/* New driver API : try to find the handler */
+		handler = get_handler(dev, cmd);
+		if (handler != NULL) {
+			/* Standard and private are not the same */
+			if (cmd < SIOCIWFIRSTPRIV)
 				return ioctl_standard_call(dev,
 							   ifr,
 							   cmd,
-							   &iw_handler_get_private);
-			}
-			// ## Fall-through for old API ##
-		default:
-			/* Generic IOCTL */
-			/* Basic check */
-			if (!netif_device_present(dev))
-				return -ENODEV;
-			/* New driver API : try to find the handler */
-			handler = get_handler(dev, cmd);
-			if(handler != NULL) {
-				/* Standard and private are not the same */
-				if(cmd < SIOCIWFIRSTPRIV)
-					return ioctl_standard_call(dev,
-								   ifr,
-								   cmd,
-								   handler);
-				else
-					return ioctl_private_call(dev,
-								  ifr,
-								  cmd,
-								  handler);
-			}
-			/* Old driver API : call driver ioctl handler */
-			if (dev->do_ioctl) {
-				return dev->do_ioctl(dev, ifr, cmd);
-			}
-			return -EOPNOTSUPP;
+							   handler);
+			else
+				return ioctl_private_call(dev,
+							  ifr,
+							  cmd,
+							  handler);
+		}
+		/* Old driver API : call driver ioctl handler */
+		if (dev->do_ioctl) {
+			return dev->do_ioctl(dev, ifr, cmd);
+		}
+		return -EOPNOTSUPP;
 	}
 	/* Not reached */
 	return -EINVAL;
@@ -1196,7 +1195,7 @@ static int rtnetlink_standard_get(struct net_device *	dev,
 
 	/* Get the description of the Request */
 	cmd = request->cmd;
-	if((cmd - SIOCIWFIRST) >= standard_ioctl_num)
+	if ((cmd - SIOCIWFIRST) >= standard_ioctl_num)
 		return -EOPNOTSUPP;
 	descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
 
@@ -1208,7 +1207,7 @@ static int rtnetlink_standard_get(struct net_device *	dev,
 
 	/* Check if wrqu is complete */
 	hdr_len = event_type_size[descr->header_type];
-	if(request_len < hdr_len) {
+	if (request_len < hdr_len) {
 #ifdef WE_RTNETLINK_DEBUG
 		printk(KERN_DEBUG
 		       "%s (WE.r) : Wireless request too short (%d)\n",
@@ -1222,7 +1221,7 @@ static int rtnetlink_standard_get(struct net_device *	dev,
 	info.flags = 0;
 
 	/* Check if we have extra data in the reply or not */
-	if(descr->header_type != IW_HEADER_TYPE_POINT) {
+	if (descr->header_type != IW_HEADER_TYPE_POINT) {
 
 		/* Create the kernel buffer that we will return.
 		 * It's at an offset to match the TYPE_POINT case... */
@@ -1254,7 +1253,7 @@ static int rtnetlink_standard_get(struct net_device *	dev,
 		 * for max space. Easier, and won't last long... */
 		extra_size = descr->max_tokens * descr->token_size;
 		/* Support for very large requests */
-		if((descr->flags & IW_DESCR_FLAG_NOMAX) &&
+		if ((descr->flags & IW_DESCR_FLAG_NOMAX) &&
 		   (wrqu_point.data.length > descr->max_tokens))
 			extra_size = (wrqu_point.data.length
 				      * descr->token_size);
@@ -1299,7 +1298,7 @@ static int rtnetlink_standard_get(struct net_device *	dev,
 #endif	/* WE_RTNETLINK_DEBUG */
 
 		/* Check if there is enough buffer up there */
-		if(wrqu_point.data.length < wrqu->data.length)
+		if (wrqu_point.data.length < wrqu->data.length)
 			ret = -E2BIG;
 	}
 
@@ -1309,7 +1308,7 @@ static int rtnetlink_standard_get(struct net_device *	dev,
 		*p_len = request->len;
 	} else {
 		/* Cleanup */
-		if(buffer)
+		if (buffer)
 			kfree(buffer);
 	}
 
@@ -1338,7 +1337,7 @@ static inline int rtnetlink_standard_set(struct net_device *	dev,
 
 	/* Get the description of the Request */
 	cmd = request->cmd;
-	if((cmd - SIOCIWFIRST) >= standard_ioctl_num)
+	if ((cmd - SIOCIWFIRST) >= standard_ioctl_num)
 		return -EOPNOTSUPP;
 	descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
 
@@ -1353,7 +1352,7 @@ static inline int rtnetlink_standard_set(struct net_device *	dev,
 
 	/* Check if wrqu is complete */
 	hdr_len = event_type_pk_size[descr->header_type];
-	if(request_len < hdr_len) {
+	if (request_len < hdr_len) {
 #ifdef WE_RTNETLINK_DEBUG
 		printk(KERN_DEBUG
 		       "%s (WE.r) : Wireless request too short (%d)\n",
@@ -1367,7 +1366,7 @@ static inline int rtnetlink_standard_set(struct net_device *	dev,
 	info.flags = 0;
 
 	/* Check if we have extra data in the request or not */
-	if(descr->header_type != IW_HEADER_TYPE_POINT) {
+	if (descr->header_type != IW_HEADER_TYPE_POINT) {
 
 		/* No extra arguments. Trivial to handle */
 		ret = handler(dev, &info, wrqu, NULL);
@@ -1382,16 +1381,16 @@ static inline int rtnetlink_standard_set(struct net_device *	dev,
 		wrqu = &wrqu_point;
 
 		/* Check if number of token fits within bounds */
-		if(wrqu_point.data.length > descr->max_tokens)
+		if (wrqu_point.data.length > descr->max_tokens)
 			return -E2BIG;
-		if(wrqu_point.data.length < descr->min_tokens)
+		if (wrqu_point.data.length < descr->min_tokens)
 			return -EINVAL;
 
 		/* Real length of payload */
 		extra_len = wrqu_point.data.length * descr->token_size;
 
 		/* Check if request is self consistent */
-		if((request_len - hdr_len) < extra_len) {
+		if ((request_len - hdr_len) < extra_len) {
 #ifdef WE_RTNETLINK_DEBUG
 			printk(KERN_DEBUG "%s (WE.r) : Wireless request data too short (%d)\n",
 			       dev->name, extra_size);
@@ -1420,9 +1419,9 @@ static inline int rtnetlink_standard_set(struct net_device *	dev,
 
 #ifdef WE_SET_EVENT
 	/* Generate an event to notify listeners of the change */
-	if((descr->flags & IW_DESCR_FLAG_EVENT) &&
+	if ((descr->flags & IW_DESCR_FLAG_EVENT) &&
 	   ((ret == 0) || (ret == -EIWCOMMIT))) {
-		if(descr->flags & IW_DESCR_FLAG_RESTRICT)
+		if (descr->flags & IW_DESCR_FLAG_RESTRICT)
 			/* If the event is restricted, don't
 			 * export the payload */
 			wireless_send_event(dev, cmd, wrqu, NULL);
@@ -1432,11 +1431,11 @@ static inline int rtnetlink_standard_set(struct net_device *	dev,
 #endif	/* WE_SET_EVENT */
 
 	/* Cleanup - I told you it wasn't that long ;-) */
-	if(extra)
+	if (extra)
 		kfree(extra);
 
 	/* Call commit handler if needed and defined */
-	if(ret == -EIWCOMMIT)
+	if (ret == -EIWCOMMIT)
 		ret = call_commit_handler(dev);
 
 	return ret;
@@ -1477,12 +1476,12 @@ static inline int rtnetlink_private_get(struct net_device *	dev,
 
 	/* Get the description of the Request */
 	cmd = request->cmd;
-	for(i = 0; i < dev->wireless_handlers->num_private_args; i++)
-		if(cmd == dev->wireless_handlers->private_args[i].cmd) {
+	for (i = 0; i < dev->wireless_handlers->num_private_args; i++)
+		if (cmd == dev->wireless_handlers->private_args[i].cmd) {
 			descr = &(dev->wireless_handlers->private_args[i]);
 			break;
 		}
-	if(descr == NULL)
+	if (descr == NULL)
 		return -EOPNOTSUPP;
 
 #ifdef WE_RTNETLINK_DEBUG
@@ -1496,7 +1495,7 @@ static inline int rtnetlink_private_get(struct net_device *	dev,
 	extra_size = get_priv_size(descr->get_args);
 
 	/* Does it fits in wrqu ? */
-	if((descr->get_args & IW_PRIV_SIZE_FIXED) &&
+	if ((descr->get_args & IW_PRIV_SIZE_FIXED) &&
 	   (extra_size <= IFNAMSIZ)) {
 		hdr_len = extra_size;
 		extra_size = 0;
@@ -1505,7 +1504,7 @@ static inline int rtnetlink_private_get(struct net_device *	dev,
 	}
 
 	/* Check if wrqu is complete */
-	if(request_len < hdr_len) {
+	if (request_len < hdr_len) {
 #ifdef WE_RTNETLINK_DEBUG
 		printk(KERN_DEBUG
 		       "%s (WE.r) : Wireless request too short (%d)\n",
@@ -1519,7 +1518,7 @@ static inline int rtnetlink_private_get(struct net_device *	dev,
 	info.flags = 0;
 
 	/* Check if we have a pointer to user space data or not. */
-	if(extra_size == 0) {
+	if (extra_size == 0) {
 
 		/* Create the kernel buffer that we will return.
 		 * It's at an offset to match the TYPE_POINT case... */
@@ -1591,7 +1590,7 @@ static inline int rtnetlink_private_get(struct net_device *	dev,
 		*p_len = request->len;
 	} else {
 		/* Cleanup */
-		if(buffer)
+		if (buffer)
 			kfree(buffer);
 	}
 
@@ -1632,12 +1631,12 @@ static inline int rtnetlink_private_set(struct net_device *	dev,
 
 	/* Get the description of the Request */
 	cmd = request->cmd;
-	for(i = 0; i < dev->wireless_handlers->num_private_args; i++)
-		if(cmd == dev->wireless_handlers->private_args[i].cmd) {
+	for (i = 0; i < dev->wireless_handlers->num_private_args; i++)
+		if (cmd == dev->wireless_handlers->private_args[i].cmd) {
 			descr = &(dev->wireless_handlers->private_args[i]);
 			break;
 		}
-	if(descr == NULL)
+	if (descr == NULL)
 		return -EOPNOTSUPP;
 
 #ifdef WE_RTNETLINK_DEBUG
@@ -1649,7 +1648,7 @@ static inline int rtnetlink_private_set(struct net_device *	dev,
 
 	/* Compute the size of the set arguments */
 	/* Check for sub-ioctl handler */
-	if(descr->name[0] == '\0')
+	if (descr->name[0] == '\0')
 		/* Reserve one int for sub-ioctl index */
 		offset = sizeof(__u32);
 
@@ -1657,7 +1656,7 @@ static inline int rtnetlink_private_set(struct net_device *	dev,
 	extra_size = get_priv_size(descr->set_args);
 
 	/* Does it fits in wrqu ? */
-	if((descr->set_args & IW_PRIV_SIZE_FIXED) &&
+	if ((descr->set_args & IW_PRIV_SIZE_FIXED) &&
 	   (extra_size <= IFNAMSIZ)) {
 		hdr_len = IW_EV_LCP_PK_LEN + extra_size;
 		extra_size = 0;
@@ -1669,7 +1668,7 @@ static inline int rtnetlink_private_set(struct net_device *	dev,
 	wrqu = (union iwreq_data *) (((char *) request) + IW_EV_LCP_PK_LEN);
 
 	/* Check if wrqu is complete */
-	if(request_len < hdr_len) {
+	if (request_len < hdr_len) {
 #ifdef WE_RTNETLINK_DEBUG
 		printk(KERN_DEBUG
 		       "%s (WE.r) : Wireless request too short (%d)\n",
@@ -1683,7 +1682,7 @@ static inline int rtnetlink_private_set(struct net_device *	dev,
 	info.flags = 0;
 
 	/* Check if we have a pointer to user space data or not. */
-	if(extra_size == 0) {
+	if (extra_size == 0) {
 
 		/* No extra arguments. Trivial to handle */
 		ret = handler(dev, &info, wrqu, (char *) wrqu);
@@ -1696,7 +1695,7 @@ static inline int rtnetlink_private_set(struct net_device *	dev,
 		       wrqu, IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN);
 
 		/* Does it fits within bounds ? */
-		if(wrqu_point.data.length > (descr->set_args &
+		if (wrqu_point.data.length > (descr->set_args &
 					     IW_PRIV_SIZE_MASK))
 			return -E2BIG;
 
@@ -1704,7 +1703,7 @@ static inline int rtnetlink_private_set(struct net_device *	dev,
 		extra_len = adjust_priv_size(descr->set_args, &wrqu_point);
 
 		/* Check if request is self consistent */
-		if((request_len - hdr_len) < extra_len) {
+		if ((request_len - hdr_len) < extra_len) {
 #ifdef WE_RTNETLINK_DEBUG
 			printk(KERN_DEBUG "%s (WE.r) : Wireless request data too short (%d)\n",
 			       dev->name, extra_size);
@@ -1734,7 +1733,7 @@ static inline int rtnetlink_private_set(struct net_device *	dev,
 	}
 
 	/* Call commit handler if needed and defined */
-	if(ret == -EIWCOMMIT)
+	if (ret == -EIWCOMMIT)
 		ret = call_commit_handler(dev);
 
 	return ret;
@@ -1756,21 +1755,21 @@ int wireless_rtnetlink_get(struct net_device *	dev,
 	iw_handler		handler;
 
 	/* Check length */
-	if(len < IW_EV_LCP_PK_LEN) {
+	if (len < IW_EV_LCP_PK_LEN) {
 		printk(KERN_DEBUG "%s (WE.r) : RtNetlink request too short (%d)\n",
 		       dev->name, len);
 		return -EINVAL;
 	}
 
 	/* ReCheck length (len may have padding) */
-	if(request->len > len) {
+	if (request->len > len) {
 		printk(KERN_DEBUG "%s (WE.r) : RtNetlink request len invalid (%d-%d)\n",
 		       dev->name, request->len, len);
 		return -EINVAL;
 	}
 
 	/* Only accept GET requests in here */
-	if(!IW_IS_GET(request->cmd))
+	if (!IW_IS_GET(request->cmd))
 		return -EOPNOTSUPP;
 
 	/* If command is `get the encoding parameters', check if
@@ -1782,16 +1781,16 @@ int wireless_rtnetlink_get(struct net_device *	dev,
 	}
 
 	/* Special cases */
-	if(request->cmd == SIOCGIWSTATS)
+	if (request->cmd == SIOCGIWSTATS)
 		/* Get Wireless Stats */
 		return rtnetlink_standard_get(dev,
 					      request,
 					      request->len,
 					      &iw_handler_get_iwstats,
 					      p_buf, p_len);
-	if(request->cmd == SIOCGIWPRIV) {
+	if (request->cmd == SIOCGIWPRIV) {
 		/* Check if we have some wireless handlers defined */
-		if(dev->wireless_handlers == NULL)
+		if (dev->wireless_handlers == NULL)
 			return -EOPNOTSUPP;
 		/* Get Wireless Stats */
 		return rtnetlink_standard_get(dev,
@@ -1807,9 +1806,9 @@ int wireless_rtnetlink_get(struct net_device *	dev,
 
 	/* Try to find the handler */
 	handler = get_handler(dev, request->cmd);
-	if(handler != NULL) {
+	if (handler != NULL) {
 		/* Standard and private are not the same */
-		if(request->cmd < SIOCIWFIRSTPRIV)
+		if (request->cmd < SIOCIWFIRSTPRIV)
 			return rtnetlink_standard_get(dev,
 						      request,
 						      request->len,
@@ -1840,21 +1839,21 @@ int wireless_rtnetlink_set(struct net_device *	dev,
 	iw_handler		handler;
 
 	/* Check length */
-	if(len < IW_EV_LCP_PK_LEN) {
+	if (len < IW_EV_LCP_PK_LEN) {
 		printk(KERN_DEBUG "%s (WE.r) : RtNetlink request too short (%d)\n",
 		       dev->name, len);
 		return -EINVAL;
 	}
 
 	/* ReCheck length (len may have padding) */
-	if(request->len > len) {
+	if (request->len > len) {
 		printk(KERN_DEBUG "%s (WE.r) : RtNetlink request len invalid (%d-%d)\n",
 		       dev->name, request->len, len);
 		return -EINVAL;
 	}
 
 	/* Only accept SET requests in here */
-	if(!IW_IS_SET(request->cmd))
+	if (!IW_IS_SET(request->cmd))
 		return -EOPNOTSUPP;
 
 	/* Basic check */
@@ -1863,9 +1862,9 @@ int wireless_rtnetlink_set(struct net_device *	dev,
 
 	/* New driver API : try to find the handler */
 	handler = get_handler(dev, request->cmd);
-	if(handler != NULL) {
+	if (handler != NULL) {
 		/* Standard and private are not the same */
-		if(request->cmd < SIOCIWFIRSTPRIV)
+		if (request->cmd < SIOCIWFIRSTPRIV)
 			return rtnetlink_standard_set(dev,
 						      request,
 						      request->len,
@@ -2015,17 +2014,17 @@ void wireless_send_event(struct net_device *	dev,
 	unsigned	cmd_index;		/* *MUST* be unsigned */
 
 	/* Get the description of the Event */
-	if(cmd <= SIOCIWLAST) {
+	if (cmd <= SIOCIWLAST) {
 		cmd_index = cmd - SIOCIWFIRST;
-		if(cmd_index < standard_ioctl_num)
+		if (cmd_index < standard_ioctl_num)
 			descr = &(standard_ioctl[cmd_index]);
 	} else {
 		cmd_index = cmd - IWEVFIRST;
-		if(cmd_index < standard_event_num)
+		if (cmd_index < standard_event_num)
 			descr = &(standard_event[cmd_index]);
 	}
 	/* Don't accept unknown events */
-	if(descr == NULL) {
+	if (descr == NULL) {
 		/* Note : we don't return an error to the driver, because
 		 * the driver would not know what to do about it. It can't
 		 * return an error to the user, because the event is not
@@ -2044,18 +2043,18 @@ void wireless_send_event(struct net_device *	dev,
 #endif	/* WE_EVENT_DEBUG */
 
 	/* Check extra parameters and set extra_len */
-	if(descr->header_type == IW_HEADER_TYPE_POINT) {
+	if (descr->header_type == IW_HEADER_TYPE_POINT) {
 		/* Check if number of token fits within bounds */
-		if(wrqu->data.length > descr->max_tokens) {
+		if (wrqu->data.length > descr->max_tokens) {
 			printk(KERN_ERR "%s (WE) : Wireless Event too big (%d)\n", dev->name, wrqu->data.length);
 			return;
 		}
-		if(wrqu->data.length < descr->min_tokens) {
+		if (wrqu->data.length < descr->min_tokens) {
 			printk(KERN_ERR "%s (WE) : Wireless Event too small (%d)\n", dev->name, wrqu->data.length);
 			return;
 		}
 		/* Calculate extra_len - extra is NULL for restricted events */
-		if(extra != NULL)
+		if (extra != NULL)
 			extra_len = wrqu->data.length * descr->token_size;
 		/* Always at an offset in wrqu */
 		wrqu_off = IW_EV_POINT_OFF;
@@ -2074,14 +2073,14 @@ void wireless_send_event(struct net_device *	dev,
 
 	/* Create temporary buffer to hold the event */
 	event = kmalloc(event_len, GFP_ATOMIC);
-	if(event == NULL)
+	if (event == NULL)
 		return;
 
 	/* Fill event */
 	event->len = event_len;
 	event->cmd = cmd;
 	memcpy(&event->u, ((char *) wrqu) + wrqu_off, hdr_len - IW_EV_LCP_LEN);
-	if(extra != NULL)
+	if (extra != NULL)
 		memcpy(((char *) event) + hdr_len, extra, extra_len);
 
 #ifdef WE_EVENT_RTNETLINK
@@ -2116,7 +2115,7 @@ void wireless_send_event(struct net_device *	dev,
 static inline struct iw_spy_data * get_spydata(struct net_device *dev)
 {
 	/* This is the new way */
-	if(dev->wireless_data)
+	if (dev->wireless_data)
 		return(dev->wireless_data->spy_data);
 	return NULL;
 }
@@ -2134,7 +2133,7 @@ int iw_handler_set_spy(struct net_device *	dev,
 	struct sockaddr *	address = (struct sockaddr *) extra;
 
 	/* Make sure driver is not buggy or using the old API */
-	if(!spydata)
+	if (!spydata)
 		return -EOPNOTSUPP;
 
 	/* Disable spy collection while we copy the addresses.
@@ -2151,11 +2150,11 @@ int iw_handler_set_spy(struct net_device *	dev,
 	smp_wmb();
 
 	/* Are there are addresses to copy? */
-	if(wrqu->data.length > 0) {
+	if (wrqu->data.length > 0) {
 		int i;
 
 		/* Copy addresses */
-		for(i = 0; i < wrqu->data.length; i++)
+		for (i = 0; i < wrqu->data.length; i++)
 			memcpy(spydata->spy_address[i], address[i].sa_data,
 			       ETH_ALEN);
 		/* Reset stats */
@@ -2199,23 +2198,23 @@ int iw_handler_get_spy(struct net_device *	dev,
 	int			i;
 
 	/* Make sure driver is not buggy or using the old API */
-	if(!spydata)
+	if (!spydata)
 		return -EOPNOTSUPP;
 
 	wrqu->data.length = spydata->spy_number;
 
 	/* Copy addresses. */
-	for(i = 0; i < spydata->spy_number; i++) 	{
+	for (i = 0; i < spydata->spy_number; i++) 	{
 		memcpy(address[i].sa_data, spydata->spy_address[i], ETH_ALEN);
 		address[i].sa_family = AF_UNIX;
 	}
 	/* Copy stats to the user buffer (just after). */
-	if(spydata->spy_number > 0)
+	if (spydata->spy_number > 0)
 		memcpy(extra  + (sizeof(struct sockaddr) *spydata->spy_number),
 		       spydata->spy_stat,
 		       sizeof(struct iw_quality) * spydata->spy_number);
 	/* Reset updated flags. */
-	for(i = 0; i < spydata->spy_number; i++)
+	for (i = 0; i < spydata->spy_number; i++)
 		spydata->spy_stat[i].updated &= ~IW_QUAL_ALL_UPDATED;
 	return 0;
 }
@@ -2233,7 +2232,7 @@ int iw_handler_set_thrspy(struct net_device *	dev,
 	struct iw_thrspy *	threshold = (struct iw_thrspy *) extra;
 
 	/* Make sure driver is not buggy or using the old API */
-	if(!spydata)
+	if (!spydata)
 		return -EOPNOTSUPP;
 
 	/* Just do it */
@@ -2263,7 +2262,7 @@ int iw_handler_get_thrspy(struct net_device *	dev,
 	struct iw_thrspy *	threshold = (struct iw_thrspy *) extra;
 
 	/* Make sure driver is not buggy or using the old API */
-	if(!spydata)
+	if (!spydata)
 		return -EOPNOTSUPP;
 
 	/* Just do it */
@@ -2327,7 +2326,7 @@ void wireless_spy_update(struct net_device *	dev,
 	int			match = -1;
 
 	/* Make sure driver is not buggy or using the old API */
-	if(!spydata)
+	if (!spydata)
 		return;
 
 #ifdef WE_SPY_DEBUG
@@ -2335,8 +2334,8 @@ void wireless_spy_update(struct net_device *	dev,
 #endif	/* WE_SPY_DEBUG */
 
 	/* Update all records that match */
-	for(i = 0; i < spydata->spy_number; i++)
-		if(!compare_ether_addr(address, spydata->spy_address[i])) {
+	for (i = 0; i < spydata->spy_number; i++)
+		if (!compare_ether_addr(address, spydata->spy_address[i])) {
 			memcpy(&(spydata->spy_stat[i]), wstats,
 			       sizeof(struct iw_quality));
 			match = i;
@@ -2346,15 +2345,15 @@ void wireless_spy_update(struct net_device *	dev,
 	 * To avoid event storms, we have a simple hysteresis : we generate
 	 * event only when we go under the low threshold or above the
 	 * high threshold. */
-	if(match >= 0) {
-		if(spydata->spy_thr_under[match]) {
-			if(wstats->level > spydata->spy_thr_high.level) {
+	if (match >= 0) {
+		if (spydata->spy_thr_under[match]) {
+			if (wstats->level > spydata->spy_thr_high.level) {
 				spydata->spy_thr_under[match] = 0;
 				iw_send_thrspy_event(dev, spydata,
 						     address, wstats);
 			}
 		} else {
-			if(wstats->level < spydata->spy_thr_low.level) {
+			if (wstats->level < spydata->spy_thr_low.level) {
 				spydata->spy_thr_under[match] = 1;
 				iw_send_thrspy_event(dev, spydata,
 						     address, wstats);
diff --git a/net/socket.c b/net/socket.c
index ea8f81abc45..cf18c5eb592 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1292,7 +1292,7 @@ asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
 	int err, fput_needed;
 
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
-	if(sock) {
+	if (sock) {
 		err = move_addr_to_kernel(umyaddr, addrlen, address);
 		if (err >= 0) {
 			err = security_socket_bind(sock,
-- 
cgit v1.2.3


From 1ac58ee37f439044eb09381f33c97ce0e7f2643b Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Thu, 8 Mar 2007 20:43:49 -0800
Subject: [WIRELESS]: use ARRAY_SIZE()

Use ARRAY_SIZE() macro now.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/wireless.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/core/wireless.c b/net/core/wireless.c
index 8f7f3abdb22..21c091dd39e 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -349,8 +349,7 @@ static const struct iw_ioctl_description standard_ioctl[] = {
 		.max_tokens	= sizeof(struct iw_pmksa),
 	},
 };
-static const unsigned standard_ioctl_num = (sizeof(standard_ioctl) /
-					    sizeof(struct iw_ioctl_description));
+static const unsigned standard_ioctl_num = ARRAY_SIZE(standard_ioctl);
 
 /*
  * Meta-data about all the additional standard Wireless Extension events
@@ -400,8 +399,7 @@ static const struct iw_ioctl_description standard_event[] = {
 		.max_tokens	= sizeof(struct iw_pmkid_cand),
 	},
 };
-static const unsigned standard_event_num = (sizeof(standard_event) /
-					    sizeof(struct iw_ioctl_description));
+static const unsigned standard_event_num = ARRAY_SIZE(standard_event);
 
 /* Size (in bytes) of the various private data types */
 static const char iw_priv_type_size[] = {
-- 
cgit v1.2.3


From 132adf54639cf7dd9315e8df89c2faa59f6e46d9 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Thu, 8 Mar 2007 20:44:43 -0800
Subject: [IPV4]: cleanup

Add whitespace around keywords.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c           |    2 +-
 net/ipv4/arp.c               |    2 +-
 net/ipv4/cipso_ipv4.c        |    2 +-
 net/ipv4/devinet.c           |    4 +-
 net/ipv4/fib_trie.c          |   17 +-
 net/ipv4/ip_fragment.c       |   26 +-
 net/ipv4/ip_output.c         |    2 +-
 net/ipv4/ip_sockglue.c       | 1127 +++++++++++++++++++++---------------------
 net/ipv4/ipconfig.c          |    4 +-
 net/ipv4/ipmr.c              |  305 ++++++------
 net/ipv4/xfrm4_mode_tunnel.c |    2 +-
 net/ipv4/xfrm4_policy.c      |    2 +-
 12 files changed, 745 insertions(+), 750 deletions(-)

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index df41856fc60..6e5575b0abe 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1339,7 +1339,7 @@ static int __init inet_init(void)
 	 *	Initialise per-cpu ipv4 mibs
 	 */
 
-	if(init_ipv4_mibs())
+	if (init_ipv4_mibs())
 		printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ;
 
 	ipv4_proc_init();
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 1a3488a83f4..e6e196cd3b8 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1178,7 +1178,7 @@ int arp_ioctl(unsigned int cmd, void __user *arg)
 		goto out;
 	}
 
-	switch(cmd) {
+	switch (cmd) {
 	case SIOCDARP:
 		err = arp_req_delete(&r, dev);
 		break;
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 2ce5b693a8b..b0182aa2c81 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1174,7 +1174,7 @@ static int cipso_v4_map_cat_rng_ntoh(const struct cipso_v4_doi *doi_def,
 	u16 cat_low;
 	u16 cat_high;
 
-	for(net_iter = 0; net_iter < net_cat_len; net_iter += 4) {
+	for (net_iter = 0; net_iter < net_cat_len; net_iter += 4) {
 		cat_high = ntohs(*((__be16 *)&net_cat[net_iter]));
 		if ((net_iter + 4) <= net_cat_len)
 			cat_low = ntohs(*((__be16 *)&net_cat[net_iter + 2]));
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 98a00d0edc7..043857bd151 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -633,7 +633,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
 	dev_load(ifr.ifr_name);
 #endif
 
-	switch(cmd) {
+	switch (cmd) {
 	case SIOCGIFADDR:	/* Get interface address */
 	case SIOCGIFBRDADDR:	/* Get the broadcast address */
 	case SIOCGIFDSTADDR:	/* Get the destination address */
@@ -708,7 +708,7 @@ int devinet_ioctl(unsigned int cmd, void __user *arg)
 	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
 		goto done;
 
-	switch(cmd) {
+	switch (cmd) {
 	case SIOCGIFADDR:	/* Get interface address */
 		sin->sin_addr.s_addr = ifa->ifa_local;
 		goto rarok;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 214c34732e8..c331c433acf 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -350,11 +350,10 @@ static void __tnode_free_rcu(struct rcu_head *head)
 
 static inline void tnode_free(struct tnode *tn)
 {
-	if(IS_LEAF(tn)) {
+	if (IS_LEAF(tn)) {
 		struct leaf *l = (struct leaf *) tn;
 		call_rcu_bh(&l->rcu, __leaf_free_rcu);
-	}
-	else
+	} else
 		call_rcu(&tn->rcu, __tnode_free_rcu);
 }
 
@@ -553,7 +552,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
 
 	/* Keep root node larger  */
 
-	if(!tn->parent)
+	if (!tn->parent)
 		inflate_threshold_use = inflate_threshold_root;
 	else
 		inflate_threshold_use = inflate_threshold;
@@ -584,7 +583,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
 
 	/* Keep root node larger  */
 
-	if(!tn->parent)
+	if (!tn->parent)
 		halve_threshold_use = halve_threshold_root;
 	else
 		halve_threshold_use = halve_threshold;
@@ -2039,12 +2038,12 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
 {
 	struct node *n ;
 
-	if(!t)
+	if (!t)
 		return NULL;
 
 	n = rcu_dereference(t->trie);
 
-	if(!iter)
+	if (!iter)
 		return NULL;
 
 	if (n) {
@@ -2084,7 +2083,7 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
 			int i;
 
 			s->tnodes++;
-			if(tn->bits < MAX_STAT_DEPTH)
+			if (tn->bits < MAX_STAT_DEPTH)
 				s->nodesizes[tn->bits]++;
 
 			for (i = 0; i < (1<<tn->bits); i++)
@@ -2250,7 +2249,7 @@ static inline const char *rtn_scope(enum rt_scope_t s)
 {
 	static char buf[32];
 
-	switch(s) {
+	switch (s) {
 	case RT_SCOPE_UNIVERSE: return "universe";
 	case RT_SCOPE_SITE:	return "site";
 	case RT_SCOPE_LINK:	return "link";
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index e10be7d7752..3dfd7581cfc 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -184,7 +184,7 @@ static __inline__ struct ipq *frag_alloc_queue(void)
 {
 	struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC);
 
-	if(!qp)
+	if (!qp)
 		return NULL;
 	atomic_add(sizeof(struct ipq), &ip_frag_mem);
 	return qp;
@@ -321,11 +321,11 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in)
 	 * promoted read lock to write lock.
 	 */
 	hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
-		if(qp->id == qp_in->id		&&
-		   qp->saddr == qp_in->saddr	&&
-		   qp->daddr == qp_in->daddr	&&
-		   qp->protocol == qp_in->protocol &&
-		   qp->user == qp_in->user) {
+		if (qp->id == qp_in->id		&&
+		    qp->saddr == qp_in->saddr	&&
+		    qp->daddr == qp_in->daddr	&&
+		    qp->protocol == qp_in->protocol &&
+		    qp->user == qp_in->user) {
 			atomic_inc(&qp->refcnt);
 			write_unlock(&ipfrag_lock);
 			qp_in->last_in |= COMPLETE;
@@ -398,11 +398,11 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
 	read_lock(&ipfrag_lock);
 	hash = ipqhashfn(id, saddr, daddr, protocol);
 	hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
-		if(qp->id == id		&&
-		   qp->saddr == saddr	&&
-		   qp->daddr == daddr	&&
-		   qp->protocol == protocol &&
-		   qp->user == user) {
+		if (qp->id == id		&&
+		    qp->saddr == saddr	&&
+		    qp->daddr == daddr	&&
+		    qp->protocol == protocol &&
+		    qp->user == user) {
 			atomic_inc(&qp->refcnt);
 			read_unlock(&ipfrag_lock);
 			return qp;
@@ -524,7 +524,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	 * this fragment, right?
 	 */
 	prev = NULL;
-	for(next = qp->fragments; next != NULL; next = next->next) {
+	for (next = qp->fragments; next != NULL; next = next->next) {
 		if (FRAG_CB(next)->offset >= offset)
 			break;	/* bingo! */
 		prev = next;
@@ -627,7 +627,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
 	ihlen = head->nh.iph->ihl*4;
 	len = ihlen + qp->len;
 
-	if(len > 65535)
+	if (len > 65535)
 		goto out_oversize;
 
 	/* Head of list must not be cloned. */
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index d096332f6c6..5db301b3337 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -566,7 +566,7 @@ slow_path:
 	 *	Keep copying data until we run out.
 	 */
 
-	while(left > 0)	{
+	while (left > 0) {
 		len = left;
 		/* IF: it doesn't fit, use 'mtu' - the data space left */
 		if (len > mtu)
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 23048d9f358..c5e41644c80 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -403,20 +403,20 @@ out:
  */
 
 static int do_ip_setsockopt(struct sock *sk, int level,
-		int optname, char __user *optval, int optlen)
+			    int optname, char __user *optval, int optlen)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	int val=0,err;
 
 	if (((1<<optname) & ((1<<IP_PKTINFO) | (1<<IP_RECVTTL) |
-			    (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) |
-			    (1<<IP_RETOPTS) | (1<<IP_TOS) |
-			    (1<<IP_TTL) | (1<<IP_HDRINCL) |
-			    (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
-			    (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
-			    (1<<IP_PASSSEC))) ||
-				optname == IP_MULTICAST_TTL ||
-				optname == IP_MULTICAST_LOOP) {
+			     (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) |
+			     (1<<IP_RETOPTS) | (1<<IP_TOS) |
+			     (1<<IP_TTL) | (1<<IP_HDRINCL) |
+			     (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
+			     (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
+			     (1<<IP_PASSSEC))) ||
+	    optname == IP_MULTICAST_TTL ||
+	    optname == IP_MULTICAST_LOOP) {
 		if (optlen >= sizeof(int)) {
 			if (get_user(val, (int __user *) optval))
 				return -EFAULT;
@@ -440,444 +440,444 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 	lock_sock(sk);
 
 	switch (optname) {
-		case IP_OPTIONS:
-		{
-			struct ip_options * opt = NULL;
-			if (optlen > 40 || optlen < 0)
-				goto e_inval;
-			err = ip_options_get_from_user(&opt, optval, optlen);
-			if (err)
-				break;
-			if (inet->is_icsk) {
-				struct inet_connection_sock *icsk = inet_csk(sk);
+	case IP_OPTIONS:
+	{
+		struct ip_options * opt = NULL;
+		if (optlen > 40 || optlen < 0)
+			goto e_inval;
+		err = ip_options_get_from_user(&opt, optval, optlen);
+		if (err)
+			break;
+		if (inet->is_icsk) {
+			struct inet_connection_sock *icsk = inet_csk(sk);
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-				if (sk->sk_family == PF_INET ||
-				    (!((1 << sk->sk_state) &
-				       (TCPF_LISTEN | TCPF_CLOSE)) &&
-				     inet->daddr != LOOPBACK4_IPV6)) {
+			if (sk->sk_family == PF_INET ||
+			    (!((1 << sk->sk_state) &
+			       (TCPF_LISTEN | TCPF_CLOSE)) &&
+			     inet->daddr != LOOPBACK4_IPV6)) {
 #endif
-					if (inet->opt)
-						icsk->icsk_ext_hdr_len -= inet->opt->optlen;
-					if (opt)
-						icsk->icsk_ext_hdr_len += opt->optlen;
-					icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
+				if (inet->opt)
+					icsk->icsk_ext_hdr_len -= inet->opt->optlen;
+				if (opt)
+					icsk->icsk_ext_hdr_len += opt->optlen;
+				icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-				}
-#endif
 			}
-			opt = xchg(&inet->opt, opt);
-			kfree(opt);
-			break;
+#endif
 		}
-		case IP_PKTINFO:
-			if (val)
-				inet->cmsg_flags |= IP_CMSG_PKTINFO;
-			else
-				inet->cmsg_flags &= ~IP_CMSG_PKTINFO;
-			break;
-		case IP_RECVTTL:
-			if (val)
-				inet->cmsg_flags |=  IP_CMSG_TTL;
-			else
-				inet->cmsg_flags &= ~IP_CMSG_TTL;
-			break;
-		case IP_RECVTOS:
-			if (val)
-				inet->cmsg_flags |=  IP_CMSG_TOS;
-			else
-				inet->cmsg_flags &= ~IP_CMSG_TOS;
-			break;
-		case IP_RECVOPTS:
-			if (val)
-				inet->cmsg_flags |=  IP_CMSG_RECVOPTS;
-			else
-				inet->cmsg_flags &= ~IP_CMSG_RECVOPTS;
-			break;
-		case IP_RETOPTS:
-			if (val)
-				inet->cmsg_flags |= IP_CMSG_RETOPTS;
-			else
-				inet->cmsg_flags &= ~IP_CMSG_RETOPTS;
+		opt = xchg(&inet->opt, opt);
+		kfree(opt);
+		break;
+	}
+	case IP_PKTINFO:
+		if (val)
+			inet->cmsg_flags |= IP_CMSG_PKTINFO;
+		else
+			inet->cmsg_flags &= ~IP_CMSG_PKTINFO;
+		break;
+	case IP_RECVTTL:
+		if (val)
+			inet->cmsg_flags |=  IP_CMSG_TTL;
+		else
+			inet->cmsg_flags &= ~IP_CMSG_TTL;
+		break;
+	case IP_RECVTOS:
+		if (val)
+			inet->cmsg_flags |=  IP_CMSG_TOS;
+		else
+			inet->cmsg_flags &= ~IP_CMSG_TOS;
+		break;
+	case IP_RECVOPTS:
+		if (val)
+			inet->cmsg_flags |=  IP_CMSG_RECVOPTS;
+		else
+			inet->cmsg_flags &= ~IP_CMSG_RECVOPTS;
+		break;
+	case IP_RETOPTS:
+		if (val)
+			inet->cmsg_flags |= IP_CMSG_RETOPTS;
+		else
+			inet->cmsg_flags &= ~IP_CMSG_RETOPTS;
+		break;
+	case IP_PASSSEC:
+		if (val)
+			inet->cmsg_flags |= IP_CMSG_PASSSEC;
+		else
+			inet->cmsg_flags &= ~IP_CMSG_PASSSEC;
+		break;
+	case IP_TOS:	/* This sets both TOS and Precedence */
+		if (sk->sk_type == SOCK_STREAM) {
+			val &= ~3;
+			val |= inet->tos & 3;
+		}
+		if (IPTOS_PREC(val) >= IPTOS_PREC_CRITIC_ECP &&
+		    !capable(CAP_NET_ADMIN)) {
+			err = -EPERM;
 			break;
-		case IP_PASSSEC:
-			if (val)
-				inet->cmsg_flags |= IP_CMSG_PASSSEC;
-			else
-				inet->cmsg_flags &= ~IP_CMSG_PASSSEC;
+		}
+		if (inet->tos != val) {
+			inet->tos = val;
+			sk->sk_priority = rt_tos2priority(val);
+			sk_dst_reset(sk);
+		}
+		break;
+	case IP_TTL:
+		if (optlen<1)
+			goto e_inval;
+		if (val != -1 && (val < 1 || val>255))
+			goto e_inval;
+		inet->uc_ttl = val;
+		break;
+	case IP_HDRINCL:
+		if (sk->sk_type != SOCK_RAW) {
+			err = -ENOPROTOOPT;
 			break;
-		case IP_TOS:	/* This sets both TOS and Precedence */
-			if (sk->sk_type == SOCK_STREAM) {
-				val &= ~3;
-				val |= inet->tos & 3;
-			}
-			if (IPTOS_PREC(val) >= IPTOS_PREC_CRITIC_ECP &&
-			    !capable(CAP_NET_ADMIN)) {
-				err = -EPERM;
+		}
+		inet->hdrincl = val ? 1 : 0;
+		break;
+	case IP_MTU_DISCOVER:
+		if (val<0 || val>2)
+			goto e_inval;
+		inet->pmtudisc = val;
+		break;
+	case IP_RECVERR:
+		inet->recverr = !!val;
+		if (!val)
+			skb_queue_purge(&sk->sk_error_queue);
+		break;
+	case IP_MULTICAST_TTL:
+		if (sk->sk_type == SOCK_STREAM)
+			goto e_inval;
+		if (optlen<1)
+			goto e_inval;
+		if (val==-1)
+			val = 1;
+		if (val < 0 || val > 255)
+			goto e_inval;
+		inet->mc_ttl = val;
+		break;
+	case IP_MULTICAST_LOOP:
+		if (optlen<1)
+			goto e_inval;
+		inet->mc_loop = !!val;
+		break;
+	case IP_MULTICAST_IF:
+	{
+		struct ip_mreqn mreq;
+		struct net_device *dev = NULL;
+
+		if (sk->sk_type == SOCK_STREAM)
+			goto e_inval;
+		/*
+		 *	Check the arguments are allowable
+		 */
+
+		err = -EFAULT;
+		if (optlen >= sizeof(struct ip_mreqn)) {
+			if (copy_from_user(&mreq,optval,sizeof(mreq)))
 				break;
-			}
-			if (inet->tos != val) {
-				inet->tos = val;
-				sk->sk_priority = rt_tos2priority(val);
-				sk_dst_reset(sk);
-			}
-			break;
-		case IP_TTL:
-			if (optlen<1)
-				goto e_inval;
-			if (val != -1 && (val < 1 || val>255))
-				goto e_inval;
-			inet->uc_ttl = val;
-			break;
-		case IP_HDRINCL:
-			if (sk->sk_type != SOCK_RAW) {
-				err = -ENOPROTOOPT;
+		} else {
+			memset(&mreq, 0, sizeof(mreq));
+			if (optlen >= sizeof(struct in_addr) &&
+			    copy_from_user(&mreq.imr_address,optval,sizeof(struct in_addr)))
+				break;
+		}
+
+		if (!mreq.imr_ifindex) {
+			if (mreq.imr_address.s_addr == INADDR_ANY) {
+				inet->mc_index = 0;
+				inet->mc_addr  = 0;
+				err = 0;
 				break;
 			}
-			inet->hdrincl = val ? 1 : 0;
-			break;
-		case IP_MTU_DISCOVER:
-			if (val<0 || val>2)
-				goto e_inval;
-			inet->pmtudisc = val;
-			break;
-		case IP_RECVERR:
-			inet->recverr = !!val;
-			if (!val)
-				skb_queue_purge(&sk->sk_error_queue);
-			break;
-		case IP_MULTICAST_TTL:
-			if (sk->sk_type == SOCK_STREAM)
-				goto e_inval;
-			if (optlen<1)
-				goto e_inval;
-			if (val==-1)
-				val = 1;
-			if (val < 0 || val > 255)
-				goto e_inval;
-			inet->mc_ttl = val;
-			break;
-		case IP_MULTICAST_LOOP:
-			if (optlen<1)
-				goto e_inval;
-			inet->mc_loop = !!val;
-			break;
-		case IP_MULTICAST_IF:
-		{
-			struct ip_mreqn mreq;
-			struct net_device *dev = NULL;
+			dev = ip_dev_find(mreq.imr_address.s_addr);
+			if (dev) {
+				mreq.imr_ifindex = dev->ifindex;
+				dev_put(dev);
+			}
+		} else
+			dev = __dev_get_by_index(mreq.imr_ifindex);
 
-			if (sk->sk_type == SOCK_STREAM)
-				goto e_inval;
-			/*
-			 *	Check the arguments are allowable
-			 */
 
-			err = -EFAULT;
-			if (optlen >= sizeof(struct ip_mreqn)) {
-				if (copy_from_user(&mreq,optval,sizeof(mreq)))
-					break;
-			} else {
-				memset(&mreq, 0, sizeof(mreq));
-				if (optlen >= sizeof(struct in_addr) &&
-				    copy_from_user(&mreq.imr_address,optval,sizeof(struct in_addr)))
-					break;
-			}
+		err = -EADDRNOTAVAIL;
+		if (!dev)
+			break;
+
+		err = -EINVAL;
+		if (sk->sk_bound_dev_if &&
+		    mreq.imr_ifindex != sk->sk_bound_dev_if)
+			break;
 
-			if (!mreq.imr_ifindex) {
-				if (mreq.imr_address.s_addr == INADDR_ANY) {
-					inet->mc_index = 0;
-					inet->mc_addr  = 0;
-					err = 0;
-					break;
-				}
-				dev = ip_dev_find(mreq.imr_address.s_addr);
-				if (dev) {
-					mreq.imr_ifindex = dev->ifindex;
-					dev_put(dev);
-				}
-			} else
-				dev = __dev_get_by_index(mreq.imr_ifindex);
+		inet->mc_index = mreq.imr_ifindex;
+		inet->mc_addr  = mreq.imr_address.s_addr;
+		err = 0;
+		break;
+	}
 
+	case IP_ADD_MEMBERSHIP:
+	case IP_DROP_MEMBERSHIP:
+	{
+		struct ip_mreqn mreq;
 
-			err = -EADDRNOTAVAIL;
-			if (!dev)
+		if (optlen < sizeof(struct ip_mreq))
+			goto e_inval;
+		err = -EFAULT;
+		if (optlen >= sizeof(struct ip_mreqn)) {
+			if (copy_from_user(&mreq,optval,sizeof(mreq)))
 				break;
-
-			err = -EINVAL;
-			if (sk->sk_bound_dev_if &&
-			    mreq.imr_ifindex != sk->sk_bound_dev_if)
+		} else {
+			memset(&mreq, 0, sizeof(mreq));
+			if (copy_from_user(&mreq,optval,sizeof(struct ip_mreq)))
 				break;
+		}
 
-			inet->mc_index = mreq.imr_ifindex;
-			inet->mc_addr  = mreq.imr_address.s_addr;
-			err = 0;
+		if (optname == IP_ADD_MEMBERSHIP)
+			err = ip_mc_join_group(sk, &mreq);
+		else
+			err = ip_mc_leave_group(sk, &mreq);
+		break;
+	}
+	case IP_MSFILTER:
+	{
+		extern int sysctl_igmp_max_msf;
+		struct ip_msfilter *msf;
+
+		if (optlen < IP_MSFILTER_SIZE(0))
+			goto e_inval;
+		if (optlen > sysctl_optmem_max) {
+			err = -ENOBUFS;
 			break;
 		}
+		msf = kmalloc(optlen, GFP_KERNEL);
+		if (msf == 0) {
+			err = -ENOBUFS;
+			break;
+		}
+		err = -EFAULT;
+		if (copy_from_user(msf, optval, optlen)) {
+			kfree(msf);
+			break;
+		}
+		/* numsrc >= (1G-4) overflow in 32 bits */
+		if (msf->imsf_numsrc >= 0x3ffffffcU ||
+		    msf->imsf_numsrc > sysctl_igmp_max_msf) {
+			kfree(msf);
+			err = -ENOBUFS;
+			break;
+		}
+		if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) {
+			kfree(msf);
+			err = -EINVAL;
+			break;
+		}
+		err = ip_mc_msfilter(sk, msf, 0);
+		kfree(msf);
+		break;
+	}
+	case IP_BLOCK_SOURCE:
+	case IP_UNBLOCK_SOURCE:
+	case IP_ADD_SOURCE_MEMBERSHIP:
+	case IP_DROP_SOURCE_MEMBERSHIP:
+	{
+		struct ip_mreq_source mreqs;
+		int omode, add;
 
-		case IP_ADD_MEMBERSHIP:
-		case IP_DROP_MEMBERSHIP:
-		{
-			struct ip_mreqn mreq;
-
-			if (optlen < sizeof(struct ip_mreq))
-				goto e_inval;
+		if (optlen != sizeof(struct ip_mreq_source))
+			goto e_inval;
+		if (copy_from_user(&mreqs, optval, sizeof(mreqs))) {
 			err = -EFAULT;
-			if (optlen >= sizeof(struct ip_mreqn)) {
-				if(copy_from_user(&mreq,optval,sizeof(mreq)))
-					break;
-			} else {
-				memset(&mreq, 0, sizeof(mreq));
-				if (copy_from_user(&mreq,optval,sizeof(struct ip_mreq)))
-					break;
-			}
-
-			if (optname == IP_ADD_MEMBERSHIP)
-				err = ip_mc_join_group(sk, &mreq);
-			else
-				err = ip_mc_leave_group(sk, &mreq);
 			break;
 		}
-		case IP_MSFILTER:
-		{
-			extern int sysctl_igmp_max_msf;
-			struct ip_msfilter *msf;
+		if (optname == IP_BLOCK_SOURCE) {
+			omode = MCAST_EXCLUDE;
+			add = 1;
+		} else if (optname == IP_UNBLOCK_SOURCE) {
+			omode = MCAST_EXCLUDE;
+			add = 0;
+		} else if (optname == IP_ADD_SOURCE_MEMBERSHIP) {
+			struct ip_mreqn mreq;
 
-			if (optlen < IP_MSFILTER_SIZE(0))
-				goto e_inval;
-			if (optlen > sysctl_optmem_max) {
-				err = -ENOBUFS;
-				break;
-			}
-			msf = kmalloc(optlen, GFP_KERNEL);
-			if (msf == 0) {
-				err = -ENOBUFS;
+			mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr;
+			mreq.imr_address.s_addr = mreqs.imr_interface;
+			mreq.imr_ifindex = 0;
+			err = ip_mc_join_group(sk, &mreq);
+			if (err && err != -EADDRINUSE)
 				break;
-			}
+			omode = MCAST_INCLUDE;
+			add = 1;
+		} else /* IP_DROP_SOURCE_MEMBERSHIP */ {
+			omode = MCAST_INCLUDE;
+			add = 0;
+		}
+		err = ip_mc_source(add, omode, sk, &mreqs, 0);
+		break;
+	}
+	case MCAST_JOIN_GROUP:
+	case MCAST_LEAVE_GROUP:
+	{
+		struct group_req greq;
+		struct sockaddr_in *psin;
+		struct ip_mreqn mreq;
+
+		if (optlen < sizeof(struct group_req))
+			goto e_inval;
+		err = -EFAULT;
+		if (copy_from_user(&greq, optval, sizeof(greq)))
+			break;
+		psin = (struct sockaddr_in *)&greq.gr_group;
+		if (psin->sin_family != AF_INET)
+			goto e_inval;
+		memset(&mreq, 0, sizeof(mreq));
+		mreq.imr_multiaddr = psin->sin_addr;
+		mreq.imr_ifindex = greq.gr_interface;
+
+		if (optname == MCAST_JOIN_GROUP)
+			err = ip_mc_join_group(sk, &mreq);
+		else
+			err = ip_mc_leave_group(sk, &mreq);
+		break;
+	}
+	case MCAST_JOIN_SOURCE_GROUP:
+	case MCAST_LEAVE_SOURCE_GROUP:
+	case MCAST_BLOCK_SOURCE:
+	case MCAST_UNBLOCK_SOURCE:
+	{
+		struct group_source_req greqs;
+		struct ip_mreq_source mreqs;
+		struct sockaddr_in *psin;
+		int omode, add;
+
+		if (optlen != sizeof(struct group_source_req))
+			goto e_inval;
+		if (copy_from_user(&greqs, optval, sizeof(greqs))) {
 			err = -EFAULT;
-			if (copy_from_user(msf, optval, optlen)) {
-				kfree(msf);
-				break;
-			}
-			/* numsrc >= (1G-4) overflow in 32 bits */
-			if (msf->imsf_numsrc >= 0x3ffffffcU ||
-			    msf->imsf_numsrc > sysctl_igmp_max_msf) {
-				kfree(msf);
-				err = -ENOBUFS;
-				break;
-			}
-			if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) {
-				kfree(msf);
-				err = -EINVAL;
-				break;
-			}
-			err = ip_mc_msfilter(sk, msf, 0);
-			kfree(msf);
 			break;
 		}
-		case IP_BLOCK_SOURCE:
-		case IP_UNBLOCK_SOURCE:
-		case IP_ADD_SOURCE_MEMBERSHIP:
-		case IP_DROP_SOURCE_MEMBERSHIP:
-		{
-			struct ip_mreq_source mreqs;
-			int omode, add;
-
-			if (optlen != sizeof(struct ip_mreq_source))
-				goto e_inval;
-			if (copy_from_user(&mreqs, optval, sizeof(mreqs))) {
-				err = -EFAULT;
-				break;
-			}
-			if (optname == IP_BLOCK_SOURCE) {
-				omode = MCAST_EXCLUDE;
-				add = 1;
-			} else if (optname == IP_UNBLOCK_SOURCE) {
-				omode = MCAST_EXCLUDE;
-				add = 0;
-			} else if (optname == IP_ADD_SOURCE_MEMBERSHIP) {
-				struct ip_mreqn mreq;
-
-				mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr;
-				mreq.imr_address.s_addr = mreqs.imr_interface;
-				mreq.imr_ifindex = 0;
-				err = ip_mc_join_group(sk, &mreq);
-				if (err && err != -EADDRINUSE)
-					break;
-				omode = MCAST_INCLUDE;
-				add = 1;
-			} else /* IP_DROP_SOURCE_MEMBERSHIP */ {
-				omode = MCAST_INCLUDE;
-				add = 0;
-			}
-			err = ip_mc_source(add, omode, sk, &mreqs, 0);
+		if (greqs.gsr_group.ss_family != AF_INET ||
+		    greqs.gsr_source.ss_family != AF_INET) {
+			err = -EADDRNOTAVAIL;
 			break;
 		}
-		case MCAST_JOIN_GROUP:
-		case MCAST_LEAVE_GROUP:
-		{
-			struct group_req greq;
-			struct sockaddr_in *psin;
+		psin = (struct sockaddr_in *)&greqs.gsr_group;
+		mreqs.imr_multiaddr = psin->sin_addr.s_addr;
+		psin = (struct sockaddr_in *)&greqs.gsr_source;
+		mreqs.imr_sourceaddr = psin->sin_addr.s_addr;
+		mreqs.imr_interface = 0; /* use index for mc_source */
+
+		if (optname == MCAST_BLOCK_SOURCE) {
+			omode = MCAST_EXCLUDE;
+			add = 1;
+		} else if (optname == MCAST_UNBLOCK_SOURCE) {
+			omode = MCAST_EXCLUDE;
+			add = 0;
+		} else if (optname == MCAST_JOIN_SOURCE_GROUP) {
 			struct ip_mreqn mreq;
 
-			if (optlen < sizeof(struct group_req))
-				goto e_inval;
-			err = -EFAULT;
-			if(copy_from_user(&greq, optval, sizeof(greq)))
-				break;
-			psin = (struct sockaddr_in *)&greq.gr_group;
-			if (psin->sin_family != AF_INET)
-				goto e_inval;
-			memset(&mreq, 0, sizeof(mreq));
+			psin = (struct sockaddr_in *)&greqs.gsr_group;
 			mreq.imr_multiaddr = psin->sin_addr;
-			mreq.imr_ifindex = greq.gr_interface;
-
-			if (optname == MCAST_JOIN_GROUP)
-				err = ip_mc_join_group(sk, &mreq);
-			else
-				err = ip_mc_leave_group(sk, &mreq);
+			mreq.imr_address.s_addr = 0;
+			mreq.imr_ifindex = greqs.gsr_interface;
+			err = ip_mc_join_group(sk, &mreq);
+			if (err && err != -EADDRINUSE)
+				break;
+			greqs.gsr_interface = mreq.imr_ifindex;
+			omode = MCAST_INCLUDE;
+			add = 1;
+		} else /* MCAST_LEAVE_SOURCE_GROUP */ {
+			omode = MCAST_INCLUDE;
+			add = 0;
+		}
+		err = ip_mc_source(add, omode, sk, &mreqs,
+				   greqs.gsr_interface);
+		break;
+	}
+	case MCAST_MSFILTER:
+	{
+		extern int sysctl_igmp_max_msf;
+		struct sockaddr_in *psin;
+		struct ip_msfilter *msf = NULL;
+		struct group_filter *gsf = NULL;
+		int msize, i, ifindex;
+
+		if (optlen < GROUP_FILTER_SIZE(0))
+			goto e_inval;
+		if (optlen > sysctl_optmem_max) {
+			err = -ENOBUFS;
 			break;
 		}
-		case MCAST_JOIN_SOURCE_GROUP:
-		case MCAST_LEAVE_SOURCE_GROUP:
-		case MCAST_BLOCK_SOURCE:
-		case MCAST_UNBLOCK_SOURCE:
-		{
-			struct group_source_req greqs;
-			struct ip_mreq_source mreqs;
-			struct sockaddr_in *psin;
-			int omode, add;
-
-			if (optlen != sizeof(struct group_source_req))
-				goto e_inval;
-			if (copy_from_user(&greqs, optval, sizeof(greqs))) {
-				err = -EFAULT;
-				break;
-			}
-			if (greqs.gsr_group.ss_family != AF_INET ||
-			    greqs.gsr_source.ss_family != AF_INET) {
-				err = -EADDRNOTAVAIL;
-				break;
-			}
-			psin = (struct sockaddr_in *)&greqs.gsr_group;
-			mreqs.imr_multiaddr = psin->sin_addr.s_addr;
-			psin = (struct sockaddr_in *)&greqs.gsr_source;
-			mreqs.imr_sourceaddr = psin->sin_addr.s_addr;
-			mreqs.imr_interface = 0; /* use index for mc_source */
-
-			if (optname == MCAST_BLOCK_SOURCE) {
-				omode = MCAST_EXCLUDE;
-				add = 1;
-			} else if (optname == MCAST_UNBLOCK_SOURCE) {
-				omode = MCAST_EXCLUDE;
-				add = 0;
-			} else if (optname == MCAST_JOIN_SOURCE_GROUP) {
-				struct ip_mreqn mreq;
-
-				psin = (struct sockaddr_in *)&greqs.gsr_group;
-				mreq.imr_multiaddr = psin->sin_addr;
-				mreq.imr_address.s_addr = 0;
-				mreq.imr_ifindex = greqs.gsr_interface;
-				err = ip_mc_join_group(sk, &mreq);
-				if (err && err != -EADDRINUSE)
-					break;
-				greqs.gsr_interface = mreq.imr_ifindex;
-				omode = MCAST_INCLUDE;
-				add = 1;
-			} else /* MCAST_LEAVE_SOURCE_GROUP */ {
-				omode = MCAST_INCLUDE;
-				add = 0;
-			}
-			err = ip_mc_source(add, omode, sk, &mreqs,
-				greqs.gsr_interface);
+		gsf = kmalloc(optlen,GFP_KERNEL);
+		if (gsf == 0) {
+			err = -ENOBUFS;
 			break;
 		}
-		case MCAST_MSFILTER:
-		{
-			extern int sysctl_igmp_max_msf;
-			struct sockaddr_in *psin;
-			struct ip_msfilter *msf = NULL;
-			struct group_filter *gsf = NULL;
-			int msize, i, ifindex;
-
-			if (optlen < GROUP_FILTER_SIZE(0))
-				goto e_inval;
-			if (optlen > sysctl_optmem_max) {
-				err = -ENOBUFS;
-				break;
-			}
-			gsf = kmalloc(optlen,GFP_KERNEL);
-			if (gsf == 0) {
-				err = -ENOBUFS;
-				break;
-			}
-			err = -EFAULT;
-			if (copy_from_user(gsf, optval, optlen)) {
-				goto mc_msf_out;
-			}
-			/* numsrc >= (4G-140)/128 overflow in 32 bits */
-			if (gsf->gf_numsrc >= 0x1ffffff ||
-			    gsf->gf_numsrc > sysctl_igmp_max_msf) {
-				err = -ENOBUFS;
-				goto mc_msf_out;
-			}
-			if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) {
-				err = -EINVAL;
-				goto mc_msf_out;
-			}
-			msize = IP_MSFILTER_SIZE(gsf->gf_numsrc);
-			msf = kmalloc(msize,GFP_KERNEL);
-			if (msf == 0) {
-				err = -ENOBUFS;
-				goto mc_msf_out;
-			}
-			ifindex = gsf->gf_interface;
-			psin = (struct sockaddr_in *)&gsf->gf_group;
-			if (psin->sin_family != AF_INET) {
-				err = -EADDRNOTAVAIL;
-				goto mc_msf_out;
-			}
-			msf->imsf_multiaddr = psin->sin_addr.s_addr;
-			msf->imsf_interface = 0;
-			msf->imsf_fmode = gsf->gf_fmode;
-			msf->imsf_numsrc = gsf->gf_numsrc;
+		err = -EFAULT;
+		if (copy_from_user(gsf, optval, optlen)) {
+			goto mc_msf_out;
+		}
+		/* numsrc >= (4G-140)/128 overflow in 32 bits */
+		if (gsf->gf_numsrc >= 0x1ffffff ||
+		    gsf->gf_numsrc > sysctl_igmp_max_msf) {
+			err = -ENOBUFS;
+			goto mc_msf_out;
+		}
+		if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) {
+			err = -EINVAL;
+			goto mc_msf_out;
+		}
+		msize = IP_MSFILTER_SIZE(gsf->gf_numsrc);
+		msf = kmalloc(msize,GFP_KERNEL);
+		if (msf == 0) {
+			err = -ENOBUFS;
+			goto mc_msf_out;
+		}
+		ifindex = gsf->gf_interface;
+		psin = (struct sockaddr_in *)&gsf->gf_group;
+		if (psin->sin_family != AF_INET) {
 			err = -EADDRNOTAVAIL;
-			for (i=0; i<gsf->gf_numsrc; ++i) {
-				psin = (struct sockaddr_in *)&gsf->gf_slist[i];
-
-				if (psin->sin_family != AF_INET)
-					goto mc_msf_out;
-				msf->imsf_slist[i] = psin->sin_addr.s_addr;
-			}
-			kfree(gsf);
-			gsf = NULL;
-
-			err = ip_mc_msfilter(sk, msf, ifindex);
-mc_msf_out:
-			kfree(msf);
-			kfree(gsf);
-			break;
+			goto mc_msf_out;
 		}
-		case IP_ROUTER_ALERT:
-			err = ip_ra_control(sk, val ? 1 : 0, NULL);
-			break;
+		msf->imsf_multiaddr = psin->sin_addr.s_addr;
+		msf->imsf_interface = 0;
+		msf->imsf_fmode = gsf->gf_fmode;
+		msf->imsf_numsrc = gsf->gf_numsrc;
+		err = -EADDRNOTAVAIL;
+		for (i=0; i<gsf->gf_numsrc; ++i) {
+			psin = (struct sockaddr_in *)&gsf->gf_slist[i];
 
-		case IP_FREEBIND:
-			if (optlen<1)
-				goto e_inval;
-			inet->freebind = !!val;
-			break;
-
-		case IP_IPSEC_POLICY:
-		case IP_XFRM_POLICY:
-			err = -EPERM;
-			if (!capable(CAP_NET_ADMIN))
-				break;
-			err = xfrm_user_policy(sk, optname, optval, optlen);
+			if (psin->sin_family != AF_INET)
+				goto mc_msf_out;
+			msf->imsf_slist[i] = psin->sin_addr.s_addr;
+		}
+		kfree(gsf);
+		gsf = NULL;
+
+		err = ip_mc_msfilter(sk, msf, ifindex);
+	mc_msf_out:
+		kfree(msf);
+		kfree(gsf);
+		break;
+	}
+	case IP_ROUTER_ALERT:
+		err = ip_ra_control(sk, val ? 1 : 0, NULL);
+		break;
+
+	case IP_FREEBIND:
+		if (optlen<1)
+			goto e_inval;
+		inet->freebind = !!val;
+		break;
+
+	case IP_IPSEC_POLICY:
+	case IP_XFRM_POLICY:
+		err = -EPERM;
+		if (!capable(CAP_NET_ADMIN))
 			break;
+		err = xfrm_user_policy(sk, optname, optval, optlen);
+		break;
 
-		default:
-			err = -ENOPROTOOPT;
-			break;
+	default:
+		err = -ENOPROTOOPT;
+		break;
 	}
 	release_sock(sk);
 	return err;
@@ -948,214 +948,213 @@ EXPORT_SYMBOL(compat_ip_setsockopt);
  */
 
 static int do_ip_getsockopt(struct sock *sk, int level, int optname,
-		char __user *optval, int __user *optlen)
+			    char __user *optval, int __user *optlen)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	int val;
 	int len;
 
-	if(level!=SOL_IP)
+	if (level != SOL_IP)
 		return -EOPNOTSUPP;
 
 #ifdef CONFIG_IP_MROUTE
-	if(optname>=MRT_BASE && optname <=MRT_BASE+10)
-	{
+	if (optname >= MRT_BASE && optname <= MRT_BASE+10) {
 		return ip_mroute_getsockopt(sk,optname,optval,optlen);
 	}
 #endif
 
-	if(get_user(len,optlen))
+	if (get_user(len,optlen))
 		return -EFAULT;
-	if(len < 0)
+	if (len < 0)
 		return -EINVAL;
 
 	lock_sock(sk);
 
-	switch(optname)	{
-		case IP_OPTIONS:
-			{
-				unsigned char optbuf[sizeof(struct ip_options)+40];
-				struct ip_options * opt = (struct ip_options*)optbuf;
-				opt->optlen = 0;
-				if (inet->opt)
-					memcpy(optbuf, inet->opt,
-					       sizeof(struct ip_options)+
-					       inet->opt->optlen);
-				release_sock(sk);
-
-				if (opt->optlen == 0)
-					return put_user(0, optlen);
-
-				ip_options_undo(opt);
-
-				len = min_t(unsigned int, len, opt->optlen);
-				if(put_user(len, optlen))
-					return -EFAULT;
-				if(copy_to_user(optval, opt->__data, len))
-					return -EFAULT;
-				return 0;
-			}
-		case IP_PKTINFO:
-			val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0;
-			break;
-		case IP_RECVTTL:
-			val = (inet->cmsg_flags & IP_CMSG_TTL) != 0;
-			break;
-		case IP_RECVTOS:
-			val = (inet->cmsg_flags & IP_CMSG_TOS) != 0;
-			break;
-		case IP_RECVOPTS:
-			val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0;
-			break;
-		case IP_RETOPTS:
-			val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0;
-			break;
-		case IP_PASSSEC:
-			val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0;
-			break;
-		case IP_TOS:
-			val = inet->tos;
-			break;
-		case IP_TTL:
-			val = (inet->uc_ttl == -1 ?
-			       sysctl_ip_default_ttl :
-			       inet->uc_ttl);
-			break;
-		case IP_HDRINCL:
-			val = inet->hdrincl;
-			break;
-		case IP_MTU_DISCOVER:
-			val = inet->pmtudisc;
-			break;
-		case IP_MTU:
-		{
-			struct dst_entry *dst;
-			val = 0;
-			dst = sk_dst_get(sk);
-			if (dst) {
-				val = dst_mtu(dst);
-				dst_release(dst);
-			}
-			if (!val) {
-				release_sock(sk);
-				return -ENOTCONN;
-			}
-			break;
+	switch (optname) {
+	case IP_OPTIONS:
+	{
+		unsigned char optbuf[sizeof(struct ip_options)+40];
+		struct ip_options * opt = (struct ip_options*)optbuf;
+		opt->optlen = 0;
+		if (inet->opt)
+			memcpy(optbuf, inet->opt,
+			       sizeof(struct ip_options)+
+			       inet->opt->optlen);
+		release_sock(sk);
+
+		if (opt->optlen == 0)
+			return put_user(0, optlen);
+
+		ip_options_undo(opt);
+
+		len = min_t(unsigned int, len, opt->optlen);
+		if (put_user(len, optlen))
+			return -EFAULT;
+		if (copy_to_user(optval, opt->__data, len))
+			return -EFAULT;
+		return 0;
+	}
+	case IP_PKTINFO:
+		val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0;
+		break;
+	case IP_RECVTTL:
+		val = (inet->cmsg_flags & IP_CMSG_TTL) != 0;
+		break;
+	case IP_RECVTOS:
+		val = (inet->cmsg_flags & IP_CMSG_TOS) != 0;
+		break;
+	case IP_RECVOPTS:
+		val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0;
+		break;
+	case IP_RETOPTS:
+		val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0;
+		break;
+	case IP_PASSSEC:
+		val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0;
+		break;
+	case IP_TOS:
+		val = inet->tos;
+		break;
+	case IP_TTL:
+		val = (inet->uc_ttl == -1 ?
+		       sysctl_ip_default_ttl :
+		       inet->uc_ttl);
+		break;
+	case IP_HDRINCL:
+		val = inet->hdrincl;
+		break;
+	case IP_MTU_DISCOVER:
+		val = inet->pmtudisc;
+		break;
+	case IP_MTU:
+	{
+		struct dst_entry *dst;
+		val = 0;
+		dst = sk_dst_get(sk);
+		if (dst) {
+			val = dst_mtu(dst);
+			dst_release(dst);
 		}
-		case IP_RECVERR:
-			val = inet->recverr;
-			break;
-		case IP_MULTICAST_TTL:
-			val = inet->mc_ttl;
-			break;
-		case IP_MULTICAST_LOOP:
-			val = inet->mc_loop;
-			break;
-		case IP_MULTICAST_IF:
-		{
-			struct in_addr addr;
-			len = min_t(unsigned int, len, sizeof(struct in_addr));
-			addr.s_addr = inet->mc_addr;
+		if (!val) {
 			release_sock(sk);
-
-			if(put_user(len, optlen))
-				return -EFAULT;
-			if(copy_to_user(optval, &addr, len))
-				return -EFAULT;
-			return 0;
+			return -ENOTCONN;
 		}
-		case IP_MSFILTER:
-		{
-			struct ip_msfilter msf;
-			int err;
+		break;
+	}
+	case IP_RECVERR:
+		val = inet->recverr;
+		break;
+	case IP_MULTICAST_TTL:
+		val = inet->mc_ttl;
+		break;
+	case IP_MULTICAST_LOOP:
+		val = inet->mc_loop;
+		break;
+	case IP_MULTICAST_IF:
+	{
+		struct in_addr addr;
+		len = min_t(unsigned int, len, sizeof(struct in_addr));
+		addr.s_addr = inet->mc_addr;
+		release_sock(sk);
 
-			if (len < IP_MSFILTER_SIZE(0)) {
-				release_sock(sk);
-				return -EINVAL;
-			}
-			if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {
-				release_sock(sk);
-				return -EFAULT;
-			}
-			err = ip_mc_msfget(sk, &msf,
-				(struct ip_msfilter __user *)optval, optlen);
+		if (put_user(len, optlen))
+			return -EFAULT;
+		if (copy_to_user(optval, &addr, len))
+			return -EFAULT;
+		return 0;
+	}
+	case IP_MSFILTER:
+	{
+		struct ip_msfilter msf;
+		int err;
+
+		if (len < IP_MSFILTER_SIZE(0)) {
 			release_sock(sk);
-			return err;
+			return -EINVAL;
 		}
-		case MCAST_MSFILTER:
-		{
-			struct group_filter gsf;
-			int err;
-
-			if (len < GROUP_FILTER_SIZE(0)) {
-				release_sock(sk);
-				return -EINVAL;
-			}
-			if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) {
-				release_sock(sk);
-				return -EFAULT;
-			}
-			err = ip_mc_gsfget(sk, &gsf,
-				(struct group_filter __user *)optval, optlen);
+		if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {
 			release_sock(sk);
-			return err;
+			return -EFAULT;
 		}
-		case IP_PKTOPTIONS:
-		{
-			struct msghdr msg;
+		err = ip_mc_msfget(sk, &msf,
+				   (struct ip_msfilter __user *)optval, optlen);
+		release_sock(sk);
+		return err;
+	}
+	case MCAST_MSFILTER:
+	{
+		struct group_filter gsf;
+		int err;
 
+		if (len < GROUP_FILTER_SIZE(0)) {
 			release_sock(sk);
+			return -EINVAL;
+		}
+		if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) {
+			release_sock(sk);
+			return -EFAULT;
+		}
+		err = ip_mc_gsfget(sk, &gsf,
+				   (struct group_filter __user *)optval, optlen);
+		release_sock(sk);
+		return err;
+	}
+	case IP_PKTOPTIONS:
+	{
+		struct msghdr msg;
+
+		release_sock(sk);
 
-			if (sk->sk_type != SOCK_STREAM)
-				return -ENOPROTOOPT;
+		if (sk->sk_type != SOCK_STREAM)
+			return -ENOPROTOOPT;
 
-			msg.msg_control = optval;
-			msg.msg_controllen = len;
-			msg.msg_flags = 0;
+		msg.msg_control = optval;
+		msg.msg_controllen = len;
+		msg.msg_flags = 0;
 
-			if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
-				struct in_pktinfo info;
+		if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
+			struct in_pktinfo info;
 
-				info.ipi_addr.s_addr = inet->rcv_saddr;
-				info.ipi_spec_dst.s_addr = inet->rcv_saddr;
-				info.ipi_ifindex = inet->mc_index;
-				put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
-			}
-			if (inet->cmsg_flags & IP_CMSG_TTL) {
-				int hlim = inet->mc_ttl;
-				put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
-			}
-			len -= msg.msg_controllen;
-			return put_user(len, optlen);
+			info.ipi_addr.s_addr = inet->rcv_saddr;
+			info.ipi_spec_dst.s_addr = inet->rcv_saddr;
+			info.ipi_ifindex = inet->mc_index;
+			put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
 		}
-		case IP_FREEBIND:
-			val = inet->freebind;
-			break;
-		default:
-			release_sock(sk);
-			return -ENOPROTOOPT;
+		if (inet->cmsg_flags & IP_CMSG_TTL) {
+			int hlim = inet->mc_ttl;
+			put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
+		}
+		len -= msg.msg_controllen;
+		return put_user(len, optlen);
+	}
+	case IP_FREEBIND:
+		val = inet->freebind;
+		break;
+	default:
+		release_sock(sk);
+		return -ENOPROTOOPT;
 	}
 	release_sock(sk);
 
 	if (len < sizeof(int) && len > 0 && val>=0 && val<255) {
 		unsigned char ucval = (unsigned char)val;
 		len = 1;
-		if(put_user(len, optlen))
+		if (put_user(len, optlen))
 			return -EFAULT;
-		if(copy_to_user(optval,&ucval,1))
+		if (copy_to_user(optval,&ucval,1))
 			return -EFAULT;
 	} else {
 		len = min_t(unsigned int, sizeof(int), len);
-		if(put_user(len, optlen))
+		if (put_user(len, optlen))
 			return -EFAULT;
-		if(copy_to_user(optval,&val,len))
+		if (copy_to_user(optval,&val,len))
 			return -EFAULT;
 	}
 	return 0;
 }
 
 int ip_getsockopt(struct sock *sk, int level,
-		int optname, char __user *optval, int __user *optlen)
+		  int optname, char __user *optval, int __user *optlen)
 {
 	int err;
 
@@ -1169,7 +1168,7 @@ int ip_getsockopt(struct sock *sk, int level,
 	   ) {
 		int len;
 
-		if(get_user(len,optlen))
+		if (get_user(len,optlen))
 			return -EFAULT;
 
 		lock_sock(sk);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index cf49de1a498..c43699f374c 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -782,7 +782,7 @@ static void __init ic_do_bootp_ext(u8 *ext)
 	u8 *c;
 
 	printk("DHCP/BOOTP: Got extension %d:",*ext);
-	for(c=ext+2; c<ext+2+ext[1]; c++)
+	for (c=ext+2; c<ext+2+ext[1]; c++)
 		printk(" %02x", *c);
 	printk("\n");
 #endif
@@ -1094,7 +1094,7 @@ static int __init ic_dynamic(void)
 	retries = CONF_SEND_RETRIES;
 	get_random_bytes(&timeout, sizeof(timeout));
 	timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM);
-	for(;;) {
+	for (;;) {
 #ifdef IPCONFIG_BOOTP
 		if (do_bootp && (d->able & IC_BOOTP))
 			ic_bootp_send_if(d, jiffies - start_jiffies);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 601e3df6925..f73f4e402f7 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -302,7 +302,7 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
 
 	atomic_dec(&cache_resolve_queue_len);
 
-	while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
+	while ((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
 		if (skb->nh.iph->version == 0) {
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 			nlh->nlmsg_type = NLMSG_ERROR;
@@ -479,7 +479,7 @@ static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
 static struct mfc_cache *ipmr_cache_alloc(void)
 {
 	struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
-	if(c==NULL)
+	if (c==NULL)
 		return NULL;
 	c->mfc_un.res.minvif = MAXVIFS;
 	return c;
@@ -488,7 +488,7 @@ static struct mfc_cache *ipmr_cache_alloc(void)
 static struct mfc_cache *ipmr_cache_alloc_unres(void)
 {
 	struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
-	if(c==NULL)
+	if (c==NULL)
 		return NULL;
 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
 	c->mfc_un.unres.expires = jiffies + 10*HZ;
@@ -508,7 +508,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 	 *	Play the pending entries through our router
 	 */
 
-	while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+	while ((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
 		if (skb->nh.iph->version == 0) {
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 
@@ -551,7 +551,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 #endif
 		skb = alloc_skb(128, GFP_ATOMIC);
 
-	if(!skb)
+	if (!skb)
 		return -ENOBUFS;
 
 #ifdef CONFIG_IP_PIMSM
@@ -734,7 +734,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
 		return 0;
 	}
 
-	if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
+	if (!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
 		return -EINVAL;
 
 	c=ipmr_cache_alloc();
@@ -788,7 +788,7 @@ static void mroute_clean_tables(struct sock *sk)
 	/*
 	 *	Shut down all active vif entries
 	 */
-	for(i=0; i<maxvif; i++) {
+	for (i=0; i<maxvif; i++) {
 		if (!(vif_table[i].flags&VIFF_STATIC))
 			vif_delete(i);
 	}
@@ -858,119 +858,117 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt
 	struct vifctl vif;
 	struct mfcctl mfc;
 
-	if(optname!=MRT_INIT)
-	{
-		if(sk!=mroute_socket && !capable(CAP_NET_ADMIN))
+	if (optname != MRT_INIT) {
+		if (sk != mroute_socket && !capable(CAP_NET_ADMIN))
 			return -EACCES;
 	}
 
-	switch(optname)
-	{
-		case MRT_INIT:
-			if (sk->sk_type != SOCK_RAW ||
-			    inet_sk(sk)->num != IPPROTO_IGMP)
-				return -EOPNOTSUPP;
-			if(optlen!=sizeof(int))
-				return -ENOPROTOOPT;
-
-			rtnl_lock();
-			if (mroute_socket) {
-				rtnl_unlock();
-				return -EADDRINUSE;
-			}
-
-			ret = ip_ra_control(sk, 1, mrtsock_destruct);
-			if (ret == 0) {
-				write_lock_bh(&mrt_lock);
-				mroute_socket=sk;
-				write_unlock_bh(&mrt_lock);
+	switch (optname) {
+	case MRT_INIT:
+		if (sk->sk_type != SOCK_RAW ||
+		    inet_sk(sk)->num != IPPROTO_IGMP)
+			return -EOPNOTSUPP;
+		if (optlen!=sizeof(int))
+			return -ENOPROTOOPT;
 
-				ipv4_devconf.mc_forwarding++;
-			}
+		rtnl_lock();
+		if (mroute_socket) {
 			rtnl_unlock();
-			return ret;
-		case MRT_DONE:
-			if (sk!=mroute_socket)
-				return -EACCES;
-			return ip_ra_control(sk, 0, NULL);
-		case MRT_ADD_VIF:
-		case MRT_DEL_VIF:
-			if(optlen!=sizeof(vif))
-				return -EINVAL;
-			if (copy_from_user(&vif,optval,sizeof(vif)))
-				return -EFAULT;
-			if(vif.vifc_vifi >= MAXVIFS)
-				return -ENFILE;
-			rtnl_lock();
-			if (optname==MRT_ADD_VIF) {
-				ret = vif_add(&vif, sk==mroute_socket);
-			} else {
-				ret = vif_delete(vif.vifc_vifi);
-			}
-			rtnl_unlock();
-			return ret;
+			return -EADDRINUSE;
+		}
+
+		ret = ip_ra_control(sk, 1, mrtsock_destruct);
+		if (ret == 0) {
+			write_lock_bh(&mrt_lock);
+			mroute_socket=sk;
+			write_unlock_bh(&mrt_lock);
+
+			ipv4_devconf.mc_forwarding++;
+		}
+		rtnl_unlock();
+		return ret;
+	case MRT_DONE:
+		if (sk!=mroute_socket)
+			return -EACCES;
+		return ip_ra_control(sk, 0, NULL);
+	case MRT_ADD_VIF:
+	case MRT_DEL_VIF:
+		if (optlen!=sizeof(vif))
+			return -EINVAL;
+		if (copy_from_user(&vif,optval,sizeof(vif)))
+			return -EFAULT;
+		if (vif.vifc_vifi >= MAXVIFS)
+			return -ENFILE;
+		rtnl_lock();
+		if (optname==MRT_ADD_VIF) {
+			ret = vif_add(&vif, sk==mroute_socket);
+		} else {
+			ret = vif_delete(vif.vifc_vifi);
+		}
+		rtnl_unlock();
+		return ret;
 
 		/*
 		 *	Manipulate the forwarding caches. These live
 		 *	in a sort of kernel/user symbiosis.
 		 */
-		case MRT_ADD_MFC:
-		case MRT_DEL_MFC:
-			if(optlen!=sizeof(mfc))
-				return -EINVAL;
-			if (copy_from_user(&mfc,optval, sizeof(mfc)))
-				return -EFAULT;
-			rtnl_lock();
-			if (optname==MRT_DEL_MFC)
-				ret = ipmr_mfc_delete(&mfc);
-			else
-				ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
-			rtnl_unlock();
-			return ret;
+	case MRT_ADD_MFC:
+	case MRT_DEL_MFC:
+		if (optlen!=sizeof(mfc))
+			return -EINVAL;
+		if (copy_from_user(&mfc,optval, sizeof(mfc)))
+			return -EFAULT;
+		rtnl_lock();
+		if (optname==MRT_DEL_MFC)
+			ret = ipmr_mfc_delete(&mfc);
+		else
+			ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
+		rtnl_unlock();
+		return ret;
 		/*
 		 *	Control PIM assert.
 		 */
-		case MRT_ASSERT:
-		{
-			int v;
-			if(get_user(v,(int __user *)optval))
-				return -EFAULT;
-			mroute_do_assert=(v)?1:0;
-			return 0;
-		}
+	case MRT_ASSERT:
+	{
+		int v;
+		if (get_user(v,(int __user *)optval))
+			return -EFAULT;
+		mroute_do_assert=(v)?1:0;
+		return 0;
+	}
 #ifdef CONFIG_IP_PIMSM
-		case MRT_PIM:
-		{
-			int v, ret;
-			if(get_user(v,(int __user *)optval))
-				return -EFAULT;
-			v = (v)?1:0;
-			rtnl_lock();
-			ret = 0;
-			if (v != mroute_do_pim) {
-				mroute_do_pim = v;
-				mroute_do_assert = v;
+	case MRT_PIM:
+	{
+		int v, ret;
+		if (get_user(v,(int __user *)optval))
+			return -EFAULT;
+		v = (v)?1:0;
+		rtnl_lock();
+		ret = 0;
+		if (v != mroute_do_pim) {
+			mroute_do_pim = v;
+			mroute_do_assert = v;
 #ifdef CONFIG_IP_PIMSM_V2
-				if (mroute_do_pim)
-					ret = inet_add_protocol(&pim_protocol,
-								IPPROTO_PIM);
-				else
-					ret = inet_del_protocol(&pim_protocol,
-								IPPROTO_PIM);
-				if (ret < 0)
-					ret = -EAGAIN;
+			if (mroute_do_pim)
+				ret = inet_add_protocol(&pim_protocol,
+							IPPROTO_PIM);
+			else
+				ret = inet_del_protocol(&pim_protocol,
+							IPPROTO_PIM);
+			if (ret < 0)
+				ret = -EAGAIN;
 #endif
-			}
-			rtnl_unlock();
-			return ret;
 		}
+		rtnl_unlock();
+		return ret;
+	}
 #endif
-		/*
-		 *	Spurious command, or MRT_VERSION which you cannot
-		 *	set.
-		 */
-		default:
-			return -ENOPROTOOPT;
+	/*
+	 *	Spurious command, or MRT_VERSION which you cannot
+	 *	set.
+	 */
+	default:
+		return -ENOPROTOOPT;
 	}
 }
 
@@ -983,7 +981,7 @@ int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __u
 	int olr;
 	int val;
 
-	if(optname!=MRT_VERSION &&
+	if (optname!=MRT_VERSION &&
 #ifdef CONFIG_IP_PIMSM
 	   optname!=MRT_PIM &&
 #endif
@@ -997,17 +995,17 @@ int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __u
 	if (olr < 0)
 		return -EINVAL;
 
-	if(put_user(olr,optlen))
+	if (put_user(olr,optlen))
 		return -EFAULT;
-	if(optname==MRT_VERSION)
+	if (optname==MRT_VERSION)
 		val=0x0305;
 #ifdef CONFIG_IP_PIMSM
-	else if(optname==MRT_PIM)
+	else if (optname==MRT_PIM)
 		val=mroute_do_pim;
 #endif
 	else
 		val=mroute_do_assert;
-	if(copy_to_user(optval,&val,olr))
+	if (copy_to_user(optval,&val,olr))
 		return -EFAULT;
 	return 0;
 }
@@ -1023,48 +1021,47 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
 	struct vif_device *vif;
 	struct mfc_cache *c;
 
-	switch(cmd)
-	{
-		case SIOCGETVIFCNT:
-			if (copy_from_user(&vr,arg,sizeof(vr)))
-				return -EFAULT;
-			if(vr.vifi>=maxvif)
-				return -EINVAL;
-			read_lock(&mrt_lock);
-			vif=&vif_table[vr.vifi];
-			if(VIF_EXISTS(vr.vifi))	{
-				vr.icount=vif->pkt_in;
-				vr.ocount=vif->pkt_out;
-				vr.ibytes=vif->bytes_in;
-				vr.obytes=vif->bytes_out;
-				read_unlock(&mrt_lock);
-
-				if (copy_to_user(arg,&vr,sizeof(vr)))
-					return -EFAULT;
-				return 0;
-			}
+	switch (cmd) {
+	case SIOCGETVIFCNT:
+		if (copy_from_user(&vr,arg,sizeof(vr)))
+			return -EFAULT;
+		if (vr.vifi>=maxvif)
+			return -EINVAL;
+		read_lock(&mrt_lock);
+		vif=&vif_table[vr.vifi];
+		if (VIF_EXISTS(vr.vifi))	{
+			vr.icount=vif->pkt_in;
+			vr.ocount=vif->pkt_out;
+			vr.ibytes=vif->bytes_in;
+			vr.obytes=vif->bytes_out;
 			read_unlock(&mrt_lock);
-			return -EADDRNOTAVAIL;
-		case SIOCGETSGCNT:
-			if (copy_from_user(&sr,arg,sizeof(sr)))
-				return -EFAULT;
 
-			read_lock(&mrt_lock);
-			c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
-			if (c) {
-				sr.pktcnt = c->mfc_un.res.pkt;
-				sr.bytecnt = c->mfc_un.res.bytes;
-				sr.wrong_if = c->mfc_un.res.wrong_if;
-				read_unlock(&mrt_lock);
-
-				if (copy_to_user(arg,&sr,sizeof(sr)))
-					return -EFAULT;
-				return 0;
-			}
+			if (copy_to_user(arg,&vr,sizeof(vr)))
+				return -EFAULT;
+			return 0;
+		}
+		read_unlock(&mrt_lock);
+		return -EADDRNOTAVAIL;
+	case SIOCGETSGCNT:
+		if (copy_from_user(&sr,arg,sizeof(sr)))
+			return -EFAULT;
+
+		read_lock(&mrt_lock);
+		c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
+		if (c) {
+			sr.pktcnt = c->mfc_un.res.pkt;
+			sr.bytecnt = c->mfc_un.res.bytes;
+			sr.wrong_if = c->mfc_un.res.wrong_if;
 			read_unlock(&mrt_lock);
-			return -EADDRNOTAVAIL;
-		default:
-			return -ENOIOCTLCMD;
+
+			if (copy_to_user(arg,&sr,sizeof(sr)))
+				return -EFAULT;
+			return 0;
+		}
+		read_unlock(&mrt_lock);
+		return -EADDRNOTAVAIL;
+	default:
+		return -ENOIOCTLCMD;
 	}
 }
 
@@ -1076,7 +1073,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
 	if (event != NETDEV_UNREGISTER)
 		return NOTIFY_DONE;
 	v=&vif_table[0];
-	for(ct=0;ct<maxvif;ct++,v++) {
+	for (ct=0;ct<maxvif;ct++,v++) {
 		if (v->dev==ptr)
 			vif_delete(ct);
 	}
@@ -1625,7 +1622,7 @@ static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
 					   loff_t pos)
 {
 	for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
-		if(!VIF_EXISTS(iter->ct))
+		if (!VIF_EXISTS(iter->ct))
 			continue;
 		if (pos-- == 0)
 			return &vif_table[iter->ct];
@@ -1649,7 +1646,7 @@ static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 		return ipmr_vif_seq_idx(iter, 0);
 
 	while (++iter->ct < maxvif) {
-		if(!VIF_EXISTS(iter->ct))
+		if (!VIF_EXISTS(iter->ct))
 			continue;
 		return &vif_table[iter->ct];
 	}
@@ -1732,14 +1729,14 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
 	it->cache = mfc_cache_array;
 	read_lock(&mrt_lock);
 	for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
-		for(mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next)
+		for (mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next)
 			if (pos-- == 0)
 				return mfc;
 	read_unlock(&mrt_lock);
 
 	it->cache = &mfc_unres_queue;
 	spin_lock_bh(&mfc_unres_lock);
-	for(mfc = mfc_unres_queue; mfc; mfc = mfc->next)
+	for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
 		if (pos-- == 0)
 			return mfc;
 	spin_unlock_bh(&mfc_unres_lock);
@@ -1829,9 +1826,9 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 			   mfc->mfc_un.res.wrong_if);
 
 		if (it->cache != &mfc_unres_queue) {
-			for(n = mfc->mfc_un.res.minvif;
-			    n < mfc->mfc_un.res.maxvif; n++ ) {
-				if(VIF_EXISTS(n)
+			for (n = mfc->mfc_un.res.minvif;
+			     n < mfc->mfc_un.res.maxvif; n++ ) {
+				if (VIF_EXISTS(n)
 				   && mfc->mfc_un.res.ttls[n] < 255)
 				seq_printf(seq,
 					   " %2d:%-3d",
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index ceb4376f572..4872b30ba5d 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -93,7 +93,7 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	struct iphdr *iph = skb->nh.iph;
 	int err = -EINVAL;
 
-	switch(iph->protocol){
+	switch (iph->protocol){
 		case IPPROTO_IPIP:
 			break;
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 5d51a2af34c..98a833ce111 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -119,7 +119,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 
 		if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) {
 			unsigned short encap_family = xfrm[i]->props.family;
-			switch(encap_family) {
+			switch (encap_family) {
 			case AF_INET:
 				fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4;
 				fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4;
-- 
cgit v1.2.3


From 2de979bd7da9c8b39cc0aabb0ab5aa1516d929eb Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Thu, 8 Mar 2007 20:45:19 -0800
Subject: [TCP]: whitespace cleanup

Add whitespace around keywords.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_hybla.c     |  2 +-
 net/ipv4/tcp_input.c     | 57 ++++++++++++++++++++++++------------------------
 net/ipv4/tcp_minisocks.c |  6 ++---
 net/ipv4/tcp_output.c    | 34 +++++++++++++++--------------
 net/ipv4/tcp_westwood.c  |  2 +-
 5 files changed, 51 insertions(+), 50 deletions(-)

diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 59e691d26f6..e5be3511722 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -144,7 +144,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
 	ca->snd_cwnd_cents += odd;
 
 	/* check when fractions goes >=128 and increase cwnd by 1. */
-	while(ca->snd_cwnd_cents >= 128) {
+	while (ca->snd_cwnd_cents >= 128) {
 		tp->snd_cwnd++;
 		ca->snd_cwnd_cents -= 128;
 		tp->snd_cwnd_cnt = 0;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 22d0bb03c5d..fb025608594 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -578,7 +578,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
 	 * does not matter how to _calculate_ it. Seems, it was trap
 	 * that VJ failed to avoid. 8)
 	 */
-	if(m == 0)
+	if (m == 0)
 		m = 1;
 	if (tp->srtt != 0) {
 		m -= (tp->srtt >> 3);	/* m is now error in rtt est */
@@ -1758,12 +1758,11 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
 
 			/* clear xmit_retransmit_queue hints
 			 *  if this is beyond hint */
-			if(tp->retransmit_skb_hint != NULL &&
-			   before(TCP_SKB_CB(skb)->seq,
-				  TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) {
-
+			if (tp->retransmit_skb_hint != NULL &&
+			    before(TCP_SKB_CB(skb)->seq,
+				   TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
 				tp->retransmit_skb_hint = NULL;
-			}
+
 		}
 	}
 	tcp_sync_left_out(tp);
@@ -2441,7 +2440,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 
 		if (sacked) {
 			if (sacked & TCPCB_RETRANS) {
-				if(sacked & TCPCB_SACKED_RETRANS)
+				if (sacked & TCPCB_SACKED_RETRANS)
 					tp->retrans_out -= tcp_skb_pcount(skb);
 				acked |= FLAG_RETRANS_DATA_ACKED;
 				seq_rtt = -1;
@@ -2840,7 +2839,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 	ptr = (unsigned char *)(th + 1);
 	opt_rx->saw_tstamp = 0;
 
-	while(length>0) {
+	while (length > 0) {
 		int opcode=*ptr++;
 		int opsize;
 
@@ -2856,9 +2855,9 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 					return;
 				if (opsize > length)
 					return;	/* don't parse partial options */
-				switch(opcode) {
+				switch (opcode) {
 				case TCPOPT_MSS:
-					if(opsize==TCPOLEN_MSS && th->syn && !estab) {
+					if (opsize==TCPOLEN_MSS && th->syn && !estab) {
 						u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
 						if (in_mss) {
 							if (opt_rx->user_mss && opt_rx->user_mss < in_mss)
@@ -2868,12 +2867,12 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 					}
 					break;
 				case TCPOPT_WINDOW:
-					if(opsize==TCPOLEN_WINDOW && th->syn && !estab)
+					if (opsize==TCPOLEN_WINDOW && th->syn && !estab)
 						if (sysctl_tcp_window_scaling) {
 							__u8 snd_wscale = *(__u8 *) ptr;
 							opt_rx->wscale_ok = 1;
 							if (snd_wscale > 14) {
-								if(net_ratelimit())
+								if (net_ratelimit())
 									printk(KERN_INFO "tcp_parse_options: Illegal window "
 									       "scaling value %d >14 received.\n",
 									       snd_wscale);
@@ -2883,7 +2882,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 						}
 					break;
 				case TCPOPT_TIMESTAMP:
-					if(opsize==TCPOLEN_TIMESTAMP) {
+					if (opsize==TCPOLEN_TIMESTAMP) {
 						if ((estab && opt_rx->tstamp_ok) ||
 						    (!estab && sysctl_tcp_timestamps)) {
 							opt_rx->saw_tstamp = 1;
@@ -2893,7 +2892,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 					}
 					break;
 				case TCPOPT_SACK_PERM:
-					if(opsize==TCPOLEN_SACK_PERM && th->syn && !estab) {
+					if (opsize==TCPOLEN_SACK_PERM && th->syn && !estab) {
 						if (sysctl_tcp_sack) {
 							opt_rx->sack_ok = 1;
 							tcp_sack_reset(opt_rx);
@@ -2902,7 +2901,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 					break;
 
 				case TCPOPT_SACK:
-					if((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
+					if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
 					   !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
 					   opt_rx->sack_ok) {
 						TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
@@ -2964,7 +2963,7 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
 		 * Not only, also it occurs for expired timestamps.
 		 */
 
-		if((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 ||
+		if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 ||
 		   get_seconds() >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS)
 			tcp_store_ts_recent(tp);
 	}
@@ -3223,7 +3222,7 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
 			 */
 			tp->rx_opt.num_sacks--;
 			tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
-			for(i=this_sack; i < tp->rx_opt.num_sacks; i++)
+			for (i=this_sack; i < tp->rx_opt.num_sacks; i++)
 				sp[i] = sp[i+1];
 			continue;
 		}
@@ -3276,7 +3275,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
 		tp->rx_opt.num_sacks--;
 		sp--;
 	}
-	for(; this_sack > 0; this_sack--, sp--)
+	for (; this_sack > 0; this_sack--, sp--)
 		*sp = *(sp-1);
 
 new_sack:
@@ -3302,7 +3301,7 @@ static void tcp_sack_remove(struct tcp_sock *tp)
 		return;
 	}
 
-	for(this_sack = 0; this_sack < num_sacks; ) {
+	for (this_sack = 0; this_sack < num_sacks; ) {
 		/* Check if the start of the sack is covered by RCV.NXT. */
 		if (!before(tp->rcv_nxt, sp->start_seq)) {
 			int i;
@@ -3358,7 +3357,7 @@ static void tcp_ofo_queue(struct sock *sk)
 		__skb_unlink(skb, &tp->out_of_order_queue);
 		__skb_queue_tail(&sk->sk_receive_queue, skb);
 		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-		if(skb->h.th->fin)
+		if (skb->h.th->fin)
 			tcp_fin(skb, sk, skb->h.th);
 	}
 }
@@ -3424,9 +3423,9 @@ queue_and_out:
 			__skb_queue_tail(&sk->sk_receive_queue, skb);
 		}
 		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-		if(skb->len)
+		if (skb->len)
 			tcp_event_data_recv(sk, tp, skb);
-		if(th->fin)
+		if (th->fin)
 			tcp_fin(skb, sk, th);
 
 		if (!skb_queue_empty(&tp->out_of_order_queue)) {
@@ -4323,7 +4322,7 @@ slow_path:
 		goto discard;
 	}
 
-	if(th->rst) {
+	if (th->rst) {
 		tcp_reset(sk);
 		goto discard;
 	}
@@ -4338,7 +4337,7 @@ slow_path:
 	}
 
 step5:
-	if(th->ack)
+	if (th->ack)
 		tcp_ack(sk, skb, FLAG_SLOWPATH);
 
 	tcp_rcv_rtt_measure_ts(sk, skb);
@@ -4626,13 +4625,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		goto discard;
 
 	case TCP_LISTEN:
-		if(th->ack)
+		if (th->ack)
 			return 1;
 
-		if(th->rst)
+		if (th->rst)
 			goto discard;
 
-		if(th->syn) {
+		if (th->syn) {
 			if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
 				return 1;
 
@@ -4688,7 +4687,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 	}
 
 	/* step 2: check RST bit */
-	if(th->rst) {
+	if (th->rst) {
 		tcp_reset(sk);
 		goto discard;
 	}
@@ -4711,7 +4710,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 	if (th->ack) {
 		int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH);
 
-		switch(sk->sk_state) {
+		switch (sk->sk_state) {
 		case TCP_SYN_RECV:
 			if (acceptable) {
 				tp->copied_seq = tp->rcv_nxt;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index ac4ce48a659..463d2b24d2d 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -246,7 +246,7 @@ kill:
 	if (paws_reject)
 		NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
 
-	if(!th->rst) {
+	if (!th->rst) {
 		/* In this case we must reset the TIMEWAIT timer.
 		 *
 		 * If it is ACKless SYN it may be both old duplicate
@@ -324,7 +324,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 				if (tcp_alloc_md5sig_pool() == NULL)
 					BUG();
 			}
-		} while(0);
+		} while (0);
 #endif
 
 		/* Linkage updates. */
@@ -438,7 +438,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 						       keepalive_time_when(newtp));
 
 		newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
-		if((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
+		if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) {
 			if (sysctl_tcp_fack)
 				newtp->rx_opt.sack_ok |= 2;
 		}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 2a62b55b15f..f19f5fb361b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -236,7 +236,7 @@ static u16 tcp_select_window(struct sock *sk)
 	u32 new_win = __tcp_select_window(sk);
 
 	/* Never shrink the offered window */
-	if(new_win < cur_win) {
+	if (new_win < cur_win) {
 		/* Danger Will Robinson!
 		 * Don't update rcv_wup/rcv_wnd here or else
 		 * we will not be able to advertise a zero
@@ -287,10 +287,12 @@ static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp,
 			       (TCPOPT_SACK <<  8) |
 			       (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks *
 						     TCPOLEN_SACK_PERBLOCK)));
-		for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
+
+		for (this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
 			*ptr++ = htonl(sp[this_sack].start_seq);
 			*ptr++ = htonl(sp[this_sack].end_seq);
 		}
+
 		if (tp->rx_opt.dsack) {
 			tp->rx_opt.dsack = 0;
 			tp->rx_opt.eff_sacks--;
@@ -335,7 +337,7 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
 	 */
 	*ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
 	if (ts) {
-		if(sack)
+		if (sack)
 			*ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
 				       (TCPOLEN_SACK_PERM << 16) |
 				       (TCPOPT_TIMESTAMP << 8) |
@@ -347,7 +349,7 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
 				       TCPOLEN_TIMESTAMP);
 		*ptr++ = htonl(tstamp);		/* TSVAL */
 		*ptr++ = htonl(ts_recent);	/* TSECR */
-	} else if(sack)
+	} else if (sack)
 		*ptr++ = htonl((TCPOPT_NOP << 24) |
 			       (TCPOPT_NOP << 16) |
 			       (TCPOPT_SACK_PERM << 8) |
@@ -428,7 +430,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	sysctl_flags = 0;
 	if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
 		tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
-		if(sysctl_tcp_timestamps) {
+		if (sysctl_tcp_timestamps) {
 			tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
 			sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
 		}
@@ -1618,7 +1620,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
 		u16 flags = TCP_SKB_CB(skb)->flags;
 
 		/* Also punt if next skb has been SACK'd. */
-		if(TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
+		if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
 			return;
 
 		/* Next skb is out of window. */
@@ -1778,13 +1780,13 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	}
 
 	/* Collapse two adjacent packets if worthwhile and we can. */
-	if(!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&
-	   (skb->len < (cur_mss >> 1)) &&
-	   (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) &&
-	   (!tcp_skb_is_last(sk, skb)) &&
-	   (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) &&
-	   (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) &&
-	   (sysctl_tcp_retrans_collapse != 0))
+	if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&
+	    (skb->len < (cur_mss >> 1)) &&
+	    (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) &&
+	    (!tcp_skb_is_last(sk, skb)) &&
+	    (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) &&
+	    (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) &&
+	    (sysctl_tcp_retrans_collapse != 0))
 		tcp_retrans_try_collapse(sk, skb, cur_mss);
 
 	if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
@@ -1794,9 +1796,9 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	 * retransmit when old data is attached.  So strip it off
 	 * since it is cheap to do so and saves bytes on the network.
 	 */
-	if(skb->len > 0 &&
-	   (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
-	   tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
+	if (skb->len > 0 &&
+	    (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
+	    tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
 		if (!pskb_trim(skb, 0)) {
 			TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;
 			skb_shinfo(skb)->gso_segs = 1;
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 4e1b61032a9..1f91aeae10a 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -226,7 +226,7 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct westwood *w = inet_csk_ca(sk);
 
-	switch(event) {
+	switch (event) {
 	case CA_EVENT_FAST_ACK:
 		westwood_fast_bw(sk);
 		break;
-- 
cgit v1.2.3


From 6f05f629716a71d4c9c82813f45d3e9a6e90d146 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Thu, 8 Mar 2007 20:46:03 -0800
Subject: [NET]: deinline some functions

Several functions are marked inline or forced inline, but it
would be better to let the compiler decide.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 582db646cc5..424d6d0e98f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1673,9 +1673,9 @@ static void net_tx_action(struct softirq_action *h)
 	}
 }
 
-static __inline__ int deliver_skb(struct sk_buff *skb,
-				  struct packet_type *pt_prev,
-				  struct net_device *orig_dev)
+static inline int deliver_skb(struct sk_buff *skb,
+			      struct packet_type *pt_prev,
+			      struct net_device *orig_dev)
 {
 	atomic_inc(&skb->users);
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
@@ -2065,7 +2065,7 @@ static int dev_ifconf(char __user *arg)
  *	This is invoked by the /proc filesystem handler to display a device
  *	in detail.
  */
-static __inline__ struct net_device *dev_get_idx(loff_t pos)
+static struct net_device *dev_get_idx(loff_t pos)
 {
 	struct net_device *dev;
 	loff_t i;
@@ -2836,7 +2836,7 @@ static int dev_boot_phase = 1;
 static DEFINE_SPINLOCK(net_todo_list_lock);
 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
 
-static inline void net_set_todo(struct net_device *dev)
+static void net_set_todo(struct net_device *dev)
 {
 	spin_lock(&net_todo_list_lock);
 	list_add_tail(&dev->todo_list, &net_todo_list);
-- 
cgit v1.2.3


From 3fbe070a4293e8ab2d2edb1bc23f1e5220ce61af Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Thu, 8 Mar 2007 20:46:41 -0800
Subject: [UDP]: deinline

A couple of functions are exported or used indirectly
so it is pointless to mark them as inline.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 21901d2c532..ca353802ca6 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -213,13 +213,13 @@ fail:
 	return error;
 }
 
-__inline__ int udp_get_port(struct sock *sk, unsigned short snum,
+int udp_get_port(struct sock *sk, unsigned short snum,
 			int (*scmp)(const struct sock *, const struct sock *))
 {
 	return  __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp);
 }
 
-inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
+int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
 {
 	struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
 
@@ -391,7 +391,7 @@ out:
 	sock_put(sk);
 }
 
-__inline__ void udp_err(struct sk_buff *skb, u32 info)
+void udp_err(struct sk_buff *skb, u32 info)
 {
 	return __udp4_lib_err(skb, info, udp_hash);
 }
@@ -1296,7 +1296,7 @@ drop:
 	return 0;
 }
 
-__inline__ int udp_rcv(struct sk_buff *skb)
+int udp_rcv(struct sk_buff *skb)
 {
 	return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP);
 }
-- 
cgit v1.2.3


From 4c6510a738c71ca6b4b7b624a7d0a00acebfd7fb Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sun, 18 Mar 2007 17:35:57 -0700
Subject: [IPV6]: Return correct result for sticky options.

We returned incorrect result with IPV6_RTHDRDSTOPTS, IPV6_RTHDR and
IPV6_DSTOPTS.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ipv6_sockglue.c | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 1cb7a084f0c..eceed09e434 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -797,13 +797,32 @@ EXPORT_SYMBOL(compat_ipv6_setsockopt);
 #endif
 
 static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
-				  char __user *optval, int len)
+				  int optname, char __user *optval, int len)
 {
 	struct ipv6_opt_hdr *hdr;
 
-	if (!opt || !opt->hopopt)
+	if (!opt)
+		return 0;
+
+	switch(optname) {
+	case IPV6_HOPOPTS:
+		hdr = opt->hopopt;
+		break;
+	case IPV6_RTHDRDSTOPTS:
+		hdr = opt->dst0opt;
+		break;
+	case IPV6_RTHDR:
+		hdr = (struct ipv6_opt_hdr *)opt->srcrt;
+		break;
+	case IPV6_DSTOPTS:
+		hdr = opt->dst1opt;
+		break;
+	default:
+		return -EINVAL;	/* should not happen */
+	}
+
+	if (!hdr)
 		return 0;
-	hdr = opt->hopopt;
 
 	len = min_t(unsigned int, len, ipv6_optlen(hdr));
 	if (copy_to_user(optval, hdr, ipv6_optlen(hdr)))
@@ -946,7 +965,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 
 		lock_sock(sk);
 		len = ipv6_getsockopt_sticky(sk, np->opt,
-					     optval, len);
+					     optname, optval, len);
 		release_sock(sk);
 		return put_user(len, optlen);
 	}
-- 
cgit v1.2.3


From e5268f12f26f1f51590cd1ed26547e21c46b08f2 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Thu, 8 Mar 2007 20:48:23 -0800
Subject: [IPV6]: Ensure to truncate result and return full length for sticky
 options.

Bug noticed by Chris Wright <chrisw@sous-sol.org>.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ipv6_sockglue.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index eceed09e434..1d56b465bdd 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -825,9 +825,9 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
 		return 0;
 
 	len = min_t(unsigned int, len, ipv6_optlen(hdr));
-	if (copy_to_user(optval, hdr, ipv6_optlen(hdr)))
+	if (copy_to_user(optval, hdr, len));
 		return -EFAULT;
-	return len;
+	return ipv6_optlen(hdr);
 }
 
 static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
-- 
cgit v1.2.3


From 6dea649a8a4c4b086227018c919298f988c34b30 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Thu, 8 Mar 2007 22:36:37 -0800
Subject: [NET]: New sysctls should use __read_mostly tags

net_msg_warn should be placed in the read_mostly section, to avoid
performance problems on SMP

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/utils.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/core/utils.c b/net/core/utils.c
index 34f08107b98..adecfd281ae 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -30,9 +30,9 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 
-int net_msg_cost = 5*HZ;
-int net_msg_burst = 10;
-int net_msg_warn = 1;
+int net_msg_cost __read_mostly = 5*HZ;
+int net_msg_burst __read_mostly = 10;
+int net_msg_warn __read_mostly = 1;
 EXPORT_SYMBOL(net_msg_warn);
 
 /*
-- 
cgit v1.2.3


From c7a3c5da35055e2fa97ed4f0da3eec4bd0ef4c38 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 9 Mar 2007 13:51:54 -0800
Subject: [UDP]: Use __skb_pull since we have checked it won't fail with
 pskb_may_pull

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ca353802ca6..1bbf5510cf3 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1002,7 +1002,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
 	 * transport header to point to ESP.  Keep UDP on the stack
 	 * for later.
 	 */
-	skb->h.raw = skb_pull(skb, len);
+	skb->h.raw = __skb_pull(skb, len);
 
 	/* modify the protocol (it's ESP!) */
 	iph->protocol = IPPROTO_ESP;
-- 
cgit v1.2.3


From 92f37fd2ee805aa77925c1e64fd56088b46094fc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Sun, 25 Mar 2007 22:14:49 -0700
Subject: [NET]: Adding SO_TIMESTAMPNS / SCM_TIMESTAMPNS support

Now that network timestamps use ktime_t infrastructure, we can add a new
SOL_SOCKET sockopt  SO_TIMESTAMPNS.

This command is similar to SO_TIMESTAMP, but permits transmission of
a 'timespec struct' instead of a 'timeval struct' control message.
(nanosecond resolution instead of microsecond)

Control message is labelled SCM_TIMESTAMPNS instead of SCM_TIMESTAMP

A socket cannot mix SO_TIMESTAMP and SO_TIMESTAMPNS : the two modes are
mutually exclusive.

sock_recv_timestamp() became too big to be fully inlined so I added a
__sock_recv_timestamp() helper function.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
CC: linux-arch@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/asm-alpha/socket.h   |  2 ++
 include/asm-arm/socket.h     |  2 ++
 include/asm-arm26/socket.h   |  2 ++
 include/asm-avr32/socket.h   |  2 ++
 include/asm-cris/socket.h    |  2 ++
 include/asm-frv/socket.h     |  2 ++
 include/asm-h8300/socket.h   |  2 ++
 include/asm-i386/socket.h    |  2 ++
 include/asm-ia64/socket.h    |  2 ++
 include/asm-m32r/socket.h    |  2 ++
 include/asm-m68k/socket.h    |  2 ++
 include/asm-mips/socket.h    |  2 ++
 include/asm-parisc/socket.h  |  2 ++
 include/asm-powerpc/socket.h |  2 ++
 include/asm-s390/socket.h    |  2 ++
 include/asm-sh/socket.h      |  2 ++
 include/asm-sparc/socket.h   |  2 ++
 include/asm-sparc64/socket.h |  2 ++
 include/asm-v850/socket.h    |  2 ++
 include/asm-x86_64/socket.h  |  2 ++
 include/asm-xtensa/socket.h  |  2 ++
 include/net/sock.h           | 17 +++++++----------
 net/compat.c                 | 10 +++++++++-
 net/core/sock.c              | 16 ++++++++++++++--
 net/socket.c                 | 29 +++++++++++++++++++++++++++++
 25 files changed, 101 insertions(+), 13 deletions(-)

diff --git a/include/asm-alpha/socket.h b/include/asm-alpha/socket.h
index d22ab97ea72..1fede7f9286 100644
--- a/include/asm-alpha/socket.h
+++ b/include/asm-alpha/socket.h
@@ -52,6 +52,8 @@
 
 #define SO_PEERSEC		30
 #define SO_PASSSEC		34
+#define SO_TIMESTAMPNS		35
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
 
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		19
diff --git a/include/asm-arm/socket.h b/include/asm-arm/socket.h
index 19f7df702b0..65a1a64bf93 100644
--- a/include/asm-arm/socket.h
+++ b/include/asm-arm/socket.h
@@ -49,5 +49,7 @@
 
 #define SO_PEERSEC		31
 #define SO_PASSSEC		34
+#define SO_TIMESTAMPNS		35
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
 
 #endif /* _ASM_SOCKET_H */
diff --git a/include/asm-arm26/socket.h b/include/asm-arm26/socket.h
index 19f7df702b0..65a1a64bf93 100644
--- a/include/asm-arm26/socket.h
+++ b/include/asm-arm26/socket.h
@@ -49,5 +49,7 @@
 
 #define SO_PEERSEC		31
 #define SO_PASSSEC		34
+#define SO_TIMESTAMPNS		35
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
 
 #endif /* _ASM_SOCKET_H */
diff --git a/include/asm-avr32/socket.h b/include/asm-avr32/socket.h
index 543229de817..a0d0507a503 100644
--- a/include/asm-avr32/socket.h
+++ b/include/asm-avr32/socket.h
@@ -49,5 +49,7 @@
 
 #define SO_PEERSEC		31
 #define SO_PASSSEC		34
+#define SO_TIMESTAMPNS		35
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
 
 #endif /* __ASM_AVR32_SOCKET_H */
diff --git a/include/asm-cris/socket.h b/include/asm-cris/socket.h
index 01cfdf1d6d3..5b18dfdf174 100644
--- a/include/asm-cris/socket.h
+++ b/include/asm-cris/socket.h
@@ -51,6 +51,8 @@
 
 #define SO_PEERSEC             31
 #define SO_PASSSEC		34
+#define SO_TIMESTAMPNS		35
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
 
 #endif /* _ASM_SOCKET_H */
 
diff --git a/include/asm-frv/socket.h b/include/asm-frv/socket.h
index 31db18fc871..a823befd11d 100644
--- a/include/asm-frv/socket.h
+++ b/include/asm-frv/socket.h
@@ -49,6 +49,8 @@
 
 #define SO_PEERSEC		31
 #define SO_PASSSEC		34
+#define SO_TIMESTAMPNS		35
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
 
 #endif /* _ASM_SOCKET_H */
 
diff --git a/include/asm-h8300/socket.h b/include/asm-h8300/socket.h
index ebc830fee0d..39911d8c968 100644
--- a/include/asm-h8300/socket.h
+++ b/include/asm-h8300/socket.h
@@ -49,5 +49,7 @@
 
 #define SO_PEERSEC		31
 #define SO_PASSSEC		34
+#define SO_TIMESTAMPNS		35
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
 
 #endif /* _ASM_SOCKET_H */
diff --git a/include/asm-i386/socket.h b/include/asm-i386/socket.h
index 5755d57c4e9..99ca648b94c 100644
--- a/include/asm-i386/socket.h
+++ b/include/asm-i386/socket.h
@@ -49,5 +49,7 @@
 
 #define SO_PEERSEC		31
 #define SO_PASSSEC		34
+#define SO_TIMESTAMPNS		35
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
 
 #endif /* _ASM_SOCKET_H */
diff --git a/include/asm-ia64/socket.h b/include/asm-ia64/socket.h
index d638ef3d50c..9e42ce43cfb 100644
--- a/include/asm-ia64/socket.h
+++ b/include/asm-ia64/socket.h
@@ -58,5 +58,7 @@
 
 #define SO_PEERSEC             31
 #define SO_PASSSEC		34
+#define SO_TIMESTAMPNS		35
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
 
 #endif /* _ASM_IA64_SOCKET_H */
diff --git a/include/asm-m32r/socket.h b/include/asm-m32r/socket.h
index acdf748fcdc..793d5d30c85 100644
--- a/include/asm-m32r/socket.h
+++ b/include/asm-m32r/socket.h
@@ -49,5 +49,7 @@
 
 #define SO_PEERSEC		31
 #define SO_PASSSEC		34
+#define SO_TIMESTAMPNS		35
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
 
 #endif /* _ASM_M32R_SOCKET_H */
diff --git a/include/asm-m68k/socket.h b/include/asm-m68k/socket.h
index a5966ec005a..6d21b90863a 100644
--- a/include/asm-m68k/socket.h
+++ b/include/asm-m68k/socket.h
@@ -49,5 +49,7 @@
 
 #define SO_PEERSEC             31
 #define SO_PASSSEC		34
+#define SO_TIMESTAMPNS		35
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
 
 #endif /* _ASM_SOCKET_H */
diff --git a/include/asm-mips/socket.h b/include/asm-mips/socket.h
index 36ebe4e186a..95945689b1c 100644
--- a/include/asm-mips/socket.h
+++ b/include/asm-mips/socket.h
@@ -70,6 +70,8 @@ To add: #define SO_REUSEPORT 0x0200	/* Allow local address and port reuse.  */
 #define SO_SNDBUFFORCE		31
 #define SO_RCVBUFFORCE		33
 #define SO_PASSSEC		34
+#define SO_TIMESTAMPNS		35
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
 
 #ifdef __KERNEL__
 
diff --git a/include/asm-parisc/socket.h b/include/asm-parisc/socket.h
index ce2eae1708b..99e868f6a8f 100644
--- a/include/asm-parisc/socket.h
+++ b/include/asm-parisc/socket.h
@@ -33,6 +33,8 @@
 #define SO_PEERCRED	0x4011
 #define SO_TIMESTAMP	0x4012
 #define SCM_TIMESTAMP	SO_TIMESTAMP
+#define SO_TIMESTAMPNS	0x4013
+#define SCM_TIMESTAMPNS	SO_TIMESTAMPNS
 
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		0x4016
diff --git a/include/asm-powerpc/socket.h b/include/asm-powerpc/socket.h
index c8b1da50e72..403e9fde2eb 100644
--- a/include/asm-powerpc/socket.h
+++ b/include/asm-powerpc/socket.h
@@ -56,5 +56,7 @@
 
 #define SO_PEERSEC		31
 #define SO_PASSSEC		34
+#define SO_TIMESTAMPNS		35
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
 
 #endif	/* _ASM_POWERPC_SOCKET_H */
diff --git a/include/asm-s390/socket.h b/include/asm-s390/socket.h
index 1778a49a74c..1161ebe3dec 100644
--- a/include/asm-s390/socket.h
+++ b/include/asm-s390/socket.h
@@ -57,5 +57,7 @@
 
 #define SO_PEERSEC		31
 #define SO_PASSSEC		34
+#define SO_TIMESTAMPNS		35
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
 
 #endif /* _ASM_SOCKET_H */
diff --git a/include/asm-sh/socket.h b/include/asm-sh/socket.h
index ca70362eb56..c48d6fc9da3 100644
--- a/include/asm-sh/socket.h
+++ b/include/asm-sh/socket.h
@@ -49,5 +49,7 @@
 
 #define SO_PEERSEC		31
 #define SO_PASSSEC		34
+#define SO_TIMESTAMPNS		35
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
 
 #endif /* __ASM_SH_SOCKET_H */
diff --git a/include/asm-sparc/socket.h b/include/asm-sparc/socket.h
index f6c4e5baf3f..7c1423997cf 100644
--- a/include/asm-sparc/socket.h
+++ b/include/asm-sparc/socket.h
@@ -49,6 +49,8 @@
 
 #define SO_PEERSEC		0x001e
 #define SO_PASSSEC		0x001f
+#define SO_TIMESTAMPNS		0x0021
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
 
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		0x5001
diff --git a/include/asm-sparc64/socket.h b/include/asm-sparc64/socket.h
index 754d46a50af..986441dcb8f 100644
--- a/include/asm-sparc64/socket.h
+++ b/include/asm-sparc64/socket.h
@@ -49,6 +49,8 @@
 
 #define SO_PEERSEC		0x001e
 #define SO_PASSSEC		0x001f
+#define SO_TIMESTAMPNS		0x0021
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
 
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		0x5001
diff --git a/include/asm-v850/socket.h b/include/asm-v850/socket.h
index 0dfe55ac2ef..a4c2493b025 100644
--- a/include/asm-v850/socket.h
+++ b/include/asm-v850/socket.h
@@ -49,5 +49,7 @@
 
 #define SO_PEERSEC		31
 #define SO_PASSSEC		34
+#define SO_TIMESTAMPNS		35
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
 
 #endif /* __V850_SOCKET_H__ */
diff --git a/include/asm-x86_64/socket.h b/include/asm-x86_64/socket.h
index b4670260793..90af60cf3c0 100644
--- a/include/asm-x86_64/socket.h
+++ b/include/asm-x86_64/socket.h
@@ -49,5 +49,7 @@
 
 #define SO_PEERSEC             31
 #define SO_PASSSEC		34
+#define SO_TIMESTAMPNS		35
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
 
 #endif /* _ASM_SOCKET_H */
diff --git a/include/asm-xtensa/socket.h b/include/asm-xtensa/socket.h
index 971d231be60..1f5aeacb9da 100644
--- a/include/asm-xtensa/socket.h
+++ b/include/asm-xtensa/socket.h
@@ -60,5 +60,7 @@
 #define SO_ACCEPTCONN		30
 #define SO_PEERSEC		31
 #define SO_PASSSEC		34
+#define SO_TIMESTAMPNS		35
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
 
 #endif	/* _XTENSA_SOCKET_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index 51246579592..390c0470059 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -390,6 +390,7 @@ enum sock_flags {
 	SOCK_USE_WRITE_QUEUE, /* whether to call sk->sk_write_space in sock_wfree */
 	SOCK_DBG, /* %SO_DEBUG setting */
 	SOCK_RCVTSTAMP, /* %SO_TIMESTAMP setting */
+	SOCK_RCVTSTAMPNS, /* %SO_TIMESTAMPNS setting */
 	SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */
 	SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */
 };
@@ -1283,21 +1284,17 @@ static inline int sock_intr_errno(long timeo)
 	return timeo == MAX_SCHEDULE_TIMEOUT ? -ERESTARTSYS : -EINTR;
 }
 
+extern void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
+	struct sk_buff *skb);
+
 static __inline__ void
 sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
 {
 	ktime_t kt = skb->tstamp;
 
-	if (sock_flag(sk, SOCK_RCVTSTAMP)) {
-		struct timeval tv;
-		/* Race occurred between timestamp enabling and packet
-		   receiving.  Fill in the current time for now. */
-		if (kt.tv64 == 0)
-			kt = ktime_get_real();
-		skb->tstamp = kt;
-		tv = ktime_to_timeval(kt);
-		put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(tv), &tv);
-	} else
+	if (sock_flag(sk, SOCK_RCVTSTAMP))
+		__sock_recv_timestamp(msg, sk, skb);
+	else
 		sk->sk_stamp = kt;
 }
 
diff --git a/net/compat.c b/net/compat.c
index 0e407563ae8..9a0f5f2b90c 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -215,6 +215,7 @@ Efault:
 int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *data)
 {
 	struct compat_timeval ctv;
+	struct compat_timespec cts;
 	struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control;
 	struct compat_cmsghdr cmhdr;
 	int cmlen;
@@ -229,7 +230,14 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat
 		ctv.tv_sec = tv->tv_sec;
 		ctv.tv_usec = tv->tv_usec;
 		data = &ctv;
-		len = sizeof(struct compat_timeval);
+		len = sizeof(ctv);
+	}
+	if (level == SOL_SOCKET && type == SO_TIMESTAMPNS) {
+		struct timespec *ts = (struct timespec *)data;
+		cts.tv_sec = ts->tv_sec;
+		cts.tv_nsec = ts->tv_nsec;
+		data = &cts;
+		len = sizeof(cts);
 	}
 
 	cmlen = CMSG_COMPAT_LEN(len);
diff --git a/net/core/sock.c b/net/core/sock.c
index 792ae39804a..f9e6991d372 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -521,11 +521,18 @@ set_rcvbuf:
 		break;
 
 	case SO_TIMESTAMP:
+	case SO_TIMESTAMPNS:
 		if (valbool)  {
+			if (optname == SO_TIMESTAMP)
+				sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
+			else
+				sock_set_flag(sk, SOCK_RCVTSTAMPNS);
 			sock_set_flag(sk, SOCK_RCVTSTAMP);
 			sock_enable_timestamp(sk);
-		} else
+		} else {
 			sock_reset_flag(sk, SOCK_RCVTSTAMP);
+			sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
+		}
 		break;
 
 	case SO_RCVLOWAT:
@@ -715,7 +722,12 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		break;
 
 	case SO_TIMESTAMP:
-		v.val = sock_flag(sk, SOCK_RCVTSTAMP);
+		v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
+				!sock_flag(sk, SOCK_RCVTSTAMPNS);
+		break;
+
+	case SO_TIMESTAMPNS:
+		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
 		break;
 
 	case SO_RCVTIMEO:
diff --git a/net/socket.c b/net/socket.c
index cf18c5eb592..a7bd0df115b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -585,6 +585,35 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 	return result;
 }
 
+/*
+ * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
+ */
+void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
+	struct sk_buff *skb)
+{
+	ktime_t kt = skb->tstamp;
+
+	if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
+		struct timeval tv;
+		/* Race occurred between timestamp enabling and packet
+		   receiving.  Fill in the current time for now. */
+		if (kt.tv64 == 0)
+			kt = ktime_get_real();
+		skb->tstamp = kt;
+		tv = ktime_to_timeval(kt);
+		put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
+	} else {
+		struct timespec ts;
+		/* Race occurred between timestamp enabling and packet
+		   receiving.  Fill in the current time for now. */
+		if (kt.tv64 == 0)
+			kt = ktime_get_real();
+		skb->tstamp = kt;
+		ts = ktime_to_timespec(kt);
+		put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
+	}
+}
+
 static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 				 struct msghdr *msg, size_t size, int flags)
 {
-- 
cgit v1.2.3


From 7c81fd8bfbaa9732eca142350de5154da6919411 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 00:39:35 -0300
Subject: [SOCKET]: Export __sock_recv_timestamp

Kernel: arch/x86_64/boot/bzImage is ready  (#2)
  MODPOST 1816 modules
WARNING: "__sock_recv_timestamp" [net/sctp/sctp.ko] undefined!
WARNING: "__sock_recv_timestamp" [net/packet/af_packet.ko] undefined!
WARNING: "__sock_recv_timestamp" [net/key/af_key.ko] undefined!
WARNING: "__sock_recv_timestamp" [net/ipv6/ipv6.ko] undefined!
WARNING: "__sock_recv_timestamp" [net/atm/atm.ko] undefined!
make[2]: *** [__modpost] Error 1
make[1]: *** [modules] Error 2
make: *** [_all] Error 2

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/socket.c b/net/socket.c
index a7bd0df115b..1ad62c08377 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -614,6 +614,8 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 	}
 }
 
+EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
+
 static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 				 struct msghdr *msg, size_t size, int flags)
 {
-- 
cgit v1.2.3


From c1a4b86e396b6870b420d23e4d49c7b685aef0a4 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Mar 2007 15:27:07 -0700
Subject: [TR]: Use tr_hdr() were appropriate

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tokenring/lanstreamer.c | 6 +++---
 drivers/net/tokenring/olympic.c     | 9 +++++----
 drivers/s390/net/qeth_main.c        | 2 +-
 net/802/tr.c                        | 3 ++-
 4 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/net/tokenring/lanstreamer.c b/drivers/net/tokenring/lanstreamer.c
index e999feb8c0b..5e1b884e10c 100644
--- a/drivers/net/tokenring/lanstreamer.c
+++ b/drivers/net/tokenring/lanstreamer.c
@@ -1607,10 +1607,12 @@ static void streamer_arb_cmd(struct net_device *dev)
 				      frame_data, buffer_len);
 		} while (next_ptr && (buff_off = next_ptr));
 
+		mac_frame->dev = dev;
+		mac_frame->protocol = tr_type_trans(mac_frame, dev);
 #if STREAMER_NETWORK_MONITOR
 		printk(KERN_WARNING "%s: Received MAC Frame, details: \n",
 		       dev->name);
-		mac_hdr = (struct trh_hdr *) mac_frame->data;
+		mac_hdr = tr_hdr(mac_frame);
 		printk(KERN_WARNING
 		       "%s: MAC Frame Dest. Addr: %02x:%02x:%02x:%02x:%02x:%02x \n",
 		       dev->name, mac_hdr->daddr[0], mac_hdr->daddr[1],
@@ -1622,8 +1624,6 @@ static void streamer_arb_cmd(struct net_device *dev)
 		       mac_hdr->saddr[2], mac_hdr->saddr[3],
 		       mac_hdr->saddr[4], mac_hdr->saddr[5]);
 #endif
-		mac_frame->dev = dev;
-		mac_frame->protocol = tr_type_trans(mac_frame, dev);
 		netif_rx(mac_frame);
 
 		/* Now tell the card we have dealt with the received frame */
diff --git a/drivers/net/tokenring/olympic.c b/drivers/net/tokenring/olympic.c
index 8f4ecc1109c..683186afcab 100644
--- a/drivers/net/tokenring/olympic.c
+++ b/drivers/net/tokenring/olympic.c
@@ -1440,16 +1440,17 @@ static void olympic_arb_cmd(struct net_device *dev)
 			next_ptr=readw(buf_ptr+offsetof(struct mac_receive_buffer,next)); 
 		} while (next_ptr && (buf_ptr=olympic_priv->olympic_lap + ntohs(next_ptr)));
 
+		mac_frame->dev = dev;
+		mac_frame->protocol = tr_type_trans(mac_frame, dev);
+
 		if (olympic_priv->olympic_network_monitor) { 
 			struct trh_hdr *mac_hdr ; 
 			printk(KERN_WARNING "%s: Received MAC Frame, details: \n",dev->name) ;
-			mac_hdr = (struct trh_hdr *)mac_frame->data ; 
+			mac_hdr = tr_hdr(mac_frame);
 			printk(KERN_WARNING "%s: MAC Frame Dest. Addr: %02x:%02x:%02x:%02x:%02x:%02x \n", dev->name , mac_hdr->daddr[0], mac_hdr->daddr[1], mac_hdr->daddr[2], mac_hdr->daddr[3], mac_hdr->daddr[4], mac_hdr->daddr[5]) ; 
 			printk(KERN_WARNING "%s: MAC Frame Srce. Addr: %02x:%02x:%02x:%02x:%02x:%02x \n", dev->name , mac_hdr->saddr[0], mac_hdr->saddr[1], mac_hdr->saddr[2], mac_hdr->saddr[3], mac_hdr->saddr[4], mac_hdr->saddr[5]) ; 
 		}
-		mac_frame->dev = dev ; 
-		mac_frame->protocol = tr_type_trans(mac_frame,dev);
-		netif_rx(mac_frame) ; 	
+		netif_rx(mac_frame);
 		dev->last_rx = jiffies;
 
 drop_frame:
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index d8a86f5af37..f2b9b1b1ec1 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -2308,7 +2308,7 @@ qeth_rebuild_skb_fake_ll_tr(struct qeth_card *card, struct sk_buff *skb,
 	QETH_DBF_TEXT(trace,5,"skbfktr");
 	skb->mac.raw = skb->data - QETH_FAKE_LL_LEN_TR;
 	/* this is a fake ethernet header */
-	fake_hdr = (struct trh_hdr *) skb->mac.raw;
+	fake_hdr = tr_hdr(skb);
 
 	/* the destination MAC address */
 	switch (skb->pkt_type){
diff --git a/net/802/tr.c b/net/802/tr.c
index 96bd14452c5..a6a6d46e708 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -189,11 +189,12 @@ static int tr_rebuild_header(struct sk_buff *skb)
 __be16 tr_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
 
-	struct trh_hdr *trh=(struct trh_hdr *)skb->data;
+	struct trh_hdr *trh;
 	struct trllc *trllc;
 	unsigned riflen=0;
 
 	skb->mac.raw = skb->data;
+	trh = tr_hdr(skb);
 
 	if(trh->saddr[0] & TR_RII)
 		riflen = (ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8;
-- 
cgit v1.2.3


From c8fb7948dc1aeff0515b2912b564d4236f6c0ebd Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Mar 2007 15:29:16 -0700
Subject: [TR]: Make tr_type_trans set skb->dev

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tokenring/3c359.c       | 4 ----
 drivers/net/tokenring/ibmtr.c       | 1 -
 drivers/net/tokenring/lanstreamer.c | 3 ---
 drivers/net/tokenring/olympic.c     | 3 ---
 drivers/net/tokenring/smctr.c       | 2 --
 drivers/net/tokenring/tms380tr.c    | 1 -
 net/802/tr.c                        | 1 +
 net/atm/lec.c                       | 4 +++-
 8 files changed, 4 insertions(+), 15 deletions(-)

diff --git a/drivers/net/tokenring/3c359.c b/drivers/net/tokenring/3c359.c
index 7580bdeacad..d293423ee8e 100644
--- a/drivers/net/tokenring/3c359.c
+++ b/drivers/net/tokenring/3c359.c
@@ -933,8 +933,6 @@ static void xl_rx(struct net_device *dev)
 				return ; 				
 			}
 	
-			skb->dev = dev ; 
-
 			while (xl_priv->rx_ring_tail != temp_ring_loc) { 
 				copy_len = xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfraglen & 0x7FFF ; 
 				frame_length -= copy_len ;  
@@ -967,8 +965,6 @@ static void xl_rx(struct net_device *dev)
 				return ; 
 			}
 
-			skb->dev = dev ; 
-
 			skb2 = xl_priv->rx_ring_skb[xl_priv->rx_ring_tail] ; 
 			pci_unmap_single(xl_priv->pdev, xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr, xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ; 
 			skb_put(skb2, frame_length) ; 
diff --git a/drivers/net/tokenring/ibmtr.c b/drivers/net/tokenring/ibmtr.c
index 01d55315ee8..1e8958ee2d0 100644
--- a/drivers/net/tokenring/ibmtr.c
+++ b/drivers/net/tokenring/ibmtr.c
@@ -1771,7 +1771,6 @@ static void tr_rx(struct net_device *dev)
 	/*BMS again, if she comes in with few but leaves with many */
 	skb_reserve(skb, sizeof(struct trh_hdr) - lan_hdr_len);
 	skb_put(skb, length);
-	skb->dev = dev;
 	data = skb->data;
 	rbuffer_len = ntohs(readw(rbuf + offsetof(struct rec_buf, buf_len)));
 	rbufdata = rbuf + offsetof(struct rec_buf, data);
diff --git a/drivers/net/tokenring/lanstreamer.c b/drivers/net/tokenring/lanstreamer.c
index 5e1b884e10c..5d849c089a3 100644
--- a/drivers/net/tokenring/lanstreamer.c
+++ b/drivers/net/tokenring/lanstreamer.c
@@ -944,8 +944,6 @@ static void streamer_rx(struct net_device *dev)
 				printk(KERN_WARNING "%s: Not enough memory to copy packet to upper layers. \n",	dev->name);
 				streamer_priv->streamer_stats.rx_dropped++;
 			} else {	/* we allocated an skb OK */
-				skb->dev = dev;
-
 				if (buffer_cnt == 1) {
 					/* release the DMA mapping */
 					pci_unmap_single(streamer_priv->pci_dev, 
@@ -1607,7 +1605,6 @@ static void streamer_arb_cmd(struct net_device *dev)
 				      frame_data, buffer_len);
 		} while (next_ptr && (buff_off = next_ptr));
 
-		mac_frame->dev = dev;
 		mac_frame->protocol = tr_type_trans(mac_frame, dev);
 #if STREAMER_NETWORK_MONITOR
 		printk(KERN_WARNING "%s: Received MAC Frame, details: \n",
diff --git a/drivers/net/tokenring/olympic.c b/drivers/net/tokenring/olympic.c
index 683186afcab..a6206580888 100644
--- a/drivers/net/tokenring/olympic.c
+++ b/drivers/net/tokenring/olympic.c
@@ -814,8 +814,6 @@ static void olympic_rx(struct net_device *dev)
 					olympic_priv->rx_ring_last_received += i ; 
 					olympic_priv->rx_ring_last_received &= (OLYMPIC_RX_RING_SIZE -1) ;  
 				} else  {
-					skb->dev = dev ; 
-
 					/* Optimise based upon number of buffers used. 
 			   	   	   If only one buffer is used we can simply swap the buffers around.
 			   	   	   If more than one then we must use the new buffer and copy the information
@@ -1440,7 +1438,6 @@ static void olympic_arb_cmd(struct net_device *dev)
 			next_ptr=readw(buf_ptr+offsetof(struct mac_receive_buffer,next)); 
 		} while (next_ptr && (buf_ptr=olympic_priv->olympic_lap + ntohs(next_ptr)));
 
-		mac_frame->dev = dev;
 		mac_frame->protocol = tr_type_trans(mac_frame, dev);
 
 		if (olympic_priv->olympic_network_monitor) { 
diff --git a/drivers/net/tokenring/smctr.c b/drivers/net/tokenring/smctr.c
index cec282a6f62..b0296d80e46 100644
--- a/drivers/net/tokenring/smctr.c
+++ b/drivers/net/tokenring/smctr.c
@@ -3896,7 +3896,6 @@ static int smctr_process_rx_packet(MAC_HEADER *rmf, __u16 size,
                 tp->MacStat.rx_bytes += skb->len;
 
                 /* Kick the packet on up. */
-                skb->dev = dev;
                 skb->protocol = tr_type_trans(skb, dev);
                 netif_rx(skb);
 		dev->last_rx = jiffies;
@@ -4483,7 +4482,6 @@ static int smctr_rx_frame(struct net_device *dev)
                                 	tp->MacStat.rx_bytes += skb->len;
 
                                 	/* Kick the packet on up. */
-                                	skb->dev = dev;
                                 	skb->protocol = tr_type_trans(skb, dev);
                                 	netif_rx(skb);
 					dev->last_rx = jiffies;
diff --git a/drivers/net/tokenring/tms380tr.c b/drivers/net/tokenring/tms380tr.c
index ea797ca2b98..de6f72775ec 100644
--- a/drivers/net/tokenring/tms380tr.c
+++ b/drivers/net/tokenring/tms380tr.c
@@ -2168,7 +2168,6 @@ static void tms380tr_rcv_status_irq(struct net_device *dev)
 				}
 				else
 				{
-					skb->dev	= dev;
 					skb_put(skb, tp->MaxPacketSize);
 					rpl->SkbStat 	= SKB_DATA_COPY;
 					ReceiveDataPtr 	= rpl->MData;
diff --git a/net/802/tr.c b/net/802/tr.c
index a6a6d46e708..987d91559bc 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -193,6 +193,7 @@ __be16 tr_type_trans(struct sk_buff *skb, struct net_device *dev)
 	struct trllc *trllc;
 	unsigned riflen=0;
 
+	skb->dev = dev;
 	skb->mac.raw = skb->data;
 	trh = tr_hdr(skb);
 
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 3d804d61f65..e5ed878c882 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -825,14 +825,16 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb)
 		if (!hlist_empty(&priv->lec_arp_empty_ones)) {
 			lec_arp_check_empties(priv, vcc, skb);
 		}
-		skb->dev = dev;
 		skb_pull(skb, 2);	/* skip lec_id */
 #ifdef CONFIG_TR
 		if (priv->is_trdev)
 			skb->protocol = tr_type_trans(skb, dev);
 		else
 #endif
+		{
+			skb->dev = dev;
 			skb->protocol = eth_type_trans(skb, dev);
+		}
 		priv->stats.rx_packets++;
 		priv->stats.rx_bytes += skb->len;
 		memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data));
-- 
cgit v1.2.3


From 0a4f23fbbff70c268b0f2f5e0b87301c132fb305 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 10:57:13 -0300
Subject: [HIPPI/FDDI]: Make {hippi,fddi}_type_trans set skb->dev

Now all the _type_trans routines are consistent in this regard.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/defxx.c       | 2 --
 drivers/net/rrunner.c     | 1 -
 drivers/net/skfp/skfddi.c | 1 -
 drivers/s390/net/lcs.c    | 1 -
 net/802/fddi.c            | 1 +
 net/802/hippi.c           | 8 ++++----
 6 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/net/defxx.c b/drivers/net/defxx.c
index 07d2731c1aa..8d29fae1c71 100644
--- a/drivers/net/defxx.c
+++ b/drivers/net/defxx.c
@@ -3096,8 +3096,6 @@ static void dfx_rcv_queue_process(
 
 					skb_reserve(skb,3);		/* adjust data field so that it points to FC byte */
 					skb_put(skb, pkt_len);		/* pass up packet length, NOT including CRC */
-					skb->dev = bp->dev;		/* pass up device pointer */
-
 					skb->protocol = fddi_type_trans(skb, bp->dev);
 					bp->rcv_total_bytes += skb->len;
 					netif_rx(skb);
diff --git a/drivers/net/rrunner.c b/drivers/net/rrunner.c
index d81536f90df..3a4fce38450 100644
--- a/drivers/net/rrunner.c
+++ b/drivers/net/rrunner.c
@@ -1029,7 +1029,6 @@ static void rx_int(struct net_device *dev, u32 rxlimit, u32 index)
 					goto defer;
 				}
 			}
-			skb->dev = dev;
 			skb->protocol = hippi_type_trans(skb, dev);
 
 			netif_rx(skb);		/* send it up */
diff --git a/drivers/net/skfp/skfddi.c b/drivers/net/skfp/skfddi.c
index 9733a11c614..064e7c21c01 100644
--- a/drivers/net/skfp/skfddi.c
+++ b/drivers/net/skfp/skfddi.c
@@ -1680,7 +1680,6 @@ void mac_drv_rx_complete(struct s_smc *smc, volatile struct s_smt_fp_rxd *rxd,
 	rxd->rxd_os.skb = NULL;
 	skb_trim(skb, len);
 	skb->protocol = fddi_type_trans(skb, bp->dev);
-	skb->dev = bp->dev;	/* pass up device pointer */
 
 	netif_rx(skb);
 	bp->dev->last_rx = jiffies;
diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c
index ecca1046714..1c23e187a3b 100644
--- a/drivers/s390/net/lcs.c
+++ b/drivers/s390/net/lcs.c
@@ -1784,7 +1784,6 @@ lcs_get_skb(struct lcs_card *card, char *skb_data, unsigned int skb_len)
 		card->stats.rx_dropped++;
 		return;
 	}
-	skb->dev = card->dev;
 	memcpy(skb_put(skb, skb_len), skb_data, skb_len);
 	skb->protocol =	card->lan_type_trans(skb, card->dev);
 	card->stats.rx_bytes += skb_len;
diff --git a/net/802/fddi.c b/net/802/fddi.c
index 8c86216b1c8..f8a0c9f6fec 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -130,6 +130,7 @@ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev)
 	 * to start of packet data.  Assume 802.2 SNAP frames for now.
 	 */
 
+	skb->dev = dev;
 	skb->mac.raw = skb->data;	/* point to frame control (FC) */
 
 	if(fddi->hdr.llc_8022_1.dsap==0xe0)
diff --git a/net/802/hippi.c b/net/802/hippi.c
index 35dd938cff9..138302c14ee 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -126,14 +126,14 @@ __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
 	struct hippi_hdr *hip;
 
-	hip = (struct hippi_hdr *) skb->data;
-
 	/*
 	 * This is actually wrong ... question is if we really should
 	 * set the raw address here.
 	 */
-	 skb->mac.raw = skb->data;
-	 skb_pull(skb, HIPPI_HLEN);
+	skb->dev = dev;
+	skb->mac.raw = skb->data;
+	hip = (struct hippi_hdr *)skb->mac.raw;
+	skb_pull(skb, HIPPI_HLEN);
 
 	/*
 	 * No fancy promisc stuff here now.
-- 
cgit v1.2.3


From 4839fccea04b5f4d2b3ce01585d6bdbcbc24002c Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 11:13:59 -0300
Subject: [QETH]: Use eth_hdr()

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index f2b9b1b1ec1..d502b77adf6 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -2361,7 +2361,7 @@ qeth_rebuild_skb_fake_ll_eth(struct qeth_card *card, struct sk_buff *skb,
 	QETH_DBF_TEXT(trace,5,"skbfketh");
 	skb->mac.raw = skb->data - QETH_FAKE_LL_LEN_ETH;
 	/* this is a fake ethernet header */
-	fake_hdr = (struct ethhdr *) skb->mac.raw;
+	fake_hdr = eth_hdr(skb);
 
 	/* the destination MAC address */
 	switch (skb->pkt_type){
-- 
cgit v1.2.3


From 029720f15dcd3c6c16824177cfc486083b229411 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 11:20:07 -0300
Subject: [AOE]: Introduce aoe_hdr()

For consistency with other skb->mac.raw users.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/block/aoe/aoe.h    |  9 +++++++++
 drivers/block/aoe/aoecmd.c | 14 +++++++-------
 drivers/block/aoe/aoenet.c |  2 +-
 3 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 2308e83e5f3..4c34f8d31cc 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -48,6 +48,15 @@ struct aoe_hdr {
 	__be32 tag;
 };
 
+#ifdef __KERNEL__
+#include <linux/skbuff.h>
+
+static inline struct aoe_hdr *aoe_hdr(const struct sk_buff *skb)
+{
+	return (struct aoe_hdr *)skb->mac.raw;
+}
+#endif
+
 struct aoe_atahdr {
 	unsigned char aflags;
 	unsigned char errfeat;
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 8d17d8df366..4ab7b40e8c5 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -118,7 +118,7 @@ aoecmd_ata_rw(struct aoedev *d, struct frame *f)
 
 	/* initialize the headers & frame */
 	skb = f->skb;
-	h = (struct aoe_hdr *) skb->mac.raw;
+	h = aoe_hdr(skb);
 	ah = (struct aoe_atahdr *) (h+1);
 	skb_put(skb, sizeof *h + sizeof *ah);
 	memset(h, 0, skb->len);
@@ -207,7 +207,7 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff **tail)
 		skb->dev = ifp;
 		if (sl_tail == NULL)
 			sl_tail = skb;
-		h = (struct aoe_hdr *) skb->mac.raw;
+		h = aoe_hdr(skb);
 		memset(h, 0, sizeof *h + sizeof *ch);
 
 		memset(h->dst, 0xff, sizeof h->dst);
@@ -300,7 +300,7 @@ rexmit(struct aoedev *d, struct frame *f)
 	aoechr_error(buf);
 
 	skb = f->skb;
-	h = (struct aoe_hdr *) skb->mac.raw;
+	h = aoe_hdr(skb);
 	ah = (struct aoe_atahdr *) (h+1);
 	f->tag = n;
 	h->tag = cpu_to_be32(n);
@@ -529,7 +529,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
 	char ebuf[128];
 	u16 aoemajor;
 
-	hin = (struct aoe_hdr *) skb->mac.raw;
+	hin = aoe_hdr(skb);
 	aoemajor = be16_to_cpu(get_unaligned(&hin->major));
 	d = aoedev_by_aoeaddr(aoemajor, hin->minor);
 	if (d == NULL) {
@@ -561,7 +561,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
 	calc_rttavg(d, tsince(f->tag));
 
 	ahin = (struct aoe_atahdr *) (hin+1);
-	hout = (struct aoe_hdr *) f->skb->mac.raw;
+	hout = aoe_hdr(f->skb);
 	ahout = (struct aoe_atahdr *) (hout+1);
 	buf = f->buf;
 
@@ -695,7 +695,7 @@ aoecmd_ata_id(struct aoedev *d)
 
 	/* initialize the headers & frame */
 	skb = f->skb;
-	h = (struct aoe_hdr *) skb->mac.raw;
+	h = aoe_hdr(skb);
 	ah = (struct aoe_atahdr *) (h+1);
 	skb_put(skb, sizeof *h + sizeof *ah);
 	memset(h, 0, skb->len);
@@ -726,7 +726,7 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
 	enum { MAXFRAMES = 16 };
 	u16 n;
 
-	h = (struct aoe_hdr *) skb->mac.raw;
+	h = aoe_hdr(skb);
 	ch = (struct aoe_cfghdr *) (h+1);
 
 	/*
diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c
index aab6d91a2c2..f9ddfda4d9c 100644
--- a/drivers/block/aoe/aoenet.c
+++ b/drivers/block/aoe/aoenet.c
@@ -123,7 +123,7 @@ aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt,
 		goto exit;
 	skb_push(skb, ETH_HLEN);	/* (1) */
 
-	h = (struct aoe_hdr *) skb->mac.raw;
+	h = aoe_hdr(skb);
 	n = be32_to_cpu(get_unaligned(&h->tag));
 	if ((h->verfl & AOEFL_RSP) == 0 || (n & 1<<31))
 		goto exit;
-- 
cgit v1.2.3


From 4c13eb6657fe9ef7b4dc8f1a405c902e9e5234e0 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 25 Apr 2007 17:40:23 -0700
Subject: [ETH]: Make eth_type_trans set skb->dev like the other *_type_trans

One less thing for drivers writers to worry about.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/ia64/hp/sim/simeth.c                     | 1 -
 arch/ia64/sn/kernel/xpnet.c                   | 1 -
 arch/ppc/8260_io/enet.c                       | 1 -
 arch/ppc/8260_io/fcc_enet.c                   | 1 -
 arch/ppc/8xx_io/enet.c                        | 1 -
 arch/ppc/8xx_io/fec.c                         | 1 -
 drivers/infiniband/hw/amso1100/c2.c           | 1 -
 drivers/isdn/hysdn/hysdn_net.c                | 2 --
 drivers/net/3c501.c                           | 1 -
 drivers/net/3c505.c                           | 1 -
 drivers/net/3c507.c                           | 1 -
 drivers/net/3c509.c                           | 1 -
 drivers/net/3c515.c                           | 2 --
 drivers/net/3c523.c                           | 1 -
 drivers/net/3c527.c                           | 1 -
 drivers/net/3c59x.c                           | 2 --
 drivers/net/7990.c                            | 1 -
 drivers/net/8139cp.c                          | 2 --
 drivers/net/8139too.c                         | 1 -
 drivers/net/82596.c                           | 1 -
 drivers/net/a2065.c                           | 1 -
 drivers/net/acenic.c                          | 1 -
 drivers/net/amd8111e.c                        | 4 ----
 drivers/net/ariadne.c                         | 1 -
 drivers/net/arm/am79c961a.c                   | 1 -
 drivers/net/arm/at91_ether.c                  | 1 -
 drivers/net/arm/ep93xx_eth.c                  | 1 -
 drivers/net/arm/ether1.c                      | 1 -
 drivers/net/arm/ether3.c                      | 1 -
 drivers/net/at1700.c                          | 1 -
 drivers/net/atari_bionet.c                    | 1 -
 drivers/net/atarilance.c                      | 1 -
 drivers/net/atl1/atl1_main.c                  | 2 --
 drivers/net/atp.c                             | 1 -
 drivers/net/au1000_eth.c                      | 1 -
 drivers/net/b44.c                             | 1 -
 drivers/net/bmac.c                            | 1 -
 drivers/net/cassini.c                         | 1 -
 drivers/net/chelsio/sge.c                     | 3 +--
 drivers/net/cris/eth_v10.c                    | 1 -
 drivers/net/cs89x0.c                          | 2 --
 drivers/net/cxgb3/sge.c                       | 3 +--
 drivers/net/de600.c                           | 1 -
 drivers/net/de620.c                           | 1 -
 drivers/net/declance.c                        | 1 -
 drivers/net/depca.c                           | 1 -
 drivers/net/dgrs.c                            | 1 -
 drivers/net/dl2k.c                            | 4 ----
 drivers/net/dm9000.c                          | 1 -
 drivers/net/eepro.c                           | 1 -
 drivers/net/eepro100.c                        | 1 -
 drivers/net/eexpress.c                        | 1 -
 drivers/net/epic100.c                         | 3 ---
 drivers/net/eth16i.c                          | 1 -
 drivers/net/ewrk3.c                           | 1 -
 drivers/net/fealnx.c                          | 1 -
 drivers/net/fec.c                             | 1 -
 drivers/net/fec_8xx/fec_main.c                | 1 -
 drivers/net/forcedeth.c                       | 2 --
 drivers/net/fs_enet/fs_enet-main.c            | 3 ---
 drivers/net/gianfar.c                         | 2 --
 drivers/net/hamachi.c                         | 1 -
 drivers/net/hp100.c                           | 1 -
 drivers/net/ibm_emac/ibm_emac_core.c          | 1 -
 drivers/net/ibmlana.c                         | 1 -
 drivers/net/ibmveth.c                         | 1 -
 drivers/net/ioc3-eth.c                        | 3 ---
 drivers/net/iseries_veth.c                    | 1 -
 drivers/net/ixp2000/ixpdev.c                  | 3 +--
 drivers/net/lance.c                           | 1 -
 drivers/net/lasi_82596.c                      | 1 -
 drivers/net/lib8390.c                         | 1 -
 drivers/net/loopback.c                        | 1 -
 drivers/net/lp486e.c                          | 1 -
 drivers/net/mac89x0.c                         | 1 -
 drivers/net/macb.c                            | 1 -
 drivers/net/mace.c                            | 1 -
 drivers/net/macmace.c                         | 1 -
 drivers/net/meth.c                            | 1 -
 drivers/net/mipsnet.c                         | 1 -
 drivers/net/mv643xx_eth.c                     | 1 -
 drivers/net/myri10ge/myri10ge.c               | 1 -
 drivers/net/natsemi.c                         | 1 -
 drivers/net/netx-eth.c                        | 1 -
 drivers/net/netxen/netxen_nic_init.c          | 1 -
 drivers/net/ni5010.c                          | 1 -
 drivers/net/ni52.c                            | 1 -
 drivers/net/ni65.c                            | 2 --
 drivers/net/ns83820.c                         | 1 -
 drivers/net/pasemi_mac.c                      | 2 --
 drivers/net/pci-skeleton.c                    | 1 -
 drivers/net/pcmcia/3c574_cs.c                 | 1 -
 drivers/net/pcmcia/3c589_cs.c                 | 1 -
 drivers/net/pcmcia/axnet_cs.c                 | 1 -
 drivers/net/pcmcia/fmvj18x_cs.c               | 1 -
 drivers/net/pcmcia/nmclan_cs.c                | 2 --
 drivers/net/pcmcia/smc91c92_cs.c              | 1 -
 drivers/net/pcmcia/xirc2ps_cs.c               | 1 -
 drivers/net/pcnet32.c                         | 1 -
 drivers/net/qla3xxx.c                         | 2 --
 drivers/net/r8169.c                           | 1 -
 drivers/net/rionet.c                          | 1 -
 drivers/net/saa9730.c                         | 1 -
 drivers/net/sb1250-mac.c                      | 3 ---
 drivers/net/sc92031.c                         | 1 -
 drivers/net/seeq8005.c                        | 1 -
 drivers/net/sgiseeq.c                         | 1 -
 drivers/net/sis190.c                          | 1 -
 drivers/net/sis900.c                          | 3 ---
 drivers/net/sk98lin/skge.c                    | 2 --
 drivers/net/smc911x.c                         | 2 --
 drivers/net/smc9194.c                         | 1 -
 drivers/net/smc91x.c                          | 1 -
 drivers/net/sonic.c                           | 2 --
 drivers/net/spider_net.c                      | 1 -
 drivers/net/starfire.c                        | 1 -
 drivers/net/sun3_82586.c                      | 1 -
 drivers/net/sun3lance.c                       | 1 -
 drivers/net/sunbmac.c                         | 1 -
 drivers/net/sundance.c                        | 1 -
 drivers/net/sungem.c                          | 1 -
 drivers/net/sunhme.c                          | 1 -
 drivers/net/sunlance.c                        | 2 --
 drivers/net/sunqe.c                           | 1 -
 drivers/net/tc35815.c                         | 1 -
 drivers/net/tlan.c                            | 2 --
 drivers/net/tsi108_eth.c                      | 1 -
 drivers/net/tulip/de2104x.c                   | 1 -
 drivers/net/tulip/de4x5.c                     | 2 --
 drivers/net/tulip/dmfe.c                      | 6 ++----
 drivers/net/tulip/interrupt.c                 | 2 --
 drivers/net/tulip/uli526x.c                   | 6 ++----
 drivers/net/tulip/winbond-840.c               | 3 ---
 drivers/net/tulip/xircom_cb.c                 | 1 -
 drivers/net/tulip/xircom_tulip_cb.c           | 1 -
 drivers/net/tun.c                             | 2 +-
 drivers/net/typhoon.c                         | 1 -
 drivers/net/via-rhine.c                       | 1 -
 drivers/net/via-velocity.c                    | 3 +--
 drivers/net/wan/hdlc_fr.c                     | 1 -
 drivers/net/wan/sbni.c                        | 5 -----
 drivers/net/wireless/airo.c                   | 5 +----
 drivers/net/wireless/arlan-main.c             | 1 -
 drivers/net/wireless/atmel.c                  | 2 --
 drivers/net/wireless/hostap/hostap_80211_rx.c | 1 -
 drivers/net/wireless/netwave_cs.c             | 1 -
 drivers/net/wireless/orinoco.c                | 1 -
 drivers/net/wireless/prism54/islpci_eth.c     | 4 ----
 drivers/net/wireless/ray_cs.c                 | 1 -
 drivers/net/wireless/wavelan.c                | 2 --
 drivers/net/wireless/wavelan_cs.c             | 2 --
 drivers/net/wireless/zd1201.c                 | 2 --
 drivers/net/yellowfin.c                       | 1 -
 drivers/net/znet.c                            | 1 -
 drivers/usb/gadget/ether.c                    | 1 -
 drivers/usb/net/catc.c                        | 1 -
 drivers/usb/net/kaweth.c                      | 2 --
 drivers/usb/net/pegasus.c                     | 5 +++--
 drivers/usb/net/rtl8150.c                     | 1 -
 drivers/usb/net/usbnet.c                      | 1 -
 net/atm/lec.c                                 | 3 ---
 net/bluetooth/bnep/core.c                     | 1 -
 net/ethernet/eth.c                            | 1 +
 net/ieee80211/ieee80211_rx.c                  | 1 -
 net/irda/irlan/irlan_eth.c                    | 3 +--
 165 files changed, 15 insertions(+), 230 deletions(-)

diff --git a/arch/ia64/hp/sim/simeth.c b/arch/ia64/hp/sim/simeth.c
index 424e9257c9a..edef008c2b4 100644
--- a/arch/ia64/hp/sim/simeth.c
+++ b/arch/ia64/hp/sim/simeth.c
@@ -427,7 +427,6 @@ make_new_skb(struct net_device *dev)
 		printk(KERN_NOTICE "%s: memory squeeze. dropping packet.\n", dev->name);
 		return NULL;
 	}
-	nskb->dev = dev;
 
 	skb_reserve(nskb, 2);	/* Align IP on 16 byte boundaries */
 
diff --git a/arch/ia64/sn/kernel/xpnet.c b/arch/ia64/sn/kernel/xpnet.c
index c8173db0d84..68d59d912c9 100644
--- a/arch/ia64/sn/kernel/xpnet.c
+++ b/arch/ia64/sn/kernel/xpnet.c
@@ -267,7 +267,6 @@ xpnet_receive(partid_t partid, int channel, struct xpnet_message *msg)
 		(void *) skb->data, (void *) skb->tail, (void *) skb->end,
 		skb->len);
 
-	skb->dev = xpnet_device;
 	skb->protocol = eth_type_trans(skb, xpnet_device);
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 
diff --git a/arch/ppc/8260_io/enet.c b/arch/ppc/8260_io/enet.c
index a6056c29cf0..48ce84f5be9 100644
--- a/arch/ppc/8260_io/enet.c
+++ b/arch/ppc/8260_io/enet.c
@@ -477,7 +477,6 @@ for (;;) {
 			cep->stats.rx_dropped++;
 		}
 		else {
-			skb->dev = dev;
 			skb_put(skb,pkt_len-4);	/* Make room */
 			eth_copy_and_sum(skb,
 				(unsigned char *)__va(bdp->cbd_bufaddr),
diff --git a/arch/ppc/8260_io/fcc_enet.c b/arch/ppc/8260_io/fcc_enet.c
index 06b84c372e5..9db825fe37f 100644
--- a/arch/ppc/8260_io/fcc_enet.c
+++ b/arch/ppc/8260_io/fcc_enet.c
@@ -734,7 +734,6 @@ for (;;) {
 			cep->stats.rx_dropped++;
 		}
 		else {
-			skb->dev = dev;
 			skb_put(skb,pkt_len);	/* Make room */
 			eth_copy_and_sum(skb,
 				(unsigned char *)__va(bdp->cbd_bufaddr),
diff --git a/arch/ppc/8xx_io/enet.c b/arch/ppc/8xx_io/enet.c
index b23c45bc151..bfa3f52996d 100644
--- a/arch/ppc/8xx_io/enet.c
+++ b/arch/ppc/8xx_io/enet.c
@@ -506,7 +506,6 @@ for (;;) {
 			cep->stats.rx_dropped++;
 		}
 		else {
-			skb->dev = dev;
 			skb_put(skb,pkt_len-4);	/* Make room */
 			eth_copy_and_sum(skb,
 				cep->rx_vaddr[bdp - cep->rx_bd_base],
diff --git a/arch/ppc/8xx_io/fec.c b/arch/ppc/8xx_io/fec.c
index e6c28fb423b..57a9a61e54b 100644
--- a/arch/ppc/8xx_io/fec.c
+++ b/arch/ppc/8xx_io/fec.c
@@ -724,7 +724,6 @@ while (!(bdp->cbd_sc & BD_ENET_RX_EMPTY)) {
 		printk("%s: Memory squeeze, dropping packet.\n", dev->name);
 		fep->stats.rx_dropped++;
 	} else {
-		skb->dev = dev;
 		skb_put(skb,pkt_len-4);	/* Make room */
 		eth_copy_and_sum(skb, data, pkt_len-4, 0);
 		skb->protocol=eth_type_trans(skb,dev);
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
index 59243d9aedd..7698feafa6a 100644
--- a/drivers/infiniband/hw/amso1100/c2.c
+++ b/drivers/infiniband/hw/amso1100/c2.c
@@ -523,7 +523,6 @@ static void c2_rx_interrupt(struct net_device *netdev)
 		skb->data += sizeof(*rxp_hdr);
 		skb->tail = skb->data + buflen;
 		skb->len = buflen;
-		skb->dev = netdev;
 		skb->protocol = eth_type_trans(skb, netdev);
 
 		netif_rx(skb);
diff --git a/drivers/isdn/hysdn/hysdn_net.c b/drivers/isdn/hysdn/hysdn_net.c
index 557d96c78a6..cfa8fa5e44a 100644
--- a/drivers/isdn/hysdn/hysdn_net.c
+++ b/drivers/isdn/hysdn/hysdn_net.c
@@ -214,8 +214,6 @@ hysdn_rx_netpkt(hysdn_card * card, unsigned char *buf, unsigned short len)
 		lp->stats.rx_dropped++;
 		return;
 	}
-	skb->dev = &lp->netdev;
-
 	/* copy the data */
 	memcpy(skb_put(skb, len), buf, len);
 
diff --git a/drivers/net/3c501.c b/drivers/net/3c501.c
index 06e33786078..4bee99ba7db 100644
--- a/drivers/net/3c501.c
+++ b/drivers/net/3c501.c
@@ -735,7 +735,6 @@ static void el_receive(struct net_device *dev)
 	else
 	{
     		skb_reserve(skb,2);	/* Force 16 byte alignment */
-		skb->dev = dev;
 		/*
 		 *	The read increments through the bytes. The interrupt
 		 *	handler will fix the pointer when it returns to
diff --git a/drivers/net/3c505.c b/drivers/net/3c505.c
index 702bfb2a5e9..c693b5a7950 100644
--- a/drivers/net/3c505.c
+++ b/drivers/net/3c505.c
@@ -615,7 +615,6 @@ static void receive_packet(struct net_device *dev, int len)
 	if (test_and_set_bit(0, (void *) &adapter->dmaing))
 		printk(KERN_ERR "%s: rx blocked, DMA in progress, dir %d\n", dev->name, adapter->current_dma.direction);
 
-	skb->dev = dev;
 	adapter->current_dma.direction = 0;
 	adapter->current_dma.length = rlen;
 	adapter->current_dma.skb = skb;
diff --git a/drivers/net/3c507.c b/drivers/net/3c507.c
index 54e1d5aebed..eed4299dc42 100644
--- a/drivers/net/3c507.c
+++ b/drivers/net/3c507.c
@@ -873,7 +873,6 @@ static void el16_rx(struct net_device *dev)
 			}
 
 			skb_reserve(skb,2);
-			skb->dev = dev;
 
 			/* 'skb->data' points to the start of sk_buff data area. */
 			memcpy_fromio(skb_put(skb,pkt_len), data_frame + 10, pkt_len);
diff --git a/drivers/net/3c509.c b/drivers/net/3c509.c
index f791bf026e5..c7511c4d3b6 100644
--- a/drivers/net/3c509.c
+++ b/drivers/net/3c509.c
@@ -1091,7 +1091,6 @@ el3_rx(struct net_device *dev)
 				printk("Receiving packet size %d status %4.4x.\n",
 					   pkt_len, rx_status);
 			if (skb != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);     /* Align IP on 16 byte */
 
 				/* 'skb->data' points to the start of sk_buff data area. */
diff --git a/drivers/net/3c515.c b/drivers/net/3c515.c
index c307ce66145..290166d5e7d 100644
--- a/drivers/net/3c515.c
+++ b/drivers/net/3c515.c
@@ -1292,7 +1292,6 @@ static int corkscrew_rx(struct net_device *dev)
 				printk("Receiving packet size %d status %4.4x.\n",
 				     pkt_len, rx_status);
 			if (skb != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
 				/* 'skb_put()' points to the start of sk_buff data area. */
 				insl(ioaddr + RX_FIFO,
@@ -1363,7 +1362,6 @@ static int boomerang_rx(struct net_device *dev)
 			   copying to a properly sized skbuff. */
 			if (pkt_len < rx_copybreak
 			    && (skb = dev_alloc_skb(pkt_len + 4)) != 0) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
 				/* 'skb_put()' points to the start of sk_buff data area. */
 				memcpy(skb_put(skb, pkt_len),
diff --git a/drivers/net/3c523.c b/drivers/net/3c523.c
index 17d61eb0a7e..6b2036df685 100644
--- a/drivers/net/3c523.c
+++ b/drivers/net/3c523.c
@@ -988,7 +988,6 @@ static void elmc_rcv_int(struct net_device *dev)
 				rbd->status = 0;
 				skb = (struct sk_buff *) dev_alloc_skb(totlen + 2);
 				if (skb != NULL) {
-					skb->dev = dev;
 					skb_reserve(skb, 2);	/* 16 byte alignment */
 					skb_put(skb,totlen);
 					eth_copy_and_sum(skb, (char *) p->base+(unsigned long) rbd->buffer,totlen,0);
diff --git a/drivers/net/3c527.c b/drivers/net/3c527.c
index 6c7437e60bd..c7b571be20e 100644
--- a/drivers/net/3c527.c
+++ b/drivers/net/3c527.c
@@ -1189,7 +1189,6 @@ static void mc32_rx_ring(struct net_device *dev)
 			}
 
 			skb->protocol=eth_type_trans(skb,dev);
-			skb->dev=dev;
 			dev->last_rx = jiffies;
  			lp->net_stats.rx_packets++;
  			lp->net_stats.rx_bytes += length;
diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c
index b406ecfa726..80924f76dee 100644
--- a/drivers/net/3c59x.c
+++ b/drivers/net/3c59x.c
@@ -2414,7 +2414,6 @@ static int vortex_rx(struct net_device *dev)
 				printk(KERN_DEBUG "Receiving packet size %d status %4.4x.\n",
 					   pkt_len, rx_status);
 			if (skb != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
 				/* 'skb_put()' points to the start of sk_buff data area. */
 				if (vp->bus_master &&
@@ -2491,7 +2490,6 @@ boomerang_rx(struct net_device *dev)
 			/* Check if the packet is long enough to just accept without
 			   copying to a properly sized skbuff. */
 			if (pkt_len < rx_copybreak && (skb = dev_alloc_skb(pkt_len + 2)) != 0) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
 				pci_dma_sync_single_for_cpu(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
 				/* 'skb_put()' points to the start of sk_buff data area. */
diff --git a/drivers/net/7990.c b/drivers/net/7990.c
index 1b3d11ed6cf..c50264aea16 100644
--- a/drivers/net/7990.c
+++ b/drivers/net/7990.c
@@ -331,7 +331,6 @@ static int lance_rx (struct net_device *dev)
                                 return 0;
                         }
 
-                        skb->dev = dev;
                         skb_reserve (skb, 2);           /* 16 byte align */
                         skb_put (skb, len);             /* make room */
                         eth_copy_and_sum(skb,
diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c
index 12c8453f44b..2f704cb06e7 100644
--- a/drivers/net/8139cp.c
+++ b/drivers/net/8139cp.c
@@ -573,7 +573,6 @@ rx_status_loop:
 		}
 
 		skb_reserve(new_skb, RX_OFFSET);
-		new_skb->dev = dev;
 
 		pci_unmap_single(cp->pdev, mapping,
 				 buflen, PCI_DMA_FROMDEVICE);
@@ -1082,7 +1081,6 @@ static int cp_refill_rx (struct cp_private *cp)
 		if (!skb)
 			goto err_out;
 
-		skb->dev = cp->dev;
 		skb_reserve(skb, RX_OFFSET);
 
 		mapping = pci_map_single(cp->pdev, skb->data, cp->rx_buf_sz,
diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c
index 99304b2aa86..2101334a8ac 100644
--- a/drivers/net/8139too.c
+++ b/drivers/net/8139too.c
@@ -2013,7 +2013,6 @@ no_early_rx:
 
 		skb = dev_alloc_skb (pkt_size + 2);
 		if (likely(skb)) {
-			skb->dev = dev;
 			skb_reserve (skb, 2);	/* 16 byte align the IP fields. */
 #if RX_BUF_IDX == 3
 			wrap_copy(skb, rx_ring, ring_offset+4, pkt_size);
diff --git a/drivers/net/82596.c b/drivers/net/82596.c
index 640d7ca2ebc..3ff1155459a 100644
--- a/drivers/net/82596.c
+++ b/drivers/net/82596.c
@@ -830,7 +830,6 @@ memory_squeeze:
 				lp->stats.rx_dropped++;
 			}
 			else {
-				skb->dev = dev;
 				if (!rx_in_place) {
 					/* 16 byte align the data fields */
 					skb_reserve(skb, 2);
diff --git a/drivers/net/a2065.c b/drivers/net/a2065.c
index d76548e7535..b38fc65005e 100644
--- a/drivers/net/a2065.c
+++ b/drivers/net/a2065.c
@@ -320,7 +320,6 @@ static int lance_rx (struct net_device *dev)
 				return 0;
 			}
 
-			skb->dev = dev;
 			skb_reserve (skb, 2);		/* 16 byte align */
 			skb_put (skb, len);		/* make room */
 			eth_copy_and_sum(skb,
diff --git a/drivers/net/acenic.c b/drivers/net/acenic.c
index 7138e0e025b..7122b7ba8d6 100644
--- a/drivers/net/acenic.c
+++ b/drivers/net/acenic.c
@@ -2027,7 +2027,6 @@ static void ace_rx_int(struct net_device *dev, u32 rxretprd, u32 rxretcsm)
 		 */
 		csum = retdesc->tcp_udp_csum;
 
-		skb->dev = dev;
 		skb->protocol = eth_type_trans(skb, dev);
 
 		/*
diff --git a/drivers/net/amd8111e.c b/drivers/net/amd8111e.c
index 962c954c2d5..675fe918421 100644
--- a/drivers/net/amd8111e.c
+++ b/drivers/net/amd8111e.c
@@ -798,9 +798,7 @@ static int amd8111e_rx_poll(struct net_device *dev, int * budget)
 			pci_unmap_single(lp->pci_dev,lp->rx_dma_addr[rx_index],
 					 lp->rx_buff_len-2, PCI_DMA_FROMDEVICE);
 			skb_put(skb, pkt_len);
-			skb->dev = dev;
 			lp->rx_skbuff[rx_index] = new_skb;
-			new_skb->dev = dev;
 			lp->rx_dma_addr[rx_index] = pci_map_single(lp->pci_dev,
 								   new_skb->data,
 								   lp->rx_buff_len-2,
@@ -926,9 +924,7 @@ static int amd8111e_rx(struct net_device *dev)
 		pci_unmap_single(lp->pci_dev,lp->rx_dma_addr[rx_index],
 			lp->rx_buff_len-2, PCI_DMA_FROMDEVICE);
 		skb_put(skb, pkt_len);
-		skb->dev = dev;
 		lp->rx_skbuff[rx_index] = new_skb;
-		new_skb->dev = dev;
 		lp->rx_dma_addr[rx_index] = pci_map_single(lp->pci_dev,
 			new_skb->data, lp->rx_buff_len-2,PCI_DMA_FROMDEVICE);
 
diff --git a/drivers/net/ariadne.c b/drivers/net/ariadne.c
index 9dfc09b181c..a0e68e71853 100644
--- a/drivers/net/ariadne.c
+++ b/drivers/net/ariadne.c
@@ -743,7 +743,6 @@ static int ariadne_rx(struct net_device *dev)
 	    }
 
 
-	    skb->dev = dev;
 	    skb_reserve(skb,2);		/* 16 byte align */
 	    skb_put(skb,pkt_len);	/* Make room */
 	    eth_copy_and_sum(skb, (char *)priv->rx_buff[entry], pkt_len,0);
diff --git a/drivers/net/arm/am79c961a.c b/drivers/net/arm/am79c961a.c
index ddd12d44ff2..8f0d7ce503c 100644
--- a/drivers/net/arm/am79c961a.c
+++ b/drivers/net/arm/am79c961a.c
@@ -526,7 +526,6 @@ am79c961_rx(struct net_device *dev, struct dev_priv *priv)
 		skb = dev_alloc_skb(len + 2);
 
 		if (skb) {
-			skb->dev = dev;
 			skb_reserve(skb, 2);
 
 			am_readbuffer(dev, pktaddr, skb_put(skb, len), len);
diff --git a/drivers/net/arm/at91_ether.c b/drivers/net/arm/at91_ether.c
index 1621b8fe35c..152fa7a042b 100644
--- a/drivers/net/arm/at91_ether.c
+++ b/drivers/net/arm/at91_ether.c
@@ -858,7 +858,6 @@ static void at91ether_rx(struct net_device *dev)
 			skb_reserve(skb, 2);
 			memcpy(skb_put(skb, pktlen), p_recv, pktlen);
 
-			skb->dev = dev;
 			skb->protocol = eth_type_trans(skb, dev);
 			dev->last_rx = jiffies;
 			lp->stats.rx_bytes += pktlen;
diff --git a/drivers/net/arm/ep93xx_eth.c b/drivers/net/arm/ep93xx_eth.c
index dd698b033a6..2438c5bff23 100644
--- a/drivers/net/arm/ep93xx_eth.c
+++ b/drivers/net/arm/ep93xx_eth.c
@@ -255,7 +255,6 @@ static int ep93xx_rx(struct net_device *dev, int *budget)
 
 		skb = dev_alloc_skb(length + 2);
 		if (likely(skb != NULL)) {
-			skb->dev = dev;
 			skb_reserve(skb, 2);
 			dma_sync_single(NULL, ep->descs->rdesc[entry].buf_addr,
 						length, DMA_FROM_DEVICE);
diff --git a/drivers/net/arm/ether1.c b/drivers/net/arm/ether1.c
index a2921882eba..f075cebe84a 100644
--- a/drivers/net/arm/ether1.c
+++ b/drivers/net/arm/ether1.c
@@ -875,7 +875,6 @@ ether1_recv_done (struct net_device *dev)
 			skb = dev_alloc_skb (length + 2);
 
 			if (skb) {
-				skb->dev = dev;
 				skb_reserve (skb, 2);
 
 				ether1_readbuffer (dev, skb_put (skb, length), rbd.rbd_bufl, length);
diff --git a/drivers/net/arm/ether3.c b/drivers/net/arm/ether3.c
index 841178343a0..32da2eb9bce 100644
--- a/drivers/net/arm/ether3.c
+++ b/drivers/net/arm/ether3.c
@@ -661,7 +661,6 @@ if (next_ptr < RX_START || next_ptr >= RX_END) {
 			if (skb) {
 				unsigned char *buf;
 
-				skb->dev = dev;
 				skb_reserve(skb, 2);
 				buf = skb_put(skb, length);
 				ether3_readbuffer(dev, buf + 12, length - 12);
diff --git a/drivers/net/at1700.c b/drivers/net/at1700.c
index 56ae8babd91..bed8e0ebaf1 100644
--- a/drivers/net/at1700.c
+++ b/drivers/net/at1700.c
@@ -768,7 +768,6 @@ net_rx(struct net_device *dev)
 				lp->stats.rx_dropped++;
 				break;
 			}
-			skb->dev = dev;
 			skb_reserve(skb,2);
 
 			insw(ioaddr + DATAPORT, skb_put(skb,pkt_len), (pkt_len + 1) >> 1);
diff --git a/drivers/net/atari_bionet.c b/drivers/net/atari_bionet.c
index 4e3bf6a1f22..f52e7f22f63 100644
--- a/drivers/net/atari_bionet.c
+++ b/drivers/net/atari_bionet.c
@@ -544,7 +544,6 @@ bionet_poll_rx(struct net_device *dev) {
 				break;
 			}
 
-			skb->dev = dev;
 			skb_reserve( skb, 2 );		/* 16 Byte align  */
 			skb_put( skb, pkt_len );	/* make room */
 
diff --git a/drivers/net/atarilance.c b/drivers/net/atarilance.c
index 7e37ac86a69..dfa8b9ba4c8 100644
--- a/drivers/net/atarilance.c
+++ b/drivers/net/atarilance.c
@@ -1047,7 +1047,6 @@ static int lance_rx( struct net_device *dev )
 						   pkt_len );
 				}
 
-				skb->dev = dev;
 				skb_reserve( skb, 2 );	/* 16 byte align */
 				skb_put( skb, pkt_len );	/* Make room */
 				lp->memcpy_f( skb->data, PKTBUF_ADDR(head), pkt_len );
diff --git a/drivers/net/atl1/atl1_main.c b/drivers/net/atl1/atl1_main.c
index 8606eac5bec..e3f181602e4 100644
--- a/drivers/net/atl1/atl1_main.c
+++ b/drivers/net/atl1/atl1_main.c
@@ -408,7 +408,6 @@ static void atl1_rx_checksum(struct atl1_adapter *adapter,
 static u16 atl1_alloc_rx_buffers(struct atl1_adapter *adapter)
 {
 	struct atl1_rfd_ring *rfd_ring = &adapter->rfd_ring;
-	struct net_device *netdev = adapter->netdev;
 	struct pci_dev *pdev = adapter->pdev;
 	struct page *page;
 	unsigned long offset;
@@ -444,7 +443,6 @@ static u16 atl1_alloc_rx_buffers(struct atl1_adapter *adapter)
 		 * the 14 byte MAC header is removed
 		 */
 		skb_reserve(skb, NET_IP_ALIGN);
-		skb->dev = netdev;
 
 		buffer_info->alloced = 1;
 		buffer_info->skb = skb;
diff --git a/drivers/net/atp.c b/drivers/net/atp.c
index 2d306fcb7f3..18aba838c1f 100644
--- a/drivers/net/atp.c
+++ b/drivers/net/atp.c
@@ -793,7 +793,6 @@ static void net_rx(struct net_device *dev)
 			lp->stats.rx_dropped++;
 			goto done;
 		}
-		skb->dev = dev;
 
 		skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
 		read_block(ioaddr, pkt_len, skb_put(skb,pkt_len), dev->if_port);
diff --git a/drivers/net/au1000_eth.c b/drivers/net/au1000_eth.c
index 69ae229b680..97b55f2546c 100644
--- a/drivers/net/au1000_eth.c
+++ b/drivers/net/au1000_eth.c
@@ -1205,7 +1205,6 @@ static int au1000_rx(struct net_device *dev)
 				aup->stats.rx_dropped++;
 				continue;
 			}
-			skb->dev = dev;
 			skb_reserve(skb, 2);	/* 16 byte IP header align */
 			eth_copy_and_sum(skb,
 				(unsigned char *)pDB->vaddr, frmlen, 0);
diff --git a/drivers/net/b44.c b/drivers/net/b44.c
index d742bfe2447..f67d97de97f 100644
--- a/drivers/net/b44.c
+++ b/drivers/net/b44.c
@@ -825,7 +825,6 @@ static int b44_rx(struct b44 *bp, int budget)
 			if (copy_skb == NULL)
 				goto drop_it_no_recycle;
 
-			copy_skb->dev = bp->dev;
 			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
 			/* DMA sync done above, copy just the actual packet */
diff --git a/drivers/net/bmac.c b/drivers/net/bmac.c
index c143304dcff..4612725965d 100644
--- a/drivers/net/bmac.c
+++ b/drivers/net/bmac.c
@@ -715,7 +715,6 @@ static irqreturn_t bmac_rxdma_intr(int irq, void *dev_id)
 		if (skb != NULL) {
 			nb -= ETHERCRC;
 			skb_put(skb, nb);
-			skb->dev = dev;
 			skb->protocol = eth_type_trans(skb, dev);
 			netif_rx(skb);
 			dev->last_rx = jiffies;
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
index c8126484c2b..68e37a655fe 100644
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -1995,7 +1995,6 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
 		return -1;
 
 	*skbref = skb;
-	skb->dev = cp->dev;
 	skb_reserve(skb, swivel);
 
 	p = skb->data;
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index 326d4a66512..47fa8dcf752 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -1379,12 +1379,11 @@ static void sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len)
 	}
 	__skb_pull(skb, sizeof(*p));
 
-	skb->dev = adapter->port[p->iff].dev;
 	skb->dev->last_rx = jiffies;
 	st = per_cpu_ptr(sge->port_stats[p->iff], smp_processor_id());
 	st->rx_packets++;
 
-	skb->protocol = eth_type_trans(skb, skb->dev);
+	skb->protocol = eth_type_trans(skb, adapter->port[p->iff].dev);
 	if ((adapter->flags & RX_CSUM_ENABLED) && p->csum == 0xffff &&
 	    skb->protocol == htons(ETH_P_IP) &&
 	    (skb->data[9] == IPPROTO_TCP || skb->data[9] == IPPROTO_UDP)) {
diff --git a/drivers/net/cris/eth_v10.c b/drivers/net/cris/eth_v10.c
index 8eb57127600..98643801a3b 100644
--- a/drivers/net/cris/eth_v10.c
+++ b/drivers/net/cris/eth_v10.c
@@ -1375,7 +1375,6 @@ e100_rx(struct net_device *dev)
 		myNextRxDesc->descr.buf = L1_CACHE_ALIGN(virt_to_phys(myNextRxDesc->skb->data));
 	}
 
-	skb->dev = dev;
 	skb->protocol = eth_type_trans(skb, dev);
 
 	/* Send the packet to the upper layers */
diff --git a/drivers/net/cs89x0.c b/drivers/net/cs89x0.c
index 4612f71a710..9774bb1b3e8 100644
--- a/drivers/net/cs89x0.c
+++ b/drivers/net/cs89x0.c
@@ -1004,7 +1004,6 @@ skip_this_frame:
 		return;
 	}
 	skb_reserve(skb, 2);	/* longword align L3 header */
-	skb->dev = dev;
 
 	if (bp + length > lp->end_dma_buff) {
 		int semi_cnt = lp->end_dma_buff - bp;
@@ -1702,7 +1701,6 @@ net_rx(struct net_device *dev)
 		return;
 	}
 	skb_reserve(skb, 2);	/* longword align L3 header */
-	skb->dev = dev;
 
 	readwords(ioaddr, RX_FRAME_PORT, skb_put(skb, length), length >> 1);
 	if (length & 1)
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 027ab2c3825..8946f7aa97c 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -1684,9 +1684,8 @@ static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
 	struct port_info *pi;
 
 	skb_pull(skb, sizeof(*p) + pad);
-	skb->dev = adap->port[p->iff];
 	skb->dev->last_rx = jiffies;
-	skb->protocol = eth_type_trans(skb, skb->dev);
+	skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
 	pi = netdev_priv(skb->dev);
 	if (pi->rx_csum_offload && p->csum_valid && p->csum == 0xffff &&
 	    !p->fragment) {
diff --git a/drivers/net/de600.c b/drivers/net/de600.c
index e547ce14eef..dae97b860da 100644
--- a/drivers/net/de600.c
+++ b/drivers/net/de600.c
@@ -359,7 +359,6 @@ static void de600_rx_intr(struct net_device *dev)
 	}
 	/* else */
 
-	skb->dev = dev;
 	skb_reserve(skb,2);	/* Align */
 
 	/* 'skb->data' points to the start of sk_buff data area. */
diff --git a/drivers/net/de620.c b/drivers/net/de620.c
index b6ad0cb5055..dc489242617 100644
--- a/drivers/net/de620.c
+++ b/drivers/net/de620.c
@@ -697,7 +697,6 @@ static int de620_rx_intr(struct net_device *dev)
 		}
 		else { /* Yep! Go get it! */
 			skb_reserve(skb,2);	/* Align */
-			skb->dev = dev;
 			/* skb->data points to the start of sk_buff data area */
 			buffer = skb_put(skb,size);
 			/* copy the packet into the buffer */
diff --git a/drivers/net/declance.c b/drivers/net/declance.c
index 9f7e1db8ce6..95d854e2295 100644
--- a/drivers/net/declance.c
+++ b/drivers/net/declance.c
@@ -616,7 +616,6 @@ static int lance_rx(struct net_device *dev)
 			}
 			lp->stats.rx_bytes += len;
 
-			skb->dev = dev;
 			skb_reserve(skb, 2);	/* 16 byte align */
 			skb_put(skb, len);	/* make room */
 
diff --git a/drivers/net/depca.c b/drivers/net/depca.c
index f3807aaf10a..183497020bf 100644
--- a/drivers/net/depca.c
+++ b/drivers/net/depca.c
@@ -1044,7 +1044,6 @@ static int depca_rx(struct net_device *dev)
 					unsigned char *buf;
 					skb_reserve(skb, 2);	/* 16 byte align the IP header */
 					buf = skb_put(skb, pkt_len);
-					skb->dev = dev;
 					if (entry < lp->rx_old) {	/* Wrapped buffer */
 						len = (lp->rxRingMask - lp->rx_old + 1) * RX_BUFF_SZ;
 						memcpy_fromio(buf, lp->rx_buff[lp->rx_old], len);
diff --git a/drivers/net/dgrs.c b/drivers/net/dgrs.c
index a79520295fd..d223c38966f 100644
--- a/drivers/net/dgrs.c
+++ b/drivers/net/dgrs.c
@@ -503,7 +503,6 @@ dgrs_rcv_frame(
 		/* discarding the frame */
 		goto out;
 	}
-	skb->dev = devN;
 	skb_reserve(skb, 2);	/* Align IP header */
 
 again:
diff --git a/drivers/net/dl2k.c b/drivers/net/dl2k.c
index 9d446a0fe0b..74ec64a1625 100644
--- a/drivers/net/dl2k.c
+++ b/drivers/net/dl2k.c
@@ -504,7 +504,6 @@ rio_timer (unsigned long data)
 					break;
 				}
 				np->rx_skbuff[entry] = skb;
-				skb->dev = dev;
 				/* 16 byte align the IP header */
 				skb_reserve (skb, 2);
 				np->rx_ring[entry].fraginfo =
@@ -575,7 +574,6 @@ alloc_list (struct net_device *dev)
 				dev->name);
 			break;
 		}
-		skb->dev = dev;	/* Mark as being used by this device. */
 		skb_reserve (skb, 2);	/* 16 byte align the IP header. */
 		/* Rubicon now supports 40 bits of addressing space. */
 		np->rx_ring[i].fraginfo =
@@ -866,7 +864,6 @@ receive_packet (struct net_device *dev)
 							    	DMA_48BIT_MASK,
 							    np->rx_buf_sz,
 							    PCI_DMA_FROMDEVICE);
-				skb->dev = dev;
 				/* 16 byte align the IP header */
 				skb_reserve (skb, 2);
 				eth_copy_and_sum (skb,
@@ -910,7 +907,6 @@ receive_packet (struct net_device *dev)
 				break;
 			}
 			np->rx_skbuff[entry] = skb;
-			skb->dev = dev;
 			/* 16 byte align the IP header */
 			skb_reserve (skb, 2);
 			np->rx_ring[entry].fraginfo =
diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c
index 615d2b14efa..8cc1174e7f6 100644
--- a/drivers/net/dm9000.c
+++ b/drivers/net/dm9000.c
@@ -954,7 +954,6 @@ dm9000_rx(struct net_device *dev)
 		/* Move data from DM9000 */
 		if (GoodPacket
 		    && ((skb = dev_alloc_skb(RxLen + 4)) != NULL)) {
-			skb->dev = dev;
 			skb_reserve(skb, 2);
 			rdptr = (u8 *) skb_put(skb, RxLen - 4);
 
diff --git a/drivers/net/eepro.c b/drivers/net/eepro.c
index b4463094c93..39654e1e2be 100644
--- a/drivers/net/eepro.c
+++ b/drivers/net/eepro.c
@@ -1591,7 +1591,6 @@ eepro_rx(struct net_device *dev)
 
 				break;
 			}
-			skb->dev = dev;
 			skb_reserve(skb,2);
 
 			if (lp->version == LAN595)
diff --git a/drivers/net/eepro100.c b/drivers/net/eepro100.c
index e28bb1e38f8..db658bc491a 100644
--- a/drivers/net/eepro100.c
+++ b/drivers/net/eepro100.c
@@ -1793,7 +1793,6 @@ speedo_rx(struct net_device *dev)
 			   copying to a properly sized skbuff. */
 			if (pkt_len < rx_copybreak
 				&& (skb = dev_alloc_skb(pkt_len + 2)) != 0) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
 				/* 'skb_put()' points to the start of sk_buff data area. */
 				pci_dma_sync_single_for_cpu(sp->pdev, sp->rx_ring_dma[entry],
diff --git a/drivers/net/eexpress.c b/drivers/net/eexpress.c
index 3868b803126..8aaf5ec0c36 100644
--- a/drivers/net/eexpress.c
+++ b/drivers/net/eexpress.c
@@ -976,7 +976,6 @@ static void eexp_hw_rx_pio(struct net_device *dev)
 					lp->stats.rx_dropped++;
 					break;
 				}
-				skb->dev = dev;
 				skb_reserve(skb, 2);
 				outw(pbuf+10, ioaddr+READ_PTR);
 			        insw(ioaddr+DATAPORT, skb_put(skb,pkt_len),(pkt_len+1)>>1);
diff --git a/drivers/net/epic100.c b/drivers/net/epic100.c
index 3a6a83d3ee1..4e3f14c9c71 100644
--- a/drivers/net/epic100.c
+++ b/drivers/net/epic100.c
@@ -934,7 +934,6 @@ static void epic_init_ring(struct net_device *dev)
 		ep->rx_skbuff[i] = skb;
 		if (skb == NULL)
 			break;
-		skb->dev = dev;			/* Mark as being used by this device. */
 		skb_reserve(skb, 2);	/* 16 byte align the IP header. */
 		ep->rx_ring[i].bufaddr = pci_map_single(ep->pci_dev,
 			skb->data, ep->rx_buf_sz, PCI_DMA_FROMDEVICE);
@@ -1199,7 +1198,6 @@ static int epic_rx(struct net_device *dev, int budget)
 			   to a minimally-sized skbuff. */
 			if (pkt_len < rx_copybreak
 				&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 				pci_dma_sync_single_for_cpu(ep->pci_dev,
 							    ep->rx_ring[entry].bufaddr,
@@ -1236,7 +1234,6 @@ static int epic_rx(struct net_device *dev, int budget)
 			skb = ep->rx_skbuff[entry] = dev_alloc_skb(ep->rx_buf_sz);
 			if (skb == NULL)
 				break;
-			skb->dev = dev;			/* Mark as being used by this device. */
 			skb_reserve(skb, 2);	/* Align IP on 16 byte boundaries */
 			ep->rx_ring[entry].bufaddr = pci_map_single(ep->pci_dev,
 				skb->data, ep->rx_buf_sz, PCI_DMA_FROMDEVICE);
diff --git a/drivers/net/eth16i.c b/drivers/net/eth16i.c
index 93283e386f3..04abf59e500 100644
--- a/drivers/net/eth16i.c
+++ b/drivers/net/eth16i.c
@@ -1175,7 +1175,6 @@ static void eth16i_rx(struct net_device *dev)
 				break;
 			}
 
-			skb->dev = dev;
 			skb_reserve(skb,2);
 
 			/*
diff --git a/drivers/net/ewrk3.c b/drivers/net/ewrk3.c
index 714ea1176ec..cb0792c187b 100644
--- a/drivers/net/ewrk3.c
+++ b/drivers/net/ewrk3.c
@@ -993,7 +993,6 @@ static int ewrk3_rx(struct net_device *dev)
 
 					if ((skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
 						unsigned char *p;
-						skb->dev = dev;
 						skb_reserve(skb, 2);	/* Align to 16 bytes */
 						p = skb_put(skb, pkt_len);
 
diff --git a/drivers/net/fealnx.c b/drivers/net/fealnx.c
index 38a13f44053..abe9b089c61 100644
--- a/drivers/net/fealnx.c
+++ b/drivers/net/fealnx.c
@@ -1719,7 +1719,6 @@ static int netdev_rx(struct net_device *dev)
 			   to a minimally-sized skbuff. */
 			if (pkt_len < rx_copybreak &&
 			    (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 				pci_dma_sync_single_for_cpu(np->pci_dev,
 							    np->cur_rx->buffer,
diff --git a/drivers/net/fec.c b/drivers/net/fec.c
index 6764281b453..255b09124e1 100644
--- a/drivers/net/fec.c
+++ b/drivers/net/fec.c
@@ -647,7 +647,6 @@ while (!((status = bdp->cbd_sc) & BD_ENET_RX_EMPTY)) {
 		printk("%s: Memory squeeze, dropping packet.\n", dev->name);
 		fep->stats.rx_dropped++;
 	} else {
-		skb->dev = dev;
 		skb_put(skb,pkt_len-4);	/* Make room */
 		eth_copy_and_sum(skb, data, pkt_len-4, 0);
 		skb->protocol=eth_type_trans(skb,dev);
diff --git a/drivers/net/fec_8xx/fec_main.c b/drivers/net/fec_8xx/fec_main.c
index 77f747a5afa..698dba8f2aa 100644
--- a/drivers/net/fec_8xx/fec_main.c
+++ b/drivers/net/fec_8xx/fec_main.c
@@ -561,7 +561,6 @@ static int fec_enet_rx_common(struct net_device *dev, int *budget)
 				skbn = dev_alloc_skb(ENET_RX_FRSIZE);
 
 			if (skbn != NULL) {
-				skb->dev = dev;
 				skb_put(skb, pkt_len);	/* Make room */
 				skb->protocol = eth_type_trans(skb, dev);
 				received++;
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index d04214e4e58..d5d458c3421 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -1385,7 +1385,6 @@ static int nv_alloc_rx(struct net_device *dev)
 	while (np->put_rx.orig != less_rx) {
 		struct sk_buff *skb = dev_alloc_skb(np->rx_buf_sz + NV_RX_ALLOC_PAD);
 		if (skb) {
-			skb->dev = dev;
 			np->put_rx_ctx->skb = skb;
 			np->put_rx_ctx->dma = pci_map_single(np->pci_dev, skb->data,
 							     skb->end-skb->data, PCI_DMA_FROMDEVICE);
@@ -1416,7 +1415,6 @@ static int nv_alloc_rx_optimized(struct net_device *dev)
 	while (np->put_rx.ex != less_rx) {
 		struct sk_buff *skb = dev_alloc_skb(np->rx_buf_sz + NV_RX_ALLOC_PAD);
 		if (skb) {
-			skb->dev = dev;
 			np->put_rx_ctx->skb = skb;
 			np->put_rx_ctx->dma = pci_map_single(np->pci_dev, skb->data,
 							     skb->end-skb->data, PCI_DMA_FROMDEVICE);
diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c
index 4a05c14bf7e..9f6ef315ce5 100644
--- a/drivers/net/fs_enet/fs_enet-main.c
+++ b/drivers/net/fs_enet/fs_enet-main.c
@@ -170,7 +170,6 @@ static int fs_enet_rx_napi(struct net_device *dev, int *budget)
 				skbn = dev_alloc_skb(ENET_RX_FRSIZE);
 
 			if (skbn != NULL) {
-				skb->dev = dev;
 				skb_put(skb, pkt_len);	/* Make room */
 				skb->protocol = eth_type_trans(skb, dev);
 				received++;
@@ -304,7 +303,6 @@ static int fs_enet_rx_non_napi(struct net_device *dev)
 				skbn = dev_alloc_skb(ENET_RX_FRSIZE);
 
 			if (skbn != NULL) {
-				skb->dev = dev;
 				skb_put(skb, pkt_len);	/* Make room */
 				skb->protocol = eth_type_trans(skb, dev);
 				received++;
@@ -516,7 +514,6 @@ void fs_init_bds(struct net_device *dev)
 			break;
 		}
 		fep->rx_skbuff[i] = skb;
-		skb->dev = dev;
 		CBDW_BUFADDR(bdp,
 			dma_map_single(fep->dev, skb->data,
 				L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index d981d4c41dd..1d019195a39 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -1295,8 +1295,6 @@ struct sk_buff * gfar_new_skb(struct net_device *dev, struct rxbd8 *bdp)
 	 */
 	skb_reserve(skb, alignamount);
 
-	skb->dev = dev;
-
 	bdp->bufPtr = dma_map_single(NULL, skb->data,
 			priv->rx_buffer_size, DMA_FROM_DEVICE);
 
diff --git a/drivers/net/hamachi.c b/drivers/net/hamachi.c
index c3c0d67fc38..2521b111b3a 100644
--- a/drivers/net/hamachi.c
+++ b/drivers/net/hamachi.c
@@ -1568,7 +1568,6 @@ static int hamachi_rx(struct net_device *dev)
 				printk(KERN_ERR "%s: rx_copybreak non-zero "
 				  "not good with RX_CHECKSUM\n", dev->name);
 #endif
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 				pci_dma_sync_single_for_cpu(hmp->pci_dev,
 							    hmp->rx_ring[entry].addr,
diff --git a/drivers/net/hp100.c b/drivers/net/hp100.c
index 7dc5185aa2c..8118a6750b6 100644
--- a/drivers/net/hp100.c
+++ b/drivers/net/hp100.c
@@ -1816,7 +1816,6 @@ static void hp100_rx(struct net_device *dev)
 			u_char *ptr;
 
 			skb_reserve(skb,2);
-			skb->dev = dev;
 
 			/* ptr to start of the sk_buff data area */
 			skb_put(skb, pkt_len);
diff --git a/drivers/net/ibm_emac/ibm_emac_core.c b/drivers/net/ibm_emac/ibm_emac_core.c
index dd8ad874682..b1ad62d89eb 100644
--- a/drivers/net/ibm_emac/ibm_emac_core.c
+++ b/drivers/net/ibm_emac/ibm_emac_core.c
@@ -1398,7 +1398,6 @@ static int emac_poll_rx(void *param, int budget)
 
 		skb_put(skb, len);
 	      push_packet:
-		skb->dev = dev->ndev;
 		skb->protocol = eth_type_trans(skb, dev->ndev);
 		emac_rx_csum(dev, skb, ctrl);
 
diff --git a/drivers/net/ibmlana.c b/drivers/net/ibmlana.c
index 3f946c81151..fe85d6fcba3 100644
--- a/drivers/net/ibmlana.c
+++ b/drivers/net/ibmlana.c
@@ -601,7 +601,6 @@ static void irqrx_handler(struct net_device *dev)
 
 				/* set up skb fields */
 
-				skb->dev = dev;
 				skb->protocol = eth_type_trans(skb, dev);
 				skb->ip_summed = CHECKSUM_NONE;
 
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index 458db0538a9..0573fcfcb2c 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -798,7 +798,6 @@ static int ibmveth_poll(struct net_device *netdev, int *budget)
 
 				skb_reserve(skb, offset);
 				skb_put(skb, length);
-				skb->dev = netdev;
 				skb->protocol = eth_type_trans(skb, netdev);
 
 				netif_receive_skb(skb);	/* send it up */
diff --git a/drivers/net/ioc3-eth.c b/drivers/net/ioc3-eth.c
index 4ad780719a8..ea07aa5ba51 100644
--- a/drivers/net/ioc3-eth.c
+++ b/drivers/net/ioc3-eth.c
@@ -633,8 +633,6 @@ static inline void ioc3_rx(struct ioc3_private *ip)
 
 			ip->rx_skbs[rx_entry] = NULL;	/* Poison  */
 
-			new_skb->dev = priv_netdev(ip);
-
 			/* Because we reserve afterwards. */
 			skb_put(new_skb, (1664 + RX_OFFSET));
 			rxb = (struct ioc3_erxbuf *) new_skb->data;
@@ -940,7 +938,6 @@ static void ioc3_alloc_rings(struct net_device *dev)
 			}
 
 			ip->rx_skbs[i] = skb;
-			skb->dev = dev;
 
 			/* Because we reserve afterwards. */
 			skb_put(skb, (1664 + RX_OFFSET));
diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c
index 0e9ba3c3faf..347d50cd77d 100644
--- a/drivers/net/iseries_veth.c
+++ b/drivers/net/iseries_veth.c
@@ -1540,7 +1540,6 @@ static void veth_receive(struct veth_lpar_connection *cnx,
 		}
 
 		skb_put(skb, length);
-		skb->dev = dev;
 		skb->protocol = eth_type_trans(skb, dev);
 		skb->ip_summed = CHECKSUM_NONE;
 		netif_rx(skb);	/* send it up */
diff --git a/drivers/net/ixp2000/ixpdev.c b/drivers/net/ixp2000/ixpdev.c
index a4eccb11d67..6683afc02aa 100644
--- a/drivers/net/ixp2000/ixpdev.c
+++ b/drivers/net/ixp2000/ixpdev.c
@@ -110,11 +110,10 @@ static int ixpdev_rx(struct net_device *dev, int *budget)
 
 		skb = dev_alloc_skb(desc->pkt_length + 2);
 		if (likely(skb != NULL)) {
-			skb->dev = nds[desc->channel];
 			skb_reserve(skb, 2);
 			eth_copy_and_sum(skb, buf, desc->pkt_length, 0);
 			skb_put(skb, desc->pkt_length);
-			skb->protocol = eth_type_trans(skb, skb->dev);
+			skb->protocol = eth_type_trans(skb, nds[desc->channel]);
 
 			skb->dev->last_rx = jiffies;
 
diff --git a/drivers/net/lance.c b/drivers/net/lance.c
index a3843320dbe..11cbcb946db 100644
--- a/drivers/net/lance.c
+++ b/drivers/net/lance.c
@@ -1184,7 +1184,6 @@ lance_rx(struct net_device *dev)
 					}
 					break;
 				}
-				skb->dev = dev;
 				skb_reserve(skb,2);	/* 16 byte align */
 				skb_put(skb,pkt_len);	/* Make room */
 				eth_copy_and_sum(skb,
diff --git a/drivers/net/lasi_82596.c b/drivers/net/lasi_82596.c
index 452863d5d49..0edcd125fd6 100644
--- a/drivers/net/lasi_82596.c
+++ b/drivers/net/lasi_82596.c
@@ -801,7 +801,6 @@ memory_squeeze:
 				lp->stats.rx_dropped++;
 			}
 			else {
-				skb->dev = dev;
 				if (!rx_in_place) {
 					/* 16 byte align the data fields */
 					dma_sync_single_for_cpu(lp->dev, (dma_addr_t)WSWAPchar(rbd->b_data), PKT_BUF_SZ, DMA_FROM_DEVICE);
diff --git a/drivers/net/lib8390.c b/drivers/net/lib8390.c
index e726c06b8dc..5c86e737f95 100644
--- a/drivers/net/lib8390.c
+++ b/drivers/net/lib8390.c
@@ -722,7 +722,6 @@ static void ei_receive(struct net_device *dev)
 			else
 			{
 				skb_reserve(skb,2);	/* IP headers on 16 byte boundaries */
-				skb->dev = dev;
 				skb_put(skb, pkt_len);	/* Make room */
 				ei_block_input(dev, pkt_len, skb, current_offset + sizeof(rx_frame));
 				skb->protocol=eth_type_trans(skb,dev);
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 2b739fd584f..4380e5e89dc 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -137,7 +137,6 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb_orphan(skb);
 
 	skb->protocol = eth_type_trans(skb,dev);
-	skb->dev = dev;
 #ifndef LOOPBACK_MUST_CHECKSUM
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 #endif
diff --git a/drivers/net/lp486e.c b/drivers/net/lp486e.c
index 177c502f738..5fc18da1873 100644
--- a/drivers/net/lp486e.c
+++ b/drivers/net/lp486e.c
@@ -676,7 +676,6 @@ i596_rx_one(struct net_device *dev, struct i596_private *lp,
 			return 1;
 		}
 
-		skb->dev = dev;
 		memcpy(skb_put(skb,pkt_len), rfd->data, pkt_len);
 
 		skb->protocol = eth_type_trans(skb,dev);
diff --git a/drivers/net/mac89x0.c b/drivers/net/mac89x0.c
index e960138011c..90e695d5326 100644
--- a/drivers/net/mac89x0.c
+++ b/drivers/net/mac89x0.c
@@ -530,7 +530,6 @@ net_rx(struct net_device *dev)
 		return;
 	}
 	skb_put(skb, length);
-	skb->dev = dev;
 
 	memcpy_fromio(skb->data, dev->mem_start + PP_RxFrame, length);
 
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index 2e9571bf073..0c3649be0d0 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -357,7 +357,6 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
 	}
 
 	skb_reserve(skb, RX_OFFSET);
-	skb->dev = bp->dev;
 	skb->ip_summed = CHECKSUM_NONE;
 	skb_put(skb, len);
 
diff --git a/drivers/net/mace.c b/drivers/net/mace.c
index 9ec24f0d5d6..b3bd6239495 100644
--- a/drivers/net/mace.c
+++ b/drivers/net/mace.c
@@ -939,7 +939,6 @@ static irqreturn_t mace_rxdma_intr(int irq, void *dev_id)
 		else	/* Ethernet header; mace includes FCS */
 		    nb -= 8;
 		skb_put(skb, nb);
-		skb->dev = dev;
 		skb->protocol = eth_type_trans(skb, dev);
 		mp->stats.rx_bytes += skb->len;
 		netif_rx(skb);
diff --git a/drivers/net/macmace.c b/drivers/net/macmace.c
index 5d541e87304..8c07ffc9c24 100644
--- a/drivers/net/macmace.c
+++ b/drivers/net/macmace.c
@@ -621,7 +621,6 @@ static void mace_dma_rx_frame(struct net_device *dev, struct mace_frame *mf)
 	skb_reserve(skb,2);
 	memcpy(skb_put(skb, mf->len), mf->data, mf->len);
 
-	skb->dev = dev;
 	skb->protocol = eth_type_trans(skb, dev);
 	netif_rx(skb);
 	dev->last_rx = jiffies;
diff --git a/drivers/net/meth.c b/drivers/net/meth.c
index 7e69ca6edd9..fafe6783523 100644
--- a/drivers/net/meth.c
+++ b/drivers/net/meth.c
@@ -421,7 +421,6 @@ static void meth_rx(struct net_device* dev, unsigned long int_status)
 					/* Write metadata, and then pass to the receive level */
 					skb_put(skb_c, len);
 					priv->rx_skbs[priv->rx_write] = skb;
-					skb_c->dev = dev;
 					skb_c->protocol = eth_type_trans(skb_c, dev);
 					dev->last_rx = jiffies;
 					priv->stats.rx_packets++;
diff --git a/drivers/net/mipsnet.c b/drivers/net/mipsnet.c
index f42b9e20193..403f63afd20 100644
--- a/drivers/net/mipsnet.c
+++ b/drivers/net/mipsnet.c
@@ -101,7 +101,6 @@ static inline ssize_t mipsnet_get_fromdev(struct net_device *dev, size_t count)
 	if (ioiocpy_frommipsnet(dev, skb_put(skb, len), len))
 		return -EFAULT;
 
-	skb->dev = dev;
 	skb->protocol = eth_type_trans(skb, dev);
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index 8015a7c5b0c..cd9369a285e 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -434,7 +434,6 @@ static int mv643xx_eth_receive_queue(struct net_device *dev, int budget)
 			 * received packet
 			 */
 			skb_put(skb, pkt_info.byte_cnt - 4);
-			skb->dev = dev;
 
 			if (pkt_info.cmd_sts & ETH_LAYER_4_CHECKSUM_OK) {
 				skb->ip_summed = CHECKSUM_UNNECESSARY;
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index f8efe0e70a6..7c04179c7b1 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -1020,7 +1020,6 @@ myri10ge_rx_done(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx,
 		skb_shinfo(skb)->nr_frags = 0;
 	}
 	skb->protocol = eth_type_trans(skb, dev);
-	skb->dev = dev;
 
 	if (mgp->csum_flag) {
 		if ((skb->protocol == htons(ETH_P_IP)) ||
diff --git a/drivers/net/natsemi.c b/drivers/net/natsemi.c
index 349b96a3ec4..a8d7ff2c96a 100644
--- a/drivers/net/natsemi.c
+++ b/drivers/net/natsemi.c
@@ -2289,7 +2289,6 @@ static void netdev_rx(struct net_device *dev, int *work_done, int work_to_do)
 			 * without copying to a minimally-sized skbuff. */
 			if (pkt_len < rx_copybreak
 			    && (skb = dev_alloc_skb(pkt_len + RX_OFFSET)) != NULL) {
-				skb->dev = dev;
 				/* 16 byte align the IP header */
 				skb_reserve(skb, RX_OFFSET);
 				pci_dma_sync_single_for_cpu(np->pci_dev,
diff --git a/drivers/net/netx-eth.c b/drivers/net/netx-eth.c
index a53644f6a29..2b8da0a5499 100644
--- a/drivers/net/netx-eth.c
+++ b/drivers/net/netx-eth.c
@@ -168,7 +168,6 @@ static void netx_eth_receive(struct net_device *ndev)
 		FIFO_PTR_SEGMENT(seg) | FIFO_PTR_FRAMENO(frameno));
 
 	ndev->last_rx = jiffies;
-	skb->dev = ndev;
 	skb->protocol = eth_type_trans(skb, ndev);
 	netif_rx(skb);
 	priv->stats.rx_packets++;
diff --git a/drivers/net/netxen/netxen_nic_init.c b/drivers/net/netxen/netxen_nic_init.c
index eff965dc5ff..5cd40562da7 100644
--- a/drivers/net/netxen/netxen_nic_init.c
+++ b/drivers/net/netxen/netxen_nic_init.c
@@ -1129,7 +1129,6 @@ netxen_process_rcv(struct netxen_adapter *adapter, int ctxid,
 		port->stats.csummed++;
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
-	skb->dev = netdev;
 	if (desc_ctx == RCV_DESC_LRO_CTXID) {
 		/* True length was only available on the last pkt */
 		skb_put(skb, buffer->lro_length);
diff --git a/drivers/net/ni5010.c b/drivers/net/ni5010.c
index 8be0d030d6f..3d5b4232f65 100644
--- a/drivers/net/ni5010.c
+++ b/drivers/net/ni5010.c
@@ -562,7 +562,6 @@ static void ni5010_rx(struct net_device *dev)
 		return;
 	}
 
-	skb->dev = dev;
 	skb_reserve(skb, 2);
 
 	/* Read packet into buffer */
diff --git a/drivers/net/ni52.c b/drivers/net/ni52.c
index a6f4b24b017..70b6812a8a7 100644
--- a/drivers/net/ni52.c
+++ b/drivers/net/ni52.c
@@ -934,7 +934,6 @@ static void ni52_rcv_int(struct net_device *dev)
 					skb = (struct sk_buff *) dev_alloc_skb(totlen+2);
 					if(skb != NULL)
 					{
-						skb->dev = dev;
 						skb_reserve(skb,2);
 						skb_put(skb,totlen);
 						eth_copy_and_sum(skb,(char *) p->base+(unsigned long) rbd->buffer,totlen,0);
diff --git a/drivers/net/ni65.c b/drivers/net/ni65.c
index 1578f4d9849..782201d12c2 100644
--- a/drivers/net/ni65.c
+++ b/drivers/net/ni65.c
@@ -610,7 +610,6 @@ static void *ni65_alloc_mem(struct net_device *dev,char *what,int size,int type)
 			printk(KERN_WARNING "%s: unable to allocate %s memory.\n",dev->name,what);
 			return NULL;
 		}
-		skb->dev = dev;
 		skb_reserve(skb,2+16);
 		skb_put(skb,R_BUF_SIZE);	 /* grab the whole space .. (not necessary) */
 		ptr = skb->data;
@@ -1094,7 +1093,6 @@ static void ni65_recv_intr(struct net_device *dev,int csr0)
 			if(skb)
 			{
 				skb_reserve(skb,2);
-	skb->dev = dev;
 #ifdef RCV_VIA_SKB
 				if( (unsigned long) (skb->data + R_BUF_SIZE) > 0x1000000) {
 					skb_put(skb,len);
diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c
index 9ec6e9e54f4..747988b12ec 100644
--- a/drivers/net/ns83820.c
+++ b/drivers/net/ns83820.c
@@ -607,7 +607,6 @@ static inline int rx_refill(struct net_device *ndev, gfp_t gfp)
 		res &= 0xf;
 		skb_reserve(skb, res);
 
-		skb->dev = ndev;
 		if (gfp != GFP_ATOMIC)
 			spin_lock_irqsave(&dev->rx_info.lock, flags);
 		res = ns83820_add_rx_skb(dev, skb);
diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c
index d670ac74824..3f4213f3d5d 100644
--- a/drivers/net/pasemi_mac.c
+++ b/drivers/net/pasemi_mac.c
@@ -334,8 +334,6 @@ static void pasemi_mac_replenish_rx_ring(struct net_device *dev)
 			break;
 		}
 
-		skb->dev = dev;
-
 		dma = pci_map_single(mac->dma_pdev, skb->data, skb->len,
 				     PCI_DMA_FROMDEVICE);
 
diff --git a/drivers/net/pci-skeleton.c b/drivers/net/pci-skeleton.c
index 6ca4e4fa6b8..099972c977e 100644
--- a/drivers/net/pci-skeleton.c
+++ b/drivers/net/pci-skeleton.c
@@ -1565,7 +1565,6 @@ static void netdrv_rx_interrupt (struct net_device *dev,
 
 		skb = dev_alloc_skb (pkt_size + 2);
 		if (skb) {
-			skb->dev = dev;
 			skb_reserve (skb, 2);	/* 16 byte align the IP fields. */
 
 			eth_copy_and_sum (skb, &rx_ring[ring_offset + 4], pkt_size, 0);
diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c
index c7bd9c1c7f3..2b395ee21f7 100644
--- a/drivers/net/pcmcia/3c574_cs.c
+++ b/drivers/net/pcmcia/3c574_cs.c
@@ -1056,7 +1056,6 @@ static int el3_rx(struct net_device *dev, int worklimit)
 			DEBUG(3, "  Receiving packet size %d status %4.4x.\n",
 				  pkt_len, rx_status);
 			if (skb != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);
 				insl(ioaddr+RX_FIFO, skb_put(skb, pkt_len),
 						((pkt_len+3)>>2));
diff --git a/drivers/net/pcmcia/3c589_cs.c b/drivers/net/pcmcia/3c589_cs.c
index 461e8274ef6..143ae2ff309 100644
--- a/drivers/net/pcmcia/3c589_cs.c
+++ b/drivers/net/pcmcia/3c589_cs.c
@@ -883,7 +883,6 @@ static int el3_rx(struct net_device *dev)
 	    DEBUG(3, "    Receiving packet size %d status %4.4x.\n",
 		  pkt_len, rx_status);
 	    if (skb != NULL) {
-		skb->dev = dev;
 		skb_reserve(skb, 2);
 		insl(ioaddr+RX_FIFO, skb_put(skb, pkt_len),
 			(pkt_len+3)>>2);
diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c
index 6139048f811..fabbe95c7ef 100644
--- a/drivers/net/pcmcia/axnet_cs.c
+++ b/drivers/net/pcmcia/axnet_cs.c
@@ -1496,7 +1496,6 @@ static void ei_receive(struct net_device *dev)
 			else
 			{
 				skb_reserve(skb,2);	/* IP headers on 16 byte boundaries */
-				skb->dev = dev;
 				skb_put(skb, pkt_len);	/* Make room */
 				ei_block_input(dev, pkt_len, skb, current_offset + sizeof(rx_frame));
 				skb->protocol=eth_type_trans(skb,dev);
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index 0d7de617e53..3f93d493323 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -999,7 +999,6 @@ static void fjn_rx(struct net_device *dev)
 		lp->stats.rx_dropped++;
 		break;
 	    }
-	    skb->dev = dev;
 
 	    skb_reserve(skb, 2);
 	    insw(ioaddr + DATAPORT, skb_put(skb, pkt_len),
diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c
index 3b707747a81..ec0af65cd5d 100644
--- a/drivers/net/pcmcia/nmclan_cs.c
+++ b/drivers/net/pcmcia/nmclan_cs.c
@@ -1182,8 +1182,6 @@ static int mace_rx(struct net_device *dev, unsigned char RxCnt)
       skb = dev_alloc_skb(pkt_len+2);
 
       if (skb != NULL) {
-	skb->dev = dev;
-
 	skb_reserve(skb, 2);
 	insw(ioaddr + AM2150_RCV, skb_put(skb, pkt_len), pkt_len>>1);
 	if (pkt_len & 1)
diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c
index 2561f76033e..7912dbd1425 100644
--- a/drivers/net/pcmcia/smc91c92_cs.c
+++ b/drivers/net/pcmcia/smc91c92_cs.c
@@ -1669,7 +1669,6 @@ static void smc_rx(struct net_device *dev)
 	     (packet_length+1)>>1);
 	skb->protocol = eth_type_trans(skb, dev);
 	
-	skb->dev = dev;
 	netif_rx(skb);
 	dev->last_rx = jiffies;
 	smc->stats.rx_packets++;
diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c
index 5879e7c3698..809ec440b8e 100644
--- a/drivers/net/pcmcia/xirc2ps_cs.c
+++ b/drivers/net/pcmcia/xirc2ps_cs.c
@@ -1226,7 +1226,6 @@ xirc2ps_interrupt(int irq, void *dev_id)
 			    (pktlen+1)>>1);
 		}
 		skb->protocol = eth_type_trans(skb, dev);
-		skb->dev = dev;
 		netif_rx(skb);
 		dev->last_rx = jiffies;
 		lp->stats.rx_packets++;
diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c
index 4d94ba7899b..0791360a6a6 100644
--- a/drivers/net/pcnet32.c
+++ b/drivers/net/pcnet32.c
@@ -1206,7 +1206,6 @@ static void pcnet32_rx_entry(struct net_device *dev,
 					 PCI_DMA_FROMDEVICE);
 			skb_put(skb, pkt_len);
 			lp->rx_skbuff[entry] = newskb;
-			newskb->dev = dev;
 			lp->rx_dma_addr[entry] =
 					    pci_map_single(lp->pci_dev,
 							   newskb->data,
diff --git a/drivers/net/qla3xxx.c b/drivers/net/qla3xxx.c
index a8246eb2f8d..40d2639eedc 100755
--- a/drivers/net/qla3xxx.c
+++ b/drivers/net/qla3xxx.c
@@ -1873,7 +1873,6 @@ static void ql_process_mac_rx_intr(struct ql3_adapter *qdev,
 			 pci_unmap_len(lrg_buf_cb2, maplen),
 			 PCI_DMA_FROMDEVICE);
 	prefetch(skb->data);
-	skb->dev = qdev->ndev;
 	skb->ip_summed = CHECKSUM_NONE;
 	skb->protocol = eth_type_trans(skb, qdev->ndev);
 
@@ -1946,7 +1945,6 @@ static void ql_process_macip_rx_intr(struct ql3_adapter *qdev,
 			skb2->ip_summed = CHECKSUM_UNNECESSARY;
 		}
 	}
-	skb2->dev = qdev->ndev;
 	skb2->protocol = eth_type_trans(skb2, qdev->ndev);
 
 	netif_receive_skb(skb2);
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 6a77b8a9224..34280f94e9f 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -2586,7 +2586,6 @@ rtl8169_rx_interrupt(struct net_device *dev, struct rtl8169_private *tp,
 			pci_action(tp->pci_dev, le64_to_cpu(desc->addr),
 				   tp->rx_buf_sz, PCI_DMA_FROMDEVICE);
 
-			skb->dev = dev;
 			skb_put(skb, pkt_size);
 			skb->protocol = eth_type_trans(skb, dev);
 
diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index b7ff484af3e..df6b73872fd 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -115,7 +115,6 @@ static int rionet_rx_clean(struct net_device *ndev)
 
 		rnet->rx_skb[i]->data = data;
 		skb_put(rnet->rx_skb[i], RIO_MAX_MSG_SIZE);
-		rnet->rx_skb[i]->dev = ndev;
 		rnet->rx_skb[i]->protocol =
 		    eth_type_trans(rnet->rx_skb[i], ndev);
 		error = netif_rx(rnet->rx_skb[i]);
diff --git a/drivers/net/saa9730.c b/drivers/net/saa9730.c
index 143958f1ef0..ad94358ece8 100644
--- a/drivers/net/saa9730.c
+++ b/drivers/net/saa9730.c
@@ -688,7 +688,6 @@ static int lan_saa9730_rx(struct net_device *dev)
 			} else {
 				lp->stats.rx_bytes += len;
 				lp->stats.rx_packets++;
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align */
 				skb_put(skb, len);	/* make room */
 				eth_copy_and_sum(skb,
diff --git a/drivers/net/sb1250-mac.c b/drivers/net/sb1250-mac.c
index 103c3174ab5..0a3a379b634 100644
--- a/drivers/net/sb1250-mac.c
+++ b/drivers/net/sb1250-mac.c
@@ -933,9 +933,6 @@ static int sbdma_add_rcvbuffer(sbmacdma_t *d,struct sk_buff *sb)
 		}
 
 		sbdma_align_skb(sb_new, SMP_CACHE_BYTES, ETHER_ALIGN);
-
-		/* mark skbuff owned by our device */
-		sb_new->dev = d->sbdma_eth->sbm_dev;
 	}
 	else {
 		sb_new = sb;
diff --git a/drivers/net/sc92031.c b/drivers/net/sc92031.c
index c32c21af3fd..5b7284c955d 100644
--- a/drivers/net/sc92031.c
+++ b/drivers/net/sc92031.c
@@ -814,7 +814,6 @@ static void _sc92031_rx_tasklet(struct net_device *dev)
 			memcpy(skb_put(skb, pkt_size), rx_ring + rx_ring_offset, pkt_size);
 		}
 
-		skb->dev = dev;
 		skb->protocol = eth_type_trans(skb, dev);
 		dev->last_rx = jiffies;
 		netif_rx(skb);
diff --git a/drivers/net/seeq8005.c b/drivers/net/seeq8005.c
index 0d6c95c7aed..4bce7c4f373 100644
--- a/drivers/net/seeq8005.c
+++ b/drivers/net/seeq8005.c
@@ -550,7 +550,6 @@ static void seeq8005_rx(struct net_device *dev)
 				lp->stats.rx_dropped++;
 				break;
 			}
-			skb->dev = dev;
 			skb_reserve(skb, 2);	/* align data on 16 byte */
 			buf = skb_put(skb,pkt_len);
 
diff --git a/drivers/net/sgiseeq.c b/drivers/net/sgiseeq.c
index 52ed522a234..5a891913218 100644
--- a/drivers/net/sgiseeq.c
+++ b/drivers/net/sgiseeq.c
@@ -318,7 +318,6 @@ static inline void sgiseeq_rx(struct net_device *dev, struct sgiseeq_private *sp
 			skb = dev_alloc_skb(len + 2);
 
 			if (skb) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);
 				skb_put(skb, len);
 
diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c
index 34463ce6f13..bc8de48da31 100644
--- a/drivers/net/sis190.c
+++ b/drivers/net/sis190.c
@@ -632,7 +632,6 @@ static int sis190_rx_interrupt(struct net_device *dev,
 			pci_action(tp->pci_dev, le32_to_cpu(desc->addr),
 				   tp->rx_buf_sz, PCI_DMA_FROMDEVICE);
 
-			skb->dev = dev;
 			skb_put(skb, pkt_size);
 			skb->protocol = eth_type_trans(skb, dev);
 
diff --git a/drivers/net/sis900.c b/drivers/net/sis900.c
index b2a3b19d773..dea0126723d 100644
--- a/drivers/net/sis900.c
+++ b/drivers/net/sis900.c
@@ -1160,7 +1160,6 @@ sis900_init_rx_ring(struct net_device *net_dev)
 			   buffer */
 			break;
 		}
-		skb->dev = net_dev;
 		sis_priv->rx_skbuff[i] = skb;
 		sis_priv->rx_ring[i].cmdsts = RX_BUF_SIZE;
                 sis_priv->rx_ring[i].bufptr = pci_map_single(sis_priv->pci_dev,
@@ -1800,7 +1799,6 @@ static int sis900_rx(struct net_device *net_dev)
 			sis_priv->stats.rx_packets++;
 			sis_priv->dirty_rx++;
 refill_rx_ring:
-			skb->dev = net_dev;
 			sis_priv->rx_skbuff[entry] = skb;
 			sis_priv->rx_ring[entry].cmdsts = RX_BUF_SIZE;
                 	sis_priv->rx_ring[entry].bufptr =
@@ -1832,7 +1830,6 @@ refill_rx_ring:
 				sis_priv->stats.rx_dropped++;
 				break;
 			}
-			skb->dev = net_dev;
 			sis_priv->rx_skbuff[entry] = skb;
 			sis_priv->rx_ring[entry].cmdsts = RX_BUF_SIZE;
                 	sis_priv->rx_ring[entry].bufptr =
diff --git a/drivers/net/sk98lin/skge.c b/drivers/net/sk98lin/skge.c
index e94ab256b54..9ac1fe659dc 100644
--- a/drivers/net/sk98lin/skge.c
+++ b/drivers/net/sk98lin/skge.c
@@ -2193,7 +2193,6 @@ rx_start:
 				SK_PNMI_CNT_RX_OCTETS_DELIVERED(pAC,
 					FrameLength, pRxPort->PortIndex);
 
-				pMsg->dev = pAC->dev[pRxPort->PortIndex];
 				pMsg->protocol = eth_type_trans(pMsg,
 					pAC->dev[pRxPort->PortIndex]);
 				netif_rx(pMsg);
@@ -2246,7 +2245,6 @@ rx_start:
 				(IFF_PROMISC | IFF_ALLMULTI)) != 0 ||
 				(ForRlmt & SK_RLMT_RX_PROTOCOL) ==
 				SK_RLMT_RX_PROTOCOL) {
-				pMsg->dev = pAC->dev[pRxPort->PortIndex];
 				pMsg->protocol = eth_type_trans(pMsg,
 					pAC->dev[pRxPort->PortIndex]);
 				netif_rx(pMsg);
diff --git a/drivers/net/smc911x.c b/drivers/net/smc911x.c
index c9561413198..8a2109a913b 100644
--- a/drivers/net/smc911x.c
+++ b/drivers/net/smc911x.c
@@ -502,7 +502,6 @@ static inline void	 smc911x_rcv(struct net_device *dev)
 		DBG(SMC_DEBUG_PKTS, "%s: Received packet\n", dev->name,);
 		PRINT_PKT(data, ((pkt_len - 4) <= 64) ? pkt_len - 4 : 64);
 		dev->last_rx = jiffies;
-		skb->dev = dev;
 		skb->protocol = eth_type_trans(skb, dev);
 		netif_rx(skb);
 		lp->stats.rx_packets++;
@@ -1307,7 +1306,6 @@ smc911x_rx_dma_irq(int dma, void *data)
 	lp->current_rx_skb = NULL;
 	PRINT_PKT(skb->data, skb->len);
 	dev->last_rx = jiffies;
-	skb->dev = dev;
 	skb->protocol = eth_type_trans(skb, dev);
 	netif_rx(skb);
 	lp->stats.rx_packets++;
diff --git a/drivers/net/smc9194.c b/drivers/net/smc9194.c
index bd6e84506c2..36c1ebadbf2 100644
--- a/drivers/net/smc9194.c
+++ b/drivers/net/smc9194.c
@@ -1262,7 +1262,6 @@ static void smc_rcv(struct net_device *dev)
 
 		skb_reserve( skb, 2 );   /* 16 bit alignment */
 
-		skb->dev = dev;
 		data = skb_put( skb, packet_length);
 
 #ifdef USE_32_BIT
diff --git a/drivers/net/smc91x.c b/drivers/net/smc91x.c
index 49f4b7712eb..01cc3c742c3 100644
--- a/drivers/net/smc91x.c
+++ b/drivers/net/smc91x.c
@@ -568,7 +568,6 @@ static inline void  smc_rcv(struct net_device *dev)
 		PRINT_PKT(data, packet_len - 4);
 
 		dev->last_rx = jiffies;
-		skb->dev = dev;
 		skb->protocol = eth_type_trans(skb, dev);
 		netif_rx(skb);
 		lp->stats.rx_packets++;
diff --git a/drivers/net/sonic.c b/drivers/net/sonic.c
index ed7aa0a5acc..c6320c71993 100644
--- a/drivers/net/sonic.c
+++ b/drivers/net/sonic.c
@@ -85,7 +85,6 @@ static int sonic_open(struct net_device *dev)
 			       dev->name);
 			return -ENOMEM;
 		}
-		skb->dev = dev;
 		/* align IP header unless DMA requires otherwise */
 		if (SONIC_BUS_SCALE(lp->dma_bitmode) == 2)
 			skb_reserve(skb, 2);
@@ -451,7 +450,6 @@ static void sonic_rx(struct net_device *dev)
 				lp->stats.rx_dropped++;
 				break;
 			}
-			new_skb->dev = dev;
 			/* provide 16 byte IP header alignment unless DMA requires otherwise */
 			if(SONIC_BUS_SCALE(lp->dma_bitmode) == 2)
 				skb_reserve(new_skb, 2);
diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c
index e3019d52c30..f7e0ac7f789 100644
--- a/drivers/net/spider_net.c
+++ b/drivers/net/spider_net.c
@@ -990,7 +990,6 @@ spider_net_pass_skb_up(struct spider_net_descr *descr,
 	netdev = card->netdev;
 
 	skb = descr->skb;
-	skb->dev = netdev;
 	skb_put(skb, hwdescr->valid_size);
 
 	/* the card seems to add 2 bytes of junk in front
diff --git a/drivers/net/starfire.c b/drivers/net/starfire.c
index 8bba2e3da7e..9d6e454a8f9 100644
--- a/drivers/net/starfire.c
+++ b/drivers/net/starfire.c
@@ -1452,7 +1452,6 @@ static int __netdev_rx(struct net_device *dev, int *quota)
 		   to a minimally-sized skbuff. */
 		if (pkt_len < rx_copybreak
 		    && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-			skb->dev = dev;
 			skb_reserve(skb, 2);	/* 16 byte align the IP header */
 			pci_dma_sync_single_for_cpu(np->pci_dev,
 						    np->rx_info[entry].mapping,
diff --git a/drivers/net/sun3_82586.c b/drivers/net/sun3_82586.c
index 4757aa647c7..5bcc749bef1 100644
--- a/drivers/net/sun3_82586.c
+++ b/drivers/net/sun3_82586.c
@@ -775,7 +775,6 @@ static void sun3_82586_rcv_int(struct net_device *dev)
 					skb = (struct sk_buff *) dev_alloc_skb(totlen+2);
 					if(skb != NULL)
 					{
-						skb->dev = dev;
 						skb_reserve(skb,2);
 						skb_put(skb,totlen);
 						eth_copy_and_sum(skb,(char *) p->base+swab32((unsigned long) rbd->buffer),totlen,0);
diff --git a/drivers/net/sun3lance.c b/drivers/net/sun3lance.c
index 7bee45b42a2..0454827c8c2 100644
--- a/drivers/net/sun3lance.c
+++ b/drivers/net/sun3lance.c
@@ -851,7 +851,6 @@ static int lance_rx( struct net_device *dev )
 				}
 
 
-				skb->dev = dev;
 				skb_reserve( skb, 2 );	/* 16 byte align */
 				skb_put( skb, pkt_len );	/* Make room */
 //			        memcpy( skb->data, PKTBUF_ADDR(head), pkt_len );
diff --git a/drivers/net/sunbmac.c b/drivers/net/sunbmac.c
index 18f88853e1e..2ad8d58dee3 100644
--- a/drivers/net/sunbmac.c
+++ b/drivers/net/sunbmac.c
@@ -855,7 +855,6 @@ static void bigmac_rx(struct bigmac *bp)
 				drops++;
 				goto drop_it;
 			}
-			copy_skb->dev = bp->dev;
 			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
 			sbus_dma_sync_single_for_cpu(bp->bigmac_sdev,
diff --git a/drivers/net/sundance.c b/drivers/net/sundance.c
index c06ecc8002b..f51ba31970a 100644
--- a/drivers/net/sundance.c
+++ b/drivers/net/sundance.c
@@ -1308,7 +1308,6 @@ static void rx_poll(unsigned long data)
 			   to a minimally-sized skbuff. */
 			if (pkt_len < rx_copybreak
 				&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 				pci_dma_sync_single_for_cpu(np->pci_dev,
 							    desc->frag[0].addr,
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index 08ea61db46f..db2e1a6b723 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -845,7 +845,6 @@ static int gem_rx(struct gem *gp, int work_to_do)
 				goto drop_it;
 			}
 
-			copy_skb->dev = gp->dev;
 			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
 			pci_dma_sync_single_for_cpu(gp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c
index 192bbc91c73..aca592bc032 100644
--- a/drivers/net/sunhme.c
+++ b/drivers/net/sunhme.c
@@ -2058,7 +2058,6 @@ static void happy_meal_rx(struct happy_meal *hp, struct net_device *dev)
 				goto drop_it;
 			}
 
-			copy_skb->dev = dev;
 			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
 			hme_dma_sync_for_cpu(hp, dma_addr, len, DMA_FROMDEVICE);
diff --git a/drivers/net/sunlance.c b/drivers/net/sunlance.c
index b0929a457b6..8f53a1ef608 100644
--- a/drivers/net/sunlance.c
+++ b/drivers/net/sunlance.c
@@ -547,7 +547,6 @@ static void lance_rx_dvma(struct net_device *dev)
 
 			lp->stats.rx_bytes += len;
 
-			skb->dev = dev;
 			skb_reserve(skb, 2);		/* 16 byte align */
 			skb_put(skb, len);		/* make room */
 			eth_copy_and_sum(skb,
@@ -721,7 +720,6 @@ static void lance_rx_pio(struct net_device *dev)
 
 			lp->stats.rx_bytes += len;
 
-			skb->dev = dev;
 			skb_reserve (skb, 2);		/* 16 byte align */
 			skb_put(skb, len);		/* make room */
 			lance_piocopy_to_skb(skb, &(ib->rx_buf[entry][0]), len);
diff --git a/drivers/net/sunqe.c b/drivers/net/sunqe.c
index f3bad56d476..fbfb98284fd 100644
--- a/drivers/net/sunqe.c
+++ b/drivers/net/sunqe.c
@@ -437,7 +437,6 @@ static void qe_rx(struct sunqe *qep)
 				drops++;
 				qep->net_stats.rx_dropped++;
 			} else {
-				skb->dev = qep->dev;
 				skb_reserve(skb, 2);
 				skb_put(skb, len);
 				eth_copy_and_sum(skb, (unsigned char *) this_qbuf,
diff --git a/drivers/net/tc35815.c b/drivers/net/tc35815.c
index e3a7e3ceab7..d7741e23f8d 100644
--- a/drivers/net/tc35815.c
+++ b/drivers/net/tc35815.c
@@ -1145,7 +1145,6 @@ tc35815_rx(struct net_device *dev)
 				break;
 			}
 			skb_reserve(skb, 2);   /* 16 bit alignment */
-			skb->dev = dev;
 
 			data = skb_put(skb, pkt_len);
 
diff --git a/drivers/net/tlan.c b/drivers/net/tlan.c
index f85f0025112..2ede3f58cf9 100644
--- a/drivers/net/tlan.c
+++ b/drivers/net/tlan.c
@@ -1577,7 +1577,6 @@ u32 TLan_HandleRxEOF( struct net_device *dev, u16 host_int )
 				printk(KERN_INFO "TLAN: Couldn't allocate memory for received data.\n");
 			else {
 				head_buffer = priv->rxBuffer + (priv->rxHead * TLAN_MAX_FRAME_SIZE);
-				skb->dev = dev;
 				skb_reserve(skb, 2);
 				t = (void *) skb_put(skb, frameSize);
 
@@ -1608,7 +1607,6 @@ u32 TLan_HandleRxEOF( struct net_device *dev, u16 host_int )
 				skb->protocol = eth_type_trans( skb, dev );
 				netif_rx( skb );
 
-				new_skb->dev = dev;
 				skb_reserve( new_skb, 2 );
 				t = (void *) skb_put( new_skb, TLAN_MAX_FRAME_SIZE );
 				head_list->buffer[0].address = pci_map_single(priv->pciDev, new_skb->data, TLAN_MAX_FRAME_SIZE, PCI_DMA_FROMDEVICE);
diff --git a/drivers/net/tsi108_eth.c b/drivers/net/tsi108_eth.c
index d92c5c597e1..0bfc2c9c1c0 100644
--- a/drivers/net/tsi108_eth.c
+++ b/drivers/net/tsi108_eth.c
@@ -788,7 +788,6 @@ static int tsi108_complete_rx(struct net_device *dev, int budget)
 			printk(".\n");
 		}
 
-		skb->dev = dev;
 		skb_put(skb, data->rxring[rx].len);
 		skb->protocol = eth_type_trans(skb, dev);
 		netif_receive_skb(skb);
diff --git a/drivers/net/tulip/de2104x.c b/drivers/net/tulip/de2104x.c
index c82befa209a..8a7effa7090 100644
--- a/drivers/net/tulip/de2104x.c
+++ b/drivers/net/tulip/de2104x.c
@@ -435,7 +435,6 @@ static void de_rx (struct de_private *de)
 			rx_work = 100;
 			goto rx_next;
 		}
-		copy_skb->dev = de->dev;
 
 		if (!copying_skb) {
 			pci_unmap_single(de->pdev, mapping,
diff --git a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c
index 4b3cd3d8b62..e40ddb86958 100644
--- a/drivers/net/tulip/de4x5.c
+++ b/drivers/net/tulip/de4x5.c
@@ -3634,7 +3634,6 @@ de4x5_alloc_rx_buff(struct net_device *dev, int index, int len)
     p = dev_alloc_skb(IEEE802_3_SZ + DE4X5_ALIGN + 2);
     if (!p) return NULL;
 
-    p->dev = dev;
     tmp = virt_to_bus(p->data);
     i = ((tmp + DE4X5_ALIGN) & ~DE4X5_ALIGN) - tmp;
     skb_reserve(p, i);
@@ -3655,7 +3654,6 @@ de4x5_alloc_rx_buff(struct net_device *dev, int index, int len)
     p = dev_alloc_skb(len + 2);
     if (!p) return NULL;
 
-    p->dev = dev;
     skb_reserve(p, 2);	                               /* Align */
     if (index < lp->rx_old) {                          /* Wrapped buffer */
 	short tlen = (lp->rxRingSize - lp->rx_old) * RX_BUFF_SZ;
diff --git a/drivers/net/tulip/dmfe.c b/drivers/net/tulip/dmfe.c
index 9aeac76184f..a5e0237a653 100644
--- a/drivers/net/tulip/dmfe.c
+++ b/drivers/net/tulip/dmfe.c
@@ -988,14 +988,12 @@ static void dmfe_rx_packet(struct DEVICE *dev, struct dmfe_board_info * db)
 
 						skb = newskb;
 						/* size less than COPY_SIZE, allocate a rxlen SKB */
-						skb->dev = dev;
 						skb_reserve(skb, 2); /* 16byte align */
 						memcpy(skb_put(skb, rxlen), rxptr->rx_skb_ptr->data, rxlen);
 						dmfe_reuse_skb(db, rxptr->rx_skb_ptr);
-					} else {
-						skb->dev = dev;
+					} else
 						skb_put(skb, rxlen);
-					}
+
 					skb->protocol = eth_type_trans(skb, dev);
 					netif_rx(skb);
 					dev->last_rx = jiffies;
diff --git a/drivers/net/tulip/interrupt.c b/drivers/net/tulip/interrupt.c
index e3488d7b8ed..e86df07769a 100644
--- a/drivers/net/tulip/interrupt.c
+++ b/drivers/net/tulip/interrupt.c
@@ -192,7 +192,6 @@ int tulip_poll(struct net_device *dev, int *budget)
                                   to a minimally-sized skbuff. */
                                if (pkt_len < tulip_rx_copybreak
                                    && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-                                       skb->dev = dev;
                                        skb_reserve(skb, 2);    /* 16 byte align the IP header */
                                        pci_dma_sync_single_for_cpu(tp->pdev,
 								   tp->rx_buffers[entry].mapping,
@@ -416,7 +415,6 @@ static int tulip_rx(struct net_device *dev)
 			   to a minimally-sized skbuff. */
 			if (pkt_len < tulip_rx_copybreak
 				&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 				pci_dma_sync_single_for_cpu(tp->pdev,
 							    tp->rx_buffers[entry].mapping,
diff --git a/drivers/net/tulip/uli526x.c b/drivers/net/tulip/uli526x.c
index 229158e8e4b..9a5850fa644 100644
--- a/drivers/net/tulip/uli526x.c
+++ b/drivers/net/tulip/uli526x.c
@@ -828,14 +828,12 @@ static void uli526x_rx_packet(struct net_device *dev, struct uli526x_board_info
 					( (skb = dev_alloc_skb(rxlen + 2) )
 					!= NULL) ) {
 					/* size less than COPY_SIZE, allocate a rxlen SKB */
-					skb->dev = dev;
 					skb_reserve(skb, 2); /* 16byte align */
 					memcpy(skb_put(skb, rxlen), rxptr->rx_skb_ptr->tail, rxlen);
 					uli526x_reuse_skb(db, rxptr->rx_skb_ptr);
-				} else {
-					skb->dev = dev;
+				} else
 					skb_put(skb, rxlen);
-				}
+
 				skb->protocol = eth_type_trans(skb, dev);
 				netif_rx(skb);
 				dev->last_rx = jiffies;
diff --git a/drivers/net/tulip/winbond-840.c b/drivers/net/tulip/winbond-840.c
index 002a05e0722..d74fa871de1 100644
--- a/drivers/net/tulip/winbond-840.c
+++ b/drivers/net/tulip/winbond-840.c
@@ -813,7 +813,6 @@ static void init_rxtx_rings(struct net_device *dev)
 		np->rx_skbuff[i] = skb;
 		if (skb == NULL)
 			break;
-		skb->dev = dev;			/* Mark as being used by this device. */
 		np->rx_addr[i] = pci_map_single(np->pci_dev,skb->data,
 					np->rx_buf_sz,PCI_DMA_FROMDEVICE);
 
@@ -1229,7 +1228,6 @@ static int netdev_rx(struct net_device *dev)
 			   to a minimally-sized skbuff. */
 			if (pkt_len < rx_copybreak
 				&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 				pci_dma_sync_single_for_cpu(np->pci_dev,np->rx_addr[entry],
 							    np->rx_skbuff[entry]->len,
@@ -1278,7 +1276,6 @@ static int netdev_rx(struct net_device *dev)
 			np->rx_skbuff[entry] = skb;
 			if (skb == NULL)
 				break;			/* Better luck next round. */
-			skb->dev = dev;			/* Mark as being used by this device. */
 			np->rx_addr[entry] = pci_map_single(np->pci_dev,
 							skb->data,
 							np->rx_buf_sz, PCI_DMA_FROMDEVICE);
diff --git a/drivers/net/tulip/xircom_cb.c b/drivers/net/tulip/xircom_cb.c
index 61d313049dd..1fe3734e155 100644
--- a/drivers/net/tulip/xircom_cb.c
+++ b/drivers/net/tulip/xircom_cb.c
@@ -1207,7 +1207,6 @@ static void investigate_read_descriptor(struct net_device *dev,struct xircom_pri
 				card->stats.rx_dropped++;
 				goto out;
 			}
-			skb->dev = dev;
 			skb_reserve(skb, 2);
 			eth_copy_and_sum(skb, (unsigned char*)&card->rx_buffer[bufferoffset / 4], pkt_len, 0);
 			skb_put(skb, pkt_len);
diff --git a/drivers/net/tulip/xircom_tulip_cb.c b/drivers/net/tulip/xircom_tulip_cb.c
index a998c5d0ae9..3f24c82755f 100644
--- a/drivers/net/tulip/xircom_tulip_cb.c
+++ b/drivers/net/tulip/xircom_tulip_cb.c
@@ -1238,7 +1238,6 @@ xircom_rx(struct net_device *dev)
 			   to a minimally-sized skbuff. */
 			if (pkt_len < rx_copybreak
 				&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 #if ! defined(__alpha__)
 				eth_copy_and_sum(skb, bus_to_virt(tp->rx_ring[entry].buffer1),
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 5643d1e84ed..a57aa010cb2 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -254,11 +254,11 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv,
 		return -EFAULT;
 	}
 
-	skb->dev = tun->dev;
 	switch (tun->flags & TUN_TYPE_MASK) {
 	case TUN_TUN_DEV:
 		skb->mac.raw = skb->data;
 		skb->protocol = pi.proto;
+		skb->dev = tun->dev;
 		break;
 	case TUN_TAP_DEV:
 		skb->protocol = eth_type_trans(skb, tun->dev);
diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c
index 0d91d094edd..f2dd7763cd0 100644
--- a/drivers/net/typhoon.c
+++ b/drivers/net/typhoon.c
@@ -1708,7 +1708,6 @@ typhoon_rx(struct typhoon *tp, struct basic_ring *rxRing, volatile u32 * ready,
 
 		if(pkt_len < rx_copybreak &&
 		   (new_skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-			new_skb->dev = tp->dev;
 			skb_reserve(new_skb, 2);
 			pci_dma_sync_single_for_cpu(tp->pdev, dma_addr,
 						    PKT_BUF_SZ,
diff --git a/drivers/net/via-rhine.c b/drivers/net/via-rhine.c
index f3a972e74e9..adea290a9d5 100644
--- a/drivers/net/via-rhine.c
+++ b/drivers/net/via-rhine.c
@@ -1486,7 +1486,6 @@ static int rhine_rx(struct net_device *dev, int limit)
 			   copying to a minimally-sized skbuff. */
 			if (pkt_len < rx_copybreak &&
 				(skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 				pci_dma_sync_single_for_cpu(rp->pdev,
 							    rp->rx_skbuff_dma[entry],
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index 8e5d82051bd..9f6cc1569b3 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -1398,7 +1398,6 @@ static int velocity_receive_frame(struct velocity_info *vptr, int idx)
 		vptr->stats.multicast++;
 
 	skb = rd_info->skb;
-	skb->dev = vptr->dev;
 
 	pci_dma_sync_single_for_cpu(vptr->pdev, rd_info->skb_dma,
 				    vptr->rx_buf_sz, PCI_DMA_FROMDEVICE);
@@ -1428,7 +1427,7 @@ static int velocity_receive_frame(struct velocity_info *vptr, int idx)
 		   PCI_DMA_FROMDEVICE);
 
 	skb_put(skb, pkt_len - 4);
-	skb->protocol = eth_type_trans(skb, skb->dev);
+	skb->protocol = eth_type_trans(skb, vptr->dev);
 
 	stats->rx_bytes += pkt_len;
 	netif_rx(skb);
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index c6c3c757d6f..3240d10fc86 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -1011,7 +1011,6 @@ static int fr_rx(struct sk_buff *skb)
 		stats->rx_bytes += skb->len;
 		if (pvc->state.becn)
 			stats->rx_compressed++;
-		skb->dev = dev;
 		netif_rx(skb);
 		return NET_RX_SUCCESS;
 	} else {
diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c
index fc5c0c611ff..35eded7ffb2 100644
--- a/drivers/net/wan/sbni.c
+++ b/drivers/net/wan/sbni.c
@@ -999,11 +999,6 @@ get_rx_buf( struct net_device  *dev )
 	if( !skb )
 		return  NULL;
 
-#ifdef CONFIG_SBNI_MULTILINE
-	skb->dev = ((struct net_local *) dev->priv)->master;
-#else
-	skb->dev = dev;
-#endif
 	skb_reserve( skb, 2 );		/* Align IP on longword boundaries */
 	return  skb;
 }
diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c
index 2ada76a93cb..e50b1482d79 100644
--- a/drivers/net/wireless/airo.c
+++ b/drivers/net/wireless/airo.c
@@ -3415,10 +3415,8 @@ badrx:
 				skb->pkt_type = PACKET_OTHERHOST;
 				skb->dev = apriv->wifidev;
 				skb->protocol = htons(ETH_P_802_2);
-			} else {
-				skb->dev = dev;
+			} else
 				skb->protocol = eth_type_trans(skb,dev);
-			}
 			skb->dev->last_rx = jiffies;
 			skb->ip_summed = CHECKSUM_NONE;
 
@@ -3641,7 +3639,6 @@ badmic:
 		}
 #endif /* WIRELESS_SPY */
 
-		skb->dev = ai->dev;
 		skb->ip_summed = CHECKSUM_NONE;
 		skb->protocol = eth_type_trans(skb, ai->dev);
 		skb->dev->last_rx = jiffies;
diff --git a/drivers/net/wireless/arlan-main.c b/drivers/net/wireless/arlan-main.c
index 4688e56b69c..498e8486d12 100644
--- a/drivers/net/wireless/arlan-main.c
+++ b/drivers/net/wireless/arlan-main.c
@@ -1500,7 +1500,6 @@ static void arlan_rx_interrupt(struct net_device *dev, u_char rxStatus, u_short
 				break;
 			}
 			skb_reserve(skb, 2);
-			skb->dev = dev;
 			skbtmp = skb_put(skb, pkt_len);
 
 			memcpy_fromio(skbtmp + ARLAN_FAKE_HDR_LEN, ((char __iomem *) arlan) + rxOffset, pkt_len - ARLAN_FAKE_HDR_LEN);
diff --git a/drivers/net/wireless/atmel.c b/drivers/net/wireless/atmel.c
index 23eba698aec..1c17cbe007b 100644
--- a/drivers/net/wireless/atmel.c
+++ b/drivers/net/wireless/atmel.c
@@ -920,7 +920,6 @@ static void fast_rx_path(struct atmel_private *priv,
 		memcpy(&skbp[6], header->addr2, 6); /* source address */
 
 	priv->dev->last_rx = jiffies;
-	skb->dev = priv->dev;
 	skb->protocol = eth_type_trans(skb, priv->dev);
 	skb->ip_summed = CHECKSUM_NONE;
 	netif_rx(skb);
@@ -1028,7 +1027,6 @@ static void frag_rx_path(struct atmel_private *priv,
 				       priv->rx_buf,
 				       priv->frag_len + 12);
 				priv->dev->last_rx = jiffies;
-				skb->dev = priv->dev;
 				skb->protocol = eth_type_trans(skb, priv->dev);
 				skb->ip_summed = CHECKSUM_NONE;
 				netif_rx(skb);
diff --git a/drivers/net/wireless/hostap/hostap_80211_rx.c b/drivers/net/wireless/hostap/hostap_80211_rx.c
index 7e04dc94b3b..f78ee26d787 100644
--- a/drivers/net/wireless/hostap/hostap_80211_rx.c
+++ b/drivers/net/wireless/hostap/hostap_80211_rx.c
@@ -1083,7 +1083,6 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb,
 	if (skb) {
 		skb->protocol = eth_type_trans(skb, dev);
 		memset(skb->cb, 0, sizeof(skb->cb));
-		skb->dev = dev;
 		netif_rx(skb);
 	}
 
diff --git a/drivers/net/wireless/netwave_cs.c b/drivers/net/wireless/netwave_cs.c
index a009ab51771..45b00e13ab2 100644
--- a/drivers/net/wireless/netwave_cs.c
+++ b/drivers/net/wireless/netwave_cs.c
@@ -1283,7 +1283,6 @@ static int netwave_rx(struct net_device *dev)
 
 	skb_reserve( skb, 2);  /* Align IP on 16 byte */
 	skb_put( skb, rcvLen);
-	skb->dev = dev;
 
 	/* Copy packet fragments to the skb data area */
 	ptr = (u_char*) skb->data;
diff --git a/drivers/net/wireless/orinoco.c b/drivers/net/wireless/orinoco.c
index 4e7f6cf5143..3f9d78d059b 100644
--- a/drivers/net/wireless/orinoco.c
+++ b/drivers/net/wireless/orinoco.c
@@ -915,7 +915,6 @@ static void __orinoco_ev_rx(struct net_device *dev, hermes_t *hw)
 		memcpy(hdr->h_source, desc.addr2, ETH_ALEN);
 
 	dev->last_rx = jiffies;
-	skb->dev = dev;
 	skb->protocol = eth_type_trans(skb, dev);
 	skb->ip_summed = CHECKSUM_NONE;
 	if (fc & IEEE80211_FCTL_TODS)
diff --git a/drivers/net/wireless/prism54/islpci_eth.c b/drivers/net/wireless/prism54/islpci_eth.c
index b1122912ee2..fc2e0f3a896 100644
--- a/drivers/net/wireless/prism54/islpci_eth.c
+++ b/drivers/net/wireless/prism54/islpci_eth.c
@@ -374,10 +374,6 @@ islpci_eth_receive(islpci_private *priv)
 	DEBUG(SHOW_BUFFER_CONTENTS, "\nrx %p ", skb->data);
 	display_buffer((char *) skb->data, skb->len);
 #endif
-
-	/* do some additional sk_buff and network layer parameters */
-	skb->dev = ndev;
-
 	/* take care of monitor mode and spy monitoring. */
 	if (unlikely(priv->iw_mode == IW_MODE_MONITOR))
 		discard = islpci_monitor_rx(priv, &skb);
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index 47b2ccb6a63..9633b0457f8 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -2232,7 +2232,6 @@ static void rx_data(struct net_device *dev, struct rcs __iomem *prcs, unsigned i
         return;
     }
     skb_reserve( skb, 2);   /* Align IP on 16 byte (TBD check this)*/
-    skb->dev = dev;
 
     DEBUG(4,"ray_cs rx_data total_len = %x, rx_len = %x\n",total_len,rx_len);
 
diff --git a/drivers/net/wireless/wavelan.c b/drivers/net/wireless/wavelan.c
index 2aa3c761dd8..69cb1471096 100644
--- a/drivers/net/wireless/wavelan.c
+++ b/drivers/net/wireless/wavelan.c
@@ -2512,8 +2512,6 @@ wv_packet_read(struct net_device * dev, u16 buf_off, int sksize)
 		return;
 	}
 
-	skb->dev = dev;
-
 	/* Copy the packet to the buffer. */
 	obram_read(ioaddr, buf_off, skb_put(skb, sksize), sksize);
 	skb->protocol = eth_type_trans(skb, dev);
diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c
index b04239792f6..9351ee77331 100644
--- a/drivers/net/wireless/wavelan_cs.c
+++ b/drivers/net/wireless/wavelan_cs.c
@@ -2884,8 +2884,6 @@ wv_packet_read(struct net_device *		dev,
       return;
     }
 
-  skb->dev = dev;
-
   skb_reserve(skb, 2);
   fd_p = read_ringbuf(dev, fd_p, (char *) skb_put(skb, sksize), sksize);
   skb->protocol = eth_type_trans(skb, dev);
diff --git a/drivers/net/wireless/zd1201.c b/drivers/net/wireless/zd1201.c
index 6cb66a356c9..1fe013a7297 100644
--- a/drivers/net/wireless/zd1201.c
+++ b/drivers/net/wireless/zd1201.c
@@ -327,7 +327,6 @@ static void zd1201_usbrx(struct urb *urb)
 			memcpy(skb_put(skb, 6), &data[datalen-8], 6);
 			memcpy(skb_put(skb, 2), &data[datalen-24], 2);
 			memcpy(skb_put(skb, len), data, len);
-			skb->dev = zd->dev;
 			skb->dev->last_rx = jiffies;
 			skb->protocol = eth_type_trans(skb, zd->dev);
 			zd->stats.rx_packets++;
@@ -385,7 +384,6 @@ static void zd1201_usbrx(struct urb *urb)
 			memcpy(skb_put(skb, 2), &data[6], 2);
 			memcpy(skb_put(skb, len), data+8, len);
 		}
-		skb->dev = zd->dev;
 		skb->dev->last_rx = jiffies;
 		skb->protocol = eth_type_trans(skb, zd->dev);
 		zd->stats.rx_packets++;
diff --git a/drivers/net/yellowfin.c b/drivers/net/yellowfin.c
index 2412ce4917f..3f4a7cf9efe 100644
--- a/drivers/net/yellowfin.c
+++ b/drivers/net/yellowfin.c
@@ -1137,7 +1137,6 @@ static int yellowfin_rx(struct net_device *dev)
 				skb = dev_alloc_skb(pkt_len + 2);
 				if (skb == NULL)
 					break;
-				skb->dev = dev;
 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
 				eth_copy_and_sum(skb, rx_skb->data, pkt_len, 0);
 				skb_put(skb, pkt_len);
diff --git a/drivers/net/znet.c b/drivers/net/znet.c
index b24b0727108..4032e9f6f9b 100644
--- a/drivers/net/znet.c
+++ b/drivers/net/znet.c
@@ -774,7 +774,6 @@ static void znet_rx(struct net_device *dev)
 				znet->stats.rx_dropped++;
 				break;
 			}
-			skb->dev = dev;
 
 			if (&znet->rx_cur[(pkt_len+1)>>1] > znet->rx_end) {
 				int semi_cnt = (znet->rx_end - znet->rx_cur)<<1;
diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c
index 04e6b8508fb..8f9f217e0a6 100644
--- a/drivers/usb/gadget/ether.c
+++ b/drivers/usb/gadget/ether.c
@@ -1766,7 +1766,6 @@ static void rx_complete (struct usb_ep *ep, struct usb_request *req)
 			break;
 		}
 
-		skb->dev = dev->net;
 		skb->protocol = eth_type_trans (skb, dev->net);
 		dev->stats.rx_packets++;
 		dev->stats.rx_bytes += skb->len;
diff --git a/drivers/usb/net/catc.c b/drivers/usb/net/catc.c
index 4852012735f..d82022dd7f2 100644
--- a/drivers/usb/net/catc.c
+++ b/drivers/usb/net/catc.c
@@ -255,7 +255,6 @@ static void catc_rx_done(struct urb *urb)
 		if (!(skb = dev_alloc_skb(pkt_len)))
 			return;
 
-		skb->dev = catc->netdev;
 		eth_copy_and_sum(skb, pkt_start + pkt_offset, pkt_len, 0);
 		skb_put(skb, pkt_len);
 
diff --git a/drivers/usb/net/kaweth.c b/drivers/usb/net/kaweth.c
index de95268ae4b..a0cc05d21a6 100644
--- a/drivers/usb/net/kaweth.c
+++ b/drivers/usb/net/kaweth.c
@@ -636,8 +636,6 @@ static void kaweth_usb_receive(struct urb *urb)
 
 		skb_reserve(skb, 2);    /* Align IP on 16 byte boundaries */
 
-		skb->dev = net;
-
 		eth_copy_and_sum(skb, kaweth->rx_buf + 2, pkt_len, 0);
 
 		skb_put(skb, pkt_len);
diff --git a/drivers/usb/net/pegasus.c b/drivers/usb/net/pegasus.c
index 6d12961cf9f..13f70e09ea4 100644
--- a/drivers/usb/net/pegasus.c
+++ b/drivers/usb/net/pegasus.c
@@ -575,7 +575,6 @@ static void fill_skb_pool(pegasus_t * pegasus)
 		 */
 		if (pegasus->rx_pool[i] == NULL)
 			return;
-		pegasus->rx_pool[i]->dev = pegasus->net;
 		skb_reserve(pegasus->rx_pool[i], 2);
 	}
 }
@@ -1415,8 +1414,10 @@ static void pegasus_disconnect(struct usb_interface *intf)
 	unlink_all_urbs(pegasus);
 	free_all_urbs(pegasus);
 	free_skb_pool(pegasus);
-	if (pegasus->rx_skb)
+	if (pegasus->rx_skb != NULL) {
 		dev_kfree_skb(pegasus->rx_skb);
+		pegasus->rx_skb = NULL;
+	}
 	free_netdev(pegasus->net);
 }
 
diff --git a/drivers/usb/net/rtl8150.c b/drivers/usb/net/rtl8150.c
index ea153dc9b0a..fa598f0340c 100644
--- a/drivers/usb/net/rtl8150.c
+++ b/drivers/usb/net/rtl8150.c
@@ -646,7 +646,6 @@ static void fill_skb_pool(rtl8150_t *dev)
 		if (!skb) {
 			return;
 		}
-		skb->dev = dev->netdev;
 		skb_reserve(skb, 2);
 		dev->rx_skb_pool[i] = skb;
 	}
diff --git a/drivers/usb/net/usbnet.c b/drivers/usb/net/usbnet.c
index de69b183bd2..0c5465a7909 100644
--- a/drivers/usb/net/usbnet.c
+++ b/drivers/usb/net/usbnet.c
@@ -203,7 +203,6 @@ void usbnet_skb_return (struct usbnet *dev, struct sk_buff *skb)
 {
 	int	status;
 
-	skb->dev = dev->net;
 	skb->protocol = eth_type_trans (skb, dev->net);
 	dev->stats.rx_packets++;
 	dev->stats.rx_bytes += skb->len;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index e5ed878c882..d339645dc79 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -831,10 +831,7 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb)
 			skb->protocol = tr_type_trans(skb, dev);
 		else
 #endif
-		{
-			skb->dev = dev;
 			skb->protocol = eth_type_trans(skb, dev);
-		}
 		priv->stats.rx_packets++;
 		priv->stats.rx_bytes += skb->len;
 		memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data));
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index b85d1492c35..f7ade186bf9 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -383,7 +383,6 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
 	kfree_skb(skb);
 
 	s->stats.rx_packets++;
-	nskb->dev       = dev;
 	nskb->ip_summed = CHECKSUM_NONE;
 	nskb->protocol  = eth_type_trans(nskb, dev);
 	netif_rx_ni(nskb);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 41c5065f4a8..01ecbe42b1e 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -156,6 +156,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 	struct ethhdr *eth;
 	unsigned char *rawp;
 
+	skb->dev = dev;
 	skb->mac.raw = skb->data;
 	skb_pull(skb, ETH_HLEN);
 	eth = eth_hdr(skb);
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index 4084909f6f9..d5f5c661668 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -800,7 +800,6 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 	if (skb) {
 		skb->protocol = eth_type_trans(skb, dev);
 		memset(skb->cb, 0, sizeof(skb->cb));
-		skb->dev = dev;
 		skb->ip_summed = CHECKSUM_NONE;	/* 802.11 crc not sufficient */
 		if (netif_rx(skb) == NET_RX_DROP) {
 			/* netif_rx always succeeds, but it might drop
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index 672ab3f6903..c421521c0a9 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -234,8 +234,7 @@ int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb)
 	 * might have been previously set by the low level IrDA network
 	 * device driver
 	 */
-	skb->dev = self->dev;
-	skb->protocol=eth_type_trans(skb, skb->dev); /* Remove eth header */
+	skb->protocol = eth_type_trans(skb, self->dev); /* Remove eth header */
 
 	self->stats.rx_packets++;
 	self->stats.rx_bytes += skb->len;
-- 
cgit v1.2.3


From 459a98ed881802dee55897441bc7f77af614368e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Mar 2007 15:30:44 -0700
Subject: [SK_BUFF]: Introduce skb_reset_mac_header(skb)

For the common, open coded 'skb->mac.raw = skb->data' operation, so that we can
later turn skb->mac.raw into a offset, reducing the size of struct sk_buff in
64bit land while possibly keeping it as a pointer on 32bit.

This one touches just the most simple case, next will handle the slightly more
"complex" cases.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/um/drivers/net_kern.c                    |  2 +-
 arch/xtensa/platform-iss/network.c            |  2 +-
 drivers/block/aoe/aoecmd.c                    |  3 ++-
 drivers/ieee1394/eth1394.c                    |  2 +-
 drivers/infiniband/ulp/ipoib/ipoib_cm.c       |  2 +-
 drivers/infiniband/ulp/ipoib/ipoib_ib.c       |  2 +-
 drivers/isdn/i4l/isdn_net.c                   |  4 ++--
 drivers/isdn/i4l/isdn_ppp.c                   |  2 +-
 drivers/message/fusion/mptlan.c               |  4 ++--
 drivers/net/appletalk/cops.c                  |  2 +-
 drivers/net/appletalk/ltpc.c                  |  2 +-
 drivers/net/arcnet/arc-rawmode.c              |  2 +-
 drivers/net/arcnet/capmode.c                  | 11 ++++-------
 drivers/net/arcnet/rfc1051.c                  |  2 +-
 drivers/net/arcnet/rfc1201.c                  |  2 +-
 drivers/net/bonding/bond_3ad.c                |  4 ++--
 drivers/net/bonding/bond_alb.c                |  4 ++--
 drivers/net/cxgb3/cxgb3_offload.c             |  2 +-
 drivers/net/cxgb3/sge.c                       |  3 ++-
 drivers/net/irda/ali-ircc.c                   |  2 +-
 drivers/net/irda/au1k_ir.c                    |  2 +-
 drivers/net/irda/donauboe.c                   |  2 +-
 drivers/net/irda/irda-usb.c                   |  2 +-
 drivers/net/irda/mcs7780.c                    |  4 ++--
 drivers/net/irda/nsc-ircc.c                   |  2 +-
 drivers/net/irda/pxaficp_ir.c                 |  2 +-
 drivers/net/irda/sa1100_ir.c                  |  2 +-
 drivers/net/irda/smsc-ircc2.c                 |  2 +-
 drivers/net/irda/stir4200.c                   |  2 +-
 drivers/net/irda/via-ircc.c                   |  8 ++++----
 drivers/net/irda/vlsi_ir.c                    |  2 +-
 drivers/net/irda/w83977af_ir.c                |  2 +-
 drivers/net/myri_sbus.c                       |  2 +-
 drivers/net/ppp_generic.c                     |  2 +-
 drivers/net/sb1000.c                          |  2 +-
 drivers/net/tun.c                             |  2 +-
 drivers/net/wan/cosa.c                        |  2 +-
 drivers/net/wan/cycx_x25.c                    |  2 +-
 drivers/net/wan/dlci.c                        |  2 +-
 drivers/net/wan/farsync.c                     |  2 +-
 drivers/net/wan/lmc/lmc_main.c                |  4 ++--
 drivers/net/wan/pc300_drv.c                   |  2 +-
 drivers/net/wan/pc300_tty.c                   |  2 +-
 drivers/net/wireless/airo.c                   |  4 ++--
 drivers/net/wireless/hostap/hostap_80211_rx.c |  7 ++++---
 drivers/net/wireless/hostap/hostap_80211_tx.c |  2 +-
 drivers/net/wireless/hostap/hostap_ap.c       |  3 ++-
 drivers/net/wireless/hostap/hostap_hw.c       |  2 +-
 drivers/net/wireless/hostap/hostap_main.c     |  3 ++-
 drivers/net/wireless/ipw2200.c                |  2 +-
 drivers/net/wireless/orinoco.c                |  2 +-
 drivers/net/wireless/prism54/islpci_eth.c     |  2 +-
 drivers/net/wireless/strip.c                  |  2 +-
 drivers/s390/net/ctcmain.c                    |  4 ++--
 drivers/s390/net/netiucv.c                    |  4 ++--
 drivers/s390/net/qeth_eddp.c                  |  2 +-
 drivers/s390/net/qeth_main.c                  |  4 ++--
 include/linux/hdlc.h                          |  4 ++--
 include/linux/skbuff.h                        |  5 +++++
 include/net/ax25.h                            |  2 +-
 include/net/x25device.h                       |  2 +-
 net/802/fddi.c                                |  2 +-
 net/802/hippi.c                               |  2 +-
 net/802/tr.c                                  |  2 +-
 net/atm/br2684.c                              |  2 +-
 net/atm/clip.c                                |  2 +-
 net/ax25/ax25_in.c                            |  2 +-
 net/bluetooth/bnep/core.c                     |  2 +-
 net/bridge/br_device.c                        |  2 +-
 net/core/dev.c                                |  4 ++--
 net/core/netpoll.c                            |  2 +-
 net/core/skbuff.c                             |  2 +-
 net/decnet/dn_route.c                         |  2 +-
 net/ethernet/eth.c                            |  2 +-
 net/ieee80211/ieee80211_rx.c                  |  7 ++++---
 net/ipv4/ip_gre.c                             |  2 +-
 net/ipv4/ip_output.c                          |  2 +-
 net/ipv4/route.c                              |  3 ++-
 net/ipv6/ip6_output.c                         |  2 +-
 net/ipv6/route.c                              |  2 +-
 net/irda/irlap_frame.c                        |  3 ++-
 net/irda/wrapper.c                            |  2 +-
 net/llc/llc_output.c                          |  2 +-
 net/netrom/nr_dev.c                           |  2 +-
 net/wanrouter/wanmain.c                       |  2 +-
 85 files changed, 119 insertions(+), 109 deletions(-)

diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 04e31f86c10..859303730b2 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -55,7 +55,7 @@ static int uml_net_rx(struct net_device *dev)
 
 	skb->dev = dev;
 	skb_put(skb, dev->mtu);
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	pkt_len = (*lp->read)(lp->fd, &skb, lp);
 
 	if (pkt_len > 0) {
diff --git a/arch/xtensa/platform-iss/network.c b/arch/xtensa/platform-iss/network.c
index 8ebfc876122..ab05bff4010 100644
--- a/arch/xtensa/platform-iss/network.c
+++ b/arch/xtensa/platform-iss/network.c
@@ -386,7 +386,7 @@ static int iss_net_rx(struct net_device *dev)
 	/* Setup skb */
 
 	skb->dev = dev;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	pkt_len = lp->tp.read(lp, &skb);
 	skb_put(skb, pkt_len);
 
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 4ab7b40e8c5..74062dc4e90 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -27,7 +27,8 @@ new_skb(ulong len)
 
 	skb = alloc_skb(len, GFP_ATOMIC);
 	if (skb) {
-		skb->nh.raw = skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
+		skb->nh.raw = skb->data;
 		skb->protocol = __constant_htons(ETH_P_AOE);
 		skb->priority = 0;
 		skb->next = skb->prev = NULL;
diff --git a/drivers/ieee1394/eth1394.c b/drivers/ieee1394/eth1394.c
index 03e44b337eb..db2346f4d20 100644
--- a/drivers/ieee1394/eth1394.c
+++ b/drivers/ieee1394/eth1394.c
@@ -834,7 +834,7 @@ static inline u16 ether1394_type_trans(struct sk_buff *skb,
 	struct eth1394hdr *eth;
 	unsigned char *rawp;
 
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull (skb, ETH1394_HLEN);
 	eth = eth1394_hdr(skb);
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 2b242a4823f..c722e5c141b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -408,7 +408,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 	skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len, newskb);
 
 	skb->protocol = ((struct ipoib_header *) skb->data)->proto;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, IPOIB_ENCAP_LEN);
 
 	dev->last_rx = jiffies;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index ba0ee5cf2ad..93f74567897 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -216,7 +216,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 	if (wc->slid != priv->local_lid ||
 	    wc->src_qp != priv->qp->qp_num) {
 		skb->protocol = ((struct ipoib_header *) skb->data)->proto;
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 		skb_pull(skb, IPOIB_ENCAP_LEN);
 
 		dev->last_rx = jiffies;
diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c
index 838b3734e2b..fadb9291bc1 100644
--- a/drivers/isdn/i4l/isdn_net.c
+++ b/drivers/isdn/i4l/isdn_net.c
@@ -1366,7 +1366,7 @@ isdn_net_type_trans(struct sk_buff *skb, struct net_device *dev)
 	struct ethhdr *eth;
 	unsigned char *rawp;
 
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, ETH_HLEN);
 	eth = eth_hdr(skb);
 
@@ -1786,7 +1786,7 @@ isdn_net_receive(struct net_device *ndev, struct sk_buff *skb)
 	}
 	skb->dev = ndev;
 	skb->pkt_type = PACKET_HOST;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 #ifdef ISDN_DEBUG_NET_DUMP
 	isdn_dumppkt("R:", skb->data, skb->len, 40);
 #endif
diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index 1b2df80c3bc..be915051cb2 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -1167,7 +1167,7 @@ isdn_ppp_push_higher(isdn_net_dev * net_dev, isdn_net_local * lp, struct sk_buff
 		mlp->huptimer = 0;
 #endif /* CONFIG_IPPP_FILTER */
 	skb->dev = dev;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	netif_rx(skb);
 	/* net_dev->local->stats.rx_packets++; done in isdn_net.c */
 	return;
diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c
index b691292ff59..d5b878d5628 100644
--- a/drivers/message/fusion/mptlan.c
+++ b/drivers/message/fusion/mptlan.c
@@ -753,7 +753,7 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev)
 	/* Set the mac.raw pointer, since this apparently isn't getting
 	 * done before we get the skb. Pull the data pointer past the mac data.
 	 */
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, 12);
 
         dma = pci_map_single(mpt_dev->pcidev, skb->data, skb->len,
@@ -1549,7 +1549,7 @@ mpt_lan_type_trans(struct sk_buff *skb, struct net_device *dev)
 	struct mpt_lan_ohdr *fch = (struct mpt_lan_ohdr *)skb->data;
 	struct fcllc *fcllc;
 
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, sizeof(struct mpt_lan_ohdr));
 
 	if (fch->dtype == htons(0xffff)) {
diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c
index dba5e516545..28cb79cee91 100644
--- a/drivers/net/appletalk/cops.c
+++ b/drivers/net/appletalk/cops.c
@@ -853,7 +853,7 @@ static void cops_rx(struct net_device *dev)
                 return;
         }
 
-        skb->mac.raw    = skb->data;    /* Point to entire packet. */
+        skb_reset_mac_header(skb);    /* Point to entire packet. */
         skb_pull(skb,3);
         skb->h.raw      = skb->data;    /* Point to data (Skip header). */
 
diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c
index 2ea44ce4981..12682439f8b 100644
--- a/drivers/net/appletalk/ltpc.c
+++ b/drivers/net/appletalk/ltpc.c
@@ -770,7 +770,7 @@ static int sendup_buffer (struct net_device *dev)
 	skb->data[0] = dnode;
 	skb->data[1] = snode;
 	skb->data[2] = llaptype;
-	skb->mac.raw = skb->data;	/* save pointer to llap header */
+	skb_reset_mac_header(skb);	/* save pointer to llap header */
 	skb_pull(skb,3);
 
 	/* copy ddp(s,e)hdr + contents */
diff --git a/drivers/net/arcnet/arc-rawmode.c b/drivers/net/arcnet/arc-rawmode.c
index 6318814a11a..e0a18e7c73c 100644
--- a/drivers/net/arcnet/arc-rawmode.c
+++ b/drivers/net/arcnet/arc-rawmode.c
@@ -110,7 +110,7 @@ static void rx(struct net_device *dev, int bufnum,
 
 	pkt = (struct archdr *) skb->data;
 
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, ARC_HDR_SIZE);
 
 	/* up to sizeof(pkt->soft) has already been copied from the card */
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c
index 66485585ab3..6c764b66e9c 100644
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -122,10 +122,8 @@ static void rx(struct net_device *dev, int bufnum,
 	}
 	skb_put(skb, length + ARC_HDR_SIZE + sizeof(int));
 	skb->dev = dev;
-
-	pkt = (struct archdr *) skb->data;
-
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
+	pkt = (struct archdr *)skb->mac.raw;
 	skb_pull(skb, ARC_HDR_SIZE);
 
 	/* up to sizeof(pkt->soft) has already been copied from the card */
@@ -270,9 +268,8 @@ static int ack_tx(struct net_device *dev, int acked)
   skb_put(ackskb, length + ARC_HDR_SIZE );
   ackskb->dev = dev;
 
-  ackpkt = (struct archdr *) ackskb->data;
-
-  ackskb->mac.raw = ackskb->data;
+  skb_reset_mac_header(ackskb);
+  ackpkt = (struct archdr *)ackskb->mac.raw;
   /* skb_pull(ackskb, ARC_HDR_SIZE); */
 
 
diff --git a/drivers/net/arcnet/rfc1051.c b/drivers/net/arcnet/rfc1051.c
index 6d6c69f036e..2de8877ece2 100644
--- a/drivers/net/arcnet/rfc1051.c
+++ b/drivers/net/arcnet/rfc1051.c
@@ -94,7 +94,7 @@ static unsigned short type_trans(struct sk_buff *skb, struct net_device *dev)
 	int hdr_size = ARC_HDR_SIZE + RFC1051_HDR_SIZE;
 
 	/* Pull off the arcnet header. */
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, hdr_size);
 
 	if (pkt->hard.dest == 0)
diff --git a/drivers/net/arcnet/rfc1201.c b/drivers/net/arcnet/rfc1201.c
index bee34226abf..460a095000c 100644
--- a/drivers/net/arcnet/rfc1201.c
+++ b/drivers/net/arcnet/rfc1201.c
@@ -96,7 +96,7 @@ static unsigned short type_trans(struct sk_buff *skb, struct net_device *dev)
 	int hdr_size = ARC_HDR_SIZE + RFC1201_HDR_SIZE;
 
 	/* Pull off the arcnet header. */
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, hdr_size);
 
 	if (pkt->hard.dest == 0)
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 3fb354d9c51..e3c9e2e56d1 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -884,7 +884,7 @@ static int ad_lacpdu_send(struct port *port)
 	}
 
 	skb->dev = slave->dev;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb->nh.raw = skb->data + ETH_HLEN;
 	skb->protocol = PKT_TYPE_LACPDU;
 	skb->priority = TC_PRIO_CONTROL;
@@ -928,7 +928,7 @@ static int ad_marker_send(struct port *port, struct marker *marker)
 	skb_reserve(skb, 16);
 
 	skb->dev = slave->dev;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb->nh.raw = skb->data + ETH_HLEN;
 	skb->protocol = PKT_TYPE_LACPDU;
 
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 217a2eedee0..916162ca0c9 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -890,7 +890,7 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[])
 		data = skb_put(skb, size);
 		memcpy(data, &pkt, size);
 
-		skb->mac.raw = data;
+		skb_reset_mac_header(skb);
 		skb->nh.raw = data + ETH_HLEN;
 		skb->protocol = pkt.type;
 		skb->priority = TC_PRIO_CONTROL;
@@ -1266,7 +1266,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 	u8 *hash_start = NULL;
 	int res = 1;
 
-	skb->mac.raw = (unsigned char *)skb->data;
+	skb_reset_mac_header(skb);
 	eth_data = eth_hdr(skb);
 
 	/* make sure that the curr_active_slave and the slaves list do
diff --git a/drivers/net/cxgb3/cxgb3_offload.c b/drivers/net/cxgb3/cxgb3_offload.c
index 199e5066acf..ebcf35e4cf5 100644
--- a/drivers/net/cxgb3/cxgb3_offload.c
+++ b/drivers/net/cxgb3/cxgb3_offload.c
@@ -783,7 +783,7 @@ static int do_trace(struct t3cdev *dev, struct sk_buff *skb)
 	skb->protocol = htons(0xffff);
 	skb->dev = dev->lldev;
 	skb_pull(skb, sizeof(*p));
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	netif_receive_skb(skb);
 	return 0;
 }
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 8946f7aa97c..b5cf2a60834 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -1620,7 +1620,8 @@ static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
 			     unsigned int gather_idx)
 {
 	rq->offload_pkts++;
-	skb->mac.raw = skb->nh.raw = skb->h.raw = skb->data;
+	skb_reset_mac_header(skb);
+	skb->nh.raw = skb->h.raw = skb->data;
 
 	if (rq->polling) {
 		rx_gather[gather_idx++] = skb;
diff --git a/drivers/net/irda/ali-ircc.c b/drivers/net/irda/ali-ircc.c
index cebf8c374bc..0f10758226f 100644
--- a/drivers/net/irda/ali-ircc.c
+++ b/drivers/net/irda/ali-ircc.c
@@ -1932,7 +1932,7 @@ static int  ali_ircc_dma_receive_complete(struct ali_ircc_cb *self)
 			self->stats.rx_packets++;
 
 			skb->dev = self->netdev;
-			skb->mac.raw  = skb->data;
+			skb_reset_mac_header(skb);
 			skb->protocol = htons(ETH_P_IRDA);
 			netif_rx(skb);
 			self->netdev->last_rx = jiffies;
diff --git a/drivers/net/irda/au1k_ir.c b/drivers/net/irda/au1k_ir.c
index 37914dc5b90..27afd0f367d 100644
--- a/drivers/net/irda/au1k_ir.c
+++ b/drivers/net/irda/au1k_ir.c
@@ -606,7 +606,7 @@ static int au1k_irda_rx(struct net_device *dev)
 				skb_put(skb, count-2);
 			memcpy(skb->data, (void *)pDB->vaddr, count-2);
 			skb->dev = dev;
-			skb->mac.raw = skb->data;
+			skb_reset_mac_header(skb);
 			skb->protocol = htons(ETH_P_IRDA);
 			netif_rx(skb);
 			prxd->count_0 = 0;
diff --git a/drivers/net/irda/donauboe.c b/drivers/net/irda/donauboe.c
index 11af0ae7510..ddfa6c38a16 100644
--- a/drivers/net/irda/donauboe.c
+++ b/drivers/net/irda/donauboe.c
@@ -1286,7 +1286,7 @@ dumpbufs(self->rx_bufs[self->rxs],len,'<');
 
                       self->stats.rx_packets++;
                       skb->dev = self->netdev;
-                      skb->mac.raw = skb->data;
+                      skb_reset_mac_header(skb);
                       skb->protocol = htons (ETH_P_IRDA);
                     }
                   else
diff --git a/drivers/net/irda/irda-usb.c b/drivers/net/irda/irda-usb.c
index 1d510bdc9b8..6ef375a095f 100644
--- a/drivers/net/irda/irda-usb.c
+++ b/drivers/net/irda/irda-usb.c
@@ -921,7 +921,7 @@ static void irda_usb_receive(struct urb *urb)
 
 	/* Ask the networking layer to queue the packet for the IrDA stack */
 	dataskb->dev = self->netdev;
-	dataskb->mac.raw  = dataskb->data;
+	skb_reset_mac_header(dataskb);
 	dataskb->protocol = htons(ETH_P_IRDA);
 	len = dataskb->len;
 	netif_rx(dataskb);
diff --git a/drivers/net/irda/mcs7780.c b/drivers/net/irda/mcs7780.c
index f0c61f3b2a8..3ff1f4b33c0 100644
--- a/drivers/net/irda/mcs7780.c
+++ b/drivers/net/irda/mcs7780.c
@@ -428,7 +428,7 @@ static void mcs_unwrap_mir(struct mcs_cb *mcs, __u8 *buf, int len)
 	skb_reserve(skb, 1);
 	memcpy(skb->data, buf, new_len);
 	skb_put(skb, new_len);
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb->protocol = htons(ETH_P_IRDA);
 	skb->dev = mcs->netdev;
 
@@ -481,7 +481,7 @@ static void mcs_unwrap_fir(struct mcs_cb *mcs, __u8 *buf, int len)
 	skb_reserve(skb, 1);
 	memcpy(skb->data, buf, new_len);
 	skb_put(skb, new_len);
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb->protocol = htons(ETH_P_IRDA);
 	skb->dev = mcs->netdev;
 
diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/net/irda/nsc-ircc.c
index 29b5ccd29d0..8ce7dad582f 100644
--- a/drivers/net/irda/nsc-ircc.c
+++ b/drivers/net/irda/nsc-ircc.c
@@ -1881,7 +1881,7 @@ static int nsc_ircc_dma_receive_complete(struct nsc_ircc_cb *self, int iobase)
 			self->stats.rx_packets++;
 
 			skb->dev = self->netdev;
-			skb->mac.raw  = skb->data;
+			skb_reset_mac_header(skb);
 			skb->protocol = htons(ETH_P_IRDA);
 			netif_rx(skb);
 			self->netdev->last_rx = jiffies;
diff --git a/drivers/net/irda/pxaficp_ir.c b/drivers/net/irda/pxaficp_ir.c
index 2272156af31..f35d7d42624 100644
--- a/drivers/net/irda/pxaficp_ir.c
+++ b/drivers/net/irda/pxaficp_ir.c
@@ -391,7 +391,7 @@ static void pxa_irda_fir_irq_eif(struct pxa_irda *si, struct net_device *dev, in
 
 		/* Feed it to IrLAP  */
 		skb->dev = dev;
-		skb->mac.raw  = skb->data;
+		skb_reset_mac_header(skb);
 		skb->protocol = htons(ETH_P_IRDA);
 		netif_rx(skb);
 
diff --git a/drivers/net/irda/sa1100_ir.c b/drivers/net/irda/sa1100_ir.c
index 937372d0039..056639f72be 100644
--- a/drivers/net/irda/sa1100_ir.c
+++ b/drivers/net/irda/sa1100_ir.c
@@ -504,7 +504,7 @@ static void sa1100_irda_fir_error(struct sa1100_irda *si, struct net_device *dev
 
 		skb_put(skb, len);
 		skb->dev = dev;
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 		skb->protocol = htons(ETH_P_IRDA);
 		si->stats.rx_packets++;
 		si->stats.rx_bytes += len;
diff --git a/drivers/net/irda/smsc-ircc2.c b/drivers/net/irda/smsc-ircc2.c
index 31c623381ea..103a2d18ed2 100644
--- a/drivers/net/irda/smsc-ircc2.c
+++ b/drivers/net/irda/smsc-ircc2.c
@@ -1412,7 +1412,7 @@ static void smsc_ircc_dma_receive_complete(struct smsc_ircc_cb *self)
 	self->stats.rx_bytes += len;
 
 	skb->dev = self->netdev;
-	skb->mac.raw  = skb->data;
+	skb_reset_mac_header(skb);
 	skb->protocol = htons(ETH_P_IRDA);
 	netif_rx(skb);
 }
diff --git a/drivers/net/irda/stir4200.c b/drivers/net/irda/stir4200.c
index 20d306fea4c..a22175f4ea8 100644
--- a/drivers/net/irda/stir4200.c
+++ b/drivers/net/irda/stir4200.c
@@ -364,7 +364,7 @@ static void fir_eof(struct stir_cb *stir)
 
 	skb_put(skb, len);
 
-	skb->mac.raw  = skb->data;
+	skb_reset_mac_header(skb);
 	skb->protocol = htons(ETH_P_IRDA);
 	skb->dev = stir->netdev;
 
diff --git a/drivers/net/irda/via-ircc.c b/drivers/net/irda/via-ircc.c
index c3ed9b3067e..5ff41631460 100644
--- a/drivers/net/irda/via-ircc.c
+++ b/drivers/net/irda/via-ircc.c
@@ -1125,7 +1125,7 @@ static int via_ircc_dma_receive_complete(struct via_ircc_cb *self,
 		self->stats.rx_bytes += len;
 		self->stats.rx_packets++;
 		skb->dev = self->netdev;
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 		skb->protocol = htons(ETH_P_IRDA);
 		netif_rx(skb);
 		return TRUE;
@@ -1198,7 +1198,7 @@ F01_E */
 		self->stats.rx_bytes += len;
 		self->stats.rx_packets++;
 		skb->dev = self->netdev;
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 		skb->protocol = htons(ETH_P_IRDA);
 		netif_rx(skb);
 
@@ -1244,7 +1244,7 @@ static int upload_rxdata(struct via_ircc_cb *self, int iobase)
 	self->stats.rx_bytes += len;
 	self->stats.rx_packets++;
 	skb->dev = self->netdev;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb->protocol = htons(ETH_P_IRDA);
 	netif_rx(skb);
 	if (st_fifo->len < (MAX_RX_WINDOW + 2)) {
@@ -1313,7 +1313,7 @@ static int RxTimerHandler(struct via_ircc_cb *self, int iobase)
 			self->stats.rx_bytes += len;
 			self->stats.rx_packets++;
 			skb->dev = self->netdev;
-			skb->mac.raw = skb->data;
+			skb_reset_mac_header(skb);
 			skb->protocol = htons(ETH_P_IRDA);
 			netif_rx(skb);
 		}		//while
diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c
index 3457e9d8b66..79b407f3a49 100644
--- a/drivers/net/irda/vlsi_ir.c
+++ b/drivers/net/irda/vlsi_ir.c
@@ -595,7 +595,7 @@ static int vlsi_process_rx(struct vlsi_ring *r, struct ring_descr *rd)
 	rd->skb = NULL;
 	skb->dev = ndev;
 	memcpy(skb_put(skb,len), rd->buf, len);
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	if (in_interrupt())
 		netif_rx(skb);
 	else
diff --git a/drivers/net/irda/w83977af_ir.c b/drivers/net/irda/w83977af_ir.c
index 4212657fa4f..bee44513095 100644
--- a/drivers/net/irda/w83977af_ir.c
+++ b/drivers/net/irda/w83977af_ir.c
@@ -919,7 +919,7 @@ int w83977af_dma_receive_complete(struct w83977af_ir *self)
 			self->stats.rx_packets++;
 			
 			skb->dev = self->netdev;
-			skb->mac.raw  = skb->data;
+			skb_reset_mac_header(skb);
 			skb->protocol = htons(ETH_P_IRDA);
 			netif_rx(skb);
 			self->netdev->last_rx = jiffies;
diff --git a/drivers/net/myri_sbus.c b/drivers/net/myri_sbus.c
index ee26ef52289..de092658db6 100644
--- a/drivers/net/myri_sbus.c
+++ b/drivers/net/myri_sbus.c
@@ -368,7 +368,7 @@ static __be16 myri_type_trans(struct sk_buff *skb, struct net_device *dev)
 	struct ethhdr *eth;
 	unsigned char *rawp;
 
-	skb->mac.raw = (((unsigned char *)skb->data) + MYRI_PAD_LEN);
+	skb->mac.raw = skb->data + MYRI_PAD_LEN;
 	skb_pull(skb, dev->hard_header_len);
 	eth = eth_hdr(skb);
 
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index ef58e412878..18f1790aab9 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -1685,7 +1685,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
 			skb_pull_rcsum(skb, 2);
 			skb->dev = ppp->dev;
 			skb->protocol = htons(npindex_to_ethertype[npi]);
-			skb->mac.raw = skb->data;
+			skb_reset_mac_header(skb);
 			netif_rx(skb);
 			ppp->dev->last_rx = jiffies;
 		}
diff --git a/drivers/net/sb1000.c b/drivers/net/sb1000.c
index b9fa4fbb139..1de3eec1a79 100644
--- a/drivers/net/sb1000.c
+++ b/drivers/net/sb1000.c
@@ -834,7 +834,7 @@ printk("cm0: IP identification: %02x%02x  fragment offset: %02x%02x\n", buffer[3
 			goto dropped_frame;
 		}
 		skb->dev = dev;
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 		skb->protocol = (unsigned short) buffer[NewDatagramHeaderSkip + 16];
 		insw(ioaddr, skb_put(skb, NewDatagramDataSize),
 			NewDatagramDataSize / 2);
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index a57aa010cb2..288d8559f8c 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -256,7 +256,7 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv,
 
 	switch (tun->flags & TUN_TYPE_MASK) {
 	case TUN_TUN_DEV:
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 		skb->protocol = pi.proto;
 		skb->dev = tun->dev;
 		break;
diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index 5b82e4fd0d7..c198511ec3f 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -773,7 +773,7 @@ static int sppp_rx_done(struct channel_data *chan)
 	}
 	chan->rx_skb->protocol = htons(ETH_P_WAN_PPP);
 	chan->rx_skb->dev = chan->pppdev.dev;
-	chan->rx_skb->mac.raw = chan->rx_skb->data;
+	skb_reset_mac_header(chan->rx_skb)
 	chan->stats.rx_packets++;
 	chan->stats.rx_bytes += chan->cosa->rxsize;
 	netif_rx(chan->rx_skb);
diff --git a/drivers/net/wan/cycx_x25.c b/drivers/net/wan/cycx_x25.c
index a631d1c2fa1..016b3ff3ea5 100644
--- a/drivers/net/wan/cycx_x25.c
+++ b/drivers/net/wan/cycx_x25.c
@@ -834,7 +834,7 @@ static void cycx_x25_irq_rx(struct cycx_device *card, struct cycx_x25_cmd *cmd)
 	++chan->ifstats.rx_packets;
 	chan->ifstats.rx_bytes += pktlen;
 
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	netif_rx(skb);
 	dev->last_rx = jiffies;		/* timestamp */
 }
diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c
index 73698755943..66be20c292b 100644
--- a/drivers/net/wan/dlci.c
+++ b/drivers/net/wan/dlci.c
@@ -176,7 +176,7 @@ static void dlci_receive(struct sk_buff *skb, struct net_device *dev)
 	if (process)
 	{
 		/* we've set up the protocol, so discard the header */
-		skb->mac.raw = skb->data; 
+		skb_reset_mac_header(skb);
 		skb_pull(skb, header);
 		dlp->stats.rx_bytes += skb->len;
 		netif_rx(skb);
diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index c45d6a83339..58a53b6d9b4 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -864,7 +864,7 @@ fst_tx_dma_complete(struct fst_card_info *card, struct fst_port_info *port,
 static __be16 farsync_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
 	skb->dev = dev;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb->pkt_type = PACKET_HOST;
 	return htons(ETH_P_CUST);
 }
diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index 2b54f1bc3a0..6d288839dda 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c
@@ -1667,7 +1667,7 @@ static int lmc_rx (struct net_device *dev) /*fold00*/
             skb_put (skb, len);
             skb->protocol = lmc_proto_type(sc, skb);
             skb->protocol = htons(ETH_P_WAN_PPP);
-            skb->mac.raw = skb->data;
+            skb_reset_mac_header(skb);
 //            skb->nh.raw = skb->data;
             skb->dev = dev;
             lmc_proto_netif(sc, skb);
@@ -1705,7 +1705,7 @@ static int lmc_rx (struct net_device *dev) /*fold00*/
             memcpy(skb_put(nsb, len), skb->data, len);
             
             nsb->protocol = lmc_proto_type(sc, skb);
-            nsb->mac.raw = nsb->data;
+            skb_reset_mac_header(nsb);
 //            nsb->nh.raw = nsb->data;
             nsb->dev = dev;
             lmc_proto_netif(sc, nsb);
diff --git a/drivers/net/wan/pc300_drv.c b/drivers/net/wan/pc300_drv.c
index 62184dee377..edbc55528be 100644
--- a/drivers/net/wan/pc300_drv.c
+++ b/drivers/net/wan/pc300_drv.c
@@ -1755,7 +1755,7 @@ cpc_trace(struct net_device *dev, struct sk_buff *skb_main, char rx_tx)
 
 	skb->dev = dev;
 	skb->protocol = htons(ETH_P_CUST);
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb->pkt_type = PACKET_HOST;
 	skb->len = 10 + skb_main->len;
 
diff --git a/drivers/net/wan/pc300_tty.c b/drivers/net/wan/pc300_tty.c
index 5873c346e7e..de02a07259c 100644
--- a/drivers/net/wan/pc300_tty.c
+++ b/drivers/net/wan/pc300_tty.c
@@ -1003,7 +1003,7 @@ static void cpc_tty_trace(pc300dev_t *dev, char* buf, int len, char rxtx)
 	skb_put (skb, 10 + len); 
 	skb->dev = dev->dev; 
 	skb->protocol = htons(ETH_P_CUST); 
-	skb->mac.raw = skb->data; 
+	skb_reset_mac_header(skb);
 	skb->pkt_type = PACKET_HOST; 
 	skb->len = 10 + len; 
 
diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c
index e50b1482d79..692a23f9834 100644
--- a/drivers/net/wireless/airo.c
+++ b/drivers/net/wireless/airo.c
@@ -3411,7 +3411,7 @@ badrx:
 			OUT4500( apriv, EVACK, EV_RX);
 
 			if (test_bit(FLAG_802_11, &apriv->flags)) {
-				skb->mac.raw = skb->data;
+				skb_reset_mac_header(skb);
 				skb->pkt_type = PACKET_OTHERHOST;
 				skb->dev = apriv->wifidev;
 				skb->protocol = htons(ETH_P_802_2);
@@ -3746,7 +3746,7 @@ void mpi_receive_802_11 (struct airo_info *ai)
 		wireless_spy_update(ai->dev, sa, &wstats);
 	}
 #endif /* IW_WIRELESS_SPY */
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb->pkt_type = PACKET_OTHERHOST;
 	skb->dev = ai->wifidev;
 	skb->protocol = htons(ETH_P_802_2);
diff --git a/drivers/net/wireless/hostap/hostap_80211_rx.c b/drivers/net/wireless/hostap/hostap_80211_rx.c
index f78ee26d787..e4082f9d766 100644
--- a/drivers/net/wireless/hostap/hostap_80211_rx.c
+++ b/drivers/net/wireless/hostap/hostap_80211_rx.c
@@ -167,7 +167,7 @@ hdr->f.status = s; hdr->f.len = l; hdr->f.data = d
 
 	ret = skb->len - phdrlen;
 	skb->dev = dev;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, hdrlen);
 	if (prism_header)
 		skb_pull(skb, phdrlen);
@@ -1073,10 +1073,11 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb,
 
 	if (skb2 != NULL) {
 		/* send to wireless media */
+		skb2->dev = dev;
 		skb2->protocol = __constant_htons(ETH_P_802_3);
-		skb2->mac.raw = skb2->nh.raw = skb2->data;
+		skb_reset_mac_header(skb2);
+		skb2->nh.raw = skb2->data;
 		/* skb2->nh.raw = skb2->data + ETH_HLEN; */
-		skb2->dev = dev;
 		dev_queue_xmit(skb2);
 	}
 
diff --git a/drivers/net/wireless/hostap/hostap_80211_tx.c b/drivers/net/wireless/hostap/hostap_80211_tx.c
index 4a5be70c041..159baef18e4 100644
--- a/drivers/net/wireless/hostap/hostap_80211_tx.c
+++ b/drivers/net/wireless/hostap/hostap_80211_tx.c
@@ -237,7 +237,7 @@ int hostap_data_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	iface->stats.tx_packets++;
 	iface->stats.tx_bytes += skb->len;
 
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	meta = (struct hostap_skb_tx_data *) skb->cb;
 	memset(meta, 0, sizeof(*meta));
 	meta->magic = HOSTAP_SKB_TX_DATA_MAGIC;
diff --git a/drivers/net/wireless/hostap/hostap_ap.c b/drivers/net/wireless/hostap/hostap_ap.c
index efb8cf3bd8a..cc18f9686d2 100644
--- a/drivers/net/wireless/hostap/hostap_ap.c
+++ b/drivers/net/wireless/hostap/hostap_ap.c
@@ -982,7 +982,8 @@ static void prism2_send_mgmt(struct net_device *dev,
 	meta->tx_cb_idx = tx_cb_idx;
 
 	skb->dev = dev;
-	skb->mac.raw = skb->nh.raw = skb->data;
+	skb_reset_mac_header(skb);
+	skb->nh.raw = skb->data;
 	dev_queue_xmit(skb);
 }
 #endif /* PRISM2_NO_KERNEL_IEEE80211_MGMT */
diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c
index 3079378fb8c..9003ff7d151 100644
--- a/drivers/net/wireless/hostap/hostap_hw.c
+++ b/drivers/net/wireless/hostap/hostap_hw.c
@@ -2217,7 +2217,7 @@ static void hostap_tx_callback(local_info_t *local,
 		memcpy(skb_put(skb, len), payload, len);
 
 	skb->dev = local->dev;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 
 	cb->func(skb, ok, cb->data);
 }
diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c
index 9077e6edde3..0e29ff76287 100644
--- a/drivers/net/wireless/hostap/hostap_main.c
+++ b/drivers/net/wireless/hostap/hostap_main.c
@@ -1063,7 +1063,8 @@ int prism2_sta_send_mgmt(local_info_t *local, u8 *dst, u16 stype,
 	meta->iface = netdev_priv(dev);
 
 	skb->dev = dev;
-	skb->mac.raw = skb->nh.raw = skb->data;
+	skb_reset_mac_header(skb);
+	skb->nh.raw = skb->data;
 	dev_queue_xmit(skb);
 
 	return 0;
diff --git a/drivers/net/wireless/ipw2200.c b/drivers/net/wireless/ipw2200.c
index c878a2f3239..b04c56a25cc 100644
--- a/drivers/net/wireless/ipw2200.c
+++ b/drivers/net/wireless/ipw2200.c
@@ -8133,7 +8133,7 @@ static void ipw_handle_mgmt_packet(struct ipw_priv *priv,
 		skb->dev = priv->ieee->dev;
 
 		/* Point raw at the ieee80211_stats */
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 
 		skb->pkt_type = PACKET_OTHERHOST;
 		skb->protocol = __constant_htons(ETH_P_80211_STATS);
diff --git a/drivers/net/wireless/orinoco.c b/drivers/net/wireless/orinoco.c
index 3f9d78d059b..f1415bff527 100644
--- a/drivers/net/wireless/orinoco.c
+++ b/drivers/net/wireless/orinoco.c
@@ -770,7 +770,7 @@ static void orinoco_rx_monitor(struct net_device *dev, u16 rxfid,
 
 	/* Copy the 802.11 header to the skb */
 	memcpy(skb_put(skb, hdrlen), &(desc->frame_ctl), hdrlen);
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 
 	/* If any, copy the data from the card to the skb */
 	if (datalen > 0) {
diff --git a/drivers/net/wireless/prism54/islpci_eth.c b/drivers/net/wireless/prism54/islpci_eth.c
index fc2e0f3a896..6ebfff03424 100644
--- a/drivers/net/wireless/prism54/islpci_eth.c
+++ b/drivers/net/wireless/prism54/islpci_eth.c
@@ -303,7 +303,7 @@ islpci_monitor_rx(islpci_private *priv, struct sk_buff **skb)
 		skb_pull(*skb, sizeof (struct rfmon_header));
 
 	(*skb)->protocol = htons(ETH_P_802_2);
-	(*skb)->mac.raw = (*skb)->data;
+	skb_reset_mac_header(*skb);
 	(*skb)->pkt_type = PACKET_OTHERHOST;
 
 	return 0;
diff --git a/drivers/net/wireless/strip.c b/drivers/net/wireless/strip.c
index f5ce1c6063d..2a299a0676a 100644
--- a/drivers/net/wireless/strip.c
+++ b/drivers/net/wireless/strip.c
@@ -2009,7 +2009,7 @@ static void deliver_packet(struct strip *strip_info, STRIP_Header * header,
 		       packetlen);
 		skb->dev = get_strip_dev(strip_info);
 		skb->protocol = header->protocol;
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 
 		/* Having put a fake header on the front of the sk_buff for the */
 		/* benefit of tools like tcpdump, skb_pull now 'consumes' that  */
diff --git a/drivers/s390/net/ctcmain.c b/drivers/s390/net/ctcmain.c
index 0d6d5fcc128..787c0131704 100644
--- a/drivers/s390/net/ctcmain.c
+++ b/drivers/s390/net/ctcmain.c
@@ -455,7 +455,7 @@ ctc_unpack_skb(struct channel *ch, struct sk_buff *pskb)
 			return;
 		}
 		skb_put(pskb, header->length);
-		pskb->mac.raw = pskb->data;
+		skb_reset_mac_header(pskb);
 		len -= header->length;
 		skb = dev_alloc_skb(pskb->len);
 		if (!skb) {
@@ -473,7 +473,7 @@ ctc_unpack_skb(struct channel *ch, struct sk_buff *pskb)
 			return;
 		}
 		memcpy(skb_put(skb, pskb->len), pskb->data, pskb->len);
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 		skb->dev = pskb->dev;
 		skb->protocol = pskb->protocol;
 		pskb->ip_summed = CHECKSUM_UNNECESSARY;
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index 594320ca1b7..82edf201440 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -635,7 +635,7 @@ static void netiucv_unpack_skb(struct iucv_connection *conn,
 			return;
 		}
 		skb_put(pskb, header->next);
-		pskb->mac.raw = pskb->data;
+		skb_reset_mac_header(pskb);
 		skb = dev_alloc_skb(pskb->len);
 		if (!skb) {
 			PRINT_WARN("%s Out of memory in netiucv_unpack_skb\n",
@@ -646,7 +646,7 @@ static void netiucv_unpack_skb(struct iucv_connection *conn,
 			return;
 		}
 		memcpy(skb_put(skb, pskb->len), pskb->data, pskb->len);
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 		skb->dev = pskb->dev;
 		skb->protocol = pskb->protocol;
 		pskb->ip_summed = CHECKSUM_UNNECESSARY;
diff --git a/drivers/s390/net/qeth_eddp.c b/drivers/s390/net/qeth_eddp.c
index 7c735e1fe06..910a8ab66b0 100644
--- a/drivers/s390/net/qeth_eddp.c
+++ b/drivers/s390/net/qeth_eddp.c
@@ -486,7 +486,7 @@ qeth_eddp_fill_context_tcp(struct qeth_eddp_context *ctx,
 		return -ENOMEM;
 	}
 	if (qhdr->hdr.l2.id == QETH_HEADER_TYPE_LAYER2) {
-		skb->mac.raw = (skb->data) + sizeof(struct qeth_hdr);
+		skb->mac.raw = skb->data + sizeof(struct qeth_hdr);
 		memcpy(&eddp->mac, eth_hdr(skb), ETH_HLEN);
 #ifdef CONFIG_QETH_VLAN
 		if (eddp->mac.h_proto == __constant_htons(ETH_P_8021Q)) {
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index d502b77adf6..28822025b79 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -2278,7 +2278,7 @@ qeth_type_trans(struct sk_buff *skb, struct net_device *dev)
 	    (card->info.link_type == QETH_LINK_TYPE_LANE_TR))
 	 	return tr_type_trans(skb,dev);
 #endif /* CONFIG_TR */
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, ETH_HLEN );
 	eth = eth_hdr(skb);
 
@@ -2461,7 +2461,7 @@ qeth_rebuild_skb(struct qeth_card *card, struct sk_buff *skb,
 	if (card->options.fake_ll)
 		qeth_rebuild_skb_fake_ll(card, skb, hdr);
 	else
-		skb->mac.raw = skb->data;
+		skb_reset_mac_header(skb);
 	skb->ip_summed = card->options.checksum_type;
 	if (card->options.checksum_type == HW_CHECKSUMMING){
 		if ( (hdr->hdr.l3.ext_flags &
diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h
index d4b333938f7..0fe562af9c8 100644
--- a/include/linux/hdlc.h
+++ b/include/linux/hdlc.h
@@ -132,8 +132,8 @@ static __inline__ __be16 hdlc_type_trans(struct sk_buff *skb,
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
 
-	skb->mac.raw  = skb->data;
-	skb->dev      = dev;
+	skb->dev = dev;
+	skb_reset_mac_header(skb);
 
 	if (hdlc->proto->type_trans)
 		return hdlc->proto->type_trans(skb, dev);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index df229bd5f1a..748f254b50c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -960,6 +960,11 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
 	skb->tail += len;
 }
 
+static inline void skb_reset_mac_header(struct sk_buff *skb)
+{
+	skb->mac.raw = skb->data;
+}
+
 /*
  * CPUs often take a performance hit when accessing unaligned memory
  * locations. The actual performance hit varies, it can be small if the
diff --git a/include/net/ax25.h b/include/net/ax25.h
index 47ff2f46e90..99a4e364c74 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -263,8 +263,8 @@ static __inline__ void ax25_cb_put(ax25_cb *ax25)
 static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
 	skb->dev      = dev;
+	skb_reset_mac_header(skb);
 	skb->pkt_type = PACKET_HOST;
-	skb->mac.raw  = skb->data;
 	return htons(ETH_P_AX25);
 }
 
diff --git a/include/net/x25device.h b/include/net/x25device.h
index 1d10c879f7e..1415bcf9398 100644
--- a/include/net/x25device.h
+++ b/include/net/x25device.h
@@ -7,8 +7,8 @@
 
 static inline __be16 x25_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
-	skb->mac.raw = skb->data;
 	skb->dev = dev;
+	skb_reset_mac_header(skb);
 	skb->pkt_type = PACKET_HOST;
 	
 	return htons(ETH_P_X25);
diff --git a/net/802/fddi.c b/net/802/fddi.c
index f8a0c9f6fec..91dde41b548 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -131,7 +131,7 @@ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev)
 	 */
 
 	skb->dev = dev;
-	skb->mac.raw = skb->data;	/* point to frame control (FC) */
+	skb_reset_mac_header(skb);	/* point to frame control (FC) */
 
 	if(fddi->hdr.llc_8022_1.dsap==0xe0)
 	{
diff --git a/net/802/hippi.c b/net/802/hippi.c
index 138302c14ee..d87190038ed 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -131,7 +131,7 @@ __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev)
 	 * set the raw address here.
 	 */
 	skb->dev = dev;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	hip = (struct hippi_hdr *)skb->mac.raw;
 	skb_pull(skb, HIPPI_HLEN);
 
diff --git a/net/802/tr.c b/net/802/tr.c
index 987d91559bc..eb2de0d1620 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -194,7 +194,7 @@ __be16 tr_type_trans(struct sk_buff *skb, struct net_device *dev)
 	unsigned riflen=0;
 
 	skb->dev = dev;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	trh = tr_hdr(skb);
 
 	if(trh->saddr[0] & TR_RII)
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index c444f5eda22..900d42ca8a5 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -458,7 +458,7 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 	/* FIXME: tcpdump shows that pointer to mac header is 2 bytes earlier,
 	   than should be. What else should I set? */
 	skb_pull(skb, plen);
-	skb->mac.raw = ((char *) (skb->data)) - ETH_HLEN;
+	skb->mac.raw = skb->data - ETH_HLEN;
 	skb->pkt_type = PACKET_HOST;
 #ifdef CONFIG_BR2684_FAST_TRANS
 	skb->protocol = ((u16 *) skb->data)[-1];
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 8c382581608..ccba24ffb96 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -213,7 +213,7 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb)
 		return;
 	}
 	ATM_SKB(skb)->vcc = vcc;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	if (!clip_vcc->encap
 	    || skb->len < RFC1483LLC_LEN
 	    || memcmp(skb->data, llc_oui, sizeof (llc_oui)))
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index 4a6b26becad..6d11b0633d5 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -122,7 +122,7 @@ int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb)
 		}
 
 		skb_pull(skb, 1);	/* Remove PID */
-		skb->mac.raw  = skb->nh.raw;
+		skb_reset_mac_header(skb);
 		skb->nh.raw   = skb->data;
 		skb->dev      = ax25->ax25_dev->dev;
 		skb->pkt_type = PACKET_HOST;
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index f7ade186bf9..b1c2fa96c69 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -326,7 +326,7 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
 		return 0;
 	}
 
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 
 	/* Verify and pull out header */
 	if (!skb_pull(skb, __bnep_rx_hlen[type & BNEP_TYPE_MASK]))
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 905a39c33a1..b22ada529cc 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -37,7 +37,7 @@ int br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	br->statistics.tx_packets++;
 	br->statistics.tx_bytes += skb->len;
 
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, ETH_HLEN);
 
 	if (dest[0] & 1)
diff --git a/net/core/dev.c b/net/core/dev.c
index 424d6d0e98f..2fcaf5bc4a9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1066,7 +1066,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 			   set by sender, so that the second statement is
 			   just protection against buggy protocols.
 			 */
-			skb2->mac.raw = skb2->data;
+			skb_reset_mac_header(skb2);
 
 			if (skb2->nh.raw < skb2->data ||
 			    skb2->nh.raw > skb2->tail) {
@@ -1206,7 +1206,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 
 	BUG_ON(skb_shinfo(skb)->frag_list);
 
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb->mac_len = skb->nh.raw - skb->data;
 	__skb_pull(skb, skb->mac_len);
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 32a9f80b5f1..0ad3896bbf6 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -324,7 +324,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 	iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);
 
 	eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb->protocol = eth->h_proto = htons(ETH_P_IP);
 	memcpy(eth->h_source, np->local_mac, 6);
 	memcpy(eth->h_dest, np->remote_mac, 6);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 336958fbbcb..8f6ebd0d369 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1928,7 +1928,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 		nskb->mac_len = skb->mac_len;
 
 		skb_reserve(nskb, headroom);
-		nskb->mac.raw = nskb->data;
+		skb_reset_mac_header(nskb);
 		nskb->nh.raw = nskb->data + skb->mac_len;
 		nskb->h.raw = nskb->nh.raw + (skb->h.raw - skb->nh.raw);
 		memcpy(skb_put(nskb, doffset), skb->data, doffset);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index c1b5502f195..ef94ca56d7b 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1537,7 +1537,7 @@ int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (skb == NULL)
 		return -ENOBUFS;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	cb = DN_SKB_CB(skb);
 
 	if (rta[RTA_SRC-1])
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 01ecbe42b1e..0ac2524f3b6 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -157,7 +157,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 	unsigned char *rawp;
 
 	skb->dev = dev;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, ETH_HLEN);
 	eth = eth_hdr(skb);
 
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index d5f5c661668..f39bf7c4101 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -42,7 +42,7 @@ static void ieee80211_monitor_rx(struct ieee80211_device *ieee,
 	u16 fc = le16_to_cpu(hdr->frame_ctl);
 
 	skb->dev = ieee->dev;
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb, ieee80211_get_hdrlen(fc));
 	skb->pkt_type = PACKET_OTHERHOST;
 	skb->protocol = __constant_htons(ETH_P_80211_RAW);
@@ -789,10 +789,11 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 
 	if (skb2 != NULL) {
 		/* send to wireless media */
+		skb2->dev = dev;
 		skb2->protocol = __constant_htons(ETH_P_802_3);
-		skb2->mac.raw = skb2->nh.raw = skb2->data;
+		skb_reset_mac_header(skb2);
+		skb2->nh.raw = skb2->data;
 		/* skb2->nh.raw = skb2->data + ETH_HLEN; */
-		skb2->dev = dev;
 		dev_queue_xmit(skb2);
 	}
 #endif
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 9151da64231..88f8aae873f 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -616,7 +616,7 @@ static int ipgre_rcv(struct sk_buff *skb)
 				offset += 4;
 		}
 
-		skb->mac.raw = skb->nh.raw;
+		skb_reset_mac_header(skb);
 		skb->nh.raw = __pskb_pull(skb, offset);
 		skb_postpull_rcsum(skb, skb->h.raw, offset);
 		skb->pkt_type = PACKET_HOST;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 5db301b3337..ddba857bd24 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -95,7 +95,7 @@ __inline__ void ip_send_check(struct iphdr *iph)
 /* dev_loopback_xmit for use with netfilter. */
 static int ip_dev_loopback_xmit(struct sk_buff *newskb)
 {
-	newskb->mac.raw = newskb->data;
+	skb_reset_mac_header(newskb);
 	__skb_pull(newskb, newskb->nh.raw - newskb->data);
 	newskb->pkt_type = PACKET_LOOPBACK;
 	newskb->ip_summed = CHECKSUM_UNNECESSARY;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 0b3d7bf40f4..29ee7be45aa 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2747,7 +2747,8 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 	/* Reserve room for dummy headers, this skb can pass
 	   through good chunk of routing engine.
 	 */
-	skb->mac.raw = skb->nh.raw = skb->data;
+	skb_reset_mac_header(skb);
+	skb->nh.raw = skb->data;
 
 	/* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
 	skb->nh.iph->protocol = IPPROTO_ICMP;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 7e25043d826..a5f4562b5d2 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -88,7 +88,7 @@ static inline int ip6_output_finish(struct sk_buff *skb)
 /* dev_loopback_xmit for use with netfilter. */
 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
 {
-	newskb->mac.raw = newskb->data;
+	skb_reset_mac_header(newskb);
 	__skb_pull(newskb, newskb->nh.raw - newskb->data);
 	newskb->pkt_type = PACKET_LOOPBACK;
 	newskb->ip_summed = CHECKSUM_UNNECESSARY;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index cc08cc48e9e..0aa4762f53f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2218,7 +2218,7 @@ int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 	/* Reserve room for dummy headers, this skb can pass
 	   through good chunk of routing engine.
 	 */
-	skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
 
 	rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
index 0b04603e9c4..1b7e2490e2e 100644
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -93,7 +93,8 @@ void irlap_queue_xmit(struct irlap_cb *self, struct sk_buff *skb)
 {
 	/* Some common init stuff */
 	skb->dev = self->netdev;
-	skb->h.raw = skb->nh.raw = skb->mac.raw = skb->data;
+	skb_reset_mac_header(skb);
+	skb->h.raw = skb->nh.raw = skb->data;
 	skb->protocol = htons(ETH_P_IRDA);
 	skb->priority = TC_PRIO_BESTEFFORT;
 
diff --git a/net/irda/wrapper.c b/net/irda/wrapper.c
index 5abfb71aae8..2acc66dfb55 100644
--- a/net/irda/wrapper.c
+++ b/net/irda/wrapper.c
@@ -256,7 +256,7 @@ async_bump(struct net_device *dev,
 
 	/* Feed it to IrLAP layer */
 	dataskb->dev = dev;
-	dataskb->mac.raw  = dataskb->data;
+	skb_reset_mac_header(dataskb);
 	dataskb->protocol = htons(ETH_P_IRDA);
 
 	netif_rx(dataskb);
diff --git a/net/llc/llc_output.c b/net/llc/llc_output.c
index f4291f349e9..729e2510827 100644
--- a/net/llc/llc_output.c
+++ b/net/llc/llc_output.c
@@ -52,7 +52,7 @@ int llc_mac_hdr_init(struct sk_buff *skb,
 		if (da) {
 			memcpy(trh->daddr, da, dev->addr_len);
 			tr_source_route(skb, trh, dev);
-			skb->mac.raw = skb->data;
+			skb_reset_mac_header(skb);
 		}
 		break;
 	}
diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c
index 9a97ed6e691..17c3f1ef83e 100644
--- a/net/netrom/nr_dev.c
+++ b/net/netrom/nr_dev.c
@@ -56,7 +56,7 @@ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev)
 
 	/* Spoof incoming device */
 	skb->dev      = dev;
-	skb->mac.raw  = skb->nh.raw;
+	skb_reset_mac_header(skb);
 	skb->nh.raw   = skb->data;
 	skb->pkt_type = PACKET_HOST;
 
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index 5d2d93dc083..c49e223084f 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -339,7 +339,7 @@ __be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev)
 	skb->protocol = ethertype;
 	skb->pkt_type = PACKET_HOST;	/*	Physically point to point */
 	skb_pull(skb, cnt);
-	skb->mac.raw  = skb->data;
+	skb_reset_mac_header(skb);
 	return ethertype;
 }
 
-- 
cgit v1.2.3


From 0a1b0ad9ae27f918fd935c6da101083e11446f09 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 12:14:56 -0300
Subject: [LLC]: Use skb_reset_mac_header in llc_alloc_frame

skb->head is equal to skb->data after alloc_skb, so reset the mac header while
this is true, i.e. before skb_reserve.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/llc_sap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 2615dc81aa3..5fa31117e46 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -36,11 +36,11 @@ struct sk_buff *llc_alloc_frame(struct sock *sk, struct net_device *dev)
 	struct sk_buff *skb = alloc_skb(128, GFP_ATOMIC);
 
 	if (skb) {
+		skb_reset_mac_header(skb);
 		skb_reserve(skb, 50);
 		skb->nh.raw   = skb->h.raw = skb->data;
 		skb->protocol = htons(ETH_P_802_2);
 		skb->dev      = dev;
-		skb->mac.raw  = skb->head;
 		if (sk != NULL)
 			skb_set_owner_w(skb, sk);
 	}
-- 
cgit v1.2.3


From f64955eb117ad62480b858fd69a11e6f9e74f60b Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 12:17:29 -0300
Subject: [LLC]: Use skb_reset_mac_header in llc_mac_hdr_init

skb_push updates and returns skb->data, so we can just call
skb_reset_mac_header after the call to skb_push.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/llc_output.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/llc/llc_output.c b/net/llc/llc_output.c
index 729e2510827..754f4fedc85 100644
--- a/net/llc/llc_output.c
+++ b/net/llc/llc_output.c
@@ -41,7 +41,8 @@ int llc_mac_hdr_init(struct sk_buff *skb,
 		struct net_device *dev = skb->dev;
 		struct trh_hdr *trh;
 
-		skb->mac.raw = skb_push(skb, sizeof(*trh));
+		skb_push(skb, sizeof(*trh));
+		skb_reset_mac_header(skb);
 		trh = tr_hdr(skb);
 		trh->ac = AC;
 		trh->fc = LLC_FRAME;
@@ -62,7 +63,8 @@ int llc_mac_hdr_init(struct sk_buff *skb,
 		unsigned short len = skb->len;
 		struct ethhdr *eth;
 
-		skb->mac.raw = skb_push(skb, sizeof(*eth));
+		skb_push(skb, sizeof(*eth));
+		skb_reset_mac_header(skb);
 		eth = eth_hdr(skb);
 		eth->h_proto = htons(len);
 		memcpy(eth->h_dest, da, ETH_ALEN);
-- 
cgit v1.2.3


From 48d49d0ccdaa9caff4636ef9c3410973d28131b5 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 12:30:58 -0300
Subject: [SK_BUFF]: Introduce skb_set_mac_header()

For the cases where we want to set skb->mac.raw to an offset from skb->data.

Simple cases first, the memmove ones and specially pktgen will be left for later.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/loopback.c       | 2 +-
 drivers/net/myri_sbus.c      | 2 +-
 drivers/s390/net/qeth_eddp.c | 2 +-
 drivers/s390/net/qeth_main.c | 4 ++--
 include/linux/skbuff.h       | 5 +++++
 net/atm/br2684.c             | 2 +-
 6 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 4380e5e89dc..a71d8e0a9b5 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -90,7 +90,7 @@ static void emulate_large_send_offload(struct sk_buff *skb)
 		if (!nskb)
 			break;
 		skb_reserve(nskb, 32);
-		nskb->mac.raw = nskb->data - 14;
+		skb_set_mac_header(nskb, -ETH_HLEN);
 		nskb->nh.raw = nskb->data;
 		iph = nskb->nh.iph;
 		memcpy(nskb->data, skb->nh.raw, doffset);
diff --git a/drivers/net/myri_sbus.c b/drivers/net/myri_sbus.c
index de092658db6..e1f16fb0584 100644
--- a/drivers/net/myri_sbus.c
+++ b/drivers/net/myri_sbus.c
@@ -368,7 +368,7 @@ static __be16 myri_type_trans(struct sk_buff *skb, struct net_device *dev)
 	struct ethhdr *eth;
 	unsigned char *rawp;
 
-	skb->mac.raw = skb->data + MYRI_PAD_LEN;
+	skb_set_mac_header(skb, MYRI_PAD_LEN);
 	skb_pull(skb, dev->hard_header_len);
 	eth = eth_hdr(skb);
 
diff --git a/drivers/s390/net/qeth_eddp.c b/drivers/s390/net/qeth_eddp.c
index 910a8ab66b0..893125403c6 100644
--- a/drivers/s390/net/qeth_eddp.c
+++ b/drivers/s390/net/qeth_eddp.c
@@ -486,7 +486,7 @@ qeth_eddp_fill_context_tcp(struct qeth_eddp_context *ctx,
 		return -ENOMEM;
 	}
 	if (qhdr->hdr.l2.id == QETH_HEADER_TYPE_LAYER2) {
-		skb->mac.raw = skb->data + sizeof(struct qeth_hdr);
+		skb_set_mac_header(skb, sizeof(struct qeth_hdr));
 		memcpy(&eddp->mac, eth_hdr(skb), ETH_HLEN);
 #ifdef CONFIG_QETH_VLAN
 		if (eddp->mac.h_proto == __constant_htons(ETH_P_8021Q)) {
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index 28822025b79..c0ee6d94ea3 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -2306,7 +2306,7 @@ qeth_rebuild_skb_fake_ll_tr(struct qeth_card *card, struct sk_buff *skb,
 	struct iphdr *ip_hdr;
 
 	QETH_DBF_TEXT(trace,5,"skbfktr");
-	skb->mac.raw = skb->data - QETH_FAKE_LL_LEN_TR;
+	skb_set_mac_header(skb, -QETH_FAKE_LL_LEN_TR);
 	/* this is a fake ethernet header */
 	fake_hdr = tr_hdr(skb);
 
@@ -2359,7 +2359,7 @@ qeth_rebuild_skb_fake_ll_eth(struct qeth_card *card, struct sk_buff *skb,
 	struct iphdr *ip_hdr;
 
 	QETH_DBF_TEXT(trace,5,"skbfketh");
-	skb->mac.raw = skb->data - QETH_FAKE_LL_LEN_ETH;
+	skb_set_mac_header(skb, -QETH_FAKE_LL_LEN_ETH);
 	/* this is a fake ethernet header */
 	fake_hdr = eth_hdr(skb);
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 748f254b50c..43ab6cbf844 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -965,6 +965,11 @@ static inline void skb_reset_mac_header(struct sk_buff *skb)
 	skb->mac.raw = skb->data;
 }
 
+static inline void skb_set_mac_header(struct sk_buff *skb, const int offset)
+{
+	skb->mac.raw = skb->data + offset;
+}
+
 /*
  * CPUs often take a performance hit when accessing unaligned memory
  * locations. The actual performance hit varies, it can be small if the
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 900d42ca8a5..a1686dfcbb9 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -458,7 +458,7 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 	/* FIXME: tcpdump shows that pointer to mac header is 2 bytes earlier,
 	   than should be. What else should I set? */
 	skb_pull(skb, plen);
-	skb->mac.raw = skb->data - ETH_HLEN;
+	skb_set_mac_header(skb, -ETH_HLEN);
 	skb->pkt_type = PACKET_HOST;
 #ifdef CONFIG_BR2684_FAST_TRANS
 	skb->protocol = ((u16 *) skb->data)[-1];
-- 
cgit v1.2.3


From 39f69c6f922fbfb51e1ff24c9e196584a79f1484 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 12:40:27 -0300
Subject: [SK_BUFF] xfrm: Use skb_set_mac_header in the memmove cases

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_mode_tunnel.c | 6 ++++--
 net/ipv6/xfrm6_mode_beet.c   | 6 ++++--
 net/ipv6/xfrm6_mode_tunnel.c | 6 ++++--
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 4872b30ba5d..f09055d3a76 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -91,6 +91,7 @@ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct iphdr *iph = skb->nh.iph;
+	const unsigned char *old_mac;
 	int err = -EINVAL;
 
 	switch (iph->protocol){
@@ -125,8 +126,9 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 		skb->protocol = htons(ETH_P_IPV6);
 	}
 #endif
-	skb->mac.raw = memmove(skb->data - skb->mac_len,
-			       skb->mac.raw, skb->mac_len);
+	old_mac = skb->mac.raw;
+	skb_set_mac_header(skb, -skb->mac_len);
+	memmove(skb->mac.raw, old_mac, skb->mac_len);
 	skb->nh.raw = skb->data;
 	err = 0;
 
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index edcfffa9e87..53cfe1a10cc 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -59,6 +59,7 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct ipv6hdr *ip6h;
+	const unsigned char *old_mac;
 	int size = sizeof(struct ipv6hdr);
 	int err = -EINVAL;
 
@@ -69,8 +70,9 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 	memmove(skb->data, skb->nh.raw, size);
 	skb->nh.raw = skb->data;
 
-	skb->mac.raw = memmove(skb->data - skb->mac_len,
-			       skb->mac.raw, skb->mac_len);
+	old_mac = skb->mac.raw;
+	skb_set_mac_header(skb, -skb->mac_len);
+	memmove(skb->mac.raw, old_mac, skb->mac_len);
 
 	ip6h = skb->nh.ipv6h;
 	ip6h->payload_len = htons(skb->len - size);
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 0bc866c0d83..d2c560c181a 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -86,6 +86,7 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int err = -EINVAL;
+	const unsigned char *old_mac;
 
 	if (skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPV6
 	    && skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPIP)
@@ -107,8 +108,9 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 			ip6ip_ecn_decapsulate(skb);
 		skb->protocol = htons(ETH_P_IP);
 	}
-	skb->mac.raw = memmove(skb->data - skb->mac_len,
-			       skb->mac.raw, skb->mac_len);
+	old_mac = skb->mac.raw;
+	skb_set_mac_header(skb, -skb->mac_len);
+	memmove(skb->mac.raw, old_mac, skb->mac_len);
 	skb->nh.raw = skb->data;
 	err = 0;
 
-- 
cgit v1.2.3


From c51957dafa6f960c5c6372aa3da6c8fa71c13730 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 12:47:22 -0300
Subject: [TCP]: Do the layer header setting in tcp_collapse relative to
 skb->data

That is equal to skb->head before skb_reserve, to help in the layer header
changes.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index fb025608594..e5d1c2c8cea 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3632,11 +3632,13 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 		nskb = alloc_skb(copy+header, GFP_ATOMIC);
 		if (!nskb)
 			return;
+
+		nskb->mac.raw = nskb->data + (skb->mac.raw - skb->head);
+		nskb->nh.raw = nskb->data + (skb->nh.raw - skb->head);
+		nskb->h.raw = nskb->data + (skb->h.raw - skb->head);
+
 		skb_reserve(nskb, header);
 		memcpy(nskb->head, skb->head, header);
-		nskb->nh.raw = nskb->head + (skb->nh.raw-skb->head);
-		nskb->h.raw = nskb->head + (skb->h.raw-skb->head);
-		nskb->mac.raw = nskb->head + (skb->mac.raw-skb->head);
 		memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
 		TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
 		__skb_insert(nskb, skb->prev, skb, list);
-- 
cgit v1.2.3


From 31713c333ddbb66d694829082620b69b71c4b09a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 12:48:37 -0300
Subject: [TCP]: Use skb_set_mac_header in tcp_collapse

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e5d1c2c8cea..1ec05bd673a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3633,7 +3633,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 		if (!nskb)
 			return;
 
-		nskb->mac.raw = nskb->data + (skb->mac.raw - skb->head);
+		skb_set_mac_header(nskb, skb->mac.raw - skb->head);
 		nskb->nh.raw = nskb->data + (skb->nh.raw - skb->head);
 		nskb->h.raw = nskb->data + (skb->h.raw - skb->head);
 
-- 
cgit v1.2.3


From 98e399f82ab3a6d863d1d4a7ea48925cc91c830e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Mar 2007 15:33:04 -0700
Subject: [SK_BUFF]: Introduce skb_mac_header()

For the places where we need a pointer to the mac header, it is still legal to
touch skb->mac.raw directly if just adding to, subtracting from or setting it
to another layer header.

This one also converts some more cases to skb_reset_mac_header() that my
regex missed as it had no spaces before nor after '=', ugh.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/um/drivers/daemon_kern.c              |  2 +-
 arch/um/drivers/mcast_kern.c               |  2 +-
 arch/um/drivers/pcap_kern.c                |  2 +-
 arch/um/drivers/slip_kern.c                |  2 +-
 arch/um/drivers/slirp_kern.c               |  2 +-
 arch/um/os-Linux/drivers/ethertap_kern.c   |  2 +-
 arch/um/os-Linux/drivers/tuntap_kern.c     |  2 +-
 drivers/block/aoe/aoe.h                    |  2 +-
 drivers/ieee1394/eth1394.h                 |  2 +-
 drivers/media/dvb/dvb-core/dvb_net.c       |  2 +-
 drivers/message/fusion/mptlan.c            | 26 ++++++++++++++------------
 drivers/net/arcnet/capmode.c               |  4 ++--
 drivers/net/plip.c                         |  2 +-
 drivers/net/slip.c                         |  2 +-
 drivers/net/wan/hostess_sv11.c             |  2 +-
 drivers/net/wan/sealevel.c                 |  2 +-
 drivers/net/wan/syncppp.c                  |  2 +-
 drivers/net/wireless/airo.c                |  2 +-
 drivers/net/wireless/hostap/hostap_main.c  | 14 +++++++-------
 drivers/net/wireless/orinoco.c             |  2 +-
 drivers/net/wireless/wavelan.c             |  5 +++--
 drivers/net/wireless/wavelan_cs.c          |  4 ++--
 drivers/s390/net/claw.c                    |  2 +-
 include/linux/if_ether.h                   |  2 +-
 include/linux/if_tr.h                      |  2 +-
 include/linux/if_vlan.h                    |  2 +-
 include/linux/netfilter_bridge/ebt_802_3.h |  2 +-
 include/linux/skbuff.h                     | 10 ++++++++++
 net/802/hippi.c                            |  2 +-
 net/appletalk/ddp.c                        |  6 +++---
 net/ax25/af_ax25.c                         |  5 +++--
 net/bluetooth/bnep/core.c                  | 11 +++++++----
 net/bridge/br_netfilter.c                  |  5 +++--
 net/core/dev.c                             |  2 +-
 net/core/filter.c                          |  2 +-
 net/core/skbuff.c                          |  2 +-
 net/ipv4/netfilter/ipt_LOG.c               |  4 ++--
 net/ipv4/netfilter/ipt_ULOG.c              |  4 ++--
 net/ipv4/route.c                           |  4 ++--
 net/ipv4/tcp_input.c                       |  2 +-
 net/ipv4/xfrm4_mode_tunnel.c               |  4 ++--
 net/ipv6/ndisc.c                           |  3 ++-
 net/ipv6/netfilter/ip6t_LOG.c              |  5 +++--
 net/ipv6/netfilter/ip6t_eui64.c            |  4 ++--
 net/ipv6/xfrm6_mode_beet.c                 |  4 ++--
 net/ipv6/xfrm6_mode_tunnel.c               |  4 ++--
 net/netfilter/xt_mac.c                     |  4 ++--
 net/packet/af_packet.c                     |  8 ++++----
 net/tipc/eth_media.c                       |  4 ++--
 49 files changed, 108 insertions(+), 88 deletions(-)

diff --git a/arch/um/drivers/daemon_kern.c b/arch/um/drivers/daemon_kern.c
index 9c2e7a758f2..adeece11e59 100644
--- a/arch/um/drivers/daemon_kern.c
+++ b/arch/um/drivers/daemon_kern.c
@@ -46,7 +46,7 @@ static int daemon_read(int fd, struct sk_buff **skb,
 {
 	*skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER);
 	if(*skb == NULL) return(-ENOMEM);
-	return(net_recvfrom(fd, (*skb)->mac.raw, 
+	return(net_recvfrom(fd, skb_mac_header(*skb),
 			    (*skb)->dev->mtu + ETH_HEADER_OTHER));
 }
 
diff --git a/arch/um/drivers/mcast_kern.c b/arch/um/drivers/mcast_kern.c
index 52ccb7b53cd..e6b8e0dd72a 100644
--- a/arch/um/drivers/mcast_kern.c
+++ b/arch/um/drivers/mcast_kern.c
@@ -50,7 +50,7 @@ static int mcast_read(int fd, struct sk_buff **skb, struct uml_net_private *lp)
 {
 	*skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER);
 	if(*skb == NULL) return(-ENOMEM);
-	return(net_recvfrom(fd, (*skb)->mac.raw, 
+	return(net_recvfrom(fd, skb_mac_header(*skb),
 			    (*skb)->dev->mtu + ETH_HEADER_OTHER));
 }
 
diff --git a/arch/um/drivers/pcap_kern.c b/arch/um/drivers/pcap_kern.c
index e67362acf0e..948849343ca 100644
--- a/arch/um/drivers/pcap_kern.c
+++ b/arch/um/drivers/pcap_kern.c
@@ -36,7 +36,7 @@ static int pcap_read(int fd, struct sk_buff **skb,
 {
 	*skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER);
 	if(*skb == NULL) return(-ENOMEM);
-	return(pcap_user_read(fd, (*skb)->mac.raw, 
+	return(pcap_user_read(fd, skb_mac_header(*skb),
 			      (*skb)->dev->mtu + ETH_HEADER_OTHER,
 			      (struct pcap_data *) &lp->user));
 }
diff --git a/arch/um/drivers/slip_kern.c b/arch/um/drivers/slip_kern.c
index 25634bd1f58..125c44f7763 100644
--- a/arch/um/drivers/slip_kern.c
+++ b/arch/um/drivers/slip_kern.c
@@ -49,7 +49,7 @@ static unsigned short slip_protocol(struct sk_buff *skbuff)
 static int slip_read(int fd, struct sk_buff **skb, 
 		       struct uml_net_private *lp)
 {
-	return(slip_user_read(fd, (*skb)->mac.raw, (*skb)->dev->mtu, 
+	return(slip_user_read(fd, skb_mac_header(*skb), (*skb)->dev->mtu,
 			      (struct slip_data *) &lp->user));
 }
 
diff --git a/arch/um/drivers/slirp_kern.c b/arch/um/drivers/slirp_kern.c
index b3ed8fb874a..0a0324a6d29 100644
--- a/arch/um/drivers/slirp_kern.c
+++ b/arch/um/drivers/slirp_kern.c
@@ -53,7 +53,7 @@ static unsigned short slirp_protocol(struct sk_buff *skbuff)
 static int slirp_read(int fd, struct sk_buff **skb, 
 		       struct uml_net_private *lp)
 {
-	return(slirp_user_read(fd, (*skb)->mac.raw, (*skb)->dev->mtu, 
+	return(slirp_user_read(fd, skb_mac_header(*skb), (*skb)->dev->mtu,
 			      (struct slirp_data *) &lp->user));
 }
 
diff --git a/arch/um/os-Linux/drivers/ethertap_kern.c b/arch/um/os-Linux/drivers/ethertap_kern.c
index 70541821775..12689141414 100644
--- a/arch/um/os-Linux/drivers/ethertap_kern.c
+++ b/arch/um/os-Linux/drivers/ethertap_kern.c
@@ -43,7 +43,7 @@ static int etap_read(int fd, struct sk_buff **skb, struct uml_net_private *lp)
 
 	*skb = ether_adjust_skb(*skb, ETH_HEADER_ETHERTAP);
 	if(*skb == NULL) return(-ENOMEM);
-	len = net_recvfrom(fd, (*skb)->mac.raw, 
+	len = net_recvfrom(fd, skb_mac_header(*skb),
 			   (*skb)->dev->mtu + 2 * ETH_HEADER_ETHERTAP);
 	if(len <= 0) return(len);
 	skb_pull(*skb, 2);
diff --git a/arch/um/os-Linux/drivers/tuntap_kern.c b/arch/um/os-Linux/drivers/tuntap_kern.c
index 76570a2c25c..f1714e7fb1d 100644
--- a/arch/um/os-Linux/drivers/tuntap_kern.c
+++ b/arch/um/os-Linux/drivers/tuntap_kern.c
@@ -43,7 +43,7 @@ static int tuntap_read(int fd, struct sk_buff **skb,
 {
 	*skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER);
 	if(*skb == NULL) return(-ENOMEM);
-	return(net_read(fd, (*skb)->mac.raw, 
+	return(net_read(fd, skb_mac_header(*skb),
 			(*skb)->dev->mtu + ETH_HEADER_OTHER));
 }
 
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 4c34f8d31cc..1d846681794 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -53,7 +53,7 @@ struct aoe_hdr {
 
 static inline struct aoe_hdr *aoe_hdr(const struct sk_buff *skb)
 {
-	return (struct aoe_hdr *)skb->mac.raw;
+	return (struct aoe_hdr *)skb_mac_header(skb);
 }
 #endif
 
diff --git a/drivers/ieee1394/eth1394.h b/drivers/ieee1394/eth1394.h
index c45cbff9138..1e835653514 100644
--- a/drivers/ieee1394/eth1394.h
+++ b/drivers/ieee1394/eth1394.h
@@ -90,7 +90,7 @@ struct eth1394hdr {
 
 static inline struct eth1394hdr *eth1394_hdr(const struct sk_buff *skb)
 {
-	return (struct eth1394hdr *)skb->mac.raw;
+	return (struct eth1394hdr *)skb_mac_header(skb);
 }
 #endif
 
diff --git a/drivers/media/dvb/dvb-core/dvb_net.c b/drivers/media/dvb/dvb-core/dvb_net.c
index 76e9c36597e..c6b004182d9 100644
--- a/drivers/media/dvb/dvb-core/dvb_net.c
+++ b/drivers/media/dvb/dvb-core/dvb_net.c
@@ -174,7 +174,7 @@ static unsigned short dvb_net_eth_type_trans(struct sk_buff *skb,
 	struct ethhdr *eth;
 	unsigned char *rawp;
 
-	skb->mac.raw=skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb,dev->hard_header_len);
 	eth = eth_hdr(skb);
 
diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c
index d5b878d5628..21fe1b66808 100644
--- a/drivers/message/fusion/mptlan.c
+++ b/drivers/message/fusion/mptlan.c
@@ -714,6 +714,7 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev)
 	LANSendRequest_t *pSendReq;
 	SGETransaction32_t *pTrans;
 	SGESimple64_t *pSimple;
+	const unsigned char *mac;
 	dma_addr_t dma;
 	unsigned long flags;
 	int ctx;
@@ -784,6 +785,7 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev)
 //			IOC_AND_NETDEV_NAMES_s_s(dev),
 //			ctx, skb, skb->data));
 
+	mac = skb_mac_header(skb);
 #ifdef QLOGIC_NAA_WORKAROUND
 {
 	struct NAA_Hosed *nh;
@@ -793,12 +795,12 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev)
 	   drops. */
 	read_lock_irq(&bad_naa_lock);
 	for (nh = mpt_bad_naa; nh != NULL; nh=nh->next) {
-		if ((nh->ieee[0] == skb->mac.raw[0]) &&
-		    (nh->ieee[1] == skb->mac.raw[1]) &&
-		    (nh->ieee[2] == skb->mac.raw[2]) &&
-		    (nh->ieee[3] == skb->mac.raw[3]) &&
-		    (nh->ieee[4] == skb->mac.raw[4]) &&
-		    (nh->ieee[5] == skb->mac.raw[5])) {
+		if ((nh->ieee[0] == mac[0]) &&
+		    (nh->ieee[1] == mac[1]) &&
+		    (nh->ieee[2] == mac[2]) &&
+		    (nh->ieee[3] == mac[3]) &&
+		    (nh->ieee[4] == mac[4]) &&
+		    (nh->ieee[5] == mac[5])) {
 			cur_naa = nh->NAA;
 			dlprintk ((KERN_INFO "mptlan/sdu_send: using NAA value "
 				  "= %04x.\n", cur_naa));
@@ -810,12 +812,12 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev)
 #endif
 
 	pTrans->TransactionDetails[0] = cpu_to_le32((cur_naa         << 16) |
-						    (skb->mac.raw[0] <<  8) |
-						    (skb->mac.raw[1] <<  0));
-	pTrans->TransactionDetails[1] = cpu_to_le32((skb->mac.raw[2] << 24) |
-						    (skb->mac.raw[3] << 16) |
-						    (skb->mac.raw[4] <<  8) |
-						    (skb->mac.raw[5] <<  0));
+						    (mac[0] <<  8) |
+						    (mac[1] <<  0));
+	pTrans->TransactionDetails[1] = cpu_to_le32((mac[2] << 24) |
+						    (mac[3] << 16) |
+						    (mac[4] <<  8) |
+						    (mac[5] <<  0));
 
 	pSimple = (SGESimple64_t *) &pTrans->TransactionDetails[2];
 
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c
index 6c764b66e9c..f6a87bd20ff 100644
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -123,7 +123,7 @@ static void rx(struct net_device *dev, int bufnum,
 	skb_put(skb, length + ARC_HDR_SIZE + sizeof(int));
 	skb->dev = dev;
 	skb_reset_mac_header(skb);
-	pkt = (struct archdr *)skb->mac.raw;
+	pkt = (struct archdr *)skb_mac_header(skb);
 	skb_pull(skb, ARC_HDR_SIZE);
 
 	/* up to sizeof(pkt->soft) has already been copied from the card */
@@ -269,7 +269,7 @@ static int ack_tx(struct net_device *dev, int acked)
   ackskb->dev = dev;
 
   skb_reset_mac_header(ackskb);
-  ackpkt = (struct archdr *)ackskb->mac.raw;
+  ackpkt = (struct archdr *)skb_mac_header(ackskb);
   /* skb_pull(ackskb, ARC_HDR_SIZE); */
 
 
diff --git a/drivers/net/plip.c b/drivers/net/plip.c
index 6bb085f5443..8754cf3356b 100644
--- a/drivers/net/plip.c
+++ b/drivers/net/plip.c
@@ -546,7 +546,7 @@ static __be16 plip_type_trans(struct sk_buff *skb, struct net_device *dev)
 	struct ethhdr *eth;
 	unsigned char *rawp;
 
-	skb->mac.raw=skb->data;
+	skb_reset_mac_header(skb);
 	skb_pull(skb,dev->hard_header_len);
 	eth = eth_hdr(skb);
 
diff --git a/drivers/net/slip.c b/drivers/net/slip.c
index 2f4b1de7a2b..65bd20fac82 100644
--- a/drivers/net/slip.c
+++ b/drivers/net/slip.c
@@ -363,7 +363,7 @@ sl_bump(struct slip *sl)
 	}
 	skb->dev = sl->dev;
 	memcpy(skb_put(skb,count), sl->rbuff, count);
-	skb->mac.raw=skb->data;
+	skb_reset_mac_header(skb);
 	skb->protocol=htons(ETH_P_IP);
 	netif_rx(skb);
 	sl->dev->last_rx = jiffies;
diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c
index a02c5fb4056..9ba3e4ee6ec 100644
--- a/drivers/net/wan/hostess_sv11.c
+++ b/drivers/net/wan/hostess_sv11.c
@@ -59,7 +59,7 @@ static void hostess_input(struct z8530_channel *c, struct sk_buff *skb)
 	/* Drop the CRC - it's not a good idea to try and negotiate it ;) */
 	skb_trim(skb, skb->len-2);
 	skb->protocol=__constant_htons(ETH_P_WAN_PPP);
-	skb->mac.raw=skb->data;
+	skb_reset_mac_header(skb);
 	skb->dev=c->netdevice;
 	/*
 	 *	Send it to the PPP layer. We don't have time to process
diff --git a/drivers/net/wan/sealevel.c b/drivers/net/wan/sealevel.c
index 70fb1b98b1d..131358108c5 100644
--- a/drivers/net/wan/sealevel.c
+++ b/drivers/net/wan/sealevel.c
@@ -61,7 +61,7 @@ static void sealevel_input(struct z8530_channel *c, struct sk_buff *skb)
 	/* Drop the CRC - it's not a good idea to try and negotiate it ;) */
 	skb_trim(skb, skb->len-2);
 	skb->protocol=htons(ETH_P_WAN_PPP);
-	skb->mac.raw=skb->data;
+	skb_reset_mac_header(skb);
 	skb->dev=c->netdevice;
 	/*
 	 *	Send it to the PPP layer. We don't have time to process
diff --git a/drivers/net/wan/syncppp.c b/drivers/net/wan/syncppp.c
index 218f7b574ab..67fc67cfd45 100644
--- a/drivers/net/wan/syncppp.c
+++ b/drivers/net/wan/syncppp.c
@@ -227,7 +227,7 @@ static void sppp_input (struct net_device *dev, struct sk_buff *skb)
 	unsigned long flags;
 
 	skb->dev=dev;
-	skb->mac.raw=skb->data;
+	skb_reset_mac_header(skb);
 
 	if (dev->flags & IFF_RUNNING)
 	{
diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c
index 692a23f9834..7fe0a61091a 100644
--- a/drivers/net/wireless/airo.c
+++ b/drivers/net/wireless/airo.c
@@ -2444,7 +2444,7 @@ static int add_airo_dev( struct net_device *dev );
 
 static int wll_header_parse(struct sk_buff *skb, unsigned char *haddr)
 {
-	memcpy(haddr, skb->mac.raw + 10, ETH_ALEN);
+	memcpy(haddr, skb_mac_header(skb) + 10, ETH_ALEN);
 	return ETH_ALEN;
 }
 
diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c
index 0e29ff76287..c2616e7b005 100644
--- a/drivers/net/wireless/hostap/hostap_main.c
+++ b/drivers/net/wireless/hostap/hostap_main.c
@@ -590,20 +590,20 @@ void hostap_dump_tx_header(const char *name, const struct hfa384x_tx_frame *tx)
 
 int hostap_80211_header_parse(struct sk_buff *skb, unsigned char *haddr)
 {
-	memcpy(haddr, skb->mac.raw + 10, ETH_ALEN); /* addr2 */
+	memcpy(haddr, skb_mac_header(skb) + 10, ETH_ALEN); /* addr2 */
 	return ETH_ALEN;
 }
 
 
 int hostap_80211_prism_header_parse(struct sk_buff *skb, unsigned char *haddr)
 {
-	if (*(u32 *)skb->mac.raw == LWNG_CAP_DID_BASE) {
-		memcpy(haddr, skb->mac.raw +
-		       sizeof(struct linux_wlan_ng_prism_hdr) + 10,
+	const unsigned char *mac = skb_mac_header(skb);
+
+	if (*(u32 *)mac == LWNG_CAP_DID_BASE) {
+		memcpy(haddr, mac + sizeof(struct linux_wlan_ng_prism_hdr) + 10,
 		       ETH_ALEN); /* addr2 */
-	} else { /* (*(u32 *)skb->mac.raw == htonl(LWNG_CAPHDR_VERSION)) */
-		memcpy(haddr, skb->mac.raw +
-		       sizeof(struct linux_wlan_ng_cap_hdr) + 10,
+	} else { /* (*(u32 *)mac == htonl(LWNG_CAPHDR_VERSION)) */
+		memcpy(haddr, mac + sizeof(struct linux_wlan_ng_cap_hdr) + 10,
 		       ETH_ALEN); /* addr2 */
 	}
 	return ETH_ALEN;
diff --git a/drivers/net/wireless/orinoco.c b/drivers/net/wireless/orinoco.c
index f1415bff527..062286dc8e1 100644
--- a/drivers/net/wireless/orinoco.c
+++ b/drivers/net/wireless/orinoco.c
@@ -689,7 +689,7 @@ static void orinoco_stat_gather(struct net_device *dev,
 	/* Note : gcc will optimise the whole section away if
 	 * WIRELESS_SPY is not defined... - Jean II */
 	if (SPY_NUMBER(priv)) {
-		orinoco_spy_gather(dev, skb->mac.raw + ETH_ALEN,
+		orinoco_spy_gather(dev, skb_mac_header(skb) + ETH_ALEN,
 				   desc->signal, desc->silence);
 	}
 }
diff --git a/drivers/net/wireless/wavelan.c b/drivers/net/wireless/wavelan.c
index 69cb1471096..2bf77b1ee53 100644
--- a/drivers/net/wireless/wavelan.c
+++ b/drivers/net/wireless/wavelan.c
@@ -2517,7 +2517,8 @@ wv_packet_read(struct net_device * dev, u16 buf_off, int sksize)
 	skb->protocol = eth_type_trans(skb, dev);
 
 #ifdef DEBUG_RX_INFO
-	wv_packet_info(skb->mac.raw, sksize, dev->name, "wv_packet_read");
+	wv_packet_info(skb_mac_header(skb), sksize, dev->name,
+		       "wv_packet_read");
 #endif				/* DEBUG_RX_INFO */
 
 	/* Statistics-gathering and associated stuff.
@@ -2553,7 +2554,7 @@ wv_packet_read(struct net_device * dev, u16 buf_off, int sksize)
 
 		/* Spying stuff */
 #ifdef IW_WIRELESS_SPY
-		wl_spy_gather(dev, skb->mac.raw + WAVELAN_ADDR_SIZE,
+		wl_spy_gather(dev, skb_mac_header(skb) + WAVELAN_ADDR_SIZE,
 			      stats);
 #endif /* IW_WIRELESS_SPY */
 #ifdef HISTOGRAM
diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c
index 9351ee77331..67b867f837c 100644
--- a/drivers/net/wireless/wavelan_cs.c
+++ b/drivers/net/wireless/wavelan_cs.c
@@ -2889,7 +2889,7 @@ wv_packet_read(struct net_device *		dev,
   skb->protocol = eth_type_trans(skb, dev);
 
 #ifdef DEBUG_RX_INFO
-  wv_packet_info(skb->mac.raw, sksize, dev->name, "wv_packet_read");
+  wv_packet_info(skb_mac_header(skb), sksize, dev->name, "wv_packet_read");
 #endif	/* DEBUG_RX_INFO */
      
   /* Statistics gathering & stuff associated.
@@ -2923,7 +2923,7 @@ wv_packet_read(struct net_device *		dev,
 #endif	/* WAVELAN_ROAMING */
 	  
 #ifdef WIRELESS_SPY
-      wl_spy_gather(dev, skb->mac.raw + WAVELAN_ADDR_SIZE, stats);
+      wl_spy_gather(dev, skb_mac_header(skb) + WAVELAN_ADDR_SIZE, stats);
 #endif	/* WIRELESS_SPY */
 #ifdef HISTOGRAM
       wl_his_gather(dev, stats);
diff --git a/drivers/s390/net/claw.c b/drivers/s390/net/claw.c
index 7809a79feec..6dd64d0c8d4 100644
--- a/drivers/s390/net/claw.c
+++ b/drivers/s390/net/claw.c
@@ -3525,8 +3525,8 @@ unpack_next:
                                 memcpy(skb_put(skb,len_of_data),
 					privptr->p_mtc_envelope,
 					len_of_data);
-                                skb->mac.raw=skb->data;
                                 skb->dev=dev;
+				skb_reset_mac_header(skb);
                                 skb->protocol=htons(ETH_P_IP);
                                 skb->ip_summed=CHECKSUM_UNNECESSARY;
                                 privptr->stats.rx_packets++;
diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index ab08f35cbc3..f6863fbcf33 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -112,7 +112,7 @@ struct ethhdr {
 
 static inline struct ethhdr *eth_hdr(const struct sk_buff *skb)
 {
-	return (struct ethhdr *)skb->mac.raw;
+	return (struct ethhdr *)skb_mac_header(skb);
 }
 
 #ifdef CONFIG_SYSCTL
diff --git a/include/linux/if_tr.h b/include/linux/if_tr.h
index 2f94cf2c7ab..046e9d95ba9 100644
--- a/include/linux/if_tr.h
+++ b/include/linux/if_tr.h
@@ -47,7 +47,7 @@ struct trh_hdr {
 
 static inline struct trh_hdr *tr_hdr(const struct sk_buff *skb)
 {
-	return (struct trh_hdr *)skb->mac.raw;
+	return (struct trh_hdr *)skb_mac_header(skb);
 }
 #ifdef CONFIG_SYSCTL
 extern struct ctl_table tr_table[];
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index d103580c72d..544490d9d0b 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -51,7 +51,7 @@ struct vlan_ethhdr {
 
 static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb)
 {
-	return (struct vlan_ethhdr *)skb->mac.raw;
+	return (struct vlan_ethhdr *)skb_mac_header(skb);
 }
 
 struct vlan_hdr {
diff --git a/include/linux/netfilter_bridge/ebt_802_3.h b/include/linux/netfilter_bridge/ebt_802_3.h
index 07f044ff1a6..a11b0c2017f 100644
--- a/include/linux/netfilter_bridge/ebt_802_3.h
+++ b/include/linux/netfilter_bridge/ebt_802_3.h
@@ -54,7 +54,7 @@ struct ebt_802_3_hdr {
 
 static inline struct ebt_802_3_hdr *ebt_802_3_hdr(const struct sk_buff *skb)
 {
-	return (struct ebt_802_3_hdr *)skb->mac.raw;
+	return (struct ebt_802_3_hdr *)skb_mac_header(skb);
 }
 #endif
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 43ab6cbf844..dff81af454b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -960,6 +960,16 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
 	skb->tail += len;
 }
 
+static inline unsigned char *skb_mac_header(const struct sk_buff *skb)
+{
+	return skb->mac.raw;
+}
+
+static inline int skb_mac_header_was_set(const struct sk_buff *skb)
+{
+	return skb->mac.raw != NULL;
+}
+
 static inline void skb_reset_mac_header(struct sk_buff *skb)
 {
 	skb->mac.raw = skb->data;
diff --git a/net/802/hippi.c b/net/802/hippi.c
index d87190038ed..87ffc12b689 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -132,7 +132,7 @@ __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev)
 	 */
 	skb->dev = dev;
 	skb_reset_mac_header(skb);
-	hip = (struct hippi_hdr *)skb->mac.raw;
+	hip = (struct hippi_hdr *)skb_mac_header(skb);
 	skb_pull(skb, HIPPI_HLEN);
 
 	/*
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 32b82705b68..934f25993ce 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1484,7 +1484,7 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
 		     struct packet_type *pt, struct net_device *orig_dev)
 {
 	/* Expand any short form frames */
-	if (skb->mac.raw[2] == 1) {
+	if (skb_mac_header(skb)[2] == 1) {
 		struct ddpehdr *ddp;
 		/* Find our address */
 		struct atalk_addr *ap = atalk_find_dev_addr(dev);
@@ -1510,8 +1510,8 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
 		 * we write the network numbers !
 		 */
 
-		ddp->deh_dnode = skb->mac.raw[0];     /* From physical header */
-		ddp->deh_snode = skb->mac.raw[1];     /* From physical header */
+		ddp->deh_dnode = skb_mac_header(skb)[0];     /* From physical header */
+		ddp->deh_snode = skb_mac_header(skb)[1];     /* From physical header */
 
 		ddp->deh_dnet  = ap->s_net;	/* Network number */
 		ddp->deh_snet  = ap->s_net;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 62605dc5a2c..c89e4f6f902 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1645,9 +1645,10 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock,
 		struct sockaddr_ax25 *sax = (struct sockaddr_ax25 *)msg->msg_name;
 		ax25_digi digi;
 		ax25_address src;
+		const unsigned char *mac = skb_mac_header(skb);
 
-		ax25_addr_parse(skb->mac.raw+1, skb->data-skb->mac.raw-1, &src, NULL, &digi, NULL, NULL);
-
+		ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL,
+				&digi, NULL, NULL);
 		sax->sax25_family = AF_AX25;
 		/* We set this correctly, even though we may not let the
 		   application know the digi calls further down (because it
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index b1c2fa96c69..97156c4abc8 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -364,17 +364,20 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
 
 	case BNEP_COMPRESSED_SRC_ONLY:
 		memcpy(__skb_put(nskb, ETH_ALEN), s->eh.h_dest, ETH_ALEN);
-		memcpy(__skb_put(nskb, ETH_ALEN), skb->mac.raw, ETH_ALEN);
+		memcpy(__skb_put(nskb, ETH_ALEN), skb_mac_header(skb), ETH_ALEN);
 		put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2));
 		break;
 
 	case BNEP_COMPRESSED_DST_ONLY:
-		memcpy(__skb_put(nskb, ETH_ALEN), skb->mac.raw, ETH_ALEN);
-		memcpy(__skb_put(nskb, ETH_ALEN + 2), s->eh.h_source, ETH_ALEN + 2);
+		memcpy(__skb_put(nskb, ETH_ALEN), skb_mac_header(skb),
+		       ETH_ALEN);
+		memcpy(__skb_put(nskb, ETH_ALEN + 2), s->eh.h_source,
+		       ETH_ALEN + 2);
 		break;
 
 	case BNEP_GENERAL:
-		memcpy(__skb_put(nskb, ETH_ALEN * 2), skb->mac.raw, ETH_ALEN * 2);
+		memcpy(__skb_put(nskb, ETH_ALEN * 2), skb_mac_header(skb),
+		       ETH_ALEN * 2);
 		put_unaligned(s->eh.h_proto, (__be16 *) __skb_put(nskb, 2));
 		break;
 	}
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 5439a3c46c3..1163c4f6989 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -753,7 +753,8 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
 #ifdef CONFIG_NETFILTER_DEBUG
 	/* Be very paranoid. This probably won't happen anymore, but let's
 	 * keep the check just to be sure... */
-	if (skb->mac.raw < skb->head || skb->mac.raw + ETH_HLEN > skb->data) {
+	if (skb_mac_header(skb) < skb->head ||
+	    skb_mac_header(skb) + ETH_HLEN > skb->data) {
 		printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: "
 		       "bad mac.raw pointer.\n");
 		goto print_error;
@@ -808,7 +809,7 @@ print_error:
 		if (realoutdev)
 			printk("[%s]", realoutdev->name);
 	}
-	printk(" head:%p, raw:%p, data:%p\n", skb->head, skb->mac.raw,
+	printk(" head:%p, raw:%p, data:%p\n", skb->head, skb_mac_header(skb),
 	       skb->data);
 	dump_stack();
 	return NF_ACCEPT;
diff --git a/net/core/dev.c b/net/core/dev.c
index 2fcaf5bc4a9..560560fe306 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1232,7 +1232,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 	}
 	rcu_read_unlock();
 
-	__skb_push(skb, skb->data - skb->mac.raw);
+	__skb_push(skb, skb->data - skb_mac_header(skb));
 
 	return segs;
 }
diff --git a/net/core/filter.c b/net/core/filter.c
index 8d185a089c5..1cc128d0542 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -44,7 +44,7 @@ static void *__load_pointer(struct sk_buff *skb, int k)
 	if (k >= SKF_NET_OFF)
 		ptr = skb->nh.raw + k - SKF_NET_OFF;
 	else if (k >= SKF_LL_OFF)
-		ptr = skb->mac.raw + k - SKF_LL_OFF;
+		ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
 
 	if (ptr >= skb->head && ptr < skb->tail)
 		return ptr;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8f6ebd0d369..1493c95f633 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1878,7 +1878,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 	struct sk_buff *segs = NULL;
 	struct sk_buff *tail = NULL;
 	unsigned int mss = skb_shinfo(skb)->gso_size;
-	unsigned int doffset = skb->data - skb->mac.raw;
+	unsigned int doffset = skb->data - skb_mac_header(skb);
 	unsigned int offset = doffset;
 	unsigned int headroom;
 	unsigned int len;
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index d9c37fd9422..c697971fe31 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -399,9 +399,9 @@ ipt_log_packet(unsigned int pf,
 		/* MAC logging for input chain only. */
 		printk("MAC=");
 		if (skb->dev && skb->dev->hard_header_len
-		    && skb->mac.raw != (void*)skb->nh.iph) {
+		    && skb->mac.raw != skb->nh.raw) {
 			int i;
-			unsigned char *p = skb->mac.raw;
+			const unsigned char *p = skb_mac_header(skb);
 			for (i = 0; i < skb->dev->hard_header_len; i++,p++)
 				printk("%02x%c", *p,
 				       i==skb->dev->hard_header_len - 1
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 9718b666a38..fae2a34d23d 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -251,9 +251,9 @@ static void ipt_ulog_packet(unsigned int hooknum,
 		*(pm->prefix) = '\0';
 
 	if (in && in->hard_header_len > 0
-	    && skb->mac.raw != (void *) skb->nh.iph
+	    && skb->mac.raw != skb->nh.raw
 	    && in->hard_header_len <= ULOG_MAC_LEN) {
-		memcpy(pm->mac, skb->mac.raw, in->hard_header_len);
+		memcpy(pm->mac, skb_mac_header(skb), in->hard_header_len);
 		pm->mac_len = in->hard_header_len;
 	} else
 		pm->mac_len = 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 29ee7be45aa..486ab93127c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1698,9 +1698,9 @@ static void ip_handle_martian_source(struct net_device *dev,
 		printk(KERN_WARNING "martian source %u.%u.%u.%u from "
 			"%u.%u.%u.%u, on dev %s\n",
 			NIPQUAD(daddr), NIPQUAD(saddr), dev->name);
-		if (dev->hard_header_len && skb->mac.raw) {
+		if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
 			int i;
-			unsigned char *p = skb->mac.raw;
+			const unsigned char *p = skb_mac_header(skb);
 			printk(KERN_WARNING "ll header: ");
 			for (i = 0; i < dev->hard_header_len; i++, p++) {
 				printk("%02x", *p);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1ec05bd673a..f5e019cefc1 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3633,7 +3633,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 		if (!nskb)
 			return;
 
-		skb_set_mac_header(nskb, skb->mac.raw - skb->head);
+		skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
 		nskb->nh.raw = nskb->data + (skb->nh.raw - skb->head);
 		nskb->h.raw = nskb->data + (skb->h.raw - skb->head);
 
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index f09055d3a76..8e123e30cf6 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -126,9 +126,9 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 		skb->protocol = htons(ETH_P_IPV6);
 	}
 #endif
-	old_mac = skb->mac.raw;
+	old_mac = skb_mac_header(skb);
 	skb_set_mac_header(skb, -skb->mac_len);
-	memmove(skb->mac.raw, old_mac, skb->mac_len);
+	memmove(skb_mac_header(skb), old_mac, skb->mac_len);
 	skb->nh.raw = skb->data;
 	err = 0;
 
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 053147a0027..a3e3d9e2f44 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -828,7 +828,8 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 		if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
 			if (dad) {
 				if (dev->type == ARPHRD_IEEE802_TR) {
-					unsigned char *sadr = skb->mac.raw;
+					const unsigned char *sadr;
+					sadr = skb_mac_header(skb);
 					if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 &&
 					    sadr[9] == dev->dev_addr[1] &&
 					    sadr[10] == dev->dev_addr[2] &&
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index afaa039d0b7..fc9e51a7778 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -397,7 +397,7 @@ ip6t_log_packet(unsigned int pf,
 		printk("MAC=");
 		if (skb->dev && (len = skb->dev->hard_header_len) &&
 		    skb->mac.raw != skb->nh.raw) {
-			unsigned char *p = skb->mac.raw;
+			const unsigned char *p = skb_mac_header(skb);
 			int i;
 
 			if (skb->dev->type == ARPHRD_SIT &&
@@ -412,7 +412,8 @@ ip6t_log_packet(unsigned int pf,
 			printk(" ");
 
 			if (skb->dev->type == ARPHRD_SIT) {
-				struct iphdr *iph = (struct iphdr *)skb->mac.raw;
+				const struct iphdr *iph =
+					(struct iphdr *)skb_mac_header(skb);
 				printk("TUNNEL=%u.%u.%u.%u->%u.%u.%u.%u ",
 				       NIPQUAD(iph->saddr),
 				       NIPQUAD(iph->daddr));
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index 967bed71d4a..c2676066a80 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -32,8 +32,8 @@ match(const struct sk_buff *skb,
 	unsigned char eui64[8];
 	int i = 0;
 
-	if (!(skb->mac.raw >= skb->head &&
-	      (skb->mac.raw + ETH_HLEN) <= skb->data) &&
+	if (!(skb_mac_header(skb) >= skb->head &&
+	      (skb_mac_header(skb) + ETH_HLEN) <= skb->data) &&
 	    offset != 0) {
 		*hotdrop = 1;
 		return 0;
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index 53cfe1a10cc..79364b1e965 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -70,9 +70,9 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 	memmove(skb->data, skb->nh.raw, size);
 	skb->nh.raw = skb->data;
 
-	old_mac = skb->mac.raw;
+	old_mac = skb_mac_header(skb);
 	skb_set_mac_header(skb, -skb->mac_len);
-	memmove(skb->mac.raw, old_mac, skb->mac_len);
+	memmove(skb_mac_header(skb), old_mac, skb->mac_len);
 
 	ip6h = skb->nh.ipv6h;
 	ip6h->payload_len = htons(skb->len - size);
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index d2c560c181a..5bb0677d373 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -108,9 +108,9 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 			ip6ip_ecn_decapsulate(skb);
 		skb->protocol = htons(ETH_P_IP);
 	}
-	old_mac = skb->mac.raw;
+	old_mac = skb_mac_header(skb);
 	skb_set_mac_header(skb, -skb->mac_len);
-	memmove(skb->mac.raw, old_mac, skb->mac_len);
+	memmove(skb_mac_header(skb), old_mac, skb->mac_len);
 	skb->nh.raw = skb->data;
 	err = 0;
 
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index d430d90d7b2..1d3a1d98b88 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -37,8 +37,8 @@ match(const struct sk_buff *skb,
     const struct xt_mac_info *info = matchinfo;
 
     /* Is mac pointer valid? */
-    return (skb->mac.raw >= skb->head
-	    && (skb->mac.raw + ETH_HLEN) <= skb->data
+    return (skb_mac_header(skb) >= skb->head &&
+	    (skb_mac_header(skb) + ETH_HLEN) <= skb->data
 	    /* If so, compare... */
 	    && ((!compare_ether_addr(eth_hdr(skb)->h_source, info->srcaddr))
 		^ info->invert));
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 6f8c72d2413..73cb2d3e27d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -284,7 +284,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct
 	 *	Incoming packets have ll header pulled,
 	 *	push it back.
 	 *
-	 *	For outgoing ones skb->data == skb->mac.raw
+	 *	For outgoing ones skb->data == skb_mac_header(skb)
 	 *	so that this procedure is noop.
 	 */
 
@@ -303,7 +303,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct
 
 	spkt = &PACKET_SKB_CB(skb)->sa.pkt;
 
-	skb_push(skb, skb->data-skb->mac.raw);
+	skb_push(skb, skb->data - skb_mac_header(skb));
 
 	/*
 	 *	The SOCK_PACKET socket receives _all_ frames.
@@ -488,7 +488,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
 		   never delivered to user.
 		 */
 		if (sk->sk_type != SOCK_DGRAM)
-			skb_push(skb, skb->data - skb->mac.raw);
+			skb_push(skb, skb->data - skb_mac_header(skb));
 		else if (skb->pkt_type == PACKET_OUTGOING) {
 			/* Special case: outgoing packets have ll header at head */
 			skb_pull(skb, skb->nh.raw - skb->data);
@@ -592,7 +592,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 
 	if (dev->hard_header) {
 		if (sk->sk_type != SOCK_DGRAM)
-			skb_push(skb, skb->data - skb->mac.raw);
+			skb_push(skb, skb->data - skb_mac_header(skb));
 		else if (skb->pkt_type == PACKET_OUTGOING) {
 			/* Special case: outgoing packets have ll header at head */
 			skb_pull(skb, skb->nh.raw - skb->data);
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index f71ba9db611..03a9db36453 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -99,8 +99,8 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev,
 
 	if (likely(eb_ptr->bearer)) {
 	       if (likely(!dev->promiscuity) ||
-		   !memcmp(buf->mac.raw,dev->dev_addr,ETH_ALEN) ||
-		   !memcmp(buf->mac.raw,dev->broadcast,ETH_ALEN)) {
+		   !memcmp(skb_mac_header(buf), dev->dev_addr, ETH_ALEN) ||
+		   !memcmp(skb_mac_header(buf), dev->broadcast, ETH_ALEN)) {
 			size = msg_size((struct tipc_msg *)buf->data);
 			skb_trim(buf, size);
 			if (likely(buf->len == size)) {
-- 
cgit v1.2.3


From 37e6636669b0b996681586facee8034f7f674f6a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 15:34:36 -0300
Subject: [LLC]: Kill llc_set_pdu_hdr

We'll have skb_reset_network_header soon.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/llc_pdu.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h
index aa33a477c3f..4e620992c80 100644
--- a/include/net/llc_pdu.h
+++ b/include/net/llc_pdu.h
@@ -218,11 +218,6 @@ static inline struct llc_pdu_un *llc_pdu_un_hdr(struct sk_buff *skb)
 	return (struct llc_pdu_un *)skb->nh.raw;
 }
 
-static inline void *llc_set_pdu_hdr(struct sk_buff *skb, void *ptr)
-{
-	return skb->nh.raw = ptr;
-}
-
 /**
  *	llc_pdu_header_init - initializes pdu header
  *	@skb: input skb that header must be set into it.
@@ -237,7 +232,11 @@ static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type,
 				       u8 ssap, u8 dsap, u8 cr)
 {
 	const int hlen = type == LLC_PDU_TYPE_U ? 3 : 4;
-	struct llc_pdu_un *pdu = llc_set_pdu_hdr(skb, skb_push(skb, hlen));
+	struct llc_pdu_un *pdu;
+
+	skb_push(skb, hlen);
+	skb->nh.raw = skb->data;
+	pdu = llc_pdu_un_hdr(skb);
 	pdu->dsap = dsap;
 	pdu->ssap = ssap;
 	pdu->ssap |= cr;
-- 
cgit v1.2.3


From 797659fb4a4a511649cd71028141c32ad1698a12 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 15:56:08 -0300
Subject: [PPPOE]: Introduce pppoe_hdr()

For consistency with all the other skb->nh.raw accessors.

Also do some really obvious simplifications in pppoe_recvmsg, well the
kfree_skb one is not so obvious, but free() and kfree() have the same behaviour
(hint :-) ).

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pppoe.c      | 22 ++++++++--------------
 include/linux/if_pppox.h |  7 +++++++
 2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c
index ebfa2967cd6..3080a44b23a 100644
--- a/drivers/net/pppoe.c
+++ b/drivers/net/pppoe.c
@@ -347,7 +347,7 @@ static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb)
 	struct pppox_sock *relay_po = NULL;
 
 	if (sk->sk_state & PPPOX_BOUND) {
-		struct pppoe_hdr *ph = (struct pppoe_hdr *) skb->nh.raw;
+		struct pppoe_hdr *ph = pppoe_hdr(skb);
 		int len = ntohs(ph->length);
 		skb_pull_rcsum(skb, sizeof(struct pppoe_hdr));
 		if (pskb_trim_rcsum(skb, len))
@@ -401,7 +401,7 @@ static int pppoe_rcv(struct sk_buff *skb,
 	if (!(skb = skb_share_check(skb, GFP_ATOMIC)))
 		goto out;
 
-	ph = (struct pppoe_hdr *) skb->nh.raw;
+	ph = pppoe_hdr(skb);
 
 	po = get_item((unsigned long) ph->sid, eth_hdr(skb)->h_source, dev->ifindex);
 	if (po != NULL)
@@ -433,7 +433,7 @@ static int pppoe_disc_rcv(struct sk_buff *skb,
 	if (!(skb = skb_share_check(skb, GFP_ATOMIC)))
 		goto out;
 
-	ph = (struct pppoe_hdr *) skb->nh.raw;
+	ph = pppoe_hdr(skb);
 	if (ph->code != PADT_CODE)
 		goto abort;
 
@@ -931,8 +931,6 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock,
 	struct sock *sk = sock->sk;
 	struct sk_buff *skb = NULL;
 	int error = 0;
-	int len;
-	struct pppoe_hdr *ph = NULL;
 
 	if (sk->sk_state & PPPOX_BOUND) {
 		error = -EIO;
@@ -949,19 +947,15 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock,
 	m->msg_namelen = 0;
 
 	if (skb) {
-		error = 0;
-		ph = (struct pppoe_hdr *) skb->nh.raw;
-		len = ntohs(ph->length);
+		struct pppoe_hdr *ph = pppoe_hdr(skb);
+		const int len = ntohs(ph->length);
 
 		error = memcpy_toiovec(m->msg_iov, (unsigned char *) &ph->tag[0], len);
-		if (error < 0)
-			goto do_skb_free;
-		error = len;
+		if (error == 0)
+			error = len;
 	}
 
-do_skb_free:
-	if (skb)
-		kfree_skb(skb);
+	kfree_skb(skb);
 end:
 	return error;
 }
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index e33ee763c05..7044f8ab30a 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -112,6 +112,13 @@ struct pppoe_hdr {
 } __attribute__ ((packed));
 
 #ifdef __KERNEL__
+#include <linux/skbuff.h>
+
+static inline struct pppoe_hdr *pppoe_hdr(const struct sk_buff *skb)
+{
+	return (struct pppoe_hdr *)skb->nh.raw;
+}
+
 struct pppoe_opt {
 	struct net_device      *dev;	  /* device associated with socket*/
 	int			ifindex;  /* ifindex of device associated with socket */
-- 
cgit v1.2.3


From a16aeb36239ce612699ed64a75a03c88cbc657e8 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 16:07:19 -0300
Subject: [BONDING]: Introduce arp_pkt()

For consistency with all the other skb->nh.raw accessors.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_alb.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 916162ca0c9..36b8e860107 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -104,6 +104,11 @@ struct arp_pkt {
 };
 #pragma pack()
 
+static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
+{
+	return (struct arp_pkt *)skb->nh.raw;
+}
+
 /* Forward declaration */
 static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);
 
@@ -613,7 +618,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, u32 src_ip)
 static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond)
 {
 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
-	struct arp_pkt *arp = (struct arp_pkt *)skb->nh.raw;
+	struct arp_pkt *arp = arp_pkt(skb);
 	struct slave *assigned_slave;
 	struct rlb_client_info *client_info;
 	u32 hash_index = 0;
@@ -701,7 +706,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
  */
 static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
 {
-	struct arp_pkt *arp = (struct arp_pkt *)skb->nh.raw;
+	struct arp_pkt *arp = arp_pkt(skb);
 	struct slave *tx_slave = NULL;
 
 	if (arp->op_code == __constant_htons(ARPOP_REPLY)) {
-- 
cgit v1.2.3


From 57effc70a5be9f7804e9a99964eb7265367effca Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 16:21:45 -0300
Subject: [IPV6]: Use skb->nh.ipv6h instead of casting skb->nh.raw

nh.ipv6h is there exactly for this reason! Use it while it exists ;-)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/exthdrs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index bcc4820baa7..fce5abde554 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -200,7 +200,7 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
 	struct sk_buff *skb = *skbp;
 	struct ipv6_destopt_hao *hao;
 	struct inet6_skb_parm *opt = IP6CB(skb);
-	struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->nh.raw;
+	struct ipv6hdr *ipv6h = skb->nh.ipv6h;
 	struct in6_addr tmp_addr;
 	int ret;
 
-- 
cgit v1.2.3


From c1d2bbe1cd6c7bbdc6d532cefebb66c7efb789ce Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 10 Apr 2007 20:45:18 -0700
Subject: [SK_BUFF]: Introduce skb_reset_network_header(skb)

For the common, open coded 'skb->nh.raw = skb->data' operation, so that we can
later turn skb->nh.raw into a offset, reducing the size of struct sk_buff in
64bit land while possibly keeping it as a pointer on 32bit.

This one touches just the most simple case, next will handle the slightly more
"complex" cases.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/block/aoe/aoecmd.c                    | 2 +-
 drivers/net/cxgb3/sge.c                       | 3 ++-
 drivers/net/hamradio/bpqether.c               | 2 +-
 drivers/net/loopback.c                        | 2 +-
 drivers/net/pppoe.c                           | 4 ++--
 drivers/net/wan/hdlc_cisco.c                  | 2 +-
 drivers/net/wan/hdlc_fr.c                     | 2 +-
 drivers/net/wan/lmc/lmc_main.c                | 4 ++--
 drivers/net/wireless/hostap/hostap_80211_rx.c | 4 ++--
 drivers/net/wireless/hostap/hostap_ap.c       | 2 +-
 drivers/net/wireless/hostap/hostap_main.c     | 2 +-
 include/linux/skbuff.h                        | 5 +++++
 include/net/llc_pdu.h                         | 2 +-
 net/appletalk/aarp.c                          | 2 +-
 net/atm/mpc.c                                 | 2 +-
 net/ax25/af_ax25.c                            | 2 +-
 net/ax25/ax25_ds_subr.c                       | 2 +-
 net/ax25/ax25_in.c                            | 8 ++++----
 net/ax25/ax25_ip.c                            | 2 +-
 net/ax25/ax25_out.c                           | 2 +-
 net/ax25/ax25_subr.c                          | 4 ++--
 net/core/dev.c                                | 5 +++--
 net/core/netpoll.c                            | 5 +++--
 net/decnet/dn_dev.c                           | 4 ++--
 net/decnet/dn_neigh.c                         | 6 +++---
 net/decnet/dn_route.c                         | 2 +-
 net/econet/af_econet.c                        | 2 +-
 net/ieee80211/ieee80211_rx.c                  | 2 +-
 net/ipv4/arp.c                                | 2 +-
 net/ipv4/ip_gre.c                             | 2 +-
 net/ipv4/ip_output.c                          | 4 ++--
 net/ipv4/ipip.c                               | 4 ++--
 net/ipv4/route.c                              | 2 +-
 net/ipv4/xfrm4_mode_tunnel.c                  | 2 +-
 net/ipv6/ip6_output.c                         | 4 ++--
 net/ipv6/ip6_tunnel.c                         | 6 +++---
 net/ipv6/sit.c                                | 4 ++--
 net/ipv6/xfrm6_mode_beet.c                    | 4 ++--
 net/ipv6/xfrm6_mode_tunnel.c                  | 4 ++--
 net/ipv6/xfrm6_output.c                       | 2 +-
 net/irda/irlap_frame.c                        | 3 ++-
 net/iucv/af_iucv.c                            | 2 +-
 net/llc/llc_sap.c                             | 3 ++-
 net/netrom/nr_dev.c                           | 2 +-
 net/packet/af_packet.c                        | 6 +++---
 net/tipc/eth_media.c                          | 2 +-
 net/x25/x25_dev.c                             | 2 +-
 47 files changed, 78 insertions(+), 68 deletions(-)

diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 74062dc4e90..1a6aeac5a1c 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -28,7 +28,7 @@ new_skb(ulong len)
 	skb = alloc_skb(len, GFP_ATOMIC);
 	if (skb) {
 		skb_reset_mac_header(skb);
-		skb->nh.raw = skb->data;
+		skb_reset_network_header(skb);
 		skb->protocol = __constant_htons(ETH_P_AOE);
 		skb->priority = 0;
 		skb->next = skb->prev = NULL;
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index b5cf2a60834..4dd712088bc 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -1621,7 +1621,8 @@ static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
 {
 	rq->offload_pkts++;
 	skb_reset_mac_header(skb);
-	skb->nh.raw = skb->h.raw = skb->data;
+	skb_reset_network_header(skb);
+	skb->h.raw = skb->data;
 
 	if (rq->polling) {
 		rx_gather[gather_idx++] = skb;
diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c
index d2542697e29..656f2789c9b 100644
--- a/drivers/net/hamradio/bpqether.c
+++ b/drivers/net/hamradio/bpqether.c
@@ -282,7 +282,7 @@ static int bpq_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	skb->protocol = ax25_type_trans(skb, dev);
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 	dev->hard_header(skb, dev, ETH_P_BPQ, bpq->dest_addr, NULL, 0);
 	bpq->stats.tx_packets++;
 	bpq->stats.tx_bytes+=skb->len;
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index a71d8e0a9b5..af476d2a513 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -91,7 +91,7 @@ static void emulate_large_send_offload(struct sk_buff *skb)
 			break;
 		skb_reserve(nskb, 32);
 		skb_set_mac_header(nskb, -ETH_HLEN);
-		nskb->nh.raw = nskb->data;
+		skb_reset_network_header(nskb);
 		iph = nskb->nh.iph;
 		memcpy(nskb->data, skb->nh.raw, doffset);
 		if (skb_copy_bits(skb,
diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c
index 3080a44b23a..e94790632d5 100644
--- a/drivers/net/pppoe.c
+++ b/drivers/net/pppoe.c
@@ -799,7 +799,7 @@ static int pppoe_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 	/* Reserve space for headers. */
 	skb_reserve(skb, dev->hard_header_len);
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	skb->dev = dev;
 
@@ -884,7 +884,7 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb)
 	memcpy(ph, &hdr, sizeof(struct pppoe_hdr));
 	skb2->protocol = __constant_htons(ETH_P_PPP_SES);
 
-	skb2->nh.raw = skb2->data;
+	skb_reset_network_header(skb2);
 
 	skb2->dev = dev;
 
diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c
index c9664fd8a91..00e0aaadabc 100644
--- a/drivers/net/wan/hdlc_cisco.c
+++ b/drivers/net/wan/hdlc_cisco.c
@@ -124,7 +124,7 @@ static void cisco_keepalive_send(struct net_device *dev, u32 type,
 	skb_put(skb, sizeof(struct cisco_packet));
 	skb->priority = TC_PRIO_CONTROL;
 	skb->dev = dev;
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	dev_queue_xmit(skb);
 }
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index 3240d10fc86..b747228c719 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -590,7 +590,7 @@ static void fr_lmi_send(struct net_device *dev, int fullrep)
 	skb_put(skb, i);
 	skb->priority = TC_PRIO_CONTROL;
 	skb->dev = dev;
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	dev_queue_xmit(skb);
 }
diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index 6d288839dda..d4851465c83 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c
@@ -1668,7 +1668,7 @@ static int lmc_rx (struct net_device *dev) /*fold00*/
             skb->protocol = lmc_proto_type(sc, skb);
             skb->protocol = htons(ETH_P_WAN_PPP);
             skb_reset_mac_header(skb);
-//            skb->nh.raw = skb->data;
+            /* skb_reset_network_header(skb); */
             skb->dev = dev;
             lmc_proto_netif(sc, skb);
 
@@ -1706,7 +1706,7 @@ static int lmc_rx (struct net_device *dev) /*fold00*/
             
             nsb->protocol = lmc_proto_type(sc, skb);
             skb_reset_mac_header(nsb);
-//            nsb->nh.raw = nsb->data;
+            /* skb_reset_network_header(nsb); */
             nsb->dev = dev;
             lmc_proto_netif(sc, nsb);
         }
diff --git a/drivers/net/wireless/hostap/hostap_80211_rx.c b/drivers/net/wireless/hostap/hostap_80211_rx.c
index e4082f9d766..7b7c1ca8f1f 100644
--- a/drivers/net/wireless/hostap/hostap_80211_rx.c
+++ b/drivers/net/wireless/hostap/hostap_80211_rx.c
@@ -1076,8 +1076,8 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb,
 		skb2->dev = dev;
 		skb2->protocol = __constant_htons(ETH_P_802_3);
 		skb_reset_mac_header(skb2);
-		skb2->nh.raw = skb2->data;
-		/* skb2->nh.raw = skb2->data + ETH_HLEN; */
+		skb_reset_network_header(skb2);
+		/* skb2->nh.raw += ETH_HLEN; */
 		dev_queue_xmit(skb2);
 	}
 
diff --git a/drivers/net/wireless/hostap/hostap_ap.c b/drivers/net/wireless/hostap/hostap_ap.c
index cc18f9686d2..797d950d5d6 100644
--- a/drivers/net/wireless/hostap/hostap_ap.c
+++ b/drivers/net/wireless/hostap/hostap_ap.c
@@ -983,7 +983,7 @@ static void prism2_send_mgmt(struct net_device *dev,
 
 	skb->dev = dev;
 	skb_reset_mac_header(skb);
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 	dev_queue_xmit(skb);
 }
 #endif /* PRISM2_NO_KERNEL_IEEE80211_MGMT */
diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c
index c2616e7b005..1f9edd91565 100644
--- a/drivers/net/wireless/hostap/hostap_main.c
+++ b/drivers/net/wireless/hostap/hostap_main.c
@@ -1064,7 +1064,7 @@ int prism2_sta_send_mgmt(local_info_t *local, u8 *dst, u16 stype,
 
 	skb->dev = dev;
 	skb_reset_mac_header(skb);
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 	dev_queue_xmit(skb);
 
 	return 0;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index dff81af454b..6440c78fe62 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -960,6 +960,11 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
 	skb->tail += len;
 }
 
+static inline void skb_reset_network_header(struct sk_buff *skb)
+{
+	skb->nh.raw = skb->data;
+}
+
 static inline unsigned char *skb_mac_header(const struct sk_buff *skb)
 {
 	return skb->mac.raw;
diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h
index 4e620992c80..778f75a40b4 100644
--- a/include/net/llc_pdu.h
+++ b/include/net/llc_pdu.h
@@ -235,7 +235,7 @@ static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type,
 	struct llc_pdu_un *pdu;
 
 	skb_push(skb, hlen);
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 	pdu = llc_pdu_un_hdr(skb);
 	pdu->dsap = dsap;
 	pdu->ssap = ssap;
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index d89d62f3702..d4e5ba22e24 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -539,7 +539,7 @@ int aarp_send_ddp(struct net_device *dev, struct sk_buff *skb,
 	int hash;
 	struct aarp_entry *a;
 
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	/* Check for LocalTalk first */
 	if (dev->type == ARPHRD_LOCALTLK) {
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index cb3c004ff02..bc15728fd84 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -713,7 +713,7 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
 	skb_push(new_skb, eg->ctrl_info.DH_length);     /* add MAC header */
 	memcpy(new_skb->data, eg->ctrl_info.DLL_header, eg->ctrl_info.DH_length);
 	new_skb->protocol = eth_type_trans(new_skb, dev);
-	new_skb->nh.raw = new_skb->data;
+	skb_reset_network_header(new_skb);
 
 	eg->latest_ip_addr = new_skb->nh.iph->saddr;
 	eg->packets_rcvd++;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index c89e4f6f902..b1a4d60ce9a 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1548,7 +1548,7 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
 		goto out;
 	}
 
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	/* Add the PID if one is not supplied by the user in the skb */
 	if (!ax25->pidincl) {
diff --git a/net/ax25/ax25_ds_subr.c b/net/ax25/ax25_ds_subr.c
index 9569dd3fa46..a49773ff2b9 100644
--- a/net/ax25/ax25_ds_subr.c
+++ b/net/ax25/ax25_ds_subr.c
@@ -136,7 +136,7 @@ static void ax25_kiss_cmd(ax25_dev *ax25_dev, unsigned char cmd, unsigned char p
 	if ((skb = alloc_skb(2, GFP_ATOMIC)) == NULL)
 		return;
 
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 	p = skb_put(skb, 2);
 
 	*p++ = cmd;
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index 6d11b0633d5..3b2aac67026 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -61,8 +61,8 @@ static int ax25_rx_fragment(ax25_cb *ax25, struct sk_buff *skb)
 					skb_reserve(skbn, AX25_MAX_HEADER_LEN);
 
 					skbn->dev   = ax25->ax25_dev->dev;
+					skb_reset_network_header(skbn);
 					skbn->h.raw = skbn->data;
-					skbn->nh.raw = skbn->data;
 
 					/* Copy data from the fragments */
 					while ((skbo = skb_dequeue(&ax25->frag_queue)) != NULL) {
@@ -123,7 +123,7 @@ int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb)
 
 		skb_pull(skb, 1);	/* Remove PID */
 		skb_reset_mac_header(skb);
-		skb->nh.raw   = skb->data;
+		skb_reset_network_header(skb);
 		skb->dev      = ax25->ax25_dev->dev;
 		skb->pkt_type = PACKET_HOST;
 		skb->protocol = htons(ETH_P_IP);
@@ -247,7 +247,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
 		case AX25_P_IP:
 			skb_pull(skb,2);		/* drop PID/CTRL */
 			skb->h.raw    = skb->data;
-			skb->nh.raw   = skb->data;
+			skb_reset_network_header(skb);
 			skb->dev      = dev;
 			skb->pkt_type = PACKET_HOST;
 			skb->protocol = htons(ETH_P_IP);
@@ -257,7 +257,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
 		case AX25_P_ARP:
 			skb_pull(skb,2);
 			skb->h.raw    = skb->data;
-			skb->nh.raw   = skb->data;
+			skb_reset_network_header(skb);
 			skb->dev      = dev;
 			skb->pkt_type = PACKET_HOST;
 			skb->protocol = htons(ETH_P_ARP);
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index 7f818bbcd1c..4d4ef35e178 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -171,7 +171,7 @@ int ax25_rebuild_header(struct sk_buff *skb)
 			src_c = *(ax25_address *)(bp + 8);
 
 			skb_pull(ourskb, AX25_HEADER_LEN - 1);	/* Keep PID */
-			ourskb->nh.raw = ourskb->data;
+			skb_reset_network_header(ourskb);
 
 			ax25=ax25_send_frame(
 			    ourskb,
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index 223835092b7..6e08dc8dee4 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -205,7 +205,7 @@ static void ax25_send_iframe(ax25_cb *ax25, struct sk_buff *skb, int poll_bit)
 	if (skb == NULL)
 		return;
 
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	if (ax25->modulus == AX25_MODULUS) {
 		frame = skb_push(skb, 1);
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index b6c577e3c91..5fe9b2a6697 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -162,7 +162,7 @@ void ax25_send_control(ax25_cb *ax25, int frametype, int poll_bit, int type)
 
 	skb_reserve(skb, ax25->ax25_dev->dev->hard_header_len);
 
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	/* Assume a response - address structure for DTE */
 	if (ax25->modulus == AX25_MODULUS) {
@@ -205,7 +205,7 @@ void ax25_return_dm(struct net_device *dev, ax25_address *src, ax25_address *des
 		return;	/* Next SABM will get DM'd */
 
 	skb_reserve(skb, dev->hard_header_len);
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	ax25_digi_invert(digi, &retdigi);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 560560fe306..1b0758254ba 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1074,7 +1074,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 					printk(KERN_CRIT "protocol %04x is "
 					       "buggy, dev %s\n",
 					       skb2->protocol, dev->name);
-				skb2->nh.raw = skb2->data;
+				skb_reset_network_header(skb2);
 			}
 
 			skb2->h.raw = skb2->nh.raw;
@@ -1771,7 +1771,8 @@ int netif_receive_skb(struct sk_buff *skb)
 
 	__get_cpu_var(netdev_rx_stat).total++;
 
-	skb->h.raw = skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
+	skb->h.raw = skb->data;
 	skb->mac_len = skb->nh.raw - skb->mac.raw;
 
 	pt_prev = NULL;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 0ad3896bbf6..b4d1cdd58f1 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -359,7 +359,8 @@ static void arp_reply(struct sk_buff *skb)
 				 (2 * sizeof(u32)))))
 		return;
 
-	skb->h.raw = skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
+	skb->h.raw = skb->data;
 	arp = skb->nh.arph;
 
 	if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
@@ -389,7 +390,7 @@ static void arp_reply(struct sk_buff *skb)
 	if (!send_skb)
 		return;
 
-	send_skb->nh.raw = send_skb->data;
+	skb_reset_network_header(send_skb);
 	arp = (struct arphdr *) skb_put(send_skb, size);
 	send_skb->dev = skb->dev;
 	send_skb->protocol = htons(ETH_P_ARP);
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 060d725e294..95871a669dc 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -913,7 +913,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa)
 	pktlen = (__le16 *)skb_push(skb,2);
 	*pktlen = dn_htons(skb->len - 2);
 
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	dn_rt_finish_output(skb, dn_rt_all_rt_mcast, msg->id);
 }
@@ -1005,7 +1005,7 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa)
 	pktlen = (__le16 *)skb_push(skb, 2);
 	*pktlen = dn_htons(skb->len - 2);
 
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	if (dn_am_i_a_router(dn, dn_db, ifa)) {
 		struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index bf701cf5a38..4bf066c416e 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -261,7 +261,7 @@ static int dn_long_output(struct sk_buff *skb)
 	lp->s_class  = 0;
 	lp->pt       = 0;
 
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet);
 }
@@ -300,7 +300,7 @@ static int dn_short_output(struct sk_buff *skb)
 	sp->srcnode    = cb->src;
 	sp->forward    = cb->hops & 0x3f;
 
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet);
 }
@@ -342,7 +342,7 @@ static int dn_phase3_output(struct sk_buff *skb)
 	sp->srcnode  = cb->src & dn_htons(0x03ff);
 	sp->forward  = cb->hops & 0x3f;
 
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet);
 }
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index ef94ca56d7b..34079b7ba1d 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -615,7 +615,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type
 		flags = *skb->data;
 	}
 
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	/*
 	 * Weed out future version DECnet
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 487f879f5a1..099543f5401 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -345,7 +345,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 			goto out_unlock;
 
 		skb_reserve(skb, LL_RESERVED_SPACE(dev));
-		skb->nh.raw = skb->data;
+		skb_reset_network_header(skb);
 
 		eb = (struct ec_cb *)&skb->cb;
 
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index f39bf7c4101..be5ffaf6e8a 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -792,7 +792,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 		skb2->dev = dev;
 		skb2->protocol = __constant_htons(ETH_P_802_3);
 		skb_reset_mac_header(skb2);
-		skb2->nh.raw = skb2->data;
+		skb_reset_network_header(skb2);
 		/* skb2->nh.raw = skb2->data + ETH_HLEN; */
 		dev_queue_xmit(skb2);
 	}
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index e6e196cd3b8..8c533ceb970 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -578,7 +578,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
 		return NULL;
 
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 	arp = (struct arphdr *) skb_put(skb,sizeof(struct arphdr) + 2*(dev->addr_len+4));
 	skb->dev = dev;
 	skb->protocol = htons(ETH_P_ARP);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 88f8aae873f..ced2c4baf17 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -474,7 +474,7 @@ out:
 	dst_release(skb2->dst);
 	skb2->dst = NULL;
 	skb_pull(skb2, skb->data - (u8*)eiph);
-	skb2->nh.raw = skb2->data;
+	skb_reset_network_header(skb2);
 
 	/* Try to guess incoming interface */
 	memset(&fl, 0, sizeof(fl));
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index ddba857bd24..32f1a23a80f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -593,7 +593,7 @@ slow_path:
 		ip_copy_metadata(skb2, skb);
 		skb_reserve(skb2, ll_rs);
 		skb_put(skb2, len + hlen);
-		skb2->nh.raw = skb2->data;
+		skb_reset_network_header(skb2);
 		skb2->h.raw = skb2->data + hlen;
 
 		/*
@@ -722,7 +722,7 @@ static inline int ip_ufo_append_data(struct sock *sk,
 		skb_put(skb,fragheaderlen + transhdrlen);
 
 		/* initialize network header pointer */
-		skb->nh.raw = skb->data;
+		skb_reset_network_header(skb);
 
 		/* initialize protocol header pointer */
 		skb->h.raw = skb->data + fragheaderlen;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 3ec5ce0f549..5f886c89286 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -405,7 +405,7 @@ out:
 	dst_release(skb2->dst);
 	skb2->dst = NULL;
 	skb_pull(skb2, skb->data - (u8*)eiph);
-	skb2->nh.raw = skb2->data;
+	skb_reset_network_header(skb2);
 
 	/* Try to guess incoming interface */
 	memset(&fl, 0, sizeof(fl));
@@ -487,7 +487,7 @@ static int ipip_rcv(struct sk_buff *skb)
 		secpath_reset(skb);
 
 		skb->mac.raw = skb->nh.raw;
-		skb->nh.raw = skb->data;
+		skb_reset_network_header(skb);
 		skb->protocol = htons(ETH_P_IP);
 		skb->pkt_type = PACKET_HOST;
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 486ab93127c..d2986184490 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2748,7 +2748,7 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 	   through good chunk of routing engine.
 	 */
 	skb_reset_mac_header(skb);
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	/* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
 	skb->nh.iph->protocol = IPPROTO_ICMP;
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 8e123e30cf6..ffc6005d1d5 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -129,7 +129,7 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	old_mac = skb_mac_header(skb);
 	skb_set_mac_header(skb, -skb->mac_len);
 	memmove(skb_mac_header(skb), old_mac, skb->mac_len);
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 	err = 0;
 
 out:
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a5f4562b5d2..4406546d3ce 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -735,7 +735,7 @@ slow_path:
 		ip6_copy_metadata(frag, skb);
 		skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
 		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
-		frag->nh.raw = frag->data;
+		skb_reset_network_header(frag);
 		fh = (struct frag_hdr*)(frag->data + hlen);
 		frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
 
@@ -976,7 +976,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
 		skb_put(skb,fragheaderlen + transhdrlen);
 
 		/* initialize network header pointer */
-		skb->nh.raw = skb->data;
+		skb_reset_network_header(skb);
 
 		/* initialize protocol header pointer */
 		skb->h.raw = skb->data + fragheaderlen;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index d8c84d8d7cf..30df8e6c42c 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -525,7 +525,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	dst_release(skb2->dst);
 	skb2->dst = NULL;
 	skb_pull(skb2, offset);
-	skb2->nh.raw = skb2->data;
+	skb_reset_network_header(skb2);
 	eiph = skb2->nh.iph;
 
 	/* Try to guess incoming interface */
@@ -599,7 +599,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		dst_release(skb2->dst);
 		skb2->dst = NULL;
 		skb_pull(skb2, offset);
-		skb2->nh.raw = skb2->data;
+		skb_reset_network_header(skb2);
 
 		/* Try to guess incoming interface */
 		rt = rt6_lookup(&skb2->nh.ipv6h->saddr, NULL, 0, 0);
@@ -704,7 +704,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
 		}
 		secpath_reset(skb);
 		skb->mac.raw = skb->nh.raw;
-		skb->nh.raw = skb->data;
+		skb_reset_network_header(skb);
 		skb->protocol = htons(protocol);
 		skb->pkt_type = PACKET_HOST;
 		memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 08d6ed3396e..0477728578f 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -340,7 +340,7 @@ out:
 	dst_release(skb2->dst);
 	skb2->dst = NULL;
 	skb_pull(skb2, skb->data - (u8*)iph6);
-	skb2->nh.raw = skb2->data;
+	skb_reset_network_header(skb2);
 
 	/* Try to guess incoming interface */
 	rt6i = rt6_lookup(&iph6->saddr, NULL, NULL, 0);
@@ -383,7 +383,7 @@ static int ipip6_rcv(struct sk_buff *skb)
 	if ((tunnel = ipip6_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
 		secpath_reset(skb);
 		skb->mac.raw = skb->nh.raw;
-		skb->nh.raw = skb->data;
+		skb_reset_network_header(skb);
 		IPCB(skb)->flags = 0;
 		skb->protocol = htons(ETH_P_IPV6);
 		skb->pkt_type = PACKET_HOST;
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index 79364b1e965..c015bfde2b1 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -45,7 +45,7 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 	skb->h.raw = skb->data + hdr_len;
 	memmove(skb->data, iph, hdr_len);
 
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 	top_iph = skb->nh.ipv6h;
 	skb->nh.raw = &top_iph->nexthdr;
 	skb->h.ipv6h = top_iph + 1;
@@ -68,7 +68,7 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	skb_push(skb, size);
 	memmove(skb->data, skb->nh.raw, size);
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	old_mac = skb_mac_header(skb);
 	skb_set_mac_header(skb, -skb->mac_len);
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 5bb0677d373..8ce5ef2d0b1 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -53,7 +53,7 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 	skb_push(skb, x->props.header_len);
 	iph = skb->nh.ipv6h;
 
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 	top_iph = skb->nh.ipv6h;
 	skb->nh.raw = &top_iph->nexthdr;
 	skb->h.ipv6h = top_iph + 1;
@@ -111,7 +111,7 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	old_mac = skb_mac_header(skb);
 	skb_set_mac_header(skb, -skb->mac_len);
 	memmove(skb_mac_header(skb), old_mac, skb->mac_len);
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 	err = 0;
 
 out:
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index c52e9d6c75e..56364a5f676 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -82,7 +82,7 @@ static int xfrm6_output_one(struct sk_buff *skb)
 
 		spin_unlock_bh(&x->lock);
 
-		skb->nh.raw = skb->data;
+		skb_reset_network_header(skb);
 
 		if (!(skb->dst = dst_pop(dst))) {
 			err = -EHOSTUNREACH;
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
index 1b7e2490e2e..7c815de4a3b 100644
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -94,7 +94,8 @@ void irlap_queue_xmit(struct irlap_cb *self, struct sk_buff *skb)
 	/* Some common init stuff */
 	skb->dev = self->netdev;
 	skb_reset_mac_header(skb);
-	skb->h.raw = skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
+	skb->h.raw = skb->data;
 	skb->protocol = htons(ETH_P_IRDA);
 	skb->priority = TC_PRIO_BESTEFFORT;
 
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index acc94214bde..a485496059c 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -954,7 +954,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
 		}
 
 		skb->h.raw = skb->data;
-		skb->nh.raw = skb->data;
+		skb_reset_network_header(skb);
 		skb->len = msg->length;
 	}
 
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 5fa31117e46..e76bbbfb64b 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -38,7 +38,8 @@ struct sk_buff *llc_alloc_frame(struct sock *sk, struct net_device *dev)
 	if (skb) {
 		skb_reset_mac_header(skb);
 		skb_reserve(skb, 50);
-		skb->nh.raw   = skb->h.raw = skb->data;
+		skb_reset_network_header(skb);
+		skb->h.raw = skb->data;
 		skb->protocol = htons(ETH_P_802_2);
 		skb->dev      = dev;
 		if (sk != NULL)
diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c
index 17c3f1ef83e..c7b5d930e73 100644
--- a/net/netrom/nr_dev.c
+++ b/net/netrom/nr_dev.c
@@ -57,7 +57,7 @@ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev)
 	/* Spoof incoming device */
 	skb->dev      = dev;
 	skb_reset_mac_header(skb);
-	skb->nh.raw   = skb->data;
+	skb_reset_network_header(skb);
 	skb->pkt_type = PACKET_HOST;
 
 	netif_rx(skb);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 73cb2d3e27d..1225e751b3f 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -401,14 +401,14 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
 	 * notable one here. This should really be fixed at the driver level.
 	 */
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	/* Try to align data part correctly */
 	if (dev->hard_header) {
 		skb->data -= dev->hard_header_len;
 		skb->tail -= dev->hard_header_len;
 		if (len < dev->hard_header_len)
-			skb->nh.raw = skb->data;
+			skb_reset_network_header(skb);
 	}
 
 	/* Returns -EFAULT on error */
@@ -768,7 +768,7 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
 		goto out_unlock;
 
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	if (dev->hard_header) {
 		int res;
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 03a9db36453..67bb29b44d1 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -73,7 +73,7 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
 
 	clone = skb_clone(buf, GFP_ATOMIC);
 	if (clone) {
-		clone->nh.raw = clone->data;
+		skb_reset_network_header(clone);
 		dev = ((struct eth_bearer *)(tb_ptr->usr_handle))->dev;
 		clone->dev = dev;
 		dev->hard_header(clone, dev, ETH_P_TIPC,
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index c7221de98a9..94fd12f3a90 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -191,7 +191,7 @@ void x25_send_frame(struct sk_buff *skb, struct x25_neigh *nb)
 {
 	unsigned char *dptr;
 
-	skb->nh.raw = skb->data;
+	skb_reset_network_header(skb);
 
 	switch (nb->dev->type) {
 		case ARPHRD_X25:
-- 
cgit v1.2.3


From e2d1bca7e6134671bcb19810d004a252aa6a644d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 10 Apr 2007 20:46:21 -0700
Subject: [SK_BUFF]: Use skb_reset_network_header in skb_push cases

skb_push updates and returns skb->data, so we can just call
skb_reset_network_header after the call to skb_push.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c              |  4 +++-
 net/ipv4/ip_gre.c               |  3 ++-
 net/ipv4/ip_output.c            |  3 ++-
 net/ipv4/ipip.c                 |  3 ++-
 net/ipv4/ipmr.c                 |  3 ++-
 net/ipv4/ipvs/ip_vs_xmit.c      |  3 ++-
 net/ipv4/xfrm4_mode_beet.c      |  3 ++-
 net/ipv4/xfrm4_mode_transport.c |  4 +++-
 net/ipv4/xfrm4_mode_tunnel.c    |  3 ++-
 net/ipv6/ip6_output.c           | 14 ++++++++++----
 net/ipv6/ip6_tunnel.c           |  3 ++-
 net/ipv6/sit.c                  |  3 ++-
 12 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index b4d1cdd58f1..44e030eb6e7 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -308,7 +308,9 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 	if (udph->check == 0)
 		udph->check = CSUM_MANGLED_0;
 
-	skb->nh.iph = iph = (struct iphdr *)skb_push(skb, sizeof(*iph));
+	skb_push(skb, sizeof(*iph));
+	skb_reset_network_header(skb);
+	iph = skb->nh.iph;
 
 	/* iph->version = 4; iph->ihl = 5; */
 	put_unaligned(0x45, (unsigned char *)iph);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index ced2c4baf17..f84ca6668fb 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -828,7 +828,8 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	skb->h.raw = skb->nh.raw;
-	skb->nh.raw = skb_push(skb, gre_hlen);
+	skb_push(skb, gre_hlen);
+	skb_reset_network_header(skb);
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 			      IPSKB_REROUTED);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 32f1a23a80f..d329718a8e8 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -503,7 +503,8 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 			if (frag) {
 				frag->ip_summed = CHECKSUM_NONE;
 				frag->h.raw = frag->data;
-				frag->nh.raw = __skb_push(frag, hlen);
+				__skb_push(frag, hlen);
+				skb_reset_network_header(frag);
 				memcpy(frag->nh.raw, iph, hlen);
 				iph = frag->nh.iph;
 				iph->tot_len = htons(frag->len);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 5f886c89286..843cc09f961 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -619,7 +619,8 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	skb->h.raw = skb->nh.raw;
-	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
+	skb_push(skb, sizeof(struct iphdr));
+	skb_reset_network_header(skb);
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 			      IPSKB_REROUTED);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index f73f4e402f7..05c82ca45aa 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1593,7 +1593,8 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
 			return -ENOMEM;
 		}
 
-		skb2->nh.raw = skb_push(skb2, sizeof(struct iphdr));
+		skb_push(skb2, sizeof(struct iphdr));
+		skb_reset_network_header(skb2);
 		skb2->nh.iph->ihl = sizeof(struct iphdr)>>2;
 		skb2->nh.iph->saddr = rt->rt_src;
 		skb2->nh.iph->daddr = rt->rt_dst;
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index f73c5acf5dd..d1403d0855e 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -385,7 +385,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* fix old IP header checksum */
 	ip_send_check(old_iph);
 
-	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
+	skb_push(skb, sizeof(struct iphdr));
+	skb_reset_network_header(skb);
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 
 	/* drop old route */
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index d419e15d980..eaaf3565f3b 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -40,7 +40,8 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 	if (unlikely(optlen))
 		hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4);
 
-	skb->nh.raw = skb_push(skb, x->props.header_len + hdrlen);
+	skb_push(skb, x->props.header_len + hdrlen);
+	skb_reset_network_header(skb);
 	top_iph = skb->nh.iph;
 	skb->h.raw += sizeof(*iph) - hdrlen;
 
diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c
index 92676b7e403..290c0f2e7c2 100644
--- a/net/ipv4/xfrm4_mode_transport.c
+++ b/net/ipv4/xfrm4_mode_transport.c
@@ -32,7 +32,9 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 	ihl = iph->ihl * 4;
 	skb->h.raw += ihl;
 
-	skb->nh.raw = memmove(skb_push(skb, x->props.header_len), iph, ihl);
+	skb_push(skb, x->props.header_len);
+	skb_reset_network_header(skb);
+	memmove(skb->nh.raw, iph, ihl);
 	return 0;
 }
 
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index ffc6005d1d5..bec851f278e 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -49,7 +49,8 @@ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 	iph = skb->nh.iph;
 	skb->h.ipiph = iph;
 
-	skb->nh.raw = skb_push(skb, x->props.header_len);
+	skb_push(skb, x->props.header_len);
+	skb_reset_network_header(skb);
 	top_iph = skb->nh.iph;
 
 	top_iph->ihl = 5;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 4406546d3ce..8a7b5c76014 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -191,7 +191,9 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
 	}
 
-	hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
+	skb_push(skb, sizeof(struct ipv6hdr));
+	skb_reset_network_header(skb);
+	hdr = skb->nh.ipv6h;
 
 	/*
 	 *	Fill in the IPv6 header
@@ -626,7 +628,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 
 		__skb_pull(skb, hlen);
 		fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
-		skb->nh.raw = __skb_push(skb, hlen);
+		__skb_push(skb, hlen);
+		skb_reset_network_header(skb);
 		memcpy(skb->nh.raw, tmp_hdr, hlen);
 
 		ipv6_select_ident(skb, fh);
@@ -649,7 +652,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 				frag->ip_summed = CHECKSUM_NONE;
 				frag->h.raw = frag->data;
 				fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
-				frag->nh.raw = __skb_push(frag, hlen);
+				__skb_push(frag, hlen);
+				skb_reset_network_header(frag);
 				memcpy(frag->nh.raw, tmp_hdr, hlen);
 				offset += skb->len - hlen - sizeof(struct frag_hdr);
 				fh->nexthdr = nexthdr;
@@ -1346,7 +1350,9 @@ int ip6_push_pending_frames(struct sock *sk)
 	if (opt && opt->opt_nflen)
 		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
 
-	skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
+	skb_push(skb, sizeof(struct ipv6hdr));
+	skb_reset_network_header(skb);
+	hdr = skb->nh.ipv6h;
 
 	*(__be32*)hdr = fl->fl6_flowlabel |
 		     htonl(0x60000000 | ((int)np->cork.tclass << 20));
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 30df8e6c42c..a1e4f39c679 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -907,7 +907,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
 		init_tel_txopt(&opt, encap_limit);
 		ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
 	}
-	skb->nh.raw = skb_push(skb, sizeof(struct ipv6hdr));
+	skb_push(skb, sizeof(struct ipv6hdr));
+	skb_reset_network_header(skb);
 	ipv6h = skb->nh.ipv6h;
 	*(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000);
 	dsfield = INET_ECN_encapsulate(0, dsfield);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 0477728578f..62883d41b6c 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -554,7 +554,8 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	skb->h.raw = skb->nh.raw;
-	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
+	skb_push(skb, sizeof(struct iphdr));
+	skb_reset_network_header(skb);
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 	IPCB(skb)->flags = 0;
 	dst_release(skb->dst);
-- 
cgit v1.2.3


From 7e28ecc282574a7d72ace365fc9bc86e27ba880f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 18:40:59 -0300
Subject: [SK_BUFF]: Use skb_reset_network_header where the skb_pull return was
 being used

But only in the cases where its a newly allocated skb, i.e. one where skb->tail
is equal to skb->data, or just after skb_reserve, where this requirement is
maintained.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/appletalk/aarp.c | 12 +++++++++---
 net/ipv4/igmp.c      |  8 ++++++--
 net/ipv4/raw.c       |  4 +++-
 net/ipx/af_ipx.c     |  4 +++-
 net/ipx/ipx_route.c  |  4 +++-
 5 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index d4e5ba22e24..de495c57aa9 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -118,7 +118,9 @@ static void __aarp_send_query(struct aarp_entry *a)
 
 	/* Set up the buffer */
 	skb_reserve(skb, dev->hard_header_len + aarp_dl->header_length);
-	skb->nh.raw      = skb->h.raw = skb_put(skb, sizeof(*eah));
+	skb_reset_network_header(skb);
+	skb->h.raw	 = skb->data;
+	skb_put(skb, sizeof(*eah));
 	skb->protocol    = htons(ETH_P_ATALK);
 	skb->dev	 = dev;
 	eah		 = aarp_hdr(skb);
@@ -163,7 +165,9 @@ static void aarp_send_reply(struct net_device *dev, struct atalk_addr *us,
 
 	/* Set up the buffer */
 	skb_reserve(skb, dev->hard_header_len + aarp_dl->header_length);
-	skb->nh.raw      = skb->h.raw = skb_put(skb, sizeof(*eah));
+	skb_reset_network_header(skb);
+	skb->h.raw	 = skb->data;
+	skb_put(skb, sizeof(*eah));
 	skb->protocol    = htons(ETH_P_ATALK);
 	skb->dev	 = dev;
 	eah		 = aarp_hdr(skb);
@@ -212,7 +216,9 @@ static void aarp_send_probe(struct net_device *dev, struct atalk_addr *us)
 
 	/* Set up the buffer */
 	skb_reserve(skb, dev->hard_header_len + aarp_dl->header_length);
-	skb->nh.raw      = skb->h.raw = skb_put(skb, sizeof(*eah));
+	skb_reset_network_header(skb);
+	skb->h.raw	 = skb->data;
+	skb_put(skb, sizeof(*eah));
 	skb->protocol    = htons(ETH_P_ATALK);
 	skb->dev	 = dev;
 	eah		 = aarp_hdr(skb);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 8cedb2a2c9d..d17ad09a83e 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -314,7 +314,9 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
 
-	skb->nh.iph = pip =(struct iphdr *)skb_put(skb, sizeof(struct iphdr)+4);
+	skb_reset_network_header(skb);
+	pip = skb->nh.iph;
+	skb_put(skb, sizeof(struct iphdr) + 4);
 
 	pip->version  = 4;
 	pip->ihl      = (sizeof(struct iphdr)+4)>>2;
@@ -664,7 +666,9 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
 
-	skb->nh.iph = iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr)+4);
+	skb_reset_network_header(skb);
+	iph = skb->nh.iph;
+	skb_put(skb, sizeof(struct iphdr) + 4);
 
 	iph->version  = 4;
 	iph->ihl      = (sizeof(struct iphdr)+4)>>2;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 87e9c161810..67e5e3c035c 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -291,7 +291,9 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 	skb->priority = sk->sk_priority;
 	skb->dst = dst_clone(&rt->u.dst);
 
-	skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length);
+	skb_reset_network_header(skb);
+	iph = skb->nh.iph;
+	skb_put(skb, length);
 
 	skb->ip_summed = CHECKSUM_NONE;
 
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 6c6c0a3a0ab..9aa7b961b3e 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -576,7 +576,9 @@ static struct sk_buff *ipxitf_adjust_skbuff(struct ipx_interface *intrfc,
 	skb2 = alloc_skb(len, GFP_ATOMIC);
 	if (skb2) {
 		skb_reserve(skb2, out_offset);
-		skb2->nh.raw = skb2->h.raw = skb_put(skb2, skb->len);
+		skb_reset_network_header(skb2);
+		skb2->h.raw = skb2->data;
+		skb_put(skb2, skb->len);
 		memcpy(ipx_hdr(skb2), ipx_hdr(skb), skb->len);
 		memcpy(skb2->cb, skb->cb, sizeof(skb->cb));
 	}
diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c
index 8e1cad971f1..e8277f544e7 100644
--- a/net/ipx/ipx_route.c
+++ b/net/ipx/ipx_route.c
@@ -203,7 +203,9 @@ int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx,
 	skb->sk = sk;
 
 	/* Fill in IPX header */
-	skb->h.raw = skb->nh.raw = skb_put(skb, sizeof(struct ipxhdr));
+	skb_reset_network_header(skb);
+	skb->h.raw = skb->data;
+	skb_put(skb, sizeof(struct ipxhdr));
 	ipx = ipx_hdr(skb);
 	ipx->ipx_pktsize = htons(len + sizeof(struct ipxhdr));
 	IPX_SKB_CB(skb)->ipx_tctrl = 0;
-- 
cgit v1.2.3


From 4209fb601c0a0e0a9d90c0008f350dd345c8b7de Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 18:42:03 -0300
Subject: [SK_BUFF]: Use skb_reset_network_header where the return of
 __pskb_pull was being used

It returns skb->data, so we can just use skb_reset_network_header after it.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index f84ca6668fb..7c6fda6fe84 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -617,7 +617,8 @@ static int ipgre_rcv(struct sk_buff *skb)
 		}
 
 		skb_reset_mac_header(skb);
-		skb->nh.raw = __pskb_pull(skb, offset);
+		__pskb_pull(skb, offset);
+		skb_reset_network_header(skb);
 		skb_postpull_rcsum(skb, skb->h.raw, offset);
 		skb->pkt_type = PACKET_HOST;
 #ifdef CONFIG_NET_IPGRE_BROADCAST
-- 
cgit v1.2.3


From 31c7711b509d470ab1e175e7bb98ea66a82aa916 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 19:04:55 -0300
Subject: [SK_BUFF]: Some more simple skb_reset_network_header conversions

This time of the type:

 skb->nh.iph = (struct iphdr *)skb->data;

That is completely equivalent to:

 skb->nh.raw = skb->data;

Wonder why people love casts... :-)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ieee80211/ieee80211_rx.c | 2 +-
 net/ipv4/ipmr.c              | 4 ++--
 net/sctp/input.c             | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index be5ffaf6e8a..7f5a352800a 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -793,7 +793,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 		skb2->protocol = __constant_htons(ETH_P_802_3);
 		skb_reset_mac_header(skb2);
 		skb_reset_network_header(skb2);
-		/* skb2->nh.raw = skb2->data + ETH_HLEN; */
+		/* skb2->nh.raw += ETH_HLEN; */
 		dev_queue_xmit(skb2);
 	}
 #endif
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 05c82ca45aa..45435f0a5d6 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1454,7 +1454,7 @@ int pim_rcv_v1(struct sk_buff * skb)
 
 	skb->mac.raw = skb->nh.raw;
 	skb_pull(skb, (u8*)encap - skb->data);
-	skb->nh.iph = (struct iphdr *)skb->data;
+	skb_reset_network_header(skb);
 	skb->dev = reg_dev;
 	skb->protocol = htons(ETH_P_IP);
 	skb->ip_summed = 0;
@@ -1509,7 +1509,7 @@ static int pim_rcv(struct sk_buff * skb)
 
 	skb->mac.raw = skb->nh.raw;
 	skb_pull(skb, (u8*)encap - skb->data);
-	skb->nh.iph = (struct iphdr *)skb->data;
+	skb_reset_network_header(skb);
 	skb->dev = reg_dev;
 	skb->protocol = htons(ETH_P_IP);
 	skb->ip_summed = 0;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 71db6687369..2b0863aba3f 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -528,7 +528,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
 	/* Fix up skb to look at the embedded net header. */
 	saveip = skb->nh.raw;
 	savesctp  = skb->h.raw;
-	skb->nh.iph = iph;
+	skb_reset_network_header(skb);
 	skb->h.raw = (char *)sh;
 	sk = sctp_err_lookup(AF_INET, skb, sh, &asoc, &transport);
 	/* Put back, the original pointers. */
-- 
cgit v1.2.3


From 2ca9e6f2c2a4117d21947e911ae1f5e5306b0df0 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 19:15:25 -0300
Subject: [SK_BUFF]: Some more skb_put cases converted to
 skb_reset_network_header

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c   | 3 ++-
 net/ipv4/ip_sockglue.c | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index d329718a8e8..dde51fb72cd 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1126,7 +1126,8 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 			 *	Find where to start putting bytes.
 			 */
 			data = skb_put(skb, fragheaderlen + fraggap);
-			skb->nh.iph = iph = (struct iphdr *)data;
+			skb_reset_network_header(skb);
+			iph = skb->nh.iph;
 			data += fragheaderlen;
 			skb->h.raw = data;
 
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index c5e41644c80..f8ab654b6a3 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -296,8 +296,9 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
 	if (!skb)
 		return;
 
-	iph = (struct iphdr*)skb_put(skb, sizeof(struct iphdr));
-	skb->nh.iph = iph;
+	skb_put(skb, sizeof(struct iphdr));
+	skb_reset_network_header(skb);
+	iph = skb->nh.iph;
 	iph->daddr = daddr;
 
 	serr = SKB_EXT_ERR(skb);
-- 
cgit v1.2.3


From 04b964dbad25cbd6edd8ecbeca2efb40c9860865 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 19:27:27 -0300
Subject: [SK_BUFF] ipconfig: Another conversion to skb_reset_network_header
 related to skb_put

boot_pkt->iph is the first member, that is at skb->data, so just use
skb_reset_network_header().

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipconfig.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index c43699f374c..6e8998409cb 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -702,7 +702,8 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
 	memset(b, 0, sizeof(struct bootp_pkt));
 
 	/* Construct IP header */
-	skb->nh.iph = h = &b->iph;
+	skb_reset_network_header(skb);
+	h = skb->nh.iph;
 	h->version = 4;
 	h->ihl = 5;
 	h->tot_len = htons(sizeof(struct bootp_pkt));
-- 
cgit v1.2.3


From 8856dfa3e9b71ac2177016f66ace3a8978afecc1 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 19:40:39 -0300
Subject: [SK_BUFF]: Use skb_reset_network_header after skb_push

Some more cases where skb->nh.iph was being set that were converted
to using skb_reset_network_header.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 14 ++++++--------
 net/ipv4/ipmr.c      |  9 ++++++---
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index dde51fb72cd..99cd90c2231 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -125,11 +125,9 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
 	struct iphdr *iph;
 
 	/* Build the IP header. */
-	if (opt)
-		iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr) + opt->optlen);
-	else
-		iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));
-
+	skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
+	skb_reset_network_header(skb);
+	iph = skb->nh.iph;
 	iph->version  = 4;
 	iph->ihl      = 5;
 	iph->tos      = inet->tos;
@@ -143,7 +141,6 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
 	iph->protocol = sk->sk_protocol;
 	iph->tot_len  = htons(skb->len);
 	ip_select_ident(iph, &rt->u.dst, sk);
-	skb->nh.iph   = iph;
 
 	if (opt && opt->optlen) {
 		iph->ihl += opt->optlen>>2;
@@ -333,7 +330,9 @@ packet_routed:
 		goto no_route;
 
 	/* OK, we know where to send it, allocate and build IP header. */
-	iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
+	skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
+	skb_reset_network_header(skb);
+	iph = skb->nh.iph;
 	*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
 	iph->tot_len = htons(skb->len);
 	if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
@@ -344,7 +343,6 @@ packet_routed:
 	iph->protocol = sk->sk_protocol;
 	iph->saddr    = rt->rt_src;
 	iph->daddr    = rt->rt_dst;
-	skb->nh.iph   = iph;
 	/* Transport layer set skb->h.foo itself. */
 
 	if (opt && opt->optlen) {
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 45435f0a5d6..51528d3cc33 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1093,7 +1093,12 @@ static struct notifier_block ip_mr_notifier={
 
 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
 {
-	struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
+	struct iphdr *iph;
+
+	skb_push(skb, sizeof(struct iphdr));
+	skb->h.ipiph = skb->nh.iph;
+	skb_reset_network_header(skb);
+	iph = skb->nh.iph;
 
 	iph->version	= 	4;
 	iph->tos	=	skb->nh.iph->tos;
@@ -1107,8 +1112,6 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
 	ip_select_ident(iph, skb->dst, NULL);
 	ip_send_check(iph);
 
-	skb->h.ipiph = skb->nh.iph;
-	skb->nh.iph = iph;
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 	nf_reset(skb);
 }
-- 
cgit v1.2.3


From 1ced98e81d1c2f1ce965ecf8d0032e02ffa07bf0 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 19:57:15 -0300
Subject: [SK_BUFF] ipv6: More skb_reset_network_header conversions related to
 skb_pull

Now related to this form:

skb->nh.ipv6h = (struct ipv6hdr *)skb_put(skb, length);

That, as the others, is done when skb->tail is still equal to skb->data, making
the conversion to skb_reset_network_header possible.

Also one more case equivalent to skb->nh.raw = skb->data, of this form:

iph = (struct ipv6hdr *)skb->data;
<SNIP>
skb->nh.ipv6h = iph;

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/datagram.c              | 5 +++--
 net/ipv6/netfilter/ip6t_REJECT.c | 5 +++--
 net/ipv6/raw.c                   | 4 +++-
 net/sctp/ipv6.c                  | 3 +--
 4 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 3b4e8dcf4c8..31a20f17c85 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -251,8 +251,9 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
 	if (!skb)
 		return;
 
-	iph = (struct ipv6hdr*)skb_put(skb, sizeof(struct ipv6hdr));
-	skb->nh.ipv6h = iph;
+	skb_put(skb, sizeof(struct ipv6hdr));
+	skb_reset_network_header(skb);
+	iph = skb->nh.ipv6h;
 	ipv6_addr_copy(&iph->daddr, &fl->fl6_dst);
 
 	serr = SKB_EXT_ERR(skb);
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 6abee94c929..4441bed430f 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -120,8 +120,9 @@ static void send_reset(struct sk_buff *oldskb)
 
 	skb_reserve(nskb, hh_len + dst->header_len);
 
-	ip6h = nskb->nh.ipv6h = (struct ipv6hdr *)
-					skb_put(nskb, sizeof(struct ipv6hdr));
+	skb_put(nskb, sizeof(struct ipv6hdr));
+	skb_reset_network_header(nskb);
+	ip6h = nskb->nh.ipv6h;
 	ip6h->version = 6;
 	ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT);
 	ip6h->nexthdr = IPPROTO_TCP;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 203e069e7fe..5f26645195d 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -575,7 +575,9 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
 	skb->priority = sk->sk_priority;
 	skb->dst = dst_clone(&rt->u.dst);
 
-	skb->nh.ipv6h = iph = (struct ipv6hdr *)skb_put(skb, length);
+	skb_put(skb, length);
+	skb_reset_network_header(skb);
+	iph = skb->nh.ipv6h;
 
 	skb->ip_summed = CHECKSUM_NONE;
 
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 0b9c49b3a10..5f9b145b0b9 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -122,7 +122,6 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			     int type, int code, int offset, __be32 info)
 {
 	struct inet6_dev *idev;
-	struct ipv6hdr *iph = (struct ipv6hdr *)skb->data;
 	struct sctphdr *sh = (struct sctphdr *)(skb->data + offset);
 	struct sock *sk;
 	struct sctp_association *asoc;
@@ -136,7 +135,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	/* Fix up skb to look at the embedded net header. */
 	saveip = skb->nh.raw;
 	savesctp  = skb->h.raw;
-	skb->nh.ipv6h = iph;
+	skb_reset_network_header(skb);
 	skb->h.raw = (char *)sh;
 	sk = sctp_err_lookup(AF_INET6, skb, sh, &asoc, &transport);
 	/* Put back, the original pointers. */
-- 
cgit v1.2.3


From 7f5c0cb05f158ee91414e1f99d3fe18349a80371 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 19:59:16 -0300
Subject: [SK_BUFF] xfrm4: use skb_reset_network_header

Setting it to skb->h.raw, which is valid, in the (to become) old pointer based
world order and in the new world of offset based layer headers.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_mode_transport.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c
index 290c0f2e7c2..a820dde2c86 100644
--- a/net/ipv4/xfrm4_mode_transport.c
+++ b/net/ipv4/xfrm4_mode_transport.c
@@ -50,8 +50,10 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int ihl = skb->data - skb->h.raw;
 
-	if (skb->h.raw != skb->nh.raw)
-		skb->nh.raw = memmove(skb->h.raw, skb->nh.raw, ihl);
+	if (skb->h.raw != skb->nh.raw) {
+		memmove(skb->h.raw, skb->nh.raw, ihl);
+		skb->nh.raw = skb->h.raw;
+	}
 	skb->nh.iph->tot_len = htons(skb->len + ihl);
 	skb->h.raw = skb->data;
 	return 0;
-- 
cgit v1.2.3


From e7dd65dafda5737a983c04d652a69ab8da78ee3f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 20:09:45 -0300
Subject: [SK_BUFF] bonding: Set skb->nh.raw relative to skb->mac.raw

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_3ad.c | 4 ++--
 drivers/net/bonding/bond_alb.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index e3c9e2e56d1..05c870d6f6c 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -885,7 +885,7 @@ static int ad_lacpdu_send(struct port *port)
 
 	skb->dev = slave->dev;
 	skb_reset_mac_header(skb);
-	skb->nh.raw = skb->data + ETH_HLEN;
+	skb->nh.raw = skb->mac.raw + ETH_HLEN;
 	skb->protocol = PKT_TYPE_LACPDU;
 	skb->priority = TC_PRIO_CONTROL;
 
@@ -929,7 +929,7 @@ static int ad_marker_send(struct port *port, struct marker *marker)
 
 	skb->dev = slave->dev;
 	skb_reset_mac_header(skb);
-	skb->nh.raw = skb->data + ETH_HLEN;
+	skb->nh.raw = skb->mac.raw + ETH_HLEN;
 	skb->protocol = PKT_TYPE_LACPDU;
 
 	marker_header = (struct marker_header *)skb_put(skb, length);
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 36b8e860107..5c2a12c2b99 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -896,7 +896,7 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[])
 		memcpy(data, &pkt, size);
 
 		skb_reset_mac_header(skb);
-		skb->nh.raw = data + ETH_HLEN;
+		skb->nh.raw = skb->mac.raw + ETH_HLEN;
 		skb->protocol = pkt.type;
 		skb->priority = TC_PRIO_CONTROL;
 		skb->dev = slave->dev;
-- 
cgit v1.2.3


From bbe735e4247dba32568a305553b010081c8dea99 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 10 Mar 2007 22:16:10 -0300
Subject: [SK_BUFF]: Introduce skb_network_offset()

For the quite common 'skb->nh.raw - skb->data' sequence.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/i4l/isdn_net.c        | 2 +-
 drivers/net/atl1/atl1_main.c       | 2 +-
 drivers/net/chelsio/sge.c          | 2 +-
 drivers/net/cxgb3/sge.c            | 2 +-
 drivers/net/e1000/e1000_main.c     | 2 +-
 drivers/net/gianfar.c              | 2 +-
 drivers/net/ixgb/ixgb_main.c       | 2 +-
 drivers/net/netxen/netxen_nic_hw.c | 2 +-
 include/linux/skbuff.h             | 5 +++++
 net/ax25/ax25_out.c                | 4 ++--
 net/core/neighbour.c               | 6 +++---
 net/ipv4/ip_output.c               | 4 ++--
 net/ipv6/icmp.c                    | 4 ++--
 net/ipv6/ip6_output.c              | 4 ++--
 net/netfilter/nf_conntrack_core.c  | 4 ++--
 net/packet/af_packet.c             | 8 ++++----
 net/sched/sch_teql.c               | 2 +-
 security/selinux/hooks.c           | 4 ++--
 18 files changed, 33 insertions(+), 28 deletions(-)

diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c
index fadb9291bc1..0c2b3752e46 100644
--- a/drivers/isdn/i4l/isdn_net.c
+++ b/drivers/isdn/i4l/isdn_net.c
@@ -1121,7 +1121,7 @@ isdn_net_adjust_hdr(struct sk_buff *skb, struct net_device *dev)
 	if (!skb)
 		return;
 	if (lp->p_encap == ISDN_NET_ENCAP_ETHER) {
-		int pullsize = (ulong)skb->nh.raw - (ulong)skb->data - ETH_HLEN;
+		const int pullsize = skb_network_offset(skb) - ETH_HLEN;
 		if (pullsize > 0) {
 			printk(KERN_DEBUG "isdn_net: Pull junk %d\n", pullsize);
 			skb_pull(skb, pullsize);
diff --git a/drivers/net/atl1/atl1_main.c b/drivers/net/atl1/atl1_main.c
index e3f181602e4..793a61b2140 100644
--- a/drivers/net/atl1/atl1_main.c
+++ b/drivers/net/atl1/atl1_main.c
@@ -1300,7 +1300,7 @@ static int atl1_tso(struct atl1_adapter *adapter, struct sk_buff *skb,
 			    ~csum_tcpudp_magic(skb->nh.iph->saddr,
 					       skb->nh.iph->daddr, 0,
 					       IPPROTO_TCP, 0);
-			ipofst = skb->nh.raw - skb->data;
+			ipofst = skb_network_offset(skb);
 			if (ipofst != ENET_HEADER_SIZE) /* 802.3 frame */
 				tso->tsopl |= 1 << TSO_PARAM_ETHTYPE_SHIFT;
 
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index 47fa8dcf752..8cdee67d582 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -1865,7 +1865,7 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		++st->tx_tso;
 
-		eth_type = skb->nh.raw - skb->data == ETH_HLEN ?
+		eth_type = skb_network_offset(skb) == ETH_HLEN ?
 			CPL_ETH_II : CPL_ETH_II_VLAN;
 
 		hdr = (struct cpl_tx_pkt_lso *)skb_push(skb, sizeof(*hdr));
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 4dd712088bc..7e9e9db4fb9 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -897,7 +897,7 @@ static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
 		d->flit[2] = 0;
 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
 		hdr->cntrl = htonl(cntrl);
-		eth_type = skb->nh.raw - skb->data == ETH_HLEN ?
+		eth_type = skb_network_offset(skb) == ETH_HLEN ?
 		    CPL_ETH_II : CPL_ETH_II_VLAN;
 		tso_info |= V_LSO_ETH_TYPE(eth_type) |
 		    V_LSO_IPHDR_WORDS(skb->nh.iph->ihl) |
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index b28a915bd98..86161011b53 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2910,7 +2910,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 						 0);
 			ipcse = 0;
 		}
-		ipcss = skb->nh.raw - skb->data;
+		ipcss = skb_network_offset(skb);
 		ipcso = (void *)&(skb->nh.iph->check) - (void *)skb->data;
 		tucss = skb->h.raw - skb->data;
 		tucso = (void *)&(skb->h.th->check) - (void *)skb->data;
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index 1d019195a39..c7a70933c75 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -952,7 +952,7 @@ static inline void gfar_tx_checksum(struct sk_buff *skb, struct txfcb *fcb)
 	 * frame (skb->data) and the start of the IP hdr.
 	 * l4os is the distance between the start of the
 	 * l3 hdr and the l4 hdr */
-	fcb->l3os = (u16)(skb->nh.raw - skb->data - GMAC_FCB_LEN);
+	fcb->l3os = (u16)(skb_network_offset(skb) - GMAC_FCB_LEN);
 	fcb->l4os = (u16)(skb->h.raw - skb->nh.raw);
 
 	fcb->flags = flags;
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index afc2ec72529..cfb791bb45e 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -1195,7 +1195,7 @@ ixgb_tso(struct ixgb_adapter *adapter, struct sk_buff *skb)
 		skb->h.th->check = ~csum_tcpudp_magic(skb->nh.iph->saddr,
 						      skb->nh.iph->daddr,
 						      0, IPPROTO_TCP, 0);
-		ipcss = skb->nh.raw - skb->data;
+		ipcss = skb_network_offset(skb);
 		ipcso = (void *)&(skb->nh.iph->check) - (void *)skb->data;
 		ipcse = skb->h.raw - skb->data - 1;
 		tucss = skb->h.raw - skb->data;
diff --git a/drivers/net/netxen/netxen_nic_hw.c b/drivers/net/netxen/netxen_nic_hw.c
index 6537574a9cd..625e11ed6aa 100644
--- a/drivers/net/netxen/netxen_nic_hw.c
+++ b/drivers/net/netxen/netxen_nic_hw.c
@@ -386,7 +386,7 @@ void netxen_tso_check(struct netxen_adapter *adapter,
 	}
 	adapter->stats.xmitcsummed++;
 	desc->tcp_hdr_offset = skb->h.raw - skb->data;
-	desc->ip_hdr_offset = skb->nh.raw - skb->data;
+	desc->ip_hdr_offset = skb_network_offset(skb);
 }
 
 int netxen_is_flash_supported(struct netxen_adapter *adapter)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6440c78fe62..47cc8b07c2b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -965,6 +965,11 @@ static inline void skb_reset_network_header(struct sk_buff *skb)
 	skb->nh.raw = skb->data;
 }
 
+static inline int skb_network_offset(const struct sk_buff *skb)
+{
+	return skb->nh.raw - skb->data;
+}
+
 static inline unsigned char *skb_mac_header(const struct sk_buff *skb)
 {
 	return skb->mac.raw;
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index 6e08dc8dee4..02dea851a11 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -148,7 +148,7 @@ void ax25_output(ax25_cb *ax25, int paclen, struct sk_buff *skb)
 
 			if (ka9qfrag == 1) {
 				skb_reserve(skbn, frontlen + 2);
-				skbn->nh.raw = skbn->data + (skb->nh.raw - skb->data);
+				skbn->nh.raw = skbn->data + skb_network_offset(skb);
 				memcpy(skb_put(skbn, len), skb->data, len);
 				p = skb_push(skbn, 2);
 
@@ -161,7 +161,7 @@ void ax25_output(ax25_cb *ax25, int paclen, struct sk_buff *skb)
 				}
 			} else {
 				skb_reserve(skbn, frontlen + 1);
-				skbn->nh.raw = skbn->data + (skb->nh.raw - skb->data);
+				skbn->nh.raw = skbn->data + skb_network_offset(skb);
 				memcpy(skb_put(skbn, len), skb->data, len);
 				p = skb_push(skbn, 1);
 				*p = AX25_P_TEXT;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 841e3f32cab..c5653c512b4 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1125,7 +1125,7 @@ int neigh_compat_output(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
 
-	__skb_pull(skb, skb->nh.raw - skb->data);
+	__skb_pull(skb, skb_network_offset(skb));
 
 	if (dev->hard_header &&
 	    dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
@@ -1147,7 +1147,7 @@ int neigh_resolve_output(struct sk_buff *skb)
 	if (!dst || !(neigh = dst->neighbour))
 		goto discard;
 
-	__skb_pull(skb, skb->nh.raw - skb->data);
+	__skb_pull(skb, skb_network_offset(skb));
 
 	if (!neigh_event_send(neigh, skb)) {
 		int err;
@@ -1190,7 +1190,7 @@ int neigh_connected_output(struct sk_buff *skb)
 	struct neighbour *neigh = dst->neighbour;
 	struct net_device *dev = neigh->dev;
 
-	__skb_pull(skb, skb->nh.raw - skb->data);
+	__skb_pull(skb, skb_network_offset(skb));
 
 	read_lock_bh(&neigh->lock);
 	err = dev->hard_header(skb, dev, ntohs(skb->protocol),
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 99cd90c2231..669f5d97c6e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -96,7 +96,7 @@ __inline__ void ip_send_check(struct iphdr *iph)
 static int ip_dev_loopback_xmit(struct sk_buff *newskb)
 {
 	skb_reset_mac_header(newskb);
-	__skb_pull(newskb, newskb->nh.raw - newskb->data);
+	__skb_pull(newskb, skb_network_offset(newskb));
 	newskb->pkt_type = PACKET_LOOPBACK;
 	newskb->ip_summed = CHECKSUM_UNNECESSARY;
 	BUG_TRAP(newskb->dst);
@@ -1199,7 +1199,7 @@ int ip_push_pending_frames(struct sock *sk)
 
 	/* move skb->data to ip header from ext header */
 	if (skb->data < skb->nh.raw)
-		__skb_pull(skb, skb->nh.raw - skb->data);
+		__skb_pull(skb, skb_network_offset(skb));
 	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
 		__skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
 		*tail_skb = tmp_skb;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index a91dfbce843..aa4a0a59ffa 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -206,7 +206,7 @@ static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
 {
 	u8 _optval, *op;
 
-	offset += skb->nh.raw - skb->data;
+	offset += skb_network_offset(skb);
 	op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
 	if (op == NULL)
 		return 1;
@@ -431,7 +431,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
 		tclass = 0;
 
 	msg.skb = skb;
-	msg.offset = skb->nh.raw - skb->data;
+	msg.offset = skb_network_offset(skb);
 	msg.type = type;
 
 	len = skb->len - msg.offset;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 8a7b5c76014..47d00210cba 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -89,7 +89,7 @@ static inline int ip6_output_finish(struct sk_buff *skb)
 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
 {
 	skb_reset_mac_header(newskb);
-	__skb_pull(newskb, newskb->nh.raw - newskb->data);
+	__skb_pull(newskb, skb_network_offset(newskb));
 	newskb->pkt_type = PACKET_LOOPBACK;
 	newskb->ip_summed = CHECKSUM_UNNECESSARY;
 	BUG_TRAP(newskb->dst);
@@ -1330,7 +1330,7 @@ int ip6_push_pending_frames(struct sock *sk)
 
 	/* move skb->data to ip header from ext header */
 	if (skb->data < skb->nh.raw)
-		__skb_pull(skb, skb->nh.raw - skb->data);
+		__skb_pull(skb, skb_network_offset(skb));
 	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
 		__skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
 		*tail_skb = tmp_skb;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index b3a70eb6d42..7694c51f125 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -768,7 +768,7 @@ resolve_normal_ct(struct sk_buff *skb,
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conn *ct;
 
-	if (!nf_ct_get_tuple(skb, (unsigned int)(skb->nh.raw - skb->data),
+	if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
 			     dataoff, l3num, protonum, &tuple, l3proto,
 			     l4proto)) {
 		DEBUGP("resolve_normal_ct: Can't get tuple\n");
@@ -960,7 +960,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
 	if (do_acct) {
 		ct->counters[CTINFO2DIR(ctinfo)].packets++;
 		ct->counters[CTINFO2DIR(ctinfo)].bytes +=
-			skb->len - (unsigned int)(skb->nh.raw - skb->data);
+			skb->len - skb_network_offset(skb);
 
 		if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
 		    || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 1225e751b3f..a059cc7be67 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -491,7 +491,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
 			skb_push(skb, skb->data - skb_mac_header(skb));
 		else if (skb->pkt_type == PACKET_OUTGOING) {
 			/* Special case: outgoing packets have ll header at head */
-			skb_pull(skb, skb->nh.raw - skb->data);
+			skb_pull(skb, skb_network_offset(skb));
 		}
 	}
 
@@ -595,7 +595,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 			skb_push(skb, skb->data - skb_mac_header(skb));
 		else if (skb->pkt_type == PACKET_OUTGOING) {
 			/* Special case: outgoing packets have ll header at head */
-			skb_pull(skb, skb->nh.raw - skb->data);
+			skb_pull(skb, skb_network_offset(skb));
 		}
 	}
 
@@ -613,7 +613,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 	if (sk->sk_type == SOCK_DGRAM) {
 		macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
 	} else {
-		unsigned maclen = skb->nh.raw - skb->data;
+		unsigned maclen = skb_network_offset(skb);
 		netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
 		macoff = netoff - maclen;
 	}
@@ -1145,7 +1145,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 		aux.tp_len = PACKET_SKB_CB(skb)->origlen;
 		aux.tp_snaplen = skb->len;
 		aux.tp_mac = 0;
-		aux.tp_net = skb->nh.raw - skb->data;
+		aux.tp_net = skb_network_offset(skb);
 
 		put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
 	}
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 587123c61af..d24914db786 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -323,7 +323,7 @@ restart:
 			nores = 1;
 			break;
 		}
-		__skb_pull(skb, skb->nh.raw - skb->data);
+		__skb_pull(skb, skb_network_offset(skb));
 	} while ((q = NEXT_SLAVE(q)) != start);
 
 	if (nores && skb_res == NULL) {
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index d41e24d6ae4..addb5850105 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2944,7 +2944,7 @@ static int selinux_parse_skb_ipv4(struct sk_buff *skb,
 	int offset, ihlen, ret = -EINVAL;
 	struct iphdr _iph, *ih;
 
-	offset = skb->nh.raw - skb->data;
+	offset = skb_network_offset(skb);
 	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
 	if (ih == NULL)
 		goto out;
@@ -3026,7 +3026,7 @@ static int selinux_parse_skb_ipv6(struct sk_buff *skb,
 	int ret = -EINVAL, offset;
 	struct ipv6hdr _ipv6h, *ip6;
 
-	offset = skb->nh.raw - skb->data;
+	offset = skb_network_offset(skb);
 	ip6 = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h);
 	if (ip6 == NULL)
 		goto out;
-- 
cgit v1.2.3


From d56f90a7c96da5187f0cdf07ee7434fe6aa78bbc Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 10 Apr 2007 20:50:43 -0700
Subject: [SK_BUFF]: Introduce skb_network_header()

For the places where we need a pointer to the network header, it is still legal
to touch skb->nh.raw directly if just adding to, subtracting from or setting it
to another layer header.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/i4l/isdn_net.c                    |  5 ++-
 drivers/net/bonding/bond_alb.c                 |  2 +-
 drivers/net/loopback.c                         |  7 ++--
 drivers/net/pasemi_mac.c                       |  6 ++-
 drivers/s390/net/qeth_main.c                   |  6 ++-
 include/linux/if_pppox.h                       |  2 +-
 include/linux/skbuff.h                         |  5 +++
 include/net/cipso_ipv4.h                       |  2 +-
 include/net/inet_ecn.h                         |  6 ++-
 include/net/llc_pdu.h                          |  4 +-
 include/net/pkt_cls.h                          |  2 +-
 net/bridge/br_netfilter.c                      | 12 +++---
 net/core/dev.c                                 |  9 +++--
 net/core/filter.c                              |  2 +-
 net/dccp/ipv6.c                                |  8 ++--
 net/decnet/dn_route.c                          |  4 +-
 net/ipv4/af_inet.c                             |  2 +-
 net/ipv4/ah4.c                                 |  5 ++-
 net/ipv4/esp4.c                                |  7 ++--
 net/ipv4/icmp.c                                |  4 +-
 net/ipv4/ip_fragment.c                         |  2 +-
 net/ipv4/ip_options.c                          | 12 +++---
 net/ipv4/ip_output.c                           |  6 +--
 net/ipv4/ip_sockglue.c                         |  8 ++--
 net/ipv4/ipmr.c                                |  2 +-
 net/ipv4/ipvs/ip_vs_app.c                      |  4 +-
 net/ipv4/ipvs/ip_vs_core.c                     |  3 +-
 net/ipv4/netfilter/arpt_mangle.c               |  2 +-
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |  9 ++---
 net/ipv4/raw.c                                 |  2 +-
 net/ipv4/tcp_input.c                           |  2 +-
 net/ipv4/xfrm4_input.c                         |  2 +-
 net/ipv4/xfrm4_mode_beet.c                     |  4 +-
 net/ipv4/xfrm4_mode_transport.c                |  4 +-
 net/ipv4/xfrm4_policy.c                        |  2 +-
 net/ipv6/af_inet6.c                            |  3 +-
 net/ipv6/ah6.c                                 | 12 +++---
 net/ipv6/datagram.c                            | 31 ++++++++------
 net/ipv6/esp6.c                                |  4 +-
 net/ipv6/exthdrs.c                             | 56 +++++++++++++++-----------
 net/ipv6/icmp.c                                |  3 +-
 net/ipv6/ip6_input.c                           |  4 +-
 net/ipv6/ip6_output.c                          | 23 ++++++-----
 net/ipv6/ip6_tunnel.c                          |  5 ++-
 net/ipv6/ipcomp6.c                             |  4 +-
 net/ipv6/mip6.c                                | 29 +++++++------
 net/ipv6/netfilter/nf_conntrack_reasm.c        | 19 +++++----
 net/ipv6/raw.c                                 |  5 ++-
 net/ipv6/reassembly.c                          | 25 ++++++++----
 net/ipv6/tcp_ipv6.c                            |  8 +++-
 net/ipv6/xfrm6_input.c                         |  6 +--
 net/ipv6/xfrm6_mode_beet.c                     |  2 +-
 net/ipv6/xfrm6_mode_transport.c                |  6 ++-
 net/ipv6/xfrm6_mode_tunnel.c                   |  8 ++--
 net/ipv6/xfrm6_policy.c                        | 16 ++++----
 net/netfilter/xt_TCPMSS.c                      |  4 +-
 net/sched/act_pedit.c                          |  2 +-
 net/sched/cls_u32.c                            |  2 +-
 net/sched/em_u32.c                             |  2 +-
 59 files changed, 258 insertions(+), 185 deletions(-)

diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c
index 0c2b3752e46..cd3b1fa4a41 100644
--- a/drivers/isdn/i4l/isdn_net.c
+++ b/drivers/isdn/i4l/isdn_net.c
@@ -872,7 +872,8 @@ typedef struct {
 static void
 isdn_net_log_skb(struct sk_buff * skb, isdn_net_local * lp)
 {
-	u_char *p = skb->nh.raw; /* hopefully, this was set correctly */
+	/* hopefully, this was set correctly */
+	const u_char *p = skb_network_header(skb);
 	unsigned short proto = ntohs(skb->protocol);
 	int data_ofs;
 	ip_ports *ipp;
@@ -880,7 +881,7 @@ isdn_net_log_skb(struct sk_buff * skb, isdn_net_local * lp)
 
 	addinfo[0] = '\0';
 	/* This check stolen from 2.1.72 dev_queue_xmit_nit() */
-	if (skb->nh.raw < skb->data || skb->nh.raw >= skb->tail) {
+	if (p < skb->data || p >= skb->tail) {
 		/* fall back to old isdn_net_log_packet method() */
 		char * buf = skb->data;
 
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 5c2a12c2b99..86cfcb3f813 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -106,7 +106,7 @@ struct arp_pkt {
 
 static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
 {
-	return (struct arp_pkt *)skb->nh.raw;
+	return (struct arp_pkt *)skb_network_header(skb);
 }
 
 /* Forward declaration */
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index af476d2a513..9265c27b13b 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -76,7 +76,8 @@ static DEFINE_PER_CPU(struct pcpu_lstats, pcpu_lstats);
 static void emulate_large_send_offload(struct sk_buff *skb)
 {
 	struct iphdr *iph = skb->nh.iph;
-	struct tcphdr *th = (struct tcphdr*)(skb->nh.raw + (iph->ihl * 4));
+	struct tcphdr *th = (struct tcphdr *)(skb_network_header(skb) +
+					      (iph->ihl * 4));
 	unsigned int doffset = (iph->ihl + th->doff) * 4;
 	unsigned int mtu = skb_shinfo(skb)->gso_size + doffset;
 	unsigned int offset = 0;
@@ -93,7 +94,7 @@ static void emulate_large_send_offload(struct sk_buff *skb)
 		skb_set_mac_header(nskb, -ETH_HLEN);
 		skb_reset_network_header(nskb);
 		iph = nskb->nh.iph;
-		memcpy(nskb->data, skb->nh.raw, doffset);
+		memcpy(nskb->data, skb_network_header(skb), doffset);
 		if (skb_copy_bits(skb,
 				  doffset + offset,
 				  nskb->data + doffset,
@@ -108,7 +109,7 @@ static void emulate_large_send_offload(struct sk_buff *skb)
 		memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
 		nskb->pkt_type = skb->pkt_type;
 
-		th = (struct tcphdr*)(nskb->nh.raw + iph->ihl*4);
+		th = (struct tcphdr *)(skb_network_header(nskb) + iph->ihl * 4);
 		iph->tot_len = htons(frag_size + doffset);
 		iph->id = htons(id);
 		iph->check = 0;
diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c
index 3f4213f3d5d..82218720bc3 100644
--- a/drivers/net/pasemi_mac.c
+++ b/drivers/net/pasemi_mac.c
@@ -729,16 +729,18 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
 	dflags = XCT_MACTX_O | XCT_MACTX_ST | XCT_MACTX_SS | XCT_MACTX_CRC_PAD;
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		const unsigned char *nh = skb_network_header(skb);
+
 		switch (skb->nh.iph->protocol) {
 		case IPPROTO_TCP:
 			dflags |= XCT_MACTX_CSUM_TCP;
 			dflags |= XCT_MACTX_IPH((skb->h.raw - skb->nh.raw) >> 2);
-			dflags |= XCT_MACTX_IPO(skb->nh.raw - skb->data);
+			dflags |= XCT_MACTX_IPO(nh - skb->data);
 			break;
 		case IPPROTO_UDP:
 			dflags |= XCT_MACTX_CSUM_UDP;
 			dflags |= XCT_MACTX_IPH((skb->h.raw - skb->nh.raw) >> 2);
-			dflags |= XCT_MACTX_IPO(skb->nh.raw - skb->data);
+			dflags |= XCT_MACTX_IPO(nh - skb->data);
 			break;
 		}
 	}
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index c0ee6d94ea3..0ff29e0628b 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -3778,9 +3778,11 @@ qeth_get_cast_type(struct qeth_card *card, struct sk_buff *skb)
 	}
 	/* try something else */
 	if (skb->protocol == ETH_P_IPV6)
-		return (skb->nh.raw[24] == 0xff) ? RTN_MULTICAST : 0;
+		return (skb_network_header(skb)[24] == 0xff) ?
+				RTN_MULTICAST : 0;
 	else if (skb->protocol == ETH_P_IP)
-		return ((skb->nh.raw[16] & 0xf0) == 0xe0) ? RTN_MULTICAST : 0;
+		return ((skb_network_header(skb)[16] & 0xf0) == 0xe0) ?
+				RTN_MULTICAST : 0;
 	/* ... */
 	if (!memcmp(skb->data, skb->dev->broadcast, 6))
 		return RTN_BROADCAST;
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index 7044f8ab30a..29d6579ff1a 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -116,7 +116,7 @@ struct pppoe_hdr {
 
 static inline struct pppoe_hdr *pppoe_hdr(const struct sk_buff *skb)
 {
-	return (struct pppoe_hdr *)skb->nh.raw;
+	return (struct pppoe_hdr *)skb_network_header(skb);
 }
 
 struct pppoe_opt {
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 47cc8b07c2b..76d30f34b98 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -960,6 +960,11 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
 	skb->tail += len;
 }
 
+static inline unsigned char *skb_network_header(const struct sk_buff *skb)
+{
+	return skb->nh.raw;
+}
+
 static inline void skb_reset_network_header(struct sk_buff *skb)
 {
 	skb->nh.raw = skb->data;
diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
index 4c9522c5178..4f90f5554fa 100644
--- a/include/net/cipso_ipv4.h
+++ b/include/net/cipso_ipv4.h
@@ -120,7 +120,7 @@ extern int cipso_v4_rbm_strictvalid;
  */
 
 #define CIPSO_V4_OPTEXIST(x) (IPCB(x)->opt.cipso != 0)
-#define CIPSO_V4_OPTPTR(x) ((x)->nh.raw + IPCB(x)->opt.cipso)
+#define CIPSO_V4_OPTPTR(x) (skb_network_header(x) + IPCB(x)->opt.cipso)
 
 /*
  * DOI List Functions
diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index 10117c8503e..b9ed3898e36 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -114,12 +114,14 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb)
 {
 	switch (skb->protocol) {
 	case __constant_htons(ETH_P_IP):
-		if (skb->nh.raw + sizeof(struct iphdr) <= skb->tail)
+		if (skb_network_header(skb) + sizeof(struct iphdr) <=
+		    skb->tail)
 			return IP_ECN_set_ce(skb->nh.iph);
 		break;
 
 	case __constant_htons(ETH_P_IPV6):
-		if (skb->nh.raw + sizeof(struct ipv6hdr) <= skb->tail)
+		if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
+		    skb->tail)
 			return IP6_ECN_set_ce(skb->nh.ipv6h);
 		break;
 	}
diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h
index 778f75a40b4..4a8f58b17e4 100644
--- a/include/net/llc_pdu.h
+++ b/include/net/llc_pdu.h
@@ -203,7 +203,7 @@ struct llc_pdu_sn {
 
 static inline struct llc_pdu_sn *llc_pdu_sn_hdr(struct sk_buff *skb)
 {
-	return (struct llc_pdu_sn *)skb->nh.raw;
+	return (struct llc_pdu_sn *)skb_network_header(skb);
 }
 
 /* Un-numbered PDU format (3 bytes in length) */
@@ -215,7 +215,7 @@ struct llc_pdu_un {
 
 static inline struct llc_pdu_un *llc_pdu_un_hdr(struct sk_buff *skb)
 {
-	return (struct llc_pdu_un *)skb->nh.raw;
+	return (struct llc_pdu_un *)skb_network_header(skb);
 }
 
 /**
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 02647fe3d74..8a6b0e7bded 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -326,7 +326,7 @@ static inline unsigned char * tcf_get_base_ptr(struct sk_buff *skb, int layer)
 		case TCF_LAYER_LINK:
 			return skb->data;
 		case TCF_LAYER_NETWORK:
-			return skb->nh.raw;
+			return skb_network_header(skb);
 		case TCF_LAYER_TRANSPORT:
 			return skb->h.raw;
 	}
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 1163c4f6989..8a56d896302 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -374,7 +374,8 @@ static int check_hbh_len(struct sk_buff *skb)
 {
 	unsigned char *raw = (u8 *) (skb->nh.ipv6h + 1);
 	u32 pkt_len;
-	int off = raw - skb->nh.raw;
+	const unsigned char *nh = skb_network_header(skb);
+	int off = raw - nh;
 	int len = (raw[1] + 1) << 3;
 
 	if ((raw + len) - skb->data > skb_headlen(skb))
@@ -384,9 +385,9 @@ static int check_hbh_len(struct sk_buff *skb)
 	len -= 2;
 
 	while (len > 0) {
-		int optlen = skb->nh.raw[off + 1] + 2;
+		int optlen = nh[off + 1] + 2;
 
-		switch (skb->nh.raw[off]) {
+		switch (nh[off]) {
 		case IPV6_TLV_PAD0:
 			optlen = 1;
 			break;
@@ -395,9 +396,9 @@ static int check_hbh_len(struct sk_buff *skb)
 			break;
 
 		case IPV6_TLV_JUMBO:
-			if (skb->nh.raw[off + 1] != 4 || (off & 3) != 2)
+			if (nh[off + 1] != 4 || (off & 3) != 2)
 				goto bad;
-			pkt_len = ntohl(*(__be32 *) (skb->nh.raw + off + 2));
+			pkt_len = ntohl(*(__be32 *) (nh + off + 2));
 			if (pkt_len <= IPV6_MAXPLEN ||
 			    skb->nh.ipv6h->payload_len)
 				goto bad;
@@ -406,6 +407,7 @@ static int check_hbh_len(struct sk_buff *skb)
 			if (pskb_trim_rcsum(skb,
 					    pkt_len + sizeof(struct ipv6hdr)))
 				goto bad;
+			nh = skb_network_header(skb);
 			break;
 		default:
 			if (optlen > len)
diff --git a/net/core/dev.c b/net/core/dev.c
index 1b0758254ba..54ffe9db9b0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1068,8 +1068,8 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 			 */
 			skb_reset_mac_header(skb2);
 
-			if (skb2->nh.raw < skb2->data ||
-			    skb2->nh.raw > skb2->tail) {
+			if (skb_network_header(skb2) < skb2->data ||
+			    skb_network_header(skb2) > skb2->tail) {
 				if (net_ratelimit())
 					printk(KERN_CRIT "protocol %04x is "
 					       "buggy, dev %s\n",
@@ -1207,7 +1207,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 	BUG_ON(skb_shinfo(skb)->frag_list);
 
 	skb_reset_mac_header(skb);
-	skb->mac_len = skb->nh.raw - skb->data;
+	skb->mac_len = skb->nh.raw - skb->mac.raw;
 	__skb_pull(skb, skb->mac_len);
 
 	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
@@ -1224,7 +1224,8 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 				segs = ERR_PTR(err);
 				if (err || skb_gso_ok(skb, features))
 					break;
-				__skb_push(skb, skb->data - skb->nh.raw);
+				__skb_push(skb, (skb->data -
+						 skb_network_header(skb)));
 			}
 			segs = ptype->gso_segment(skb, features);
 			break;
diff --git a/net/core/filter.c b/net/core/filter.c
index 1cc128d0542..d2358a5e633 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -42,7 +42,7 @@ static void *__load_pointer(struct sk_buff *skb, int k)
 	u8 *ptr = NULL;
 
 	if (k >= SKF_NET_OFF)
-		ptr = skb->nh.raw + k - SKF_NET_OFF;
+		ptr = skb_network_header(skb) + k - SKF_NET_OFF;
 	else if (k >= SKF_LL_OFF)
 		ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
 
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 7f51e8db396..627d0c3c51c 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -261,8 +261,8 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
 
 			if (rxopt->srcrt)
 				opt = ipv6_invert_rthdr(sk,
-					(struct ipv6_rt_hdr *)(pktopts->nh.raw +
-							       rxopt->srcrt));
+			  (struct ipv6_rt_hdr *)(skb_network_header(pktopts) +
+						 rxopt->srcrt));
 		}
 
 		if (opt != NULL && opt->srcrt != NULL) {
@@ -573,8 +573,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 
 		if (rxopt->srcrt)
 			opt = ipv6_invert_rthdr(sk,
-				(struct ipv6_rt_hdr *)(ireq6->pktopts->nh.raw +
-						       rxopt->srcrt));
+		   (struct ipv6_rt_hdr *)(skb_network_header(ireq6->pktopts) +
+					  rxopt->srcrt));
 	}
 
 	if (dst == NULL) {
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 34079b7ba1d..32a7db36c9e 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -386,7 +386,7 @@ static int dn_return_short(struct sk_buff *skb)
 	__le16 tmp;
 
 	/* Add back headers */
-	skb_push(skb, skb->data - skb->nh.raw);
+	skb_push(skb, skb->data - skb_network_header(skb));
 
 	if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL)
 		return NET_RX_DROP;
@@ -425,7 +425,7 @@ static int dn_return_long(struct sk_buff *skb)
 	unsigned char tmp[ETH_ALEN];
 
 	/* Add back all headers */
-	skb_push(skb, skb->data - skb->nh.raw);
+	skb_push(skb, skb->data - skb_network_header(skb));
 
 	if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL)
 		return NET_RX_DROP;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6e5575b0abe..ab552a6098f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1184,7 +1184,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
 		iph->id = htons(id++);
 		iph->tot_len = htons(skb->len - skb->mac_len);
 		iph->check = 0;
-		iph->check = ip_fast_csum(skb->nh.raw, iph->ihl);
+		iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
 	} while ((skb = skb->next));
 
 out:
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 7194eb40b6d..95ddbbd1552 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -154,7 +154,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 	ah = (struct ip_auth_hdr*)skb->data;
 	iph = skb->nh.iph;
 
-	ihl = skb->data - skb->nh.raw;
+	ihl = skb->data - skb_network_header(skb);
 	memcpy(work_buf, iph, ihl);
 
 	iph->ttl = 0;
@@ -181,7 +181,8 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 		}
 	}
 	((struct iphdr*)work_buf)->protocol = ah->nexthdr;
-	skb->h.raw = memcpy(skb->nh.raw += ah_hlen, work_buf, ihl);
+	skb->nh.raw += ah_hlen;
+	skb->h.raw = memcpy(skb_network_header(skb), work_buf, ihl);
 	__skb_pull(skb, ah_hlen + ihl);
 
 	return 0;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 31041127eeb..222d21e5bbe 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -57,9 +57,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	*(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2;
 	pskb_put(skb, trailer, clen - skb->len);
 
-	__skb_push(skb, skb->data - skb->nh.raw);
+	__skb_push(skb, skb->data - skb_network_header(skb));
 	top_iph = skb->nh.iph;
-	esph = (struct ip_esp_hdr *)(skb->nh.raw + top_iph->ihl*4);
+	esph = (struct ip_esp_hdr *)(skb_network_header(skb) +
+				     top_iph->ihl * 4);
 	top_iph->tot_len = htons(skb->len + alen);
 	*(u8*)(trailer->tail - 1) = top_iph->protocol;
 
@@ -222,7 +223,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	if (x->encap) {
 		struct xfrm_encap_tmpl *encap = x->encap;
-		struct udphdr *uh = (void *)(skb->nh.raw + ihl);
+		struct udphdr *uh = (void *)(skb_network_header(skb) + ihl);
 
 		/*
 		 * 1) if the NAT-T peer's IP or port changed then
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4b7a0d946a0..ff124d40c58 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -484,7 +484,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 			u8 _inner_type, *itp;
 
 			itp = skb_header_pointer(skb_in,
-						 skb_in->nh.raw +
+						 skb_network_header(skb_in) +
 						 (iph->ihl << 2) +
 						 offsetof(struct icmphdr,
 							  type) -
@@ -536,7 +536,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	icmp_param.data.icmph.un.gateway = info;
 	icmp_param.data.icmph.checksum	 = 0;
 	icmp_param.skb	  = skb_in;
-	icmp_param.offset = skb_in->nh.raw - skb_in->data;
+	icmp_param.offset = skb_network_offset(skb_in);
 	icmp_out_count(icmp_param.data.icmph.type);
 	inet_sk(icmp_socket->sk)->tos = tos;
 	ipc.addr = iph->saddr;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 3dfd7581cfc..268a6c7347f 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -658,7 +658,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
 	}
 
 	skb_shinfo(head)->frag_list = head->next;
-	skb_push(head, head->data - head->nh.raw);
+	skb_push(head, head->data - skb_network_header(head));
 	atomic_sub(head->truesize, &ip_frag_mem);
 
 	for (fp=head->next; fp; fp = fp->next) {
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index f906a80d5a8..f7e9db61256 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -40,7 +40,7 @@
 void ip_options_build(struct sk_buff * skb, struct ip_options * opt,
 			    __be32 daddr, struct rtable *rt, int is_frag)
 {
-	unsigned char * iph = skb->nh.raw;
+	unsigned char *iph = skb_network_header(skb);
 
 	memcpy(&(IPCB(skb)->opt), opt, sizeof(struct ip_options));
 	memcpy(iph+sizeof(struct iphdr), opt->__data, opt->optlen);
@@ -104,7 +104,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
 		return 0;
 	}
 
-	sptr = skb->nh.raw;
+	sptr = skb_network_header(skb);
 	dptr = dopt->__data;
 
 	if (skb->dst)
@@ -217,7 +217,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
 
 void ip_options_fragment(struct sk_buff * skb)
 {
-	unsigned char * optptr = skb->nh.raw + sizeof(struct iphdr);
+	unsigned char *optptr = skb_network_header(skb) + sizeof(struct iphdr);
 	struct ip_options * opt = &(IPCB(skb)->opt);
 	int  l = opt->optlen;
 	int  optlen;
@@ -264,7 +264,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
 
 	if (!opt) {
 		opt = &(IPCB(skb)->opt);
-		iph = skb->nh.raw;
+		iph = skb_network_header(skb);
 		opt->optlen = ((struct iphdr *)iph)->ihl*4 - sizeof(struct iphdr);
 		optptr = iph + sizeof(struct iphdr);
 		opt->is_data = 0;
@@ -563,7 +563,7 @@ void ip_forward_options(struct sk_buff *skb)
 	struct   ip_options * opt	= &(IPCB(skb)->opt);
 	unsigned char * optptr;
 	struct rtable *rt = (struct rtable*)skb->dst;
-	unsigned char *raw = skb->nh.raw;
+	unsigned char *raw = skb_network_header(skb);
 
 	if (opt->rr_needaddr) {
 		optptr = (unsigned char *)raw + opt->rr;
@@ -609,7 +609,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
 	int srrspace, srrptr;
 	__be32 nexthop;
 	struct iphdr *iph = skb->nh.iph;
-	unsigned char * optptr = skb->nh.raw + opt->srr;
+	unsigned char *optptr = skb_network_header(skb) + opt->srr;
 	struct rtable *rt = (struct rtable*)skb->dst;
 	struct rtable *rt2;
 	int err;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 669f5d97c6e..eae22846962 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -503,7 +503,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 				frag->h.raw = frag->data;
 				__skb_push(frag, hlen);
 				skb_reset_network_header(frag);
-				memcpy(frag->nh.raw, iph, hlen);
+				memcpy(skb_network_header(frag), iph, hlen);
 				iph = frag->nh.iph;
 				iph->tot_len = htons(frag->len);
 				ip_copy_metadata(frag, skb);
@@ -607,7 +607,7 @@ slow_path:
 		 *	Copy the packet header into the new buffer.
 		 */
 
-		memcpy(skb2->nh.raw, skb->data, hlen);
+		memcpy(skb_network_header(skb2), skb->data, hlen);
 
 		/*
 		 *	Copy a block of the IP datagram.
@@ -1198,7 +1198,7 @@ int ip_push_pending_frames(struct sock *sk)
 	tail_skb = &(skb_shinfo(skb)->frag_list);
 
 	/* move skb->data to ip header from ext header */
-	if (skb->data < skb->nh.raw)
+	if (skb->data < skb_network_header(skb))
 		__skb_pull(skb, skb_network_offset(skb));
 	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
 		__skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index f8ab654b6a3..70888e1ef6b 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -273,7 +273,8 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
 	serr->ee.ee_pad = 0;
 	serr->ee.ee_info = info;
 	serr->ee.ee_data = 0;
-	serr->addr_offset = (u8*)&(((struct iphdr*)(skb->h.icmph+1))->daddr) - skb->nh.raw;
+	serr->addr_offset = (u8 *)&(((struct iphdr *)(skb->h.icmph + 1))->daddr) -
+				   skb_network_header(skb);
 	serr->port = port;
 
 	skb->h.raw = payload;
@@ -309,7 +310,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
 	serr->ee.ee_pad = 0;
 	serr->ee.ee_info = info;
 	serr->ee.ee_data = 0;
-	serr->addr_offset = (u8*)&iph->daddr - skb->nh.raw;
+	serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
 	serr->port = port;
 
 	skb->h.raw = skb->tail;
@@ -355,7 +356,8 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
 	sin = (struct sockaddr_in *)msg->msg_name;
 	if (sin) {
 		sin->sin_family = AF_INET;
-		sin->sin_addr.s_addr = *(__be32*)(skb->nh.raw + serr->addr_offset);
+		sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
+						   serr->addr_offset);
 		sin->sin_port = serr->port;
 		memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
 	}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 51528d3cc33..4a8d99bca44 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -563,7 +563,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 		 */
 		msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
 		skb->nh.raw = skb->h.raw = (u8*)msg;
-		memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
+		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
 		msg->im_msgtype = IGMPMSG_WHOLEPKT;
 		msg->im_mbz = 0;
 		msg->im_vif = reg_vif_num;
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index 22e104c6a49..f29d3a27eec 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -338,7 +338,7 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb,
 	if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
 		return 0;
 
-	th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset);
+	th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset);
 
 	/*
 	 *	Remember seq number in case this pkt gets resized
@@ -413,7 +413,7 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb,
 	if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
 		return 0;
 
-	th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset);
+	th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset);
 
 	/*
 	 *	Remember seq number in case this pkt gets resized
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 858686d616a..5d54dd2ce12 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -559,7 +559,8 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
 {
 	struct iphdr *iph	 = skb->nh.iph;
 	unsigned int icmp_offset = iph->ihl*4;
-	struct icmphdr *icmph	 = (struct icmphdr *)(skb->nh.raw + icmp_offset);
+	struct icmphdr *icmph	 = (struct icmphdr *)(skb_network_header(skb) +
+						      icmp_offset);
 	struct iphdr *ciph	 = (struct iphdr *)(icmph + 1);
 
 	if (inout) {
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index 709db4d3f48..af1c8593eb1 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -31,7 +31,7 @@ target(struct sk_buff **pskb,
 	}
 
 	arp = (*pskb)->nh.arph;
-	arpptr = (*pskb)->nh.raw + sizeof(*arp);
+	arpptr = skb_network_header(*pskb) + sizeof(*arp);
 	pln = arp->ar_pln;
 	hln = arp->ar_hln;
 	/* We assume that pln and hln were checked in the match */
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 8f3e92d20df..7cebbff0b0c 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -105,7 +105,7 @@ ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
 		return -NF_DROP;
 	}
 
-	*dataoff = (*pskb)->nh.raw - (*pskb)->data + (*pskb)->nh.iph->ihl*4;
+	*dataoff = skb_network_offset(*pskb) + (*pskb)->nh.iph->ihl * 4;
 	*protonum = (*pskb)->nh.iph->protocol;
 
 	return NF_ACCEPT;
@@ -151,10 +151,9 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum,
 	if (!help || !help->helper)
 		return NF_ACCEPT;
 
-	return help->helper->help(pskb,
-			       (*pskb)->nh.raw - (*pskb)->data
-					       + (*pskb)->nh.iph->ihl*4,
-			       ct, ctinfo);
+	return help->helper->help(pskb, (skb_network_offset(*pskb) +
+					 (*pskb)->nh.iph->ihl * 4),
+				  ct, ctinfo);
 }
 
 static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 67e5e3c035c..a3d02fdfc06 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -256,7 +256,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
 	}
 	nf_reset(skb);
 
-	skb_push(skb, skb->data - skb->nh.raw);
+	skb_push(skb, skb->data - skb_network_header(skb));
 
 	raw_rcv_skb(sk, skb);
 	return 0;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f5e019cefc1..00190835cea 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3634,7 +3634,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 			return;
 
 		skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
-		nskb->nh.raw = nskb->data + (skb->nh.raw - skb->head);
+		nskb->nh.raw = nskb->data + (skb_network_header(skb) - skb->head);
 		nskb->h.raw = nskb->data + (skb->h.raw - skb->head);
 
 		skb_reserve(nskb, header);
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 78e80deb7e8..d89969c502d 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -146,7 +146,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
 		return 0;
 	} else {
 #ifdef CONFIG_NETFILTER
-		__skb_push(skb, skb->data - skb->nh.raw);
+		__skb_push(skb, skb->data - skb_network_header(skb));
 		skb->nh.iph->tot_len = htons(skb->len);
 		ip_send_check(skb->nh.iph);
 
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index eaaf3565f3b..505fca034a1 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -98,7 +98,7 @@ static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 	}
 
 	skb->nh.raw = skb->data + (phlen - sizeof(*iph));
-	memmove(skb->nh.raw, iph, sizeof(*iph));
+	memmove(skb_network_header(skb), iph, sizeof(*iph));
 	skb->h.raw = skb->data + (phlen + optlen);
 	skb->data = skb->h.raw;
 
@@ -112,7 +112,7 @@ static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 	else
 		iph->protocol = protocol;
 	iph->check = 0;
-	iph->check = ip_fast_csum(skb->nh.raw, iph->ihl);
+	iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
 	err = 0;
 out:
 	return err;
diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c
index a820dde2c86..b198087c073 100644
--- a/net/ipv4/xfrm4_mode_transport.c
+++ b/net/ipv4/xfrm4_mode_transport.c
@@ -34,7 +34,7 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	skb_push(skb, x->props.header_len);
 	skb_reset_network_header(skb);
-	memmove(skb->nh.raw, iph, ihl);
+	memmove(skb_network_header(skb), iph, ihl);
 	return 0;
 }
 
@@ -51,7 +51,7 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
 	int ihl = skb->data - skb->h.raw;
 
 	if (skb->h.raw != skb->nh.raw) {
-		memmove(skb->h.raw, skb->nh.raw, ihl);
+		memmove(skb->h.raw, skb_network_header(skb), ihl);
 		skb->nh.raw = skb->h.raw;
 	}
 	skb->nh.iph->tot_len = htons(skb->len + ihl);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 98a833ce111..fbb1d3decf0 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -210,7 +210,7 @@ static void
 _decode_session4(struct sk_buff *skb, struct flowi *fl)
 {
 	struct iphdr *iph = skb->nh.iph;
-	u8 *xprth = skb->nh.raw + iph->ihl*4;
+	u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
 
 	memset(fl, 0, sizeof(struct flowi));
 	if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) {
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 2ff07041795..7b917f856e1 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -693,7 +693,8 @@ int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
 	if (np->rxopt.all) {
 		if ((opt->hop && (np->rxopt.bits.hopopts ||
 				  np->rxopt.bits.ohopopts)) ||
-		    ((IPV6_FLOWINFO_MASK & *(__be32*)skb->nh.raw) &&
+		    ((IPV6_FLOWINFO_MASK &
+		      *(__be32 *)skb_network_header(skb)) &&
 		     np->rxopt.bits.rxflow) ||
 		    (opt->srcrt && (np->rxopt.bits.srcrt ||
 		     np->rxopt.bits.osrcrt)) ||
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index dc68b7269c3..1c914386982 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -238,8 +238,8 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 	top_iph = (struct ipv6hdr *)skb->data;
 	top_iph->payload_len = htons(skb->len - sizeof(*top_iph));
 
-	nexthdr = *skb->nh.raw;
-	*skb->nh.raw = IPPROTO_AH;
+	nexthdr = *skb_network_header(skb);
+	*skb_network_header(skb) = IPPROTO_AH;
 
 	/* When there are no extension headers, we only need to save the first
 	 * 8 bytes of the base IP header.
@@ -341,7 +341,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
 		goto out;
 
-	hdr_len = skb->data - skb->nh.raw;
+	hdr_len = skb->data - skb_network_header(skb);
 	ah = (struct ipv6_auth_hdr*)skb->data;
 	ahp = x->data;
 	nexthdr = ah->nexthdr;
@@ -354,7 +354,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 	if (!pskb_may_pull(skb, ah_hlen))
 		goto out;
 
-	tmp_hdr = kmemdup(skb->nh.raw, hdr_len, GFP_ATOMIC);
+	tmp_hdr = kmemdup(skb_network_header(skb), hdr_len, GFP_ATOMIC);
 	if (!tmp_hdr)
 		goto out;
 	if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len, XFRM_POLICY_IN))
@@ -382,7 +382,9 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 		}
 	}
 
-	skb->h.raw = memcpy(skb->nh.raw += ah_hlen, tmp_hdr, hdr_len);
+	skb->nh.raw += ah_hlen;
+	memcpy(skb_network_header(skb), tmp_hdr, hdr_len);
+	skb->h.raw = skb->nh.raw;
 	__skb_pull(skb, ah_hlen + hdr_len);
 
 	kfree(tmp_hdr);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 31a20f17c85..7a86db6163e 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -227,7 +227,8 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
 	serr->ee.ee_pad = 0;
 	serr->ee.ee_info = info;
 	serr->ee.ee_data = 0;
-	serr->addr_offset = (u8*)&(((struct ipv6hdr*)(icmph+1))->daddr) - skb->nh.raw;
+	serr->addr_offset = (u8 *)&(((struct ipv6hdr *)(icmph + 1))->daddr) -
+				  skb_network_header(skb);
 	serr->port = port;
 
 	skb->h.raw = payload;
@@ -264,7 +265,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
 	serr->ee.ee_pad = 0;
 	serr->ee.ee_info = info;
 	serr->ee.ee_data = 0;
-	serr->addr_offset = (u8*)&iph->daddr - skb->nh.raw;
+	serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
 	serr->port = fl->fl_ip_dport;
 
 	skb->h.raw = skb->tail;
@@ -310,21 +311,24 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 
 	sin = (struct sockaddr_in6 *)msg->msg_name;
 	if (sin) {
+		const unsigned char *nh = skb_network_header(skb);
 		sin->sin6_family = AF_INET6;
 		sin->sin6_flowinfo = 0;
 		sin->sin6_port = serr->port;
 		sin->sin6_scope_id = 0;
 		if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
 			ipv6_addr_copy(&sin->sin6_addr,
-			  (struct in6_addr *)(skb->nh.raw + serr->addr_offset));
+				  (struct in6_addr *)(nh + serr->addr_offset));
 			if (np->sndflow)
-				sin->sin6_flowinfo = *(__be32*)(skb->nh.raw + serr->addr_offset - 24) & IPV6_FLOWINFO_MASK;
+				sin->sin6_flowinfo =
+					(*(__be32 *)(nh + serr->addr_offset - 24) &
+					 IPV6_FLOWINFO_MASK);
 			if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
 				sin->sin6_scope_id = IP6CB(skb)->iif;
 		} else {
 			ipv6_addr_set(&sin->sin6_addr, 0, 0,
 				      htonl(0xffff),
-				      *(__be32*)(skb->nh.raw + serr->addr_offset));
+				      *(__be32 *)(nh + serr->addr_offset));
 		}
 	}
 
@@ -382,6 +386,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct inet6_skb_parm *opt = IP6CB(skb);
+	unsigned char *nh = skb_network_header(skb);
 
 	if (np->rxopt.bits.rxinfo) {
 		struct in6_pktinfo src_info;
@@ -401,14 +406,14 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 		put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
 	}
 
-	if (np->rxopt.bits.rxflow && (*(__be32*)skb->nh.raw & IPV6_FLOWINFO_MASK)) {
-		__be32 flowinfo = *(__be32*)skb->nh.raw & IPV6_FLOWINFO_MASK;
+	if (np->rxopt.bits.rxflow && (*(__be32 *)nh & IPV6_FLOWINFO_MASK)) {
+		__be32 flowinfo = *(__be32 *)nh & IPV6_FLOWINFO_MASK;
 		put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
 	}
 
 	/* HbH is allowed only once */
 	if (np->rxopt.bits.hopopts && opt->hop) {
-		u8 *ptr = skb->nh.raw + opt->hop;
+		u8 *ptr = nh + opt->hop;
 		put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr);
 	}
 
@@ -428,7 +433,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 
 		while (off <= opt->lastopt) {
 			unsigned len;
-			u8 *ptr = skb->nh.raw + off;
+			u8 *ptr = nh + off;
 
 			switch(nexthdr) {
 			case IPPROTO_DSTOPTS:
@@ -470,19 +475,19 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 		put_cmsg(msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);
 	}
 	if (np->rxopt.bits.ohopopts && opt->hop) {
-		u8 *ptr = skb->nh.raw + opt->hop;
+		u8 *ptr = nh + opt->hop;
 		put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, (ptr[1]+1)<<3, ptr);
 	}
 	if (np->rxopt.bits.odstopts && opt->dst0) {
-		u8 *ptr = skb->nh.raw + opt->dst0;
+		u8 *ptr = nh + opt->dst0;
 		put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
 	}
 	if (np->rxopt.bits.osrcrt && opt->srcrt) {
-		struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(skb->nh.raw + opt->srcrt);
+		struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(nh + opt->srcrt);
 		put_cmsg(msg, SOL_IPV6, IPV6_2292RTHDR, (rthdr->hdrlen+1) << 3, rthdr);
 	}
 	if (np->rxopt.bits.odstopts && opt->dst1) {
-		u8 *ptr = skb->nh.raw + opt->dst1;
+		u8 *ptr = nh + opt->dst1;
 		put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
 	}
 	return 0;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 363e63ffecc..6e6b57ac801 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -92,8 +92,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	top_iph = (struct ipv6hdr *)__skb_push(skb, hdr_len);
 	esph = (struct ipv6_esp_hdr *)skb->h.raw;
 	top_iph->payload_len = htons(skb->len + alen - sizeof(*top_iph));
-	*(u8*)(trailer->tail - 1) = *skb->nh.raw;
-	*skb->nh.raw = IPPROTO_ESP;
+	*(u8 *)(trailer->tail - 1) = *skb_network_header(skb);
+	*skb_network_header(skb) = IPPROTO_ESP;
 
 	esph->spi = x->id.spi;
 	esph->seq_no = htonl(++x->replay.oseq);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index fce5abde554..9ebf120ba6d 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -50,13 +50,14 @@
 
 int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
 {
-	int packet_len = skb->tail - skb->nh.raw;
+	const unsigned char *nh = skb_network_header(skb);
+	int packet_len = skb->tail - nh;
 	struct ipv6_opt_hdr *hdr;
 	int len;
 
 	if (offset + 2 > packet_len)
 		goto bad;
-	hdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+	hdr = (struct ipv6_opt_hdr *)(nh + offset);
 	len = ((hdr->hdrlen + 1) << 3);
 
 	if (offset + len > packet_len)
@@ -66,7 +67,7 @@ int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
 	len -= 2;
 
 	while (len > 0) {
-		int opttype = skb->nh.raw[offset];
+		int opttype = nh[offset];
 		int optlen;
 
 		if (opttype == type)
@@ -77,7 +78,7 @@ int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
 			optlen = 1;
 			break;
 		default:
-			optlen = skb->nh.raw[offset + 1] + 2;
+			optlen = nh[offset + 1] + 2;
 			if (optlen > len)
 				goto bad;
 			break;
@@ -113,7 +114,7 @@ static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff)
 {
 	struct sk_buff *skb = *skbp;
 
-	switch ((skb->nh.raw[optoff] & 0xC0) >> 6) {
+	switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) {
 	case 0: /* ignore */
 		return 1;
 
@@ -141,6 +142,7 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
 {
 	struct sk_buff *skb = *skbp;
 	struct tlvtype_proc *curr;
+	const unsigned char *nh = skb_network_header(skb);
 	int off = skb->h.raw - skb->nh.raw;
 	int len = ((skb->h.raw[1]+1)<<3);
 
@@ -151,9 +153,9 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
 	len -= 2;
 
 	while (len > 0) {
-		int optlen = skb->nh.raw[off+1]+2;
+		int optlen = nh[off + 1] + 2;
 
-		switch (skb->nh.raw[off]) {
+		switch (nh[off]) {
 		case IPV6_TLV_PAD0:
 			optlen = 1;
 			break;
@@ -165,7 +167,7 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
 			if (optlen > len)
 				goto bad;
 			for (curr=procs; curr->type >= 0; curr++) {
-				if (curr->type == skb->nh.raw[off]) {
+				if (curr->type == nh[off]) {
 					/* type specific length/alignment
 					   checks will be performed in the
 					   func(). */
@@ -211,7 +213,7 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
 	opt->dsthao = opt->dst1;
 	opt->dst1 = 0;
 
-	hao = (struct ipv6_destopt_hao *)(skb->nh.raw + optoff);
+	hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + optoff);
 
 	if (hao->length != 16) {
 		LIMIT_NETDEBUG(
@@ -244,8 +246,9 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
 
 		/* update all variable using below by copied skbuff */
 		*skbp = skb = skb2;
-		hao = (struct ipv6_destopt_hao *)(skb2->nh.raw + optoff);
-		ipv6h = (struct ipv6hdr *)skb2->nh.raw;
+		hao = (struct ipv6_destopt_hao *)(skb_network_header(skb2) +
+						  optoff);
+		ipv6h = skb2->nh.ipv6h;
 	}
 
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
@@ -406,7 +409,8 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
 	default:
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INHDRERRORS);
-		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw);
+		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+				  (&hdr->type) - skb_network_header(skb));
 		return -1;
 	}
 
@@ -443,7 +447,7 @@ looped_back:
 		skb->h.raw += (hdr->hdrlen + 1) << 3;
 		opt->dst0 = opt->dst1;
 		opt->dst1 = 0;
-		opt->nhoff = (&hdr->nexthdr) - skb->nh.raw;
+		opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb);
 		return 1;
 	}
 
@@ -452,7 +456,9 @@ looped_back:
 		if (hdr->hdrlen & 0x01) {
 			IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 					 IPSTATS_MIB_INHDRERRORS);
-			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw);
+			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+					  ((&hdr->hdrlen) -
+					   skb_network_header(skb)));
 			return -1;
 		}
 		break;
@@ -479,7 +485,9 @@ looped_back:
 	if (hdr->segments_left > n) {
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INHDRERRORS);
-		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->segments_left) - skb->nh.raw);
+		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+				  ((&hdr->segments_left) -
+				   skb_network_header(skb)));
 		return -1;
 	}
 
@@ -547,7 +555,7 @@ looped_back:
 	dst_release(xchg(&skb->dst, NULL));
 	ip6_route_input(skb);
 	if (skb->dst->error) {
-		skb_push(skb, skb->data - skb->nh.raw);
+		skb_push(skb, skb->data - skb_network_header(skb));
 		dst_input(skb);
 		return -1;
 	}
@@ -565,7 +573,7 @@ looped_back:
 		goto looped_back;
 	}
 
-	skb_push(skb, skb->data - skb->nh.raw);
+	skb_push(skb, skb->data - skb_network_header(skb));
 	dst_input(skb);
 	return -1;
 }
@@ -656,13 +664,14 @@ EXPORT_SYMBOL_GPL(ipv6_invert_rthdr);
 static int ipv6_hop_ra(struct sk_buff **skbp, int optoff)
 {
 	struct sk_buff *skb = *skbp;
+	const unsigned char *nh = skb_network_header(skb);
 
-	if (skb->nh.raw[optoff+1] == 2) {
+	if (nh[optoff + 1] == 2) {
 		IP6CB(skb)->ra = optoff;
 		return 1;
 	}
 	LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n",
-		       skb->nh.raw[optoff+1]);
+		       nh[optoff + 1]);
 	kfree_skb(skb);
 	return 0;
 }
@@ -672,17 +681,18 @@ static int ipv6_hop_ra(struct sk_buff **skbp, int optoff)
 static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff)
 {
 	struct sk_buff *skb = *skbp;
+	const unsigned char *nh = skb_network_header(skb);
 	u32 pkt_len;
 
-	if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) {
+	if (nh[optoff + 1] != 4 || (optoff & 3) != 2) {
 		LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n",
-			       skb->nh.raw[optoff+1]);
+			       nh[optoff+1]);
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INHDRERRORS);
 		goto drop;
 	}
 
-	pkt_len = ntohl(*(__be32*)(skb->nh.raw+optoff+2));
+	pkt_len = ntohl(*(__be32 *)(nh + optoff + 2));
 	if (pkt_len <= IPV6_MAXPLEN) {
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
@@ -727,7 +737,7 @@ int ipv6_parse_hopopts(struct sk_buff **skbp)
 	struct inet6_skb_parm *opt = IP6CB(skb);
 
 	/*
-	 * skb->nh.raw is equal to skb->data, and
+	 * skb_network_header(skb) is equal to skb->data, and
 	 * skb->h.raw - skb->nh.raw is always equal to
 	 * sizeof(struct ipv6hdr) by definition of
 	 * hop-by-hop options.
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index aa4a0a59ffa..e5293b34229 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -284,7 +284,8 @@ static void mip6_addr_swap(struct sk_buff *skb)
 	if (opt->dsthao) {
 		off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
 		if (likely(off >= 0)) {
-			hao = (struct ipv6_destopt_hao *)(skb->nh.raw + off);
+			hao = (struct ipv6_destopt_hao *)
+					(skb_network_header(skb) + off);
 			ipv6_addr_copy(&tmp, &iph->saddr);
 			ipv6_addr_copy(&iph->saddr, &hao->addr);
 			ipv6_addr_copy(&hao->addr, &tmp);
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 61e7a6c8141..aecc74da072 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -163,7 +163,7 @@ resubmit:
 	if (!pskb_pull(skb, skb->h.raw - skb->data))
 		goto discard;
 	nhoff = IP6CB(skb)->nhoff;
-	nexthdr = skb->nh.raw[nhoff];
+	nexthdr = skb_network_header(skb)[nhoff];
 
 	raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]);
 	if (raw_sk && !ipv6_raw_deliver(skb, nexthdr))
@@ -181,7 +181,7 @@ resubmit:
 			   indefinitely. */
 			nf_reset(skb);
 
-			skb_postpull_rcsum(skb, skb->nh.raw,
+			skb_postpull_rcsum(skb, skb_network_header(skb),
 					   skb->h.raw - skb->nh.raw);
 			hdr = skb->nh.ipv6h;
 			if (ipv6_addr_is_multicast(&hdr->daddr) &&
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 47d00210cba..f1dfcc31971 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -323,10 +323,11 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
 	if (nexthdr == IPPROTO_ICMPV6) {
 		struct icmp6hdr *icmp6;
 
-		if (!pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data))
+		if (!pskb_may_pull(skb, (skb_network_header(skb) +
+					 offset + 1 - skb->data)))
 			return 0;
 
-		icmp6 = (struct icmp6hdr *)(skb->nh.raw + offset);
+		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 
 		switch (icmp6->icmp6_type) {
 		case NDISC_ROUTER_SOLICITATION:
@@ -392,7 +393,7 @@ int ip6_forward(struct sk_buff *skb)
 	 *	that different fragments will go along one path. --ANK
 	 */
 	if (opt->ra) {
-		u8 *ptr = skb->nh.raw + opt->ra;
+		u8 *ptr = skb_network_header(skb) + opt->ra;
 		if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
 			return 0;
 	}
@@ -527,7 +528,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 {
 	u16 offset = sizeof(struct ipv6hdr);
 	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
-	unsigned int packet_len = skb->tail - skb->nh.raw;
+	unsigned int packet_len = skb->tail - skb_network_header(skb);
 	int found_rhdr = 0;
 	*nexthdr = &skb->nh.ipv6h->nexthdr;
 
@@ -554,7 +555,8 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 
 		offset += ipv6_optlen(exthdr);
 		*nexthdr = &exthdr->nexthdr;
-		exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+		exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
+						 offset);
 	}
 
 	return offset;
@@ -620,7 +622,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 		/* BUILD HEADER */
 
 		*prevhdr = NEXTHDR_FRAGMENT;
-		tmp_hdr = kmemdup(skb->nh.raw, hlen, GFP_ATOMIC);
+		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 		if (!tmp_hdr) {
 			IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 			return -ENOMEM;
@@ -630,7 +632,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 		fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
 		__skb_push(skb, hlen);
 		skb_reset_network_header(skb);
-		memcpy(skb->nh.raw, tmp_hdr, hlen);
+		memcpy(skb_network_header(skb), tmp_hdr, hlen);
 
 		ipv6_select_ident(skb, fh);
 		fh->nexthdr = nexthdr;
@@ -654,7 +656,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 				fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
 				__skb_push(frag, hlen);
 				skb_reset_network_header(frag);
-				memcpy(frag->nh.raw, tmp_hdr, hlen);
+				memcpy(skb_network_header(frag), tmp_hdr,
+				       hlen);
 				offset += skb->len - hlen - sizeof(struct frag_hdr);
 				fh->nexthdr = nexthdr;
 				fh->reserved = 0;
@@ -753,7 +756,7 @@ slow_path:
 		/*
 		 *	Copy the packet header into the new buffer.
 		 */
-		memcpy(frag->nh.raw, skb->data, hlen);
+		memcpy(skb_network_header(frag), skb->data, hlen);
 
 		/*
 		 *	Build fragment header.
@@ -1329,7 +1332,7 @@ int ip6_push_pending_frames(struct sock *sk)
 	tail_skb = &(skb_shinfo(skb)->frag_list);
 
 	/* move skb->data to ip header from ext header */
-	if (skb->data < skb->nh.raw)
+	if (skb->data < skb_network_header(skb))
 		__skb_pull(skb, skb_network_offset(skb));
 	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
 		__skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index a1e4f39c679..aafbdfa8d78 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -995,9 +995,10 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	    !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h))
 		return -1;
 
-	if ((offset = parse_tlv_tnl_enc_lim(skb, skb->nh.raw)) > 0) {
+	offset = parse_tlv_tnl_enc_lim(skb, skb_network_header(skb));
+	if (offset > 0) {
 		struct ipv6_tlv_tnl_enc_lim *tel;
-		tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->nh.raw[offset];
+		tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
 		if (tel->encap_limit == 0) {
 			icmpv6_send(skb, ICMPV6_PARAMPROB,
 				    ICMPV6_HDR_FIELD, offset + 2, skb->dev);
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 5724ba9f75d..3e71d1691b7 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -166,10 +166,10 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	top_iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
 
 	ipch = (struct ipv6_comp_hdr *)start;
-	ipch->nexthdr = *skb->nh.raw;
+	ipch->nexthdr = *skb_network_header(skb);
 	ipch->flags = 0;
 	ipch->cpi = htons((u16 )ntohl(x->id.spi));
-	*skb->nh.raw = IPPROTO_COMP;
+	*skb_network_header(skb) = IPPROTO_COMP;
 
 out_ok:
 	return 0;
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 0afcabdd8ed..bb4033553f3 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -99,14 +99,16 @@ int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
 	if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) {
 		LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n",
 			       mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type));
-		mip6_param_prob(skb, 0, (&mh->ip6mh_hdrlen) - skb->nh.raw);
+		mip6_param_prob(skb, 0, ((&mh->ip6mh_hdrlen) -
+					 skb_network_header(skb)));
 		return -1;
 	}
 
 	if (mh->ip6mh_proto != IPPROTO_NONE) {
 		LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n",
 			       mh->ip6mh_proto);
-		mip6_param_prob(skb, 0, (&mh->ip6mh_proto) - skb->nh.raw);
+		mip6_param_prob(skb, 0, ((&mh->ip6mh_proto) -
+					 skb_network_header(skb)));
 		return -1;
 	}
 
@@ -152,8 +154,8 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb)
 	iph = (struct ipv6hdr *)skb->data;
 	iph->payload_len = htons(skb->len - sizeof(*iph));
 
-	nexthdr = *skb->nh.raw;
-	*skb->nh.raw = IPPROTO_DSTOPTS;
+	nexthdr = *skb_network_header(skb);
+	*skb_network_header(skb) = IPPROTO_DSTOPTS;
 
 	dstopt = (struct ipv6_destopt_hdr *)skb->h.raw;
 	dstopt->nexthdr = nexthdr;
@@ -215,7 +217,8 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct
 	if (likely(opt->dsthao)) {
 		offset = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
 		if (likely(offset >= 0))
-			hao = (struct ipv6_destopt_hao *)(skb->nh.raw + offset);
+			hao = (struct ipv6_destopt_hao *)
+					(skb_network_header(skb) + offset);
 	}
 
 	skb_get_timestamp(skb, &stamp);
@@ -254,7 +257,8 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
 {
 	u16 offset = sizeof(struct ipv6hdr);
 	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
-	unsigned int packet_len = skb->tail - skb->nh.raw;
+	const unsigned char *nh = skb_network_header(skb);
+	unsigned int packet_len = skb->tail - nh;
 	int found_rhdr = 0;
 
 	*nexthdr = &skb->nh.ipv6h->nexthdr;
@@ -288,7 +292,7 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
 
 		offset += ipv6_optlen(exthdr);
 		*nexthdr = &exthdr->nexthdr;
-		exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+		exthdr = (struct ipv6_opt_hdr *)(nh + offset);
 	}
 
 	return offset;
@@ -361,8 +365,8 @@ static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb)
 	iph = (struct ipv6hdr *)skb->data;
 	iph->payload_len = htons(skb->len - sizeof(*iph));
 
-	nexthdr = *skb->nh.raw;
-	*skb->nh.raw = IPPROTO_ROUTING;
+	nexthdr = *skb_network_header(skb);
+	*skb_network_header(skb) = IPPROTO_ROUTING;
 
 	rt2 = (struct rt2_hdr *)skb->h.raw;
 	rt2->rt_hdr.nexthdr = nexthdr;
@@ -384,7 +388,8 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
 {
 	u16 offset = sizeof(struct ipv6hdr);
 	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
-	unsigned int packet_len = skb->tail - skb->nh.raw;
+	const unsigned char *nh = skb_network_header(skb);
+	unsigned int packet_len = skb->tail - nh;
 	int found_rhdr = 0;
 
 	*nexthdr = &skb->nh.ipv6h->nexthdr;
@@ -397,7 +402,7 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
 		case NEXTHDR_ROUTING:
 			if (offset + 3 <= packet_len) {
 				struct ipv6_rt_hdr *rt;
-				rt = (struct ipv6_rt_hdr *)(skb->nh.raw + offset);
+				rt = (struct ipv6_rt_hdr *)(nh + offset);
 				if (rt->type != 0)
 					return offset;
 			}
@@ -417,7 +422,7 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
 
 		offset += ipv6_optlen(exthdr);
 		*nexthdr = &exthdr->nexthdr;
-		exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+		exthdr = (struct ipv6_opt_hdr *)(nh + offset);
 	}
 
 	return offset;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index c311b9a12ca..bc1d0958400 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -408,11 +408,12 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 		return -1;
 	}
 
-	if (skb->ip_summed == CHECKSUM_COMPLETE)
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
+		const unsigned char *nh = skb_network_header(skb);
 		skb->csum = csum_sub(skb->csum,
-				     csum_partial(skb->nh.raw,
-						  (u8*)(fhdr + 1) - skb->nh.raw,
+				     csum_partial(nh, (u8 *)(fhdr + 1) - nh,
 						  0));
+	}
 
 	/* Is this the final fragment? */
 	if (!(fhdr->frag_off & htons(IP6_MF))) {
@@ -583,7 +584,9 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 	BUG_TRAP(NFCT_FRAG6_CB(head)->offset == 0);
 
 	/* Unfragmented part is taken from the first segment. */
-	payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr);
+	payload_len = ((head->data - skb_network_header(head)) -
+		       sizeof(struct ipv6hdr) + fq->len -
+		       sizeof(struct frag_hdr));
 	if (payload_len > IPV6_MAXPLEN) {
 		DEBUGP("payload len is too large.\n");
 		goto out_oversize;
@@ -624,7 +627,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 
 	/* We have to remove fragment header from datagram and to relocate
 	 * header in order to calculate ICV correctly. */
-	head->nh.raw[fq->nhoffset] = head->h.raw[0];
+	skb_network_header(head)[fq->nhoffset] = head->h.raw[0];
 	memmove(head->head + sizeof(struct frag_hdr), head->head,
 		(head->data - head->head) - sizeof(struct frag_hdr));
 	head->mac.raw += sizeof(struct frag_hdr);
@@ -632,7 +635,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 
 	skb_shinfo(head)->frag_list = head->next;
 	head->h.raw = head->data;
-	skb_push(head, head->data - head->nh.raw);
+	skb_push(head, head->data - skb_network_header(head));
 	atomic_sub(head->truesize, &nf_ct_frag6_mem);
 
 	for (fp=head->next; fp; fp = fp->next) {
@@ -653,7 +656,9 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 
 	/* Yes, and fold redundant checksum back. 8) */
 	if (head->ip_summed == CHECKSUM_COMPLETE)
-		head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
+		head->csum = csum_partial(skb_network_header(head),
+					  head->h.raw - head->nh.raw,
+					  head->csum);
 
 	fq->fragments = NULL;
 
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 5f26645195d..9b2bcde73f1 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -361,7 +361,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		skb_postpull_rcsum(skb, skb->nh.raw,
+		skb_postpull_rcsum(skb, skb_network_header(skb),
 				   skb->h.raw - skb->nh.raw);
 		if (!csum_ipv6_magic(&skb->nh.ipv6h->saddr,
 				     &skb->nh.ipv6h->daddr,
@@ -488,7 +488,8 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
 		goto out;
 
 	offset = rp->offset;
-	total_len = inet_sk(sk)->cork.length - (skb->nh.raw - skb->data);
+	total_len = inet_sk(sk)->cork.length - (skb_network_header(skb) -
+						skb->data);
 	if (offset >= total_len - 1) {
 		err = -EINVAL;
 		ip6_flush_pending_frames(sk);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 1dde449379f..f85e49acb91 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -436,13 +436,18 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 	if ((unsigned int)end > IPV6_MAXPLEN) {
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INHDRERRORS);
-		icmpv6_param_prob(skb,ICMPV6_HDR_FIELD, (u8*)&fhdr->frag_off - skb->nh.raw);
+		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+				  ((u8 *)&fhdr->frag_off -
+				   skb_network_header(skb)));
 		return;
 	}
 
-	if (skb->ip_summed == CHECKSUM_COMPLETE)
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
+		const unsigned char *nh = skb_network_header(skb);
 		skb->csum = csum_sub(skb->csum,
-				     csum_partial(skb->nh.raw, (u8*)(fhdr+1)-skb->nh.raw, 0));
+				     csum_partial(nh, (u8 *)(fhdr + 1) - nh,
+						  0));
+	}
 
 	/* Is this the final fragment? */
 	if (!(fhdr->frag_off & htons(IP6_MF))) {
@@ -605,7 +610,9 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 	BUG_TRAP(FRAG6_CB(head)->offset == 0);
 
 	/* Unfragmented part is taken from the first segment. */
-	payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr);
+	payload_len = ((head->data - skb_network_header(head)) -
+		       sizeof(struct ipv6hdr) + fq->len -
+		       sizeof(struct frag_hdr));
 	if (payload_len > IPV6_MAXPLEN)
 		goto out_oversize;
 
@@ -639,7 +646,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 	/* We have to remove fragment header from datagram and to relocate
 	 * header in order to calculate ICV correctly. */
 	nhoff = fq->nhoffset;
-	head->nh.raw[nhoff] = head->h.raw[0];
+	skb_network_header(head)[nhoff] = head->h.raw[0];
 	memmove(head->head + sizeof(struct frag_hdr), head->head,
 		(head->data - head->head) - sizeof(struct frag_hdr));
 	head->mac.raw += sizeof(struct frag_hdr);
@@ -647,7 +654,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 
 	skb_shinfo(head)->frag_list = head->next;
 	head->h.raw = head->data;
-	skb_push(head, head->data - head->nh.raw);
+	skb_push(head, head->data - skb_network_header(head));
 	atomic_sub(head->truesize, &ip6_frag_mem);
 
 	for (fp=head->next; fp; fp = fp->next) {
@@ -671,7 +678,9 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 
 	/* Yes, and fold redundant checksum back. 8) */
 	if (head->ip_summed == CHECKSUM_COMPLETE)
-		head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
+		head->csum = csum_partial(skb_network_header(head),
+					  head->h.raw - head->nh.raw,
+					  head->csum);
 
 	rcu_read_lock();
 	IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
@@ -725,7 +734,7 @@ static int ipv6_frag_rcv(struct sk_buff **skbp)
 		skb->h.raw += sizeof(struct frag_hdr);
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMOKS);
 
-		IP6CB(skb)->nhoff = (u8*)fhdr - skb->nh.raw;
+		IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
 		return 1;
 	}
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 92f99927d12..80a52ab1e38 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -486,7 +486,9 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 			struct sk_buff *pktopts = treq->pktopts;
 			struct inet6_skb_parm *rxopt = IP6CB(pktopts);
 			if (rxopt->srcrt)
-				opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
+				opt = ipv6_invert_rthdr(sk,
+			  (struct ipv6_rt_hdr *)(skb_network_header(pktopts) +
+						 rxopt->srcrt));
 		}
 
 		if (opt && opt->srcrt) {
@@ -1389,7 +1391,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	    opt == NULL && treq->pktopts) {
 		struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
 		if (rxopt->srcrt)
-			opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
+			opt = ipv6_invert_rthdr(sk,
+		   (struct ipv6_rt_hdr *)(skb_network_header(treq->pktopts) +
+					  rxopt->srcrt));
 	}
 
 	if (dst == NULL) {
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 33a1b920043..5c929f88612 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -28,7 +28,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
 	unsigned int nhoff;
 
 	nhoff = IP6CB(skb)->nhoff;
-	nexthdr = skb->nh.raw[nhoff];
+	nexthdr = skb_network_header(skb)[nhoff];
 
 	seq = 0;
 	if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
@@ -58,7 +58,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
 		if (nexthdr <= 0)
 			goto drop_unlock;
 
-		skb->nh.raw[nhoff] = nexthdr;
+		skb_network_header(skb)[nhoff] = nexthdr;
 
 		if (x->props.replay_window)
 			xfrm_replay_advance(x, seq);
@@ -113,7 +113,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
 	} else {
 #ifdef CONFIG_NETFILTER
 		skb->nh.ipv6h->payload_len = htons(skb->len);
-		__skb_push(skb, skb->data - skb->nh.raw);
+		__skb_push(skb, skb->data - skb_network_header(skb));
 
 		NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL,
 			ip6_rcv_finish);
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index c015bfde2b1..247e2d5d2ac 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -67,7 +67,7 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 		goto out;
 
 	skb_push(skb, size);
-	memmove(skb->data, skb->nh.raw, size);
+	memmove(skb->data, skb_network_header(skb), size);
 	skb_reset_network_header(skb);
 
 	old_mac = skb_mac_header(skb);
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index 3a4b39b12ba..ace0bbf4f25 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -53,8 +53,10 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int ihl = skb->data - skb->h.raw;
 
-	if (skb->h.raw != skb->nh.raw)
-		skb->nh.raw = memmove(skb->h.raw, skb->nh.raw, ihl);
+	if (skb->h.raw != skb->nh.raw) {
+		memmove(skb->h.raw, skb_network_header(skb), ihl);
+		skb->nh.raw = skb->h.raw;
+	}
 	skb->nh.ipv6h->payload_len = htons(skb->len + ihl -
 					   sizeof(struct ipv6hdr));
 	skb->h.raw = skb->data;
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 8ce5ef2d0b1..498f17b5c42 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -87,9 +87,10 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int err = -EINVAL;
 	const unsigned char *old_mac;
+	const unsigned char *nh = skb_network_header(skb);
 
-	if (skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPV6
-	    && skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPIP)
+	if (nh[IP6CB(skb)->nhoff] != IPPROTO_IPV6 &&
+	    nh[IP6CB(skb)->nhoff] != IPPROTO_IPIP)
 		goto out;
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 		goto out;
@@ -98,7 +99,8 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
 		goto out;
 
-	if (skb->nh.raw[IP6CB(skb)->nhoff] == IPPROTO_IPV6) {
+	nh = skb_network_header(skb);
+	if (nh[IP6CB(skb)->nhoff] == IPPROTO_IPV6) {
 		if (x->props.flags & XFRM_STATE_DECAP_DSCP)
 			ipv6_copy_dscp(skb->nh.ipv6h, skb->h.ipv6h);
 		if (!(x->props.flags & XFRM_STATE_NOECN))
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index d8a585bd2cb..cb5a723d4cb 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -273,14 +273,16 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
 	u16 offset = skb->h.raw - skb->nh.raw;
 	struct ipv6hdr *hdr = skb->nh.ipv6h;
 	struct ipv6_opt_hdr *exthdr;
-	u8 nexthdr = skb->nh.raw[IP6CB(skb)->nhoff];
+	const unsigned char *nh = skb_network_header(skb);
+	u8 nexthdr = nh[IP6CB(skb)->nhoff];
 
 	memset(fl, 0, sizeof(struct flowi));
 	ipv6_addr_copy(&fl->fl6_dst, &hdr->daddr);
 	ipv6_addr_copy(&fl->fl6_src, &hdr->saddr);
 
-	while (pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data)) {
-		exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+	while (pskb_may_pull(skb, nh + offset + 1 - skb->data)) {
+		nh = skb_network_header(skb);
+		exthdr = (struct ipv6_opt_hdr *)(nh + offset);
 
 		switch (nexthdr) {
 		case NEXTHDR_ROUTING:
@@ -288,7 +290,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
 		case NEXTHDR_DEST:
 			offset += ipv6_optlen(exthdr);
 			nexthdr = exthdr->nexthdr;
-			exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+			exthdr = (struct ipv6_opt_hdr *)(nh + offset);
 			break;
 
 		case IPPROTO_UDP:
@@ -296,7 +298,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
 		case IPPROTO_TCP:
 		case IPPROTO_SCTP:
 		case IPPROTO_DCCP:
-			if (pskb_may_pull(skb, skb->nh.raw + offset + 4 - skb->data)) {
+			if (pskb_may_pull(skb, nh + offset + 4 - skb->data)) {
 				__be16 *ports = (__be16 *)exthdr;
 
 				fl->fl_ip_sport = ports[0];
@@ -306,7 +308,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
 			return;
 
 		case IPPROTO_ICMPV6:
-			if (pskb_may_pull(skb, skb->nh.raw + offset + 2 - skb->data)) {
+			if (pskb_may_pull(skb, nh + offset + 2 - skb->data)) {
 				u8 *icmp = (u8 *)exthdr;
 
 				fl->fl_icmp_type = icmp[0];
@@ -317,7 +319,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
 
 #ifdef CONFIG_IPV6_MIP6
 		case IPPROTO_MH:
-			if (pskb_may_pull(skb, skb->nh.raw + offset + 3 - skb->data)) {
+			if (pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
 				struct ip6_mh *mh;
 				mh = (struct ip6_mh *)exthdr;
 
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index db7e38c08de..afc0c60e19d 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -54,7 +54,7 @@ tcpmss_mangle_packet(struct sk_buff **pskb,
 		return -1;
 
 	tcplen = (*pskb)->len - tcphoff;
-	tcph = (struct tcphdr *)((*pskb)->nh.raw + tcphoff);
+	tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff);
 
 	/* Since it passed flags test in tcp match, we know it is is
 	   not a fragment, and has data >= tcp header length.  SYN
@@ -113,7 +113,7 @@ tcpmss_mangle_packet(struct sk_buff **pskb,
 			return -1;
 		kfree_skb(*pskb);
 		*pskb = newskb;
-		tcph = (struct tcphdr *)((*pskb)->nh.raw + tcphoff);
+		tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff);
 	}
 
 	skb_put((*pskb), TCPOLEN_MSS);
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 3d6a2fcc9ce..20813eee8af 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -136,7 +136,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
 		}
 	}
 
-	pptr = skb->nh.raw;
+	pptr = skb_network_header(skb);
 
 	spin_lock(&p->tcf_lock);
 
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 0bcb16928d2..695b34051b9 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -119,7 +119,7 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re
 	} stack[TC_U32_MAXDEPTH];
 
 	struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root;
-	u8 *ptr = skb->nh.raw;
+	u8 *ptr = skb_network_header(skb);
 	struct tc_u_knode *n;
 	int sdepth = 0;
 	int off2 = 0;
diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c
index cd0600c6796..0a2a7fe08de 100644
--- a/net/sched/em_u32.c
+++ b/net/sched/em_u32.c
@@ -22,7 +22,7 @@ static int em_u32_match(struct sk_buff *skb, struct tcf_ematch *em,
 			struct tcf_pkt_info *info)
 {
 	struct tc_u32_key *key = (struct tc_u32_key *) em->data;
-	unsigned char *ptr = skb->nh.raw;
+	const unsigned char *ptr = skb_network_header(skb);
 
 	if (info) {
 		if (info->ptr)
-- 
cgit v1.2.3


From 878c814500b123dd61a5e211879a32e5fd932713 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 11 Mar 2007 22:38:29 -0300
Subject: [SK_BUFF] ipmr: Another skb_push related conversion to
 skb_reset_network_header

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 4a8d99bca44..465459d59b2 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -561,8 +561,10 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 		   And all this only to mangle msg->im_msgtype and
 		   to set msg->im_mbz to "mbz" :-)
 		 */
-		msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
-		skb->nh.raw = skb->h.raw = (u8*)msg;
+		skb_push(skb, sizeof(struct iphdr));
+		skb_reset_network_header(skb);
+		skb->h.raw = skb->data;
+		msg = (struct igmpmsg *)skb->nh.raw;
 		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
 		msg->im_msgtype = IGMPMSG_WHOLEPKT;
 		msg->im_mbz = 0;
-- 
cgit v1.2.3


From c14d2450cb7fe1786e2ec325172baf66922bf597 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 11 Mar 2007 22:39:41 -0300
Subject: [SK_BUFF]: Introduce skb_set_network_header

For the cases where the network header is being set to a offset from skb->data.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 5 +++++
 net/ax25/ax25_out.c    | 6 ++++--
 net/ipv4/ip_output.c   | 4 ++--
 net/ipv4/tcp_input.c   | 3 ++-
 net/ipv6/ip6_output.c  | 4 ++--
 5 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 76d30f34b98..870438fba93 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -970,6 +970,11 @@ static inline void skb_reset_network_header(struct sk_buff *skb)
 	skb->nh.raw = skb->data;
 }
 
+static inline void skb_set_network_header(struct sk_buff *skb, const int offset)
+{
+	skb->nh.raw = skb->data + offset;
+}
+
 static inline int skb_network_offset(const struct sk_buff *skb)
 {
 	return skb->nh.raw - skb->data;
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index 02dea851a11..e66953ce53e 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -148,7 +148,8 @@ void ax25_output(ax25_cb *ax25, int paclen, struct sk_buff *skb)
 
 			if (ka9qfrag == 1) {
 				skb_reserve(skbn, frontlen + 2);
-				skbn->nh.raw = skbn->data + skb_network_offset(skb);
+				skb_set_network_header(skbn,
+						      skb_network_offset(skb));
 				memcpy(skb_put(skbn, len), skb->data, len);
 				p = skb_push(skbn, 2);
 
@@ -161,7 +162,8 @@ void ax25_output(ax25_cb *ax25, int paclen, struct sk_buff *skb)
 				}
 			} else {
 				skb_reserve(skbn, frontlen + 1);
-				skbn->nh.raw = skbn->data + skb_network_offset(skb);
+				skb_set_network_header(skbn,
+						      skb_network_offset(skb));
 				memcpy(skb_put(skbn, len), skb->data, len);
 				p = skb_push(skbn, 1);
 				*p = AX25_P_TEXT;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index eae22846962..15de9d43950 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -928,9 +928,9 @@ alloc_new_skb:
 			 *	Find where to start putting bytes.
 			 */
 			data = skb_put(skb, fraglen);
-			skb->nh.raw = data + exthdrlen;
+			skb_set_network_header(skb, exthdrlen);
+			skb->h.raw = skb->nh.raw + fragheaderlen;
 			data += fragheaderlen;
-			skb->h.raw = data + exthdrlen;
 
 			if (fraggap) {
 				skb->csum = skb_copy_and_csum_bits(
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 00190835cea..5da823a3225 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3634,7 +3634,8 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 			return;
 
 		skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
-		nskb->nh.raw = nskb->data + (skb_network_header(skb) - skb->head);
+		skb_set_network_header(nskb,
+				       skb_network_header(skb) - skb->head);
 		nskb->h.raw = nskb->data + (skb->h.raw - skb->head);
 
 		skb_reserve(nskb, header);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f1dfcc31971..bd25825c0cc 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1206,9 +1206,9 @@ alloc_new_skb:
 			 *	Find where to start putting bytes
 			 */
 			data = skb_put(skb, fraglen);
-			skb->nh.raw = data + exthdrlen;
+			skb_set_network_header(skb, exthdrlen);
 			data += fragheaderlen;
-			skb->h.raw = data + exthdrlen;
+			skb->h.raw = skb->nh.raw + fragheaderlen;
 
 			if (fraggap) {
 				skb->csum = skb_copy_and_csum_bits(
-- 
cgit v1.2.3


From 6b2bedc3a659ba228a93afc8e3f008e152abf18a Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 12 Mar 2007 14:33:50 -0700
Subject: [NET]: network dev read_mostly

For Eric, mark packet type and network device watermarks
as read mostly.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 54ffe9db9b0..f9d2b0f0bd5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -146,8 +146,8 @@
  */
 
 static DEFINE_SPINLOCK(ptype_lock);
-static struct list_head ptype_base[16];	/* 16 way hashed list */
-static struct list_head ptype_all;		/* Taps */
+static struct list_head ptype_base[16] __read_mostly;	/* 16 way hashed list */
+static struct list_head ptype_all __read_mostly;	/* Taps */
 
 #ifdef CONFIG_NET_DMA
 static struct dma_client *net_dma_client;
@@ -1533,9 +1533,9 @@ out:
 			Receiver routines
   =======================================================================*/
 
-int netdev_max_backlog = 1000;
-int netdev_budget = 300;
-int weight_p = 64;            /* old backlog weight */
+int netdev_max_backlog __read_mostly = 1000;
+int netdev_budget __read_mostly = 300;
+int weight_p __read_mostly = 64;            /* old backlog weight */
 
 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
 
-- 
cgit v1.2.3


From f690808e17925fc45217eb22e8670902ecee5c1b Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 12 Mar 2007 14:34:29 -0700
Subject: [NET]: make seq_operations const

The seq_file operations stuff can be marked constant to
get it out of dirty cache.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c       | 4 ++--
 net/core/dev_mcast.c | 2 +-
 net/core/neighbour.c | 2 +-
 net/core/sock.c      | 2 +-
 net/core/wireless.c  | 2 +-
 net/ipv4/arp.c       | 2 +-
 net/ipv4/fib_hash.c  | 2 +-
 net/ipv4/fib_trie.c  | 4 ++--
 net/ipv4/igmp.c      | 4 ++--
 net/ipv4/ipmr.c      | 4 ++--
 net/ipv4/raw.c       | 2 +-
 net/ipv4/route.c     | 4 ++--
 12 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index f9d2b0f0bd5..8ddc2ab2314 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2176,7 +2176,7 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations dev_seq_ops = {
+static const struct seq_operations dev_seq_ops = {
 	.start = dev_seq_start,
 	.next  = dev_seq_next,
 	.stop  = dev_seq_stop,
@@ -2196,7 +2196,7 @@ static const struct file_operations dev_seq_fops = {
 	.release = seq_release,
 };
 
-static struct seq_operations softnet_seq_ops = {
+static const struct seq_operations softnet_seq_ops = {
 	.start = softnet_seq_start,
 	.next  = softnet_seq_next,
 	.stop  = softnet_seq_stop,
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index 56b310c0c86..7d57bf77f3a 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -264,7 +264,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations dev_mc_seq_ops = {
+static const struct seq_operations dev_mc_seq_ops = {
 	.start = dev_mc_seq_start,
 	.next  = dev_mc_seq_next,
 	.stop  = dev_mc_seq_stop,
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index c5653c512b4..61a4713a5df 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2393,7 +2393,7 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations neigh_stat_seq_ops = {
+static const struct seq_operations neigh_stat_seq_ops = {
 	.start	= neigh_stat_seq_start,
 	.next	= neigh_stat_seq_next,
 	.stop	= neigh_stat_seq_stop,
diff --git a/net/core/sock.c b/net/core/sock.c
index f9e6991d372..73a8018029a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1925,7 +1925,7 @@ static int proto_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations proto_seq_ops = {
+static const struct seq_operations proto_seq_ops = {
 	.start  = proto_seq_start,
 	.next   = proto_seq_next,
 	.stop   = proto_seq_stop,
diff --git a/net/core/wireless.c b/net/core/wireless.c
index 21c091dd39e..7c6a5db544f 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -678,7 +678,7 @@ static int wireless_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations wireless_seq_ops = {
+static const struct seq_operations wireless_seq_ops = {
 	.start = dev_seq_start,
 	.next  = dev_seq_next,
 	.stop  = dev_seq_stop,
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 8c533ceb970..fd36eebbd90 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1360,7 +1360,7 @@ static void *arp_seq_start(struct seq_file *seq, loff_t *pos)
 
 /* ------------------------------------------------------------------------ */
 
-static struct seq_operations arp_seq_ops = {
+static const struct seq_operations arp_seq_ops = {
 	.start  = arp_seq_start,
 	.next   = neigh_seq_next,
 	.stop   = neigh_seq_stop,
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index a4949f957ab..9cfecf1215c 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -1027,7 +1027,7 @@ out:
 	return 0;
 }
 
-static struct seq_operations fib_seq_ops = {
+static const struct seq_operations fib_seq_ops = {
 	.start  = fib_seq_start,
 	.next   = fib_seq_next,
 	.stop   = fib_seq_stop,
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index c331c433acf..e2b39fdd6a0 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2339,7 +2339,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations fib_trie_seq_ops = {
+static const struct seq_operations fib_trie_seq_ops = {
 	.start  = fib_trie_seq_start,
 	.next   = fib_trie_seq_next,
 	.stop   = fib_trie_seq_stop,
@@ -2460,7 +2460,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations fib_route_seq_ops = {
+static const struct seq_operations fib_route_seq_ops = {
 	.start  = fib_trie_seq_start,
 	.next   = fib_trie_seq_next,
 	.stop   = fib_trie_seq_stop,
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index d17ad09a83e..0687a7235a6 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2401,7 +2401,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations igmp_mc_seq_ops = {
+static const struct seq_operations igmp_mc_seq_ops = {
 	.start	=	igmp_mc_seq_start,
 	.next	=	igmp_mc_seq_next,
 	.stop	=	igmp_mc_seq_stop,
@@ -2575,7 +2575,7 @@ static int igmp_mcf_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations igmp_mcf_seq_ops = {
+static const struct seq_operations igmp_mcf_seq_ops = {
 	.start	=	igmp_mcf_seq_start,
 	.next	=	igmp_mcf_seq_next,
 	.stop	=	igmp_mcf_seq_stop,
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 465459d59b2..8c4de92c1a9 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1683,7 +1683,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations ipmr_vif_seq_ops = {
+static const struct seq_operations ipmr_vif_seq_ops = {
 	.start = ipmr_vif_seq_start,
 	.next  = ipmr_vif_seq_next,
 	.stop  = ipmr_vif_seq_stop,
@@ -1846,7 +1846,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations ipmr_mfc_seq_ops = {
+static const struct seq_operations ipmr_mfc_seq_ops = {
 	.start = ipmr_mfc_seq_start,
 	.next  = ipmr_mfc_seq_next,
 	.stop  = ipmr_mfc_seq_stop,
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index a3d02fdfc06..c3757bb270c 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -889,7 +889,7 @@ static int raw_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations raw_seq_ops = {
+static const struct seq_operations raw_seq_ops = {
 	.start = raw_seq_start,
 	.next  = raw_seq_next,
 	.stop  = raw_seq_stop,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d2986184490..e50ad7dbbde 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -364,7 +364,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations rt_cache_seq_ops = {
+static const struct seq_operations rt_cache_seq_ops = {
 	.start  = rt_cache_seq_start,
 	.next   = rt_cache_seq_next,
 	.stop   = rt_cache_seq_stop,
@@ -470,7 +470,7 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations rt_cpu_seq_ops = {
+static const struct seq_operations rt_cpu_seq_ops = {
 	.start  = rt_cpu_seq_start,
 	.next   = rt_cpu_seq_next,
 	.stop   = rt_cpu_seq_stop,
-- 
cgit v1.2.3


From 0e1256ffd1ec654b35e023c66f6b262d4cba91e9 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 12 Mar 2007 14:35:37 -0700
Subject: [NET]: show bound packet types

Show what protocols are bound to what packet types in /proc/net/ptype
Uses kallsyms to decode function pointers if possible.
Example:
	Type Device      Function
	ALL  eth1     packet_rcv_spkt+0x0
	0800          ip_rcv+0x0
	0806          arp_rcv+0x0
	86dd          :ipv6:ipv6_rcv+0x0

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 134 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 134 insertions(+)

diff --git a/net/core/dev.c b/net/core/dev.c
index 8ddc2ab2314..3af0bdc8649 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2216,6 +2216,135 @@ static const struct file_operations softnet_seq_fops = {
 	.release = seq_release,
 };
 
+static void *ptype_get_idx(loff_t pos)
+{
+	struct packet_type *pt = NULL;
+	loff_t i = 0;
+	int t;
+
+	list_for_each_entry_rcu(pt, &ptype_all, list) {
+		if (i == pos)
+			return pt;
+		++i;
+	}
+
+	for (t = 0; t < 16; t++) {
+		list_for_each_entry_rcu(pt, &ptype_base[t], list) {
+			if (i == pos)
+				return pt;
+			++i;
+		}
+	}
+	return NULL;
+}
+
+static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	rcu_read_lock();
+	return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct packet_type *pt;
+	struct list_head *nxt;
+	int hash;
+
+	++*pos;
+	if (v == SEQ_START_TOKEN)
+		return ptype_get_idx(0);
+
+	pt = v;
+	nxt = pt->list.next;
+	if (pt->type == htons(ETH_P_ALL)) {
+		if (nxt != &ptype_all)
+			goto found;
+		hash = 0;
+		nxt = ptype_base[0].next;
+	} else
+		hash = ntohs(pt->type) & 15;
+
+	while (nxt == &ptype_base[hash]) {
+		if (++hash >= 16)
+			return NULL;
+		nxt = ptype_base[hash].next;
+	}
+found:
+	return list_entry(nxt, struct packet_type, list);
+}
+
+static void ptype_seq_stop(struct seq_file *seq, void *v)
+{
+	rcu_read_unlock();
+}
+
+static void ptype_seq_decode(struct seq_file *seq, void *sym)
+{
+#ifdef CONFIG_KALLSYMS
+	unsigned long offset = 0, symsize;
+	const char *symname;
+	char *modname;
+	char namebuf[128];
+
+	symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
+				  &modname, namebuf);
+
+	if (symname) {
+		char *delim = ":";
+
+		if (!modname)
+			modname = delim = "";
+		seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
+			   symname, offset);
+		return;
+	}
+#endif
+
+	seq_printf(seq, "[%p]", sym);
+}
+
+static int ptype_seq_show(struct seq_file *seq, void *v)
+{
+	struct packet_type *pt = v;
+
+	if (v == SEQ_START_TOKEN)
+		seq_puts(seq, "Type Device      Function\n");
+	else {
+		if (pt->type == htons(ETH_P_ALL))
+			seq_puts(seq, "ALL ");
+		else
+			seq_printf(seq, "%04x", ntohs(pt->type));
+
+		seq_printf(seq, " %-8s ",
+			   pt->dev ? pt->dev->name : "");
+		ptype_seq_decode(seq,  pt->func);
+		seq_putc(seq, '\n');
+	}
+
+	return 0;
+}
+
+static const struct seq_operations ptype_seq_ops = {
+	.start = ptype_seq_start,
+	.next  = ptype_seq_next,
+	.stop  = ptype_seq_stop,
+	.show  = ptype_seq_show,
+};
+
+static int ptype_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &ptype_seq_ops);
+}
+
+static const struct file_operations ptype_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = ptype_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+
 #ifdef CONFIG_WIRELESS_EXT
 extern int wireless_proc_init(void);
 #else
@@ -2230,6 +2359,9 @@ static int __init dev_proc_init(void)
 		goto out;
 	if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
 		goto out_dev;
+	if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops))
+		goto out_dev2;
+
 	if (wireless_proc_init())
 		goto out_softnet;
 	rc = 0;
@@ -2237,6 +2369,8 @@ out:
 	return rc;
 out_softnet:
 	proc_net_remove("softnet_stat");
+out_dev2:
+	proc_net_remove("ptype");
 out_dev:
 	proc_net_remove("dev");
 	goto out;
-- 
cgit v1.2.3


From 0272ffc46f81a4bbbf302ba093c737e969c5bb55 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 12 Mar 2007 20:05:39 -0300
Subject: [SK_BUFF] ipmr: Missed one conversion to skb_network_header()

We can't access skb->nh.raw directly anymore, it will become an offset.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 8c4de92c1a9..aba3ff0bec9 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -564,7 +564,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 		skb_push(skb, sizeof(struct iphdr));
 		skb_reset_network_header(skb);
 		skb->h.raw = skb->data;
-		msg = (struct igmpmsg *)skb->nh.raw;
+		msg = (struct igmpmsg *)skb_network_header(skb);
 		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
 		msg->im_msgtype = IGMPMSG_WHOLEPKT;
 		msg->im_mbz = 0;
-- 
cgit v1.2.3


From c9bdd4b5257406b0608385d19c40b5511decf4f6 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 12 Mar 2007 20:09:15 -0300
Subject: [IP]: Introduce ip_hdrlen()

For the common sequence "skb->nh.iph->ihl * 4", removing a good number of open
coded skb->nh.iph uses, now to go after the rest...

Just out of curiosity, here are the idioms found to get the same result:

skb->nh.iph->ihl << 2
skb->nh.iph->ihl<<2
skb->nh.iph->ihl * 4
skb->nh.iph->ihl*4
(skb->nh.iph)->ihl * sizeof(u32)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bnx2.c                             |  2 +-
 drivers/net/ehea/ehea_main.c                   |  4 ++--
 drivers/net/netxen/netxen_nic_hw.c             |  8 +++++---
 drivers/net/netxen/netxen_nic_main.c           |  6 +++---
 drivers/net/sky2.c                             |  3 ++-
 drivers/net/tg3.c                              |  6 +++---
 drivers/s390/net/qeth_eddp.c                   | 13 ++++++++-----
 include/net/ip.h                               |  7 ++++++-
 net/ipv4/ip_fragment.c                         |  4 ++--
 net/ipv4/ip_input.c                            |  4 +---
 net/ipv4/ipmr.c                                |  2 +-
 net/ipv4/ipvs/ip_vs_app.c                      |  4 ++--
 net/ipv4/ipvs/ip_vs_core.c                     |  3 +--
 net/ipv4/ipvs/ip_vs_proto_tcp.c                | 12 +++++-------
 net/ipv4/ipvs/ip_vs_proto_udp.c                | 12 ++++++------
 net/ipv4/netfilter/ip_conntrack_amanda.c       |  2 +-
 net/ipv4/netfilter/ip_conntrack_core.c         |  3 +--
 net/ipv4/netfilter/ip_conntrack_ftp.c          |  4 ++--
 net/ipv4/netfilter/ip_conntrack_helper_h323.c  |  9 ++++-----
 net/ipv4/netfilter/ip_conntrack_helper_pptp.c  |  4 ++--
 net/ipv4/netfilter/ip_conntrack_irc.c          |  4 ++--
 net/ipv4/netfilter/ip_conntrack_proto_icmp.c   |  8 ++++----
 net/ipv4/netfilter/ip_conntrack_proto_sctp.c   |  2 +-
 net/ipv4/netfilter/ip_conntrack_proto_tcp.c    |  2 +-
 net/ipv4/netfilter/ip_conntrack_sip.c          |  2 +-
 net/ipv4/netfilter/ip_conntrack_standalone.c   |  2 +-
 net/ipv4/netfilter/ip_conntrack_tftp.c         |  2 +-
 net/ipv4/netfilter/ip_nat_core.c               | 11 +++++------
 net/ipv4/netfilter/ip_nat_helper.c             |  8 ++++----
 net/ipv4/netfilter/ip_nat_helper_h323.c        |  8 ++++----
 net/ipv4/netfilter/ip_nat_sip.c                | 10 +++++-----
 net/ipv4/netfilter/ip_nat_standalone.c         |  7 +++----
 net/ipv4/netfilter/ip_tables.c                 |  2 +-
 net/ipv4/netfilter/ipt_ECN.c                   |  7 ++++---
 net/ipv4/netfilter/ipt_REJECT.c                | 19 +++++++++----------
 net/ipv4/netfilter/ipt_ecn.c                   |  4 ++--
 net/ipv4/netfilter/iptable_filter.c            |  3 ++-
 net/ipv4/netfilter/iptable_mangle.c            |  3 ++-
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |  8 ++++----
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   |  6 +++---
 net/ipv4/netfilter/nf_nat_core.c               | 14 +++++++-------
 net/ipv4/netfilter/nf_nat_h323.c               |  8 ++++----
 net/ipv4/netfilter/nf_nat_helper.c             | 12 ++++++------
 net/ipv4/netfilter/nf_nat_sip.c                | 11 ++++++-----
 net/ipv4/netfilter/nf_nat_standalone.c         |  7 +++----
 net/ipv6/netfilter/ip6table_filter.c           |  2 +-
 net/ipv6/netfilter/ip6table_mangle.c           |  2 +-
 47 files changed, 145 insertions(+), 141 deletions(-)

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index e85f5ec48f9..b8091c55d44 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -4527,7 +4527,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		if (skb->h.th->doff > 5) {
 			tcp_opt_len = (skb->h.th->doff - 5) << 2;
 		}
-		ip_tcp_len = (skb->nh.iph->ihl << 2) + sizeof(struct tcphdr);
+		ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr);
 
 		skb->nh.iph->check = 0;
 		skb->nh.iph->tot_len = htons(mss + ip_tcp_len + tcp_opt_len);
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index 0e4042bc0a4..b1c90a4fe31 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -1263,7 +1263,7 @@ static inline void write_ip_start_end(struct ehea_swqe *swqe,
 				      const struct sk_buff *skb)
 {
 	swqe->ip_start = (u8)(((u64)skb->nh.iph) - ((u64)skb->data));
-	swqe->ip_end = (u8)(swqe->ip_start + skb->nh.iph->ihl * 4 - 1);
+	swqe->ip_end = (u8)(swqe->ip_start + ip_hdrlen(skb) - 1);
 }
 
 static inline void write_tcp_offset_end(struct ehea_swqe *swqe,
@@ -1300,7 +1300,7 @@ static void write_swqe2_TSO(struct sk_buff *skb,
 	/* copy only eth/ip/tcp headers to immediate data and
 	 * the rest of skb->data to sg1entry
 	 */
-	headersize = ETH_HLEN + (skb->nh.iph->ihl * 4) + (skb->h.th->doff * 4);
+	headersize = ETH_HLEN + ip_hdrlen(skb) + (skb->h.th->doff * 4);
 
 	skb_data_size = skb->len - skb->data_len;
 
diff --git a/drivers/net/netxen/netxen_nic_hw.c b/drivers/net/netxen/netxen_nic_hw.c
index 625e11ed6aa..b2f5032937e 100644
--- a/drivers/net/netxen/netxen_nic_hw.c
+++ b/drivers/net/netxen/netxen_nic_hw.c
@@ -35,6 +35,8 @@
 #include "netxen_nic_hw.h"
 #include "netxen_nic_phan_reg.h"
 
+#include <net/ip.h>
+
 /*  PCI Windowing for DDR regions.  */
 
 #define ADDR_IN_RANGE(addr, low, high)	\
@@ -371,9 +373,9 @@ void netxen_tso_check(struct netxen_adapter *adapter,
 		      struct cmd_desc_type0 *desc, struct sk_buff *skb)
 {
 	if (desc->mss) {
-		desc->total_hdr_length = sizeof(struct ethhdr) +
-		    ((skb->nh.iph)->ihl * sizeof(u32)) +
-		    ((skb->h.th)->doff * sizeof(u32));
+		desc->total_hdr_length = (sizeof(struct ethhdr) +
+					  ip_hdrlen(skb) +
+					  skb->h.th->doff * 4);
 		netxen_set_cmd_desc_opcode(desc, TX_TCP_LSO);
 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		if (skb->nh.iph->protocol == IPPROTO_TCP) {
diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c
index 7d2525e76ab..b548a30e5c8 100644
--- a/drivers/net/netxen/netxen_nic_main.c
+++ b/drivers/net/netxen/netxen_nic_main.c
@@ -41,6 +41,7 @@
 
 #include <linux/dma-mapping.h>
 #include <linux/vmalloc.h>
+#include <net/ip.h>
 
 MODULE_DESCRIPTION("NetXen Multi port (1/10) Gigabit Network Driver");
 MODULE_LICENSE("GPL");
@@ -778,9 +779,8 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 		if (skb_shinfo(skb)->gso_size > 0) {
 
 			no_of_desc++;
-			if (((skb->nh.iph)->ihl * sizeof(u32)) +
-			    ((skb->h.th)->doff * sizeof(u32)) +
-			    sizeof(struct ethhdr) >
+			if ((ip_hdrlen(skb) + skb->h.th->doff * 4 +
+			     sizeof(struct ethhdr)) >
 			    (sizeof(struct cmd_desc_type0) - 2)) {
 				no_of_desc++;
 			}
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index ac36152c68b..51e994f26a8 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -32,6 +32,7 @@
 #include <linux/ethtool.h>
 #include <linux/pci.h>
 #include <linux/ip.h>
+#include <net/ip.h>
 #include <linux/tcp.h>
 #include <linux/in.h>
 #include <linux/delay.h>
@@ -1392,7 +1393,7 @@ static int sky2_xmit_frame(struct sk_buff *skb, struct net_device *dev)
 	mss = skb_shinfo(skb)->gso_size;
 	if (mss != 0) {
 		mss += ((skb->h.th->doff - 5) * 4);	/* TCP options */
-		mss += (skb->nh.iph->ihl * 4) + sizeof(struct tcphdr);
+		mss += ip_hdrlen(skb) + sizeof(struct tcphdr);
 		mss += ETH_HLEN;
 
 		if (mss != sky2->tx_last_mss) {
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 256969e1300..62a3bba0097 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -40,6 +40,7 @@
 #include <linux/dma-mapping.h>
 
 #include <net/checksum.h>
+#include <net/ip.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
@@ -3909,8 +3910,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			mss |= (skb_headlen(skb) - ETH_HLEN) << 9;
 		else {
 			tcp_opt_len = ((skb->h.th->doff - 5) * 4);
-			ip_tcp_len = (skb->nh.iph->ihl * 4) +
-				     sizeof(struct tcphdr);
+			ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr);
 
 			skb->nh.iph->check = 0;
 			skb->nh.iph->tot_len = htons(mss + ip_tcp_len +
@@ -4064,7 +4064,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
 		}
 
 		tcp_opt_len = ((skb->h.th->doff - 5) * 4);
-		ip_tcp_len = (skb->nh.iph->ihl * 4) + sizeof(struct tcphdr);
+		ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr);
 
 		hdr_len = ip_tcp_len + tcp_opt_len;
 		if (unlikely((ETH_HLEN + hdr_len) > 80) &&
diff --git a/drivers/s390/net/qeth_eddp.c b/drivers/s390/net/qeth_eddp.c
index 893125403c6..1574247abaa 100644
--- a/drivers/s390/net/qeth_eddp.c
+++ b/drivers/s390/net/qeth_eddp.c
@@ -473,9 +473,11 @@ qeth_eddp_fill_context_tcp(struct qeth_eddp_context *ctx,
 	QETH_DBF_TEXT(trace, 5, "eddpficx");
 	/* create our segmentation headers and copy original headers */
 	if (skb->protocol == htons(ETH_P_IP))
-		eddp = qeth_eddp_create_eddp_data(qhdr, (u8 *)skb->nh.iph,
-				skb->nh.iph->ihl*4,
-				(u8 *)skb->h.th, skb->h.th->doff*4);
+		eddp = qeth_eddp_create_eddp_data(qhdr,
+						  skb_network_header(skb),
+						  ip_hdrlen(skb),
+						  skb->h.raw,
+						  skb->h.th->doff * 4);
 	else
 		eddp = qeth_eddp_create_eddp_data(qhdr, (u8 *)skb->nh.ipv6h,
 				sizeof(struct ipv6hdr),
@@ -590,8 +592,9 @@ qeth_eddp_create_context_tcp(struct qeth_card *card, struct sk_buff *skb,
 	QETH_DBF_TEXT(trace, 5, "creddpct");
 	if (skb->protocol == htons(ETH_P_IP))
 		ctx = qeth_eddp_create_context_generic(card, skb,
-			sizeof(struct qeth_hdr) + skb->nh.iph->ihl*4 +
-			skb->h.th->doff*4);
+						       (sizeof(struct qeth_hdr) +
+						        ip_hdrlen(skb) +
+							skb->h.th->doff * 4));
 	else if (skb->protocol == htons(ETH_P_IPV6))
 		ctx = qeth_eddp_create_context_generic(card, skb,
 			sizeof(struct qeth_hdr) + sizeof(struct ipv6hdr) +
diff --git a/include/net/ip.h b/include/net/ip.h
index e79c3e3aa4f..6f7ba32b199 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -25,6 +25,7 @@
 #include <linux/types.h>
 #include <linux/ip.h>
 #include <linux/in.h>
+#include <linux/skbuff.h>
 
 #include <net/inet_sock.h>
 #include <net/snmp.h>
@@ -43,6 +44,11 @@ struct inet_skb_parm
 #define IPSKB_REROUTED		16
 };
 
+static inline unsigned int ip_hdrlen(const struct sk_buff *skb)
+{
+	return skb->nh.iph->ihl * 4;
+}
+
 struct ipcm_cookie
 {
 	__be32			addr;
@@ -74,7 +80,6 @@ struct msghdr;
 struct net_device;
 struct packet_type;
 struct rtable;
-struct sk_buff;
 struct sockaddr;
 
 extern void		ip_mc_dropsocket(struct sock *);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 268a6c7347f..af120b2d533 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -483,7 +483,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	flags = offset & ~IP_OFFSET;
 	offset &= IP_OFFSET;
 	offset <<= 3;		/* offset is in 8-byte chunks */
-	ihl = skb->nh.iph->ihl * 4;
+	ihl = ip_hdrlen(skb);
 
 	/* Determine the position of this fragment. */
 	end = offset + skb->len - ihl;
@@ -624,7 +624,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
 	BUG_TRAP(FRAG_CB(head)->offset == 0);
 
 	/* Allocate a new buffer for the datagram. */
-	ihlen = head->nh.iph->ihl*4;
+	ihlen = ip_hdrlen(head);
 	len = ihlen + qp->len;
 
 	if (len > 65535)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index f38e97647ac..2ee132b330f 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -198,9 +198,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
 
 static inline int ip_local_deliver_finish(struct sk_buff *skb)
 {
-	int ihl = skb->nh.iph->ihl*4;
-
-	__skb_pull(skb, ihl);
+	__skb_pull(skb, ip_hdrlen(skb));
 
 	/* Point into the IP datagram, just past the header. */
 	skb->h.raw = skb->data;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index aba3ff0bec9..54b7543190f 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -539,7 +539,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 {
 	struct sk_buff *skb;
-	int ihl = pkt->nh.iph->ihl<<2;
+	const int ihl = ip_hdrlen(pkt);
 	struct igmphdr *igmp;
 	struct igmpmsg *msg;
 	int ret;
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index f29d3a27eec..e5beab28cd0 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -331,7 +331,7 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb,
 				  struct ip_vs_app *app)
 {
 	int diff;
-	unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4;
+	const unsigned int tcp_offset = ip_hdrlen(*pskb);
 	struct tcphdr *th;
 	__u32 seq;
 
@@ -406,7 +406,7 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb,
 				 struct ip_vs_app *app)
 {
 	int diff;
-	unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4;
+	const unsigned int tcp_offset = ip_hdrlen(*pskb);
 	struct tcphdr *th;
 	__u32 seq;
 
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 5d54dd2ce12..7893c00a91f 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -713,8 +713,7 @@ static inline int is_tcp_reset(const struct sk_buff *skb)
 {
 	struct tcphdr _tcph, *th;
 
-	th = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
-				sizeof(_tcph), &_tcph);
+	th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
 	if (th == NULL)
 		return 0;
 	return th->rst;
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index 16a9ebee2fe..e65382da713 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -76,8 +76,7 @@ tcp_conn_schedule(struct sk_buff *skb,
 	struct ip_vs_service *svc;
 	struct tcphdr _tcph, *th;
 
-	th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
-				sizeof(_tcph), &_tcph);
+	th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
 	if (th == NULL) {
 		*verdict = NF_DROP;
 		return 0;
@@ -127,7 +126,7 @@ tcp_snat_handler(struct sk_buff **pskb,
 		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 {
 	struct tcphdr *tcph;
-	unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4;
+	const unsigned int tcphoff = ip_hdrlen(*pskb);
 
 	/* csum_check requires unshared skb */
 	if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
@@ -175,7 +174,7 @@ tcp_dnat_handler(struct sk_buff **pskb,
 		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 {
 	struct tcphdr *tcph;
-	unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4;
+	const unsigned int tcphoff = ip_hdrlen(*pskb);
 
 	/* csum_check requires unshared skb */
 	if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
@@ -224,7 +223,7 @@ tcp_dnat_handler(struct sk_buff **pskb,
 static int
 tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
 {
-	unsigned int tcphoff = skb->nh.iph->ihl*4;
+	const unsigned int tcphoff = ip_hdrlen(skb);
 
 	switch (skb->ip_summed) {
 	case CHECKSUM_NONE:
@@ -467,8 +466,7 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
 {
 	struct tcphdr _tcph, *th;
 
-	th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
-				sizeof(_tcph), &_tcph);
+	th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
 	if (th == NULL)
 		return 0;
 
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 03f0a414cfa..2cd95063892 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -22,7 +22,7 @@
 #include <linux/udp.h>
 
 #include <net/ip_vs.h>
-
+#include <net/ip.h>
 
 static struct ip_vs_conn *
 udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
@@ -56,7 +56,7 @@ udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
 	struct ip_vs_conn *cp;
 	__be16 _ports[2], *pptr;
 
-	pptr = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+	pptr = skb_header_pointer(skb, ip_hdrlen(skb),
 				  sizeof(_ports), _ports);
 	if (pptr == NULL)
 		return NULL;
@@ -82,7 +82,7 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
 	struct ip_vs_service *svc;
 	struct udphdr _udph, *uh;
 
-	uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+	uh = skb_header_pointer(skb, ip_hdrlen(skb),
 				sizeof(_udph), &_udph);
 	if (uh == NULL) {
 		*verdict = NF_DROP;
@@ -133,7 +133,7 @@ udp_snat_handler(struct sk_buff **pskb,
 		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 {
 	struct udphdr *udph;
-	unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
+	const unsigned int udphoff = ip_hdrlen(*pskb);
 
 	/* csum_check requires unshared skb */
 	if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
@@ -187,7 +187,7 @@ udp_dnat_handler(struct sk_buff **pskb,
 		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 {
 	struct udphdr *udph;
-	unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
+	unsigned int udphoff = ip_hdrlen(*pskb);
 
 	/* csum_check requires unshared skb */
 	if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
@@ -239,7 +239,7 @@ static int
 udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
 {
 	struct udphdr _udph, *uh;
-	unsigned int udphoff = skb->nh.iph->ihl*4;
+	const unsigned int udphoff = ip_hdrlen(skb);
 
 	uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
 	if (uh == NULL)
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
index 4f561f52c83..c40762c67d0 100644
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ b/net/ipv4/netfilter/ip_conntrack_amanda.c
@@ -103,7 +103,7 @@ static int help(struct sk_buff **pskb,
 	ip_ct_refresh(ct, *pskb, master_timeout * HZ);
 
 	/* No data? */
-	dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+	dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
 	if (dataoff >= (*pskb)->len) {
 		if (net_ratelimit())
 			printk("amanda_help: skblen = %u\n", (*pskb)->len);
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 23b99ae2cc3..8c013d9f690 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -750,8 +750,7 @@ resolve_normal_ct(struct sk_buff *skb,
 
 	IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
 
-	if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4,
-				&tuple,proto))
+	if (!ip_ct_get_tuple(skb->nh.iph, skb, ip_hdrlen(skb), &tuple,proto))
 		return NULL;
 
 	/* look for tuple match */
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
index 1faa68ab943..92389987e78 100644
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -319,12 +319,12 @@ static int help(struct sk_buff **pskb,
 		return NF_ACCEPT;
 	}
 
-	th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
+	th = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
 				sizeof(_tcph), &_tcph);
 	if (th == NULL)
 		return NF_ACCEPT;
 
-	dataoff = (*pskb)->nh.iph->ihl*4 + th->doff*4;
+	dataoff = ip_hdrlen(*pskb) + th->doff * 4;
 	/* No data? */
 	if (dataoff >= (*pskb)->len) {
 		DEBUGP("ftp: pskblen = %u\n", (*pskb)->len);
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323.c b/net/ipv4/netfilter/ip_conntrack_helper_h323.c
index 53eb365ccc7..5d638149b0e 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_h323.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_h323.c
@@ -115,13 +115,13 @@ static int get_tpkt_data(struct sk_buff **pskb, struct ip_conntrack *ct,
 	int tpktoff;
 
 	/* Get TCP header */
-	th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl * 4,
+	th = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
 				sizeof(_tcph), &_tcph);
 	if (th == NULL)
 		return 0;
 
 	/* Get TCP data offset */
-	tcpdataoff = (*pskb)->nh.iph->ihl * 4 + th->doff * 4;
+	tcpdataoff = ip_hdrlen(*pskb) + th->doff * 4;
 
 	/* Get TCP data length */
 	tcpdatalen = (*pskb)->len - tcpdataoff;
@@ -1185,11 +1185,10 @@ static unsigned char *get_udp_data(struct sk_buff **pskb, int *datalen)
 	struct udphdr _uh, *uh;
 	int dataoff;
 
-	uh = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl * 4, sizeof(_uh),
-				&_uh);
+	uh = skb_header_pointer(*pskb, ip_hdrlen(*pskb), sizeof(_uh), &_uh);
 	if (uh == NULL)
 		return NULL;
-	dataoff = (*pskb)->nh.iph->ihl * 4 + sizeof(_uh);
+	dataoff = ip_hdrlen(*pskb) + sizeof(_uh);
 	if (dataoff >= (*pskb)->len)
 		return NULL;
 	*datalen = (*pskb)->len - dataoff;
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
index 2b760c5cf70..f5ab8e4b97c 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -543,7 +543,7 @@ conntrack_pptp_help(struct sk_buff **pskb,
 	struct pptp_pkt_hdr _pptph, *pptph;
 	struct PptpControlHeader _ctlh, *ctlh;
 	union pptp_ctrl_union _pptpReq, *pptpReq;
-	unsigned int tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4;
+	unsigned int tcplen = (*pskb)->len - ip_hdrlen(*pskb);
 	unsigned int datalen, reqlen, nexthdr_off;
 	int oldsstate, oldcstate;
 	int ret;
@@ -556,7 +556,7 @@ conntrack_pptp_help(struct sk_buff **pskb,
 		return NF_ACCEPT;
 	}
 
-	nexthdr_off = (*pskb)->nh.iph->ihl*4;
+	nexthdr_off = ip_hdrlen(*pskb);
 	tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph);
 	BUG_ON(!tcph);
 	nexthdr_off += tcph->doff * 4;
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c
index 053e591f407..ee99abe482e 100644
--- a/net/ipv4/netfilter/ip_conntrack_irc.c
+++ b/net/ipv4/netfilter/ip_conntrack_irc.c
@@ -130,13 +130,13 @@ static int help(struct sk_buff **pskb,
 	}
 
 	/* Not a full tcp header? */
-	th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
+	th = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
 				sizeof(_tcph), &_tcph);
 	if (th == NULL)
 		return NF_ACCEPT;
 
 	/* No data? */
-	dataoff = (*pskb)->nh.iph->ihl*4 + th->doff*4;
+	dataoff = ip_hdrlen(*pskb) + th->doff * 4;
 	if (dataoff >= (*pskb)->len)
 		return NF_ACCEPT;
 
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index ad70c81a21e..e253f3ee52d 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -149,7 +149,7 @@ icmp_error_message(struct sk_buff *skb,
 	IP_NF_ASSERT(skb->nfct == NULL);
 
 	/* Not enough header? */
-	inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in);
+	inside = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_in), &_in);
 	if (inside == NULL)
 		return -NF_ACCEPT;
 
@@ -161,7 +161,7 @@ icmp_error_message(struct sk_buff *skb,
 	}
 
 	innerproto = ip_conntrack_proto_find_get(inside->ip.protocol);
-	dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp) + inside->ip.ihl*4;
+	dataoff = ip_hdrlen(skb) + sizeof(inside->icmp) + inside->ip.ihl * 4;
 	/* Are they talking about one of our connections? */
 	if (!ip_ct_get_tuple(&inside->ip, skb, dataoff, &origtuple, innerproto)) {
 		DEBUGP("icmp_error: ! get_tuple p=%u", inside->ip.protocol);
@@ -214,7 +214,7 @@ icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
 	struct icmphdr _ih, *icmph;
 
 	/* Not enough header? */
-	icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih);
+	icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
 	if (icmph == NULL) {
 		if (LOG_INVALID(IPPROTO_ICMP))
 			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
@@ -224,7 +224,7 @@ icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
 
 	/* See ip_conntrack_proto_tcp.c */
 	if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
-	    nf_ip_checksum(skb, hooknum, skb->nh.iph->ihl * 4, 0)) {
+	    nf_ip_checksum(skb, hooknum, ip_hdrlen(skb), 0)) {
 		if (LOG_INVALID(IPPROTO_ICMP))
 			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
 				      "ip_ct_icmp: bad ICMP checksum ");
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index e6942992b2f..e29c436144b 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -206,7 +206,7 @@ static int sctp_print_conntrack(struct seq_file *s,
 }
 
 #define for_each_sctp_chunk(skb, sch, _sch, offset, count)		\
-for (offset = skb->nh.iph->ihl * 4 + sizeof(sctp_sctphdr_t), count = 0;	\
+for (offset = ip_hdrlen(skb) + sizeof(sctp_sctphdr_t), count = 0;	\
 	offset < skb->len &&						\
 	(sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch));	\
 	offset += (ntohs(sch->length) + 3) & ~3, count++)
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index 7ff11977eb4..fce3a3c6981 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -771,7 +771,7 @@ void ip_conntrack_tcp_update(struct sk_buff *skb,
 			     enum ip_conntrack_dir dir)
 {
 	struct iphdr *iph = skb->nh.iph;
-	struct tcphdr *tcph = (void *)skb->nh.iph + skb->nh.iph->ihl*4;
+	struct tcphdr *tcph = (void *)skb->nh.iph + ip_hdrlen(skb);
 	__u32 end;
 #ifdef DEBUGP_VARS
 	struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir];
diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c
index c59a962c1f6..7363e2a5cea 100644
--- a/net/ipv4/netfilter/ip_conntrack_sip.c
+++ b/net/ipv4/netfilter/ip_conntrack_sip.c
@@ -402,7 +402,7 @@ static int sip_help(struct sk_buff **pskb,
 	typeof(ip_nat_sip_hook) ip_nat_sip;
 
 	/* No Data ? */
-	dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+	dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
 	if (dataoff >= (*pskb)->len) {
 		DEBUGP("skb->len = %u\n", (*pskb)->len);
 		return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index 56b2f7546d1..92609a4dcd7 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -458,7 +458,7 @@ static unsigned int ip_conntrack_local(unsigned int hooknum,
 {
 	/* root is playing with raw sockets. */
 	if ((*pskb)->len < sizeof(struct iphdr)
-	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+	    || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
 			printk("ipt_hook: happy cracking.\n");
 		return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c
index 76e175e7a97..afc6809a388 100644
--- a/net/ipv4/netfilter/ip_conntrack_tftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_tftp.c
@@ -53,7 +53,7 @@ static int tftp_help(struct sk_buff **pskb,
 	typeof(ip_nat_tftp_hook) ip_nat_tftp;
 
 	tfh = skb_header_pointer(*pskb,
-				 (*pskb)->nh.iph->ihl*4+sizeof(struct udphdr),
+				 ip_hdrlen(*pskb) + sizeof(struct udphdr),
 				 sizeof(_tftph), &_tftph);
 	if (tfh == NULL)
 		return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index 40737fdbe9a..cf46930606f 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -422,7 +422,7 @@ int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
 	} *inside;
 	struct ip_conntrack_protocol *proto;
 	struct ip_conntrack_tuple inner, target;
-	int hdrlen = (*pskb)->nh.iph->ihl * 4;
+	int hdrlen = ip_hdrlen(*pskb);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	unsigned long statusbit;
 	enum ip_nat_manip_type manip = HOOK2MANIP(hooknum);
@@ -430,7 +430,7 @@ int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
 	if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
 		return 0;
 
-	inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+	inside = (void *)(*pskb)->data + ip_hdrlen(*pskb);
 
 	/* We're actually going to mangle it beyond trivial checksum
 	   adjustment, so make sure the current checksum is correct. */
@@ -458,7 +458,7 @@ int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
 
 	/* rcu_read_lock()ed by nf_hook_slow */
 	proto = __ip_conntrack_proto_find(inside->ip.protocol);
-	if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 +
+	if (!ip_ct_get_tuple(&inside->ip, *pskb, ip_hdrlen(*pskb) +
 			     sizeof(struct icmphdr) + inside->ip.ihl*4,
 			     &inner, proto))
 		return 0;
@@ -469,15 +469,14 @@ int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
 	   packet: PREROUTING (DST manip), routing produces ICMP, goes
 	   through POSTROUTING (which must correct the DST manip). */
 	if (!manip_pkt(inside->ip.protocol, pskb,
-		       (*pskb)->nh.iph->ihl*4
-		       + sizeof(inside->icmp),
+		       ip_hdrlen(*pskb) + sizeof(inside->icmp),
 		       &ct->tuplehash[!dir].tuple,
 		       !manip))
 		return 0;
 
 	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
 		/* Reloading "inside" here since manip_pkt inner. */
-		inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+		inside = (void *)(*pskb)->data + ip_hdrlen(*pskb);
 		inside->icmp.checksum = 0;
 		inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
 							       (*pskb)->len - hdrlen,
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
index dc778cfef58..25624e55856 100644
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -322,8 +322,8 @@ ip_nat_sack_adjust(struct sk_buff **pskb,
 {
 	unsigned int dir, optoff, optend;
 
-	optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr);
-	optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4;
+	optoff = ip_hdrlen(*pskb) + sizeof(struct tcphdr);
+	optend = ip_hdrlen(*pskb) + tcph->doff * 4;
 
 	if (!skb_make_writable(pskb, optend))
 		return 0;
@@ -374,10 +374,10 @@ ip_nat_seq_adjust(struct sk_buff **pskb,
 	this_way = &ct->nat.info.seq[dir];
 	other_way = &ct->nat.info.seq[!dir];
 
-	if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
+	if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph)))
 		return 0;
 
-	tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+	tcph = (void *)(*pskb)->data + ip_hdrlen(*pskb);
 	if (after(ntohl(tcph->seq), this_way->correction_pos))
 		newseq = htonl(ntohl(tcph->seq) + this_way->offset_after);
 	else
diff --git a/net/ipv4/netfilter/ip_nat_helper_h323.c b/net/ipv4/netfilter/ip_nat_helper_h323.c
index bdc99ef6159..8b1e3388bd0 100644
--- a/net/ipv4/netfilter/ip_nat_helper_h323.c
+++ b/net/ipv4/netfilter/ip_nat_helper_h323.c
@@ -57,11 +57,11 @@ static int set_addr(struct sk_buff **pskb,
 		}
 
 		/* Relocate data pointer */
-		th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl * 4,
+		th = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
 					sizeof(_tcph), &_tcph);
 		if (th == NULL)
 			return -1;
-		*data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 +
+		*data = (*pskb)->data + ip_hdrlen(*pskb) +
 		    th->doff * 4 + dataoff;
 	} else {
 		if (!ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
@@ -75,8 +75,8 @@ static int set_addr(struct sk_buff **pskb,
 		/* ip_nat_mangle_udp_packet uses skb_make_writable() to copy
 		 * or pull everything in a linear buffer, so we can safely
 		 * use the skb pointers now */
-		*data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 +
-		    sizeof(struct udphdr);
+		*data = ((*pskb)->data + ip_hdrlen(*pskb) +
+			 sizeof(struct udphdr));
 	}
 
 	return 0;
diff --git a/net/ipv4/netfilter/ip_nat_sip.c b/net/ipv4/netfilter/ip_nat_sip.c
index 325c5a9dc2e..84953601762 100644
--- a/net/ipv4/netfilter/ip_nat_sip.c
+++ b/net/ipv4/netfilter/ip_nat_sip.c
@@ -90,7 +90,7 @@ static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo,
 	if (!ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
 				      matchoff, matchlen, addr, addrlen))
 		return 0;
-	*dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+	*dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr);
 	return 1;
 
 }
@@ -104,7 +104,7 @@ static unsigned int ip_nat_sip(struct sk_buff **pskb,
 	struct addr_map map;
 	int dataoff, datalen;
 
-	dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+	dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
 	datalen = (*pskb)->len - dataoff;
 	if (datalen < sizeof("SIP/2.0") - 1)
 		return NF_DROP;
@@ -153,7 +153,7 @@ static unsigned int mangle_sip_packet(struct sk_buff **pskb,
 		return 0;
 
 	/* We need to reload this. Thanks Patrick. */
-	*dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+	*dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr);
 	return 1;
 }
 
@@ -166,7 +166,7 @@ static int mangle_content_len(struct sk_buff **pskb,
 	char buffer[sizeof("65536")];
 	int bufflen;
 
-	dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+	dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
 
 	/* Get actual SDP lenght */
 	if (ct_sip_get_info(dptr, (*pskb)->len - dataoff, &matchoff,
@@ -199,7 +199,7 @@ static unsigned int mangle_sdp(struct sk_buff **pskb,
 	char buffer[sizeof("nnn.nnn.nnn.nnn")];
 	unsigned int dataoff, bufflen;
 
-	dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+	dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
 
 	/* Mangle owner and contact info. */
 	bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip));
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 6bcfdf6dfcc..dbaaf78ff9a 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -112,8 +112,7 @@ ip_nat_fn(unsigned int hooknum,
 		if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
 			struct icmphdr _hdr, *hp;
 
-			hp = skb_header_pointer(*pskb,
-						(*pskb)->nh.iph->ihl*4,
+			hp = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
 						sizeof(_hdr), &_hdr);
 			if (hp != NULL &&
 			    hp->type == ICMP_REDIRECT)
@@ -211,7 +210,7 @@ ip_nat_out(unsigned int hooknum,
 
 	/* root is playing with raw sockets. */
 	if ((*pskb)->len < sizeof(struct iphdr)
-	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
+	    || ip_hdrlen(*pskb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
 	ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
@@ -244,7 +243,7 @@ ip_nat_local_fn(unsigned int hooknum,
 
 	/* root is playing with raw sockets. */
 	if ((*pskb)->len < sizeof(struct iphdr)
-	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
+	    || ip_hdrlen(*pskb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
 	ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 50cc4b92e28..f6696665021 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -198,7 +198,7 @@ int do_match(struct ipt_entry_match *m,
 {
 	/* Stop iteration if it doesn't match */
 	if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data,
-				      offset, skb->nh.iph->ihl*4, hotdrop))
+				      offset, ip_hdrlen(skb), hotdrop))
 		return 1;
 	else
 		return 0;
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 4f565633631..44daf9e1da3 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
+#include <net/ip.h>
 #include <linux/tcp.h>
 #include <net/checksum.h>
 
@@ -52,7 +53,7 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 	__be16 oldval;
 
 	/* Not enought header? */
-	tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
+	tcph = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
 				  sizeof(_tcph), &_tcph);
 	if (!tcph)
 		return 0;
@@ -63,9 +64,9 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 	     tcph->cwr == einfo->proto.tcp.cwr)))
 		return 1;
 
-	if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
+	if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph)))
 		return 0;
-	tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4;
+	tcph = (void *)(*pskb)->nh.iph + ip_hdrlen(*pskb);
 
 	oldval = ((__be16 *)tcph)[6];
 	if (einfo->operation & IPT_ECN_OP_SET_ECE)
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 80f739e2182..01c04f0e5c9 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -43,7 +43,6 @@ MODULE_DESCRIPTION("iptables REJECT target module");
 static void send_reset(struct sk_buff *oldskb, int hook)
 {
 	struct sk_buff *nskb;
-	struct iphdr *iph = oldskb->nh.iph;
 	struct tcphdr _otcph, *oth, *tcph;
 	__be16 tmp_port;
 	__be32 tmp_addr;
@@ -54,7 +53,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	if (oldskb->nh.iph->frag_off & htons(IP_OFFSET))
 		return;
 
-	oth = skb_header_pointer(oldskb, oldskb->nh.iph->ihl * 4,
+	oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
 				 sizeof(_otcph), &_otcph);
 	if (oth == NULL)
 		return;
@@ -64,7 +63,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 		return;
 
 	/* Check checksum */
-	if (nf_ip_checksum(oldskb, hook, iph->ihl * 4, IPPROTO_TCP))
+	if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
 		return;
 
 	/* We need a linear, writeable skb.  We also need to expand
@@ -84,7 +83,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	skb_shinfo(nskb)->gso_segs = 0;
 	skb_shinfo(nskb)->gso_type = 0;
 
-	tcph = (struct tcphdr *)((u_int32_t*)nskb->nh.iph + nskb->nh.iph->ihl);
+	tcph = (struct tcphdr *)(skb_network_header(nskb) + ip_hdrlen(nskb));
 
 	/* Swap source and dest */
 	tmp_addr = nskb->nh.iph->saddr;
@@ -96,7 +95,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 
 	/* Truncate to length (no data) */
 	tcph->doff = sizeof(struct tcphdr)/4;
-	skb_trim(nskb, nskb->nh.iph->ihl*4 + sizeof(struct tcphdr));
+	skb_trim(nskb, ip_hdrlen(nskb) + sizeof(struct tcphdr));
 	nskb->nh.iph->tot_len = htons(nskb->len);
 
 	if (tcph->ack) {
@@ -105,9 +104,9 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 		tcph->ack_seq = 0;
 	} else {
 		needs_ack = 1;
-		tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin
-				      + oldskb->len - oldskb->nh.iph->ihl*4
-				      - (oth->doff<<2));
+		tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
+				      oldskb->len - ip_hdrlen(oldskb) -
+				      (oth->doff << 2));
 		tcph->seq = 0;
 	}
 
@@ -149,7 +148,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 
 	/* Adjust IP checksum */
 	nskb->nh.iph->check = 0;
-	nskb->nh.iph->check = ip_fast_csum((unsigned char *)nskb->nh.iph,
+	nskb->nh.iph->check = ip_fast_csum(skb_network_header(nskb),
 					   nskb->nh.iph->ihl);
 
 	/* "Never happens" */
@@ -182,7 +181,7 @@ static unsigned int reject(struct sk_buff **pskb,
 
 	/* Our naive response construction doesn't deal with IP
 	   options, and probably shouldn't try. */
-	if ((*pskb)->nh.iph->ihl<<2 != sizeof(struct iphdr))
+	if (ip_hdrlen(*pskb) != sizeof(struct iphdr))
 		return NF_DROP;
 
 	/* WARNING: This code causes reentry within iptables.
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index 37508b2cfea..b8ade3cc775 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -11,6 +11,7 @@
 
 #include <linux/in.h>
 #include <linux/ip.h>
+#include <net/ip.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/tcp.h>
@@ -38,8 +39,7 @@ static inline int match_tcp(const struct sk_buff *skb,
 	/* In practice, TCP match does this, so can't fail.  But let's
 	 * be good citizens.
 	 */
-	th = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
-				sizeof(_tcph), &_tcph);
+	th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
 	if (th == NULL) {
 		*hotdrop = 0;
 		return 0;
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index d1d61e97b97..42728909eba 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
+#include <net/ip.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -102,7 +103,7 @@ ipt_local_out_hook(unsigned int hook,
 {
 	/* root is playing with raw sockets. */
 	if ((*pskb)->len < sizeof(struct iphdr)
-	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+	    || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
 			printk("ipt_hook: happy cracking.\n");
 		return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 98b66ef0c71..6cc3245f676 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -17,6 +17,7 @@
 #include <net/sock.h>
 #include <net/route.h>
 #include <linux/ip.h>
+#include <net/ip.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -136,7 +137,7 @@ ipt_local_hook(unsigned int hook,
 
 	/* root is playing with raw sockets. */
 	if ((*pskb)->len < sizeof(struct iphdr)
-	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+	    || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
 			printk("ipt_hook: happy cracking.\n");
 		return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 7cebbff0b0c..fa14eb77f9b 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -105,7 +105,7 @@ ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
 		return -NF_DROP;
 	}
 
-	*dataoff = skb_network_offset(*pskb) + (*pskb)->nh.iph->ihl * 4;
+	*dataoff = skb_network_offset(*pskb) + ip_hdrlen(*pskb);
 	*protonum = (*pskb)->nh.iph->protocol;
 
 	return NF_ACCEPT;
@@ -151,8 +151,8 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum,
 	if (!help || !help->helper)
 		return NF_ACCEPT;
 
-	return help->helper->help(pskb, (skb_network_offset(*pskb) +
-					 (*pskb)->nh.iph->ihl * 4),
+	return help->helper->help(pskb,
+				  skb_network_offset(*pskb) + ip_hdrlen(*pskb),
 				  ct, ctinfo);
 }
 
@@ -198,7 +198,7 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum,
 {
 	/* root is playing with raw sockets. */
 	if ((*pskb)->len < sizeof(struct iphdr)
-	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+	    || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
 			printk("ipt_hook: happy cracking.\n");
 		return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 5fd1e5363c1..e090e929e6e 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -158,7 +158,7 @@ icmp_error_message(struct sk_buff *skb,
 	NF_CT_ASSERT(skb->nfct == NULL);
 
 	/* Not enough header? */
-	inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in);
+	inside = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_in), &_in);
 	if (inside == NULL)
 		return -NF_ACCEPT;
 
@@ -172,7 +172,7 @@ icmp_error_message(struct sk_buff *skb,
 	/* rcu_read_lock()ed by nf_hook_slow */
 	innerproto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
 
-	dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp);
+	dataoff = ip_hdrlen(skb) + sizeof(inside->icmp);
 	/* Are they talking about one of our connections? */
 	if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET,
 			     inside->ip.protocol, &origtuple,
@@ -227,7 +227,7 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff,
 	struct icmphdr _ih, *icmph;
 
 	/* Not enough header? */
-	icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih);
+	icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
 	if (icmph == NULL) {
 		if (LOG_INVALID(IPPROTO_ICMP))
 			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 452e9d32668..ea02f00d2da 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -431,7 +431,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 	} *inside;
 	struct nf_conntrack_l4proto *l4proto;
 	struct nf_conntrack_tuple inner, target;
-	int hdrlen = (*pskb)->nh.iph->ihl * 4;
+	int hdrlen = ip_hdrlen(*pskb);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	unsigned long statusbit;
 	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
@@ -439,7 +439,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 	if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
 		return 0;
 
-	inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+	inside = (void *)(*pskb)->data + ip_hdrlen(*pskb);
 
 	/* We're actually going to mangle it beyond trivial checksum
 	   adjustment, so make sure the current checksum is correct. */
@@ -469,9 +469,9 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 	l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
 
 	if (!nf_ct_get_tuple(*pskb,
-			     (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr),
-			     (*pskb)->nh.iph->ihl*4 +
-			     sizeof(struct icmphdr) + inside->ip.ihl*4,
+			     ip_hdrlen(*pskb) + sizeof(struct icmphdr),
+			     (ip_hdrlen(*pskb) +
+			      sizeof(struct icmphdr) + inside->ip.ihl * 4),
 			     (u_int16_t)AF_INET,
 			     inside->ip.protocol,
 			     &inner, l3proto, l4proto))
@@ -483,14 +483,14 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 	   packet: PREROUTING (DST manip), routing produces ICMP, goes
 	   through POSTROUTING (which must correct the DST manip). */
 	if (!manip_pkt(inside->ip.protocol, pskb,
-		       (*pskb)->nh.iph->ihl*4 + sizeof(inside->icmp),
+		       ip_hdrlen(*pskb) + sizeof(inside->icmp),
 		       &ct->tuplehash[!dir].tuple,
 		       !manip))
 		return 0;
 
 	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
 		/* Reloading "inside" here since manip_pkt inner. */
-		inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+		inside = (void *)(*pskb)->data + ip_hdrlen(*pskb);
 		inside->icmp.checksum = 0;
 		inside->icmp.checksum =
 			csum_fold(skb_checksum(*pskb, hdrlen,
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 9cbf3f9be13..2eb3832db3a 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -55,11 +55,11 @@ static int set_addr(struct sk_buff **pskb,
 		}
 
 		/* Relocate data pointer */
-		th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl * 4,
+		th = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
 					sizeof(_tcph), &_tcph);
 		if (th == NULL)
 			return -1;
-		*data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 +
+		*data = (*pskb)->data + ip_hdrlen(*pskb) +
 		    th->doff * 4 + dataoff;
 	} else {
 		if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
@@ -73,8 +73,8 @@ static int set_addr(struct sk_buff **pskb,
 		/* nf_nat_mangle_udp_packet uses skb_make_writable() to copy
 		 * or pull everything in a linear buffer, so we can safely
 		 * use the skb pointers now */
-		*data = (*pskb)->data + (*pskb)->nh.iph->ihl * 4 +
-		    sizeof(struct udphdr);
+		*data = ((*pskb)->data + ip_hdrlen(*pskb) +
+			 sizeof(struct udphdr));
 	}
 
 	return 0;
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 49a90c39ffc..723302afd84 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -190,7 +190,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
 				    (int)rep_len - (int)match_len,
 				    ct, ctinfo);
 		/* Tell TCP window tracking about seq change */
-		nf_conntrack_tcp_update(*pskb, (*pskb)->nh.iph->ihl*4,
+		nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb),
 					ct, CTINFO2DIR(ctinfo));
 	}
 	return 1;
@@ -318,8 +318,8 @@ nf_nat_sack_adjust(struct sk_buff **pskb,
 	unsigned int dir, optoff, optend;
 	struct nf_conn_nat *nat = nfct_nat(ct);
 
-	optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr);
-	optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4;
+	optoff = ip_hdrlen(*pskb) + sizeof(struct tcphdr);
+	optend = ip_hdrlen(*pskb) + tcph->doff * 4;
 
 	if (!skb_make_writable(pskb, optend))
 		return 0;
@@ -371,10 +371,10 @@ nf_nat_seq_adjust(struct sk_buff **pskb,
 	this_way = &nat->info.seq[dir];
 	other_way = &nat->info.seq[!dir];
 
-	if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
+	if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph)))
 		return 0;
 
-	tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
+	tcph = (void *)(*pskb)->data + ip_hdrlen(*pskb);
 	if (after(ntohl(tcph->seq), this_way->correction_pos))
 		newseq = htonl(ntohl(tcph->seq) + this_way->offset_after);
 	else
@@ -399,7 +399,7 @@ nf_nat_seq_adjust(struct sk_buff **pskb,
 	if (!nf_nat_sack_adjust(pskb, tcph, ct, ctinfo))
 		return 0;
 
-	nf_conntrack_tcp_update(*pskb, (*pskb)->nh.iph->ihl*4, ct, dir);
+	nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb), ct, dir);
 
 	return 1;
 }
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index b12cd7c314c..bfd88e4e068 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
+#include <net/ip.h>
 #include <linux/udp.h>
 
 #include <net/netfilter/nf_nat.h>
@@ -92,7 +93,7 @@ static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo,
 	if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
 				      matchoff, matchlen, addr, addrlen))
 		return 0;
-	*dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+	*dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr);
 	return 1;
 
 }
@@ -106,7 +107,7 @@ static unsigned int ip_nat_sip(struct sk_buff **pskb,
 	struct addr_map map;
 	int dataoff, datalen;
 
-	dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+	dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
 	datalen = (*pskb)->len - dataoff;
 	if (datalen < sizeof("SIP/2.0") - 1)
 		return NF_DROP;
@@ -155,7 +156,7 @@ static unsigned int mangle_sip_packet(struct sk_buff **pskb,
 		return 0;
 
 	/* We need to reload this. Thanks Patrick. */
-	*dptr = (*pskb)->data + (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+	*dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr);
 	return 1;
 }
 
@@ -168,7 +169,7 @@ static int mangle_content_len(struct sk_buff **pskb,
 	char buffer[sizeof("65536")];
 	int bufflen;
 
-	dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+	dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
 
 	/* Get actual SDP lenght */
 	if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff,
@@ -200,7 +201,7 @@ static unsigned int mangle_sdp(struct sk_buff **pskb,
 	char buffer[sizeof("nnn.nnn.nnn.nnn")];
 	unsigned int dataoff, bufflen;
 
-	dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+	dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
 
 	/* Mangle owner and contact info. */
 	bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip));
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 15aa3db8cb3..61ca272165a 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -101,8 +101,7 @@ nf_nat_fn(unsigned int hooknum,
 		if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
 			struct icmphdr _hdr, *hp;
 
-			hp = skb_header_pointer(*pskb,
-						(*pskb)->nh.iph->ihl*4,
+			hp = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
 						sizeof(_hdr), &_hdr);
 			if (hp != NULL &&
 			    hp->type == ICMP_REDIRECT)
@@ -203,7 +202,7 @@ nf_nat_out(unsigned int hooknum,
 
 	/* root is playing with raw sockets. */
 	if ((*pskb)->len < sizeof(struct iphdr) ||
-	    (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
+	    ip_hdrlen(*pskb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
 	ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
@@ -236,7 +235,7 @@ nf_nat_local_fn(unsigned int hooknum,
 
 	/* root is playing with raw sockets. */
 	if ((*pskb)->len < sizeof(struct iphdr) ||
-	    (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
+	    ip_hdrlen(*pskb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
 	ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 112a21d0c6d..76f0cf66f95 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -102,7 +102,7 @@ ip6t_local_out_hook(unsigned int hook,
 #if 0
 	/* root is playing with raw sockets. */
 	if ((*pskb)->len < sizeof(struct iphdr)
-	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+	    || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
 			printk("ip6t_hook: happy cracking.\n");
 		return NF_ACCEPT;
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 0c468d35a93..da2c1994539 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -138,7 +138,7 @@ ip6t_local_hook(unsigned int hook,
 #if 0
 	/* root is playing with raw sockets. */
 	if ((*pskb)->len < sizeof(struct iphdr)
-	    || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
+	    || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
 			printk("ip6t_hook: happy cracking.\n");
 		return NF_ACCEPT;
-- 
cgit v1.2.3


From e023dd643798c4f06c16466af90b4d250e4b8bd7 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 12 Mar 2007 20:09:36 -0300
Subject: [IPMR]: Fix bug introduced when converting to
 skb_network_reset_header

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 54b7543190f..b24dffe3bd4 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1096,6 +1096,7 @@ static struct notifier_block ip_mr_notifier={
 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
 {
 	struct iphdr *iph;
+	struct iphdr *old_iph = skb->nh.iph;
 
 	skb_push(skb, sizeof(struct iphdr));
 	skb->h.ipiph = skb->nh.iph;
@@ -1103,8 +1104,8 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
 	iph = skb->nh.iph;
 
 	iph->version	= 	4;
-	iph->tos	=	skb->nh.iph->tos;
-	iph->ttl	=	skb->nh.iph->ttl;
+	iph->tos	=	old_iph->tos;
+	iph->ttl	=	old_iph->ttl;
 	iph->frag_off	=	0;
 	iph->daddr	=	daddr;
 	iph->saddr	=	saddr;
-- 
cgit v1.2.3


From eddc9ec53be2ecdbf4efe0efd4a83052594f0ac0 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 20 Apr 2007 22:47:35 -0700
Subject: [SK_BUFF]: Introduce ip_hdr(), remove skb->nh.iph

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ieee1394/eth1394.c                     |  2 +-
 drivers/net/8139cp.c                           |  4 +-
 drivers/net/atl1/atl1_main.c                   | 15 +++---
 drivers/net/bnx2.c                             | 18 ++++----
 drivers/net/bonding/bond_alb.c                 | 17 ++++---
 drivers/net/bonding/bond_main.c                |  2 +-
 drivers/net/chelsio/sge.c                      |  4 +-
 drivers/net/cxgb3/sge.c                        |  2 +-
 drivers/net/e1000/e1000_main.c                 | 16 +++----
 drivers/net/ehea/ehea_main.c                   | 20 ++++----
 drivers/net/gianfar.c                          |  2 +-
 drivers/net/ioc3-eth.c                         |  4 +-
 drivers/net/ixgb/ixgb_main.c                   | 12 +++--
 drivers/net/loopback.c                         |  6 +--
 drivers/net/mv643xx_eth.c                      |  4 +-
 drivers/net/netxen/netxen_nic_hw.c             |  4 +-
 drivers/net/ns83820.c                          |  4 +-
 drivers/net/pasemi_mac.c                       |  2 +-
 drivers/net/r8169.c                            |  2 +-
 drivers/net/sky2.c                             |  2 +-
 drivers/net/spider_net.c                       |  2 +-
 drivers/net/tg3.c                              | 30 ++++++------
 drivers/net/via-velocity.c                     |  2 +-
 drivers/s390/net/qeth_main.c                   | 15 +++---
 drivers/s390/net/qeth_tso.h                    | 12 ++---
 include/linux/ip.h                             |  9 ++++
 include/linux/skbuff.h                         |  1 -
 include/net/inet_ecn.h                         |  2 +-
 include/net/ip.h                               |  2 +-
 include/net/pkt_cls.h                          |  4 +-
 net/atm/mpc.c                                  |  2 +-
 net/bridge/br_netfilter.c                      | 10 ++--
 net/core/netpoll.c                             |  2 +-
 net/core/pktgen.c                              |  2 +-
 net/dccp/ipv4.c                                | 39 ++++++++--------
 net/econet/af_econet.c                         |  4 +-
 net/ieee80211/ieee80211_tx.c                   |  2 +-
 net/ipv4/af_inet.c                             | 10 ++--
 net/ipv4/ah4.c                                 |  4 +-
 net/ipv4/arp.c                                 |  6 +--
 net/ipv4/cipso_ipv4.c                          |  2 +-
 net/ipv4/esp4.c                                |  4 +-
 net/ipv4/icmp.c                                | 10 ++--
 net/ipv4/igmp.c                                | 14 +++---
 net/ipv4/ip_forward.c                          |  4 +-
 net/ipv4/ip_fragment.c                         |  7 ++-
 net/ipv4/ip_gre.c                              | 10 ++--
 net/ipv4/ip_input.c                            | 18 ++++----
 net/ipv4/ip_options.c                          | 14 +++---
 net/ipv4/ip_output.c                           | 20 ++++----
 net/ipv4/ip_sockglue.c                         | 13 +++---
 net/ipv4/ipcomp.c                              | 50 ++++++++------------
 net/ipv4/ipconfig.c                            |  6 +--
 net/ipv4/ipip.c                                | 15 +++---
 net/ipv4/ipmr.c                                | 55 +++++++++++-----------
 net/ipv4/ipvs/ip_vs_app.c                      |  4 +-
 net/ipv4/ipvs/ip_vs_core.c                     | 38 +++++++--------
 net/ipv4/ipvs/ip_vs_dh.c                       |  2 +-
 net/ipv4/ipvs/ip_vs_ftp.c                      |  4 +-
 net/ipv4/ipvs/ip_vs_lblc.c                     |  2 +-
 net/ipv4/ipvs/ip_vs_lblcr.c                    |  2 +-
 net/ipv4/ipvs/ip_vs_proto_tcp.c                | 12 ++---
 net/ipv4/ipvs/ip_vs_proto_udp.c                | 14 +++---
 net/ipv4/ipvs/ip_vs_sh.c                       |  2 +-
 net/ipv4/ipvs/ip_vs_xmit.c                     | 24 +++++-----
 net/ipv4/netfilter.c                           |  8 ++--
 net/ipv4/netfilter/ip_conntrack_core.c         | 20 ++++----
 net/ipv4/netfilter/ip_conntrack_helper_h323.c  | 12 ++---
 net/ipv4/netfilter/ip_conntrack_netbios_ns.c   |  2 +-
 net/ipv4/netfilter/ip_conntrack_proto_sctp.c   |  4 +-
 net/ipv4/netfilter/ip_conntrack_proto_tcp.c    | 16 +++----
 net/ipv4/netfilter/ip_conntrack_proto_udp.c    |  8 ++--
 net/ipv4/netfilter/ip_conntrack_standalone.c   |  2 +-
 net/ipv4/netfilter/ip_nat_helper.c             | 12 ++---
 net/ipv4/netfilter/ip_nat_helper_h323.c        |  2 +-
 net/ipv4/netfilter/ip_nat_rule.c               |  2 +-
 net/ipv4/netfilter/ip_nat_snmp_basic.c         |  4 +-
 net/ipv4/netfilter/ip_nat_standalone.c         | 10 ++--
 net/ipv4/netfilter/ip_tables.c                 |  4 +-
 net/ipv4/netfilter/ipt_CLUSTERIP.c             |  4 +-
 net/ipv4/netfilter/ipt_ECN.c                   |  8 ++--
 net/ipv4/netfilter/ipt_NETMAP.c                |  4 +-
 net/ipv4/netfilter/ipt_REJECT.c                | 26 +++++------
 net/ipv4/netfilter/ipt_TOS.c                   |  4 +-
 net/ipv4/netfilter/ipt_TTL.c                   |  2 +-
 net/ipv4/netfilter/ipt_addrtype.c              |  2 +-
 net/ipv4/netfilter/ipt_ecn.c                   |  4 +-
 net/ipv4/netfilter/ipt_iprange.c               |  2 +-
 net/ipv4/netfilter/ipt_recent.c                |  6 +--
 net/ipv4/netfilter/ipt_tos.c                   |  2 +-
 net/ipv4/netfilter/ipt_ttl.c                   |  9 ++--
 net/ipv4/netfilter/iptable_mangle.c            | 25 ++++++----
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 10 ++--
 net/ipv4/netfilter/nf_nat_h323.c               |  2 +-
 net/ipv4/netfilter/nf_nat_helper.c             | 12 ++---
 net/ipv4/netfilter/nf_nat_rule.c               |  2 +-
 net/ipv4/netfilter/nf_nat_snmp_basic.c         |  4 +-
 net/ipv4/netfilter/nf_nat_standalone.c         | 11 ++---
 net/ipv4/raw.c                                 |  4 +-
 net/ipv4/route.c                               |  6 +--
 net/ipv4/syncookies.c                          |  8 ++--
 net/ipv4/tcp_ipv4.c                            | 64 +++++++++++++-------------
 net/ipv4/udp.c                                 | 17 +++----
 net/ipv4/xfrm4_input.c                         | 21 +++++----
 net/ipv4/xfrm4_mode_beet.c                     | 10 ++--
 net/ipv4/xfrm4_mode_transport.c                | 11 ++---
 net/ipv4/xfrm4_mode_tunnel.c                   | 10 ++--
 net/ipv4/xfrm4_output.c                        |  3 +-
 net/ipv4/xfrm4_policy.c                        |  2 +-
 net/ipv4/xfrm4_tunnel.c                        |  3 +-
 net/ipv6/datagram.c                            |  3 +-
 net/ipv6/ip6_tunnel.c                          |  8 ++--
 net/ipv6/sit.c                                 |  4 +-
 net/ipv6/udp.c                                 |  2 +-
 net/netfilter/nf_conntrack_netbios_ns.c        |  2 +-
 net/netfilter/xt_DSCP.c                        |  4 +-
 net/netfilter/xt_TCPMSS.c                      |  4 +-
 net/netfilter/xt_dscp.c                        |  2 +-
 net/netfilter/xt_hashlimit.c                   |  6 +--
 net/netfilter/xt_length.c                      |  2 +-
 net/netfilter/xt_pkttype.c                     |  2 +-
 net/rxrpc/connection.c                         |  2 +-
 net/rxrpc/transport.c                          |  4 +-
 net/sched/cls_rsvp.h                           |  2 +-
 net/sched/sch_atm.c                            |  4 +-
 net/sched/sch_dsmark.c                         |  4 +-
 net/sched/sch_sfq.c                            |  2 +-
 net/sctp/input.c                               |  2 +-
 net/sctp/ipv6.c                                |  4 +-
 net/sctp/protocol.c                            |  8 ++--
 net/sctp/sm_make_chunk.c                       |  4 +-
 net/sctp/sm_statefuns.c                        |  2 +-
 132 files changed, 565 insertions(+), 564 deletions(-)

diff --git a/drivers/ieee1394/eth1394.c b/drivers/ieee1394/eth1394.c
index db2346f4d20..a364003ba47 100644
--- a/drivers/ieee1394/eth1394.c
+++ b/drivers/ieee1394/eth1394.c
@@ -1668,7 +1668,7 @@ static int ether1394_tx (struct sk_buff *skb, struct net_device *dev)
 	if (memcmp(eth->h_dest, dev->broadcast, ETH1394_ALEN) == 0 ||
 	    proto == htons(ETH_P_ARP) ||
 	    (proto == htons(ETH_P_IP) &&
-	     IN_MULTICAST(ntohl(skb->nh.iph->daddr)))) {
+	     IN_MULTICAST(ntohl(ip_hdr(skb)->daddr)))) {
 		tx_type = ETH1394_GASP;
 		dest_node = LOCAL_BUS | ALL_NODES;
 		max_payload = priv->bc_maxpayload - ETHER1394_GASP_OVERHEAD;
diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c
index 2f704cb06e7..e8c9f27817b 100644
--- a/drivers/net/8139cp.c
+++ b/drivers/net/8139cp.c
@@ -806,7 +806,7 @@ static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev)
 		if (mss)
 			flags |= LargeSend | ((mss & MSSMask) << MSSShift);
 		else if (skb->ip_summed == CHECKSUM_PARTIAL) {
-			const struct iphdr *ip = skb->nh.iph;
+			const struct iphdr *ip = ip_hdr(skb);
 			if (ip->protocol == IPPROTO_TCP)
 				flags |= IPCS | TCPCS;
 			else if (ip->protocol == IPPROTO_UDP)
@@ -825,7 +825,7 @@ static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev)
 		u32 first_len, first_eor;
 		dma_addr_t first_mapping;
 		int frag, first_entry = entry;
-		const struct iphdr *ip = skb->nh.iph;
+		const struct iphdr *ip = ip_hdr(skb);
 
 		/* We must give this initial chunk to the device last.
 		 * Otherwise we could race with the device.
diff --git a/drivers/net/atl1/atl1_main.c b/drivers/net/atl1/atl1_main.c
index 793a61b2140..d2be79a30f8 100644
--- a/drivers/net/atl1/atl1_main.c
+++ b/drivers/net/atl1/atl1_main.c
@@ -1294,17 +1294,18 @@ static int atl1_tso(struct atl1_adapter *adapter, struct sk_buff *skb,
 		}
 
 		if (skb->protocol == ntohs(ETH_P_IP)) {
-			skb->nh.iph->tot_len = 0;
-			skb->nh.iph->check = 0;
-			skb->h.th->check =
-			    ~csum_tcpudp_magic(skb->nh.iph->saddr,
-					       skb->nh.iph->daddr, 0,
-					       IPPROTO_TCP, 0);
+			struct iphdr *iph = ip_hdr(skb);
+
+			iph->tot_len = 0;
+			iph->check = 0;
+			skb->h.th->check = ~csum_tcpudp_magic(iph->saddr,
+							      iph->daddr, 0,
+							      IPPROTO_TCP, 0);
 			ipofst = skb_network_offset(skb);
 			if (ipofst != ENET_HEADER_SIZE) /* 802.3 frame */
 				tso->tsopl |= 1 << TSO_PARAM_ETHTYPE_SHIFT;
 
-			tso->tsopl |= (skb->nh.iph->ihl &
+			tso->tsopl |= (iph->ihl &
 				CSUM_PARAM_IPHL_MASK) << CSUM_PARAM_IPHL_SHIFT;
 			tso->tsopl |= ((skb->h.th->doff << 2) &
 				TSO_PARAM_TCPHDRLEN_MASK) << TSO_PARAM_TCPHDRLEN_SHIFT;
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index b8091c55d44..eb0c4f1d448 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -4513,6 +4513,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	if ((mss = skb_shinfo(skb)->gso_size) &&
 		(skb->len > (bp->dev->mtu + ETH_HLEN))) {
 		u32 tcp_opt_len, ip_tcp_len;
+		struct iphdr *iph;
 
 		if (skb_header_cloned(skb) &&
 		    pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
@@ -4529,16 +4530,15 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 		ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr);
 
-		skb->nh.iph->check = 0;
-		skb->nh.iph->tot_len = htons(mss + ip_tcp_len + tcp_opt_len);
-		skb->h.th->check =
-			~csum_tcpudp_magic(skb->nh.iph->saddr,
-					    skb->nh.iph->daddr,
-					    0, IPPROTO_TCP, 0);
+		iph = ip_hdr(skb);
+		iph->check = 0;
+		iph->tot_len = htons(mss + ip_tcp_len + tcp_opt_len);
+		skb->h.th->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+						      0, IPPROTO_TCP, 0);
 
-		if (tcp_opt_len || (skb->nh.iph->ihl > 5)) {
-			vlan_tag_flags |= ((skb->nh.iph->ihl - 5) +
-				(tcp_opt_len >> 2)) << 8;
+		if (tcp_opt_len || (iph->ihl > 5)) {
+			vlan_tag_flags |= ((iph->ihl - 5) +
+					   (tcp_opt_len >> 2)) << 8;
 		}
 	}
 	else
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 86cfcb3f813..8555afa574a 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -112,7 +112,7 @@ static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
 /* Forward declaration */
 static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);
 
-static inline u8 _simple_hash(u8 *hash_start, int hash_size)
+static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
 {
 	int i;
 	u8 hash = 0;
@@ -1268,7 +1268,7 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 	int hash_size = 0;
 	int do_tx_balance = 1;
 	u32 hash_index = 0;
-	u8 *hash_start = NULL;
+	const u8 *hash_start = NULL;
 	int res = 1;
 
 	skb_reset_mac_header(skb);
@@ -1285,15 +1285,18 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 	}
 
 	switch (ntohs(skb->protocol)) {
-	case ETH_P_IP:
+	case ETH_P_IP: {
+		const struct iphdr *iph = ip_hdr(skb);
+
 		if ((memcmp(eth_data->h_dest, mac_bcast, ETH_ALEN) == 0) ||
-		    (skb->nh.iph->daddr == ip_bcast) ||
-		    (skb->nh.iph->protocol == IPPROTO_IGMP)) {
+		    (iph->daddr == ip_bcast) ||
+		    (iph->protocol == IPPROTO_IGMP)) {
 			do_tx_balance = 0;
 			break;
 		}
-		hash_start = (char*)&(skb->nh.iph->daddr);
-		hash_size = sizeof(skb->nh.iph->daddr);
+		hash_start = (char *)&(iph->daddr);
+		hash_size = sizeof(iph->daddr);
+	}
 		break;
 	case ETH_P_IPV6:
 		if (memcmp(eth_data->h_dest, mac_bcast, ETH_ALEN) == 0) {
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index e4724d874e7..7f11388893f 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3476,7 +3476,7 @@ static int bond_xmit_hash_policy_l34(struct sk_buff *skb,
 				    struct net_device *bond_dev, int count)
 {
 	struct ethhdr *data = (struct ethhdr *)skb->data;
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 	u16 *layer4hdr = (u16 *)((u32 *)iph + iph->ihl);
 	int layer4_xor = 0;
 
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index 8cdee67d582..c357f45a16c 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -1871,7 +1871,7 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		hdr = (struct cpl_tx_pkt_lso *)skb_push(skb, sizeof(*hdr));
 		hdr->opcode = CPL_TX_PKT_LSO;
 		hdr->ip_csum_dis = hdr->l4_csum_dis = 0;
-		hdr->ip_hdr_words = skb->nh.iph->ihl;
+		hdr->ip_hdr_words = ip_hdr(skb)->ihl;
 		hdr->tcp_hdr_words = skb->h.th->doff;
 		hdr->eth_type_mss = htons(MK_ETH_TYPE_MSS(eth_type,
 							  skb_shinfo(skb)->gso_size));
@@ -1912,7 +1912,7 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		if (!(adapter->flags & UDP_CSUM_CAPABLE) &&
 		    skb->ip_summed == CHECKSUM_PARTIAL &&
-		    skb->nh.iph->protocol == IPPROTO_UDP) {
+		    ip_hdr(skb)->protocol == IPPROTO_UDP) {
 			if (unlikely(skb_checksum_help(skb))) {
 				pr_debug("%s: unable to do udp checksum\n", dev->name);
 				dev_kfree_skb_any(skb);
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 7e9e9db4fb9..892e5dcafa0 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -900,7 +900,7 @@ static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
 		eth_type = skb_network_offset(skb) == ETH_HLEN ?
 		    CPL_ETH_II : CPL_ETH_II_VLAN;
 		tso_info |= V_LSO_ETH_TYPE(eth_type) |
-		    V_LSO_IPHDR_WORDS(skb->nh.iph->ihl) |
+		    V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
 		    V_LSO_TCPHDR_WORDS(skb->h.th->doff);
 		hdr->lso_info = htonl(tso_info);
 		flits = 3;
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 86161011b53..c324866c978 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2890,14 +2890,12 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 		hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
 		mss = skb_shinfo(skb)->gso_size;
 		if (skb->protocol == htons(ETH_P_IP)) {
-			skb->nh.iph->tot_len = 0;
-			skb->nh.iph->check = 0;
-			skb->h.th->check =
-				~csum_tcpudp_magic(skb->nh.iph->saddr,
-						   skb->nh.iph->daddr,
-						   0,
-						   IPPROTO_TCP,
-						   0);
+			struct iphdr *iph = ip_hdr(skb);
+			iph->tot_len = 0;
+			iph->check = 0;
+			skb->h.th->check = ~csum_tcpudp_magic(iph->saddr,
+							      iph->daddr, 0,
+							      IPPROTO_TCP, 0);
 			cmd_length = E1000_TXD_CMD_IP;
 			ipcse = skb->h.raw - skb->data - 1;
 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
@@ -2911,7 +2909,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 			ipcse = 0;
 		}
 		ipcss = skb_network_offset(skb);
-		ipcso = (void *)&(skb->nh.iph->check) - (void *)skb->data;
+		ipcso = (void *)&(ip_hdr(skb)->check) - (void *)skb->data;
 		tucss = skb->h.raw - skb->data;
 		tucso = (void *)&(skb->h.th->check) - (void *)skb->data;
 		tucse = 0;
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index b1c90a4fe31..0dc701e611e 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -1262,7 +1262,7 @@ static int ehea_clean_portres(struct ehea_port *port, struct ehea_port_res *pr)
 static inline void write_ip_start_end(struct ehea_swqe *swqe,
 				      const struct sk_buff *skb)
 {
-	swqe->ip_start = (u8)(((u64)skb->nh.iph) - ((u64)skb->data));
+	swqe->ip_start = skb_network_offset(skb);
 	swqe->ip_end = (u8)(swqe->ip_start + ip_hdrlen(skb) - 1);
 }
 
@@ -1688,6 +1688,7 @@ static void ehea_xmit2(struct sk_buff *skb, struct net_device *dev,
 		       struct ehea_swqe *swqe, u32 lkey)
 {
 	if (skb->protocol == htons(ETH_P_IP)) {
+		const struct iphdr *iph = ip_hdr(skb);
 		/* IPv4 */
 		swqe->tx_control |= EHEA_SWQE_CRC
 				 | EHEA_SWQE_IP_CHECKSUM
@@ -1697,15 +1698,15 @@ static void ehea_xmit2(struct sk_buff *skb, struct net_device *dev,
 
 		write_ip_start_end(swqe, skb);
 
-		if (skb->nh.iph->protocol == IPPROTO_UDP) {
-			if ((skb->nh.iph->frag_off & IP_MF) ||
-			    (skb->nh.iph->frag_off & IP_OFFSET))
+		if (iph->protocol == IPPROTO_UDP) {
+			if ((iph->frag_off & IP_MF) ||
+			    (iph->frag_off & IP_OFFSET))
 				/* IP fragment, so don't change cs */
 				swqe->tx_control &= ~EHEA_SWQE_TCP_CHECKSUM;
 			else
 				write_udp_offset_end(swqe, skb);
 
-		} else if (skb->nh.iph->protocol == IPPROTO_TCP) {
+		} else if (iph->protocol == IPPROTO_TCP) {
 			write_tcp_offset_end(swqe, skb);
 		}
 
@@ -1731,10 +1732,11 @@ static void ehea_xmit3(struct sk_buff *skb, struct net_device *dev,
 	int i;
 
 	if (skb->protocol == htons(ETH_P_IP)) {
+		const struct iphdr *iph = ip_hdr(skb);
 		/* IPv4 */
 		write_ip_start_end(swqe, skb);
 
-		if (skb->nh.iph->protocol == IPPROTO_TCP) {
+		if (iph->protocol == IPPROTO_TCP) {
 			swqe->tx_control |= EHEA_SWQE_CRC
 					 | EHEA_SWQE_IP_CHECKSUM
 					 | EHEA_SWQE_TCP_CHECKSUM
@@ -1742,9 +1744,9 @@ static void ehea_xmit3(struct sk_buff *skb, struct net_device *dev,
 
 			write_tcp_offset_end(swqe, skb);
 
-		} else if (skb->nh.iph->protocol == IPPROTO_UDP) {
-			if ((skb->nh.iph->frag_off & IP_MF) ||
-			    (skb->nh.iph->frag_off & IP_OFFSET))
+		} else if (iph->protocol == IPPROTO_UDP) {
+			if ((iph->frag_off & IP_MF) ||
+			    (iph->frag_off & IP_OFFSET))
 				/* IP fragment, so don't change cs */
 				swqe->tx_control |= EHEA_SWQE_CRC
 						 | EHEA_SWQE_IMM_DATA_PRESENT;
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index c7a70933c75..c9abc96a091 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -942,7 +942,7 @@ static inline void gfar_tx_checksum(struct sk_buff *skb, struct txfcb *fcb)
 
 	/* Tell the controller what the protocol is */
 	/* And provide the already calculated phcs */
-	if (skb->nh.iph->protocol == IPPROTO_UDP) {
+	if (ip_hdr(skb)->protocol == IPPROTO_UDP) {
 		flags |= TXFCB_UDP;
 		fcb->phcs = skb->h.uh->check;
 	} else
diff --git a/drivers/net/ioc3-eth.c b/drivers/net/ioc3-eth.c
index ea07aa5ba51..d375e786b4b 100644
--- a/drivers/net/ioc3-eth.c
+++ b/drivers/net/ioc3-eth.c
@@ -1393,9 +1393,9 @@ static int ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * manually.
 	 */
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		int proto = ntohs(skb->nh.iph->protocol);
+		const struct iphdr *ih = ip_hdr(skb);
+		const int proto = ntohs(ih->protocol);
 		unsigned int csoff;
-		struct iphdr *ih = skb->nh.iph;
 		uint32_t csum, ehsum;
 		uint16_t *eh;
 
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index cfb791bb45e..bba4dcaf92e 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -1182,6 +1182,8 @@ ixgb_tso(struct ixgb_adapter *adapter, struct sk_buff *skb)
 
 	if (likely(skb_is_gso(skb))) {
 		struct ixgb_buffer *buffer_info;
+		struct iphdr *iph;
+
 		if (skb_header_cloned(skb)) {
 			err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
 			if (err)
@@ -1190,13 +1192,13 @@ ixgb_tso(struct ixgb_adapter *adapter, struct sk_buff *skb)
 
 		hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
 		mss = skb_shinfo(skb)->gso_size;
-		skb->nh.iph->tot_len = 0;
-		skb->nh.iph->check = 0;
-		skb->h.th->check = ~csum_tcpudp_magic(skb->nh.iph->saddr,
-						      skb->nh.iph->daddr,
+		iph = ip_hdr(skb);
+		iph->tot_len = 0;
+		iph->check = 0;
+		skb->h.th->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
 						      0, IPPROTO_TCP, 0);
 		ipcss = skb_network_offset(skb);
-		ipcso = (void *)&(skb->nh.iph->check) - (void *)skb->data;
+		ipcso = (void *)&(iph->check) - (void *)skb->data;
 		ipcse = skb->h.raw - skb->data - 1;
 		tucss = skb->h.raw - skb->data;
 		tucso = (void *)&(skb->h.th->check) - (void *)skb->data;
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 9265c27b13b..20b5cb10136 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -75,7 +75,7 @@ static DEFINE_PER_CPU(struct pcpu_lstats, pcpu_lstats);
 #ifdef LOOPBACK_TSO
 static void emulate_large_send_offload(struct sk_buff *skb)
 {
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 	struct tcphdr *th = (struct tcphdr *)(skb_network_header(skb) +
 					      (iph->ihl * 4));
 	unsigned int doffset = (iph->ihl + th->doff) * 4;
@@ -93,7 +93,7 @@ static void emulate_large_send_offload(struct sk_buff *skb)
 		skb_reserve(nskb, 32);
 		skb_set_mac_header(nskb, -ETH_HLEN);
 		skb_reset_network_header(nskb);
-		iph = nskb->nh.iph;
+		iph = ip_hdr(nskb);
 		memcpy(nskb->data, skb_network_header(skb), doffset);
 		if (skb_copy_bits(skb,
 				  doffset + offset,
@@ -145,7 +145,7 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
 #ifdef LOOPBACK_TSO
 	if (skb_is_gso(skb)) {
 		BUG_ON(skb->protocol != htons(ETH_P_IP));
-		BUG_ON(skb->nh.iph->protocol != IPPROTO_TCP);
+		BUG_ON(ip_hdr(skb)->protocol != IPPROTO_TCP);
 
 		emulate_large_send_offload(skb);
 		return 0;
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index cd9369a285e..6b39a268ec2 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -1161,9 +1161,9 @@ static void eth_tx_submit_descs_for_skb(struct mv643xx_private *mp,
 
 		cmd_sts |= ETH_GEN_TCP_UDP_CHECKSUM |
 			   ETH_GEN_IP_V_4_CHECKSUM  |
-			   skb->nh.iph->ihl << ETH_TX_IHL_SHIFT;
+			   ip_hdr(skb)->ihl << ETH_TX_IHL_SHIFT;
 
-		switch (skb->nh.iph->protocol) {
+		switch (ip_hdr(skb)->protocol) {
 		case IPPROTO_UDP:
 			cmd_sts |= ETH_UDP_FRAME;
 			desc->l4i_chk = skb->h.uh->check;
diff --git a/drivers/net/netxen/netxen_nic_hw.c b/drivers/net/netxen/netxen_nic_hw.c
index b2f5032937e..28d68c3550e 100644
--- a/drivers/net/netxen/netxen_nic_hw.c
+++ b/drivers/net/netxen/netxen_nic_hw.c
@@ -378,9 +378,9 @@ void netxen_tso_check(struct netxen_adapter *adapter,
 					  skb->h.th->doff * 4);
 		netxen_set_cmd_desc_opcode(desc, TX_TCP_LSO);
 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		if (skb->nh.iph->protocol == IPPROTO_TCP) {
+		if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
 			netxen_set_cmd_desc_opcode(desc, TX_TCP_PKT);
-		} else if (skb->nh.iph->protocol == IPPROTO_UDP) {
+		} else if (ip_hdr(skb)->protocol == IPPROTO_UDP) {
 			netxen_set_cmd_desc_opcode(desc, TX_UDP_PKT);
 		} else {
 			return;
diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c
index 747988b12ec..6a32338623f 100644
--- a/drivers/net/ns83820.c
+++ b/drivers/net/ns83820.c
@@ -1156,9 +1156,9 @@ again:
 	extsts = 0;
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		extsts |= EXTSTS_IPPKT;
-		if (IPPROTO_TCP == skb->nh.iph->protocol)
+		if (IPPROTO_TCP == ip_hdr(skb)->protocol)
 			extsts |= EXTSTS_TCPPKT;
-		else if (IPPROTO_UDP == skb->nh.iph->protocol)
+		else if (IPPROTO_UDP == ip_hdr(skb)->protocol)
 			extsts |= EXTSTS_UDPPKT;
 	}
 
diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c
index 82218720bc3..1d8129986cc 100644
--- a/drivers/net/pasemi_mac.c
+++ b/drivers/net/pasemi_mac.c
@@ -731,7 +731,7 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		const unsigned char *nh = skb_network_header(skb);
 
-		switch (skb->nh.iph->protocol) {
+		switch (ip_hdr(skb)->protocol) {
 		case IPPROTO_TCP:
 			dflags |= XCT_MACTX_CSUM_TCP;
 			dflags |= XCT_MACTX_IPH((skb->h.raw - skb->nh.raw) >> 2);
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 34280f94e9f..45876a854f0 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -2284,7 +2284,7 @@ static inline u32 rtl8169_tso_csum(struct sk_buff *skb, struct net_device *dev)
 			return LargeSend | ((mss & MSSMask) << MSSShift);
 	}
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		const struct iphdr *ip = skb->nh.iph;
+		const struct iphdr *ip = ip_hdr(skb);
 
 		if (ip->protocol == IPPROTO_TCP)
 			return IPCS | TCPCS;
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 51e994f26a8..a37bb205f3d 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -1428,7 +1428,7 @@ static int sky2_xmit_frame(struct sk_buff *skb, struct net_device *dev)
 		tcpsum |= offset + skb->csum_offset;	/* sum write */
 
 		ctrl = CALSUM | WR_SUM | INIT_SUM | LOCK_SUM;
-		if (skb->nh.iph->protocol == IPPROTO_UDP)
+		if (ip_hdr(skb)->protocol == IPPROTO_UDP)
 			ctrl |= UDPTCP;
 
 		if (tcpsum != sky2->tx_tcpsum) {
diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c
index f7e0ac7f789..230da14b1b6 100644
--- a/drivers/net/spider_net.c
+++ b/drivers/net/spider_net.c
@@ -720,7 +720,7 @@ spider_net_prepare_tx_descr(struct spider_net_card *card,
 	spin_unlock_irqrestore(&chain->lock, flags);
 
 	if (skb->protocol == htons(ETH_P_IP) && skb->ip_summed == CHECKSUM_PARTIAL)
-		switch (skb->nh.iph->protocol) {
+		switch (ip_hdr(skb)->protocol) {
 		case IPPROTO_TCP:
 			hwdescr->dmac_cmd_status |= SPIDER_NET_DMAC_TCP;
 			break;
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 62a3bba0097..76a31afe20d 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -3909,12 +3909,13 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
 			mss |= (skb_headlen(skb) - ETH_HLEN) << 9;
 		else {
+			struct iphdr *iph = ip_hdr(skb);
+
 			tcp_opt_len = ((skb->h.th->doff - 5) * 4);
 			ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr);
 
-			skb->nh.iph->check = 0;
-			skb->nh.iph->tot_len = htons(mss + ip_tcp_len +
-						     tcp_opt_len);
+			iph->check = 0;
+			iph->tot_len = htons(mss + ip_tcp_len + tcp_opt_len);
 			mss |= (ip_tcp_len + tcp_opt_len) << 9;
 		}
 
@@ -4055,6 +4056,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
 	mss = 0;
 	if (skb->len > (tp->dev->mtu + ETH_HLEN) &&
 	    (mss = skb_shinfo(skb)->gso_size) != 0) {
+		struct iphdr *iph;
 		int tcp_opt_len, ip_tcp_len, hdr_len;
 
 		if (skb_header_cloned(skb) &&
@@ -4074,34 +4076,32 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
 		base_flags |= (TXD_FLAG_CPU_PRE_DMA |
 			       TXD_FLAG_CPU_POST_DMA);
 
-		skb->nh.iph->check = 0;
-		skb->nh.iph->tot_len = htons(mss + hdr_len);
+		iph = ip_hdr(skb);
+		iph->check = 0;
+		iph->tot_len = htons(mss + hdr_len);
 		if (tp->tg3_flags2 & TG3_FLG2_HW_TSO) {
 			skb->h.th->check = 0;
 			base_flags &= ~TXD_FLAG_TCPUDP_CSUM;
 		}
 		else {
-			skb->h.th->check =
-				~csum_tcpudp_magic(skb->nh.iph->saddr,
-						   skb->nh.iph->daddr,
-						   0, IPPROTO_TCP, 0);
+			skb->h.th->check = ~csum_tcpudp_magic(iph->saddr,
+							      iph->daddr, 0,
+							      IPPROTO_TCP, 0);
 		}
 
 		if ((tp->tg3_flags2 & TG3_FLG2_HW_TSO) ||
 		    (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705)) {
-			if (tcp_opt_len || skb->nh.iph->ihl > 5) {
+			if (tcp_opt_len || iph->ihl > 5) {
 				int tsflags;
 
-				tsflags = ((skb->nh.iph->ihl - 5) +
-					   (tcp_opt_len >> 2));
+				tsflags = (iph->ihl - 5) + (tcp_opt_len >> 2);
 				mss |= (tsflags << 11);
 			}
 		} else {
-			if (tcp_opt_len || skb->nh.iph->ihl > 5) {
+			if (tcp_opt_len || iph->ihl > 5) {
 				int tsflags;
 
-				tsflags = ((skb->nh.iph->ihl - 5) +
-					   (tcp_opt_len >> 2));
+				tsflags = (iph->ihl - 5) + (tcp_opt_len >> 2);
 				base_flags |= tsflags << 12;
 			}
 		}
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index 9f6cc1569b3..422eaf8ea12 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -2006,7 +2006,7 @@ static int velocity_xmit(struct sk_buff *skb, struct net_device *dev)
 	 */
 	if ((vptr->flags & VELOCITY_FLAGS_TX_CSUM)
 				 && (skb->ip_summed == CHECKSUM_PARTIAL)) {
-		struct iphdr *ip = skb->nh.iph;
+		const struct iphdr *ip = ip_hdr(skb);
 		if (ip->protocol == IPPROTO_TCP)
 			td_ptr->tdesc1.TCR |= TCR0_TCPCK;
 		else if (ip->protocol == IPPROTO_UDP)
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index 0ff29e0628b..8a07d548a05 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -3820,18 +3820,20 @@ qeth_get_priority_queue(struct qeth_card *card, struct sk_buff *skb,
 			return card->info.is_multicast_different &
 				(card->qdio.no_out_queues - 1);
 		if (card->qdio.do_prio_queueing && (ipv == 4)) {
+			const u8 tos = ip_hdr(skb)->tos;
+
 			if (card->qdio.do_prio_queueing==QETH_PRIO_Q_ING_TOS){
-				if (skb->nh.iph->tos & IP_TOS_NOTIMPORTANT)
+				if (tos & IP_TOS_NOTIMPORTANT)
 					return 3;
-				if (skb->nh.iph->tos & IP_TOS_HIGHRELIABILITY)
+				if (tos & IP_TOS_HIGHRELIABILITY)
 					return 2;
-				if (skb->nh.iph->tos & IP_TOS_HIGHTHROUGHPUT)
+				if (tos & IP_TOS_HIGHTHROUGHPUT)
 					return 1;
-				if (skb->nh.iph->tos & IP_TOS_LOWDELAY)
+				if (tos & IP_TOS_LOWDELAY)
 					return 0;
 			}
 			if (card->qdio.do_prio_queueing==QETH_PRIO_Q_ING_PREC)
-				return 3 - (skb->nh.iph->tos >> 6);
+				return 3 - (tos >> 6);
 		} else if (card->qdio.do_prio_queueing && (ipv == 6)) {
 			/* TODO: IPv6!!! */
 		}
@@ -4041,7 +4043,8 @@ qeth_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
 			    *((u32 *) skb->dst->neighbour->primary_key);
 		} else {
 			/* fill in destination address used in ip header */
-			*((u32 *) (&hdr->hdr.l3.dest_addr[12])) = skb->nh.iph->daddr;
+			*((u32 *)(&hdr->hdr.l3.dest_addr[12])) =
+							   ip_hdr(skb)->daddr;
 		}
 	} else if (ipv == 6) { /* IPv6 or passthru */
 		hdr->hdr.l3.flags = qeth_get_qeth_hdr_flags6(cast_type);
diff --git a/drivers/s390/net/qeth_tso.h b/drivers/s390/net/qeth_tso.h
index 14504afb044..255cb2e9c79 100644
--- a/drivers/s390/net/qeth_tso.h
+++ b/drivers/s390/net/qeth_tso.h
@@ -40,7 +40,7 @@ qeth_tso_fill_header(struct qeth_card *card, struct sk_buff *skb)
 	QETH_DBF_TEXT(trace, 5, "tsofhdr");
 
 	hdr  = (struct qeth_hdr_tso *) skb->data;
-	iph  = skb->nh.iph;
+	iph  = ip_hdr(skb);
 	tcph = skb->h.th;
 	/*fix header to TSO values ...*/
 	hdr->hdr.hdr.l3.id = QETH_HEADER_TYPE_TSO;
@@ -63,13 +63,9 @@ qeth_tso_fill_header(struct qeth_card *card, struct sk_buff *skb)
 static inline void
 qeth_tso_set_tcpip_header(struct qeth_card *card, struct sk_buff *skb)
 {
-	struct iphdr *iph;
-	struct ipv6hdr *ip6h;
-	struct tcphdr *tcph;
-
-	iph  = skb->nh.iph;
-	ip6h = skb->nh.ipv6h;
-	tcph = skb->h.th;
+	struct iphdr *iph    = ip_hdr(skb);
+	struct ipv6hdr *ip6h = skb->nh.ipv6h;
+	struct tcphdr *tcph  = skb->h.th;
 
 	tcph->check = 0;
 	if (skb->protocol == ETH_P_IPV6) {
diff --git a/include/linux/ip.h b/include/linux/ip.h
index 1d36b971a8b..f2f26db16f5 100644
--- a/include/linux/ip.h
+++ b/include/linux/ip.h
@@ -104,6 +104,15 @@ struct iphdr {
 	/*The options start here. */
 };
 
+#ifdef __KERNEL__
+#include <linux/skbuff.h>
+
+static inline struct iphdr *ip_hdr(const struct sk_buff *skb)
+{
+	return (struct iphdr *)skb_network_header(skb);
+}
+#endif
+
 struct ip_auth_hdr {
 	__u8  nexthdr;
 	__u8  hdrlen;		/* This one is measured in 32 bit units! */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 870438fba93..62f841b5b70 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -247,7 +247,6 @@ struct sk_buff {
 	} h;
 
 	union {
-		struct iphdr	*iph;
 		struct ipv6hdr	*ipv6h;
 		struct arphdr	*arph;
 		unsigned char	*raw;
diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index b9ed3898e36..6fd4452c15d 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -116,7 +116,7 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb)
 	case __constant_htons(ETH_P_IP):
 		if (skb_network_header(skb) + sizeof(struct iphdr) <=
 		    skb->tail)
-			return IP_ECN_set_ce(skb->nh.iph);
+			return IP_ECN_set_ce(ip_hdr(skb));
 		break;
 
 	case __constant_htons(ETH_P_IPV6):
diff --git a/include/net/ip.h b/include/net/ip.h
index 6f7ba32b199..75f226d26e0 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -46,7 +46,7 @@ struct inet_skb_parm
 
 static inline unsigned int ip_hdrlen(const struct sk_buff *skb)
 {
-	return skb->nh.iph->ihl * 4;
+	return ip_hdr(skb)->ihl * 4;
 }
 
 struct ipcm_cookie
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 8a6b0e7bded..880eb7b5416 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -334,8 +334,8 @@ static inline unsigned char * tcf_get_base_ptr(struct sk_buff *skb, int layer)
 	return NULL;
 }
 
-static inline int tcf_valid_offset(struct sk_buff *skb, unsigned char *ptr,
-				   int len)
+static inline int tcf_valid_offset(const struct sk_buff *skb,
+				   const unsigned char *ptr, const int len)
 {
 	return unlikely((ptr + len) < skb->tail && ptr > skb->head);
 }
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index bc15728fd84..4d2592c1409 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -715,7 +715,7 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
 	new_skb->protocol = eth_type_trans(new_skb, dev);
 	skb_reset_network_header(new_skb);
 
-	eg->latest_ip_addr = new_skb->nh.iph->saddr;
+	eg->latest_ip_addr = ip_hdr(new_skb)->saddr;
 	eg->packets_rcvd++;
 	mpc->eg_ops->put(eg);
 
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 8a56d896302..ebe740f6b90 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -48,8 +48,8 @@
 
 #define skb_origaddr(skb)	 (((struct bridge_skb_cb *) \
 				 (skb->nf_bridge->data))->daddr.ipv4)
-#define store_orig_dstaddr(skb)	 (skb_origaddr(skb) = (skb)->nh.iph->daddr)
-#define dnat_took_place(skb)	 (skb_origaddr(skb) != (skb)->nh.iph->daddr)
+#define store_orig_dstaddr(skb)	 (skb_origaddr(skb) = ip_hdr(skb)->daddr)
+#define dnat_took_place(skb)	 (skb_origaddr(skb) != ip_hdr(skb)->daddr)
 
 #ifdef CONFIG_SYSCTL
 static struct ctl_table_header *brnf_sysctl_header;
@@ -265,7 +265,7 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
 static int br_nf_pre_routing_finish(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
 	int err;
 
@@ -520,14 +520,14 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
 		goto inhdr_error;
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	if (iph->ihl < 5 || iph->version != 4)
 		goto inhdr_error;
 
 	if (!pskb_may_pull(skb, 4 * iph->ihl))
 		goto inhdr_error;
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	if (ip_fast_csum((__u8 *) iph, iph->ihl) != 0)
 		goto inhdr_error;
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 44e030eb6e7..c4cec17be33 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -310,7 +310,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 
 	skb_push(skb, sizeof(*iph));
 	skb_reset_network_header(skb);
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 
 	/* iph->version = 4; iph->ihl = 5; */
 	put_unaligned(0x45, (unsigned char *)iph);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 10d33fc233b..e0faff8eb65 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2391,7 +2391,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 		VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev);
 	skb->dev = odev;
 	skb->pkt_type = PACKET_HOST;
-	skb->nh.iph = iph;
+	skb->nh.raw = (unsigned char *)iph;
 	skb->h.uh = udph;
 
 	if (pkt_dev->nfrags <= 0)
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 4a83978aa66..b85437dae0e 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -363,8 +363,8 @@ EXPORT_SYMBOL_GPL(dccp_v4_send_check);
 
 static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb)
 {
-	return secure_dccp_sequence_number(skb->nh.iph->daddr,
-					   skb->nh.iph->saddr,
+	return secure_dccp_sequence_number(ip_hdr(skb)->daddr,
+					   ip_hdr(skb)->saddr,
 					   dccp_hdr(skb)->dccph_dport,
 					   dccp_hdr(skb)->dccph_sport);
 }
@@ -405,7 +405,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
 	newinet->opt	   = ireq->opt;
 	ireq->opt	   = NULL;
 	newinet->mc_index  = inet_iif(skb);
-	newinet->mc_ttl	   = skb->nh.iph->ttl;
+	newinet->mc_ttl	   = ip_hdr(skb)->ttl;
 	newinet->id	   = jiffies;
 
 	dccp_sync_mss(newsk, dst_mtu(dst));
@@ -428,7 +428,7 @@ EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock);
 static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 {
 	const struct dccp_hdr *dh = dccp_hdr(skb);
-	const struct iphdr *iph = skb->nh.iph;
+	const struct iphdr *iph = ip_hdr(skb);
 	struct sock *nsk;
 	struct request_sock **prev;
 	/* Find possible connection requests. */
@@ -460,8 +460,8 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
 	struct rtable *rt;
 	struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif,
 			    .nl_u = { .ip4_u =
-				      { .daddr = skb->nh.iph->saddr,
-					.saddr = skb->nh.iph->daddr,
+				      { .daddr = ip_hdr(skb)->saddr,
+					.saddr = ip_hdr(skb)->daddr,
 					.tos = RT_CONN_FLAGS(sk) } },
 			    .proto = sk->sk_protocol,
 			    .uli_u = { .ports =
@@ -513,6 +513,7 @@ static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
 {
 	int err;
 	struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
+	const struct iphdr *rxiph;
 	const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
 				       sizeof(struct dccp_hdr_ext) +
 				       sizeof(struct dccp_hdr_reset);
@@ -559,13 +560,13 @@ static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
 	dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq);
 
 	dccp_csum_outgoing(skb);
-	dh->dccph_checksum = dccp_v4_csum_finish(skb, rxskb->nh.iph->saddr,
-						      rxskb->nh.iph->daddr);
+	rxiph = ip_hdr(rxskb);
+	dh->dccph_checksum = dccp_v4_csum_finish(skb, rxiph->saddr,
+						 rxiph->daddr);
 
 	bh_lock_sock(dccp_v4_ctl_socket->sk);
 	err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk,
-				    rxskb->nh.iph->daddr,
-				    rxskb->nh.iph->saddr, NULL);
+				    rxiph->daddr, rxiph->saddr, NULL);
 	bh_unlock_sock(dccp_v4_ctl_socket->sk);
 
 	if (net_xmit_eval(err) == 0) {
@@ -640,8 +641,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		goto drop_and_free;
 
 	ireq = inet_rsk(req);
-	ireq->loc_addr = skb->nh.iph->daddr;
-	ireq->rmt_addr = skb->nh.iph->saddr;
+	ireq->loc_addr = ip_hdr(skb)->daddr;
+	ireq->rmt_addr = ip_hdr(skb)->saddr;
 	ireq->opt	= NULL;
 
 	/*
@@ -809,6 +810,7 @@ EXPORT_SYMBOL_GPL(dccp_invalid_packet);
 static int dccp_v4_rcv(struct sk_buff *skb)
 {
 	const struct dccp_hdr *dh;
+	const struct iphdr *iph;
 	struct sock *sk;
 	int min_cov;
 
@@ -817,8 +819,9 @@ static int dccp_v4_rcv(struct sk_buff *skb)
 	if (dccp_invalid_packet(skb))
 		goto discard_it;
 
+	iph = ip_hdr(skb);
 	/* Step 1: If header checksum is incorrect, drop packet and return */
-	if (dccp_v4_csum_finish(skb, skb->nh.iph->saddr, skb->nh.iph->daddr)) {
+	if (dccp_v4_csum_finish(skb, iph->saddr, iph->daddr)) {
 		DCCP_WARN("dropped packet with invalid checksum\n");
 		goto discard_it;
 	}
@@ -832,8 +835,8 @@ static int dccp_v4_rcv(struct sk_buff *skb)
 		      "src=%u.%u.%u.%u@%-5d "
 		      "dst=%u.%u.%u.%u@%-5d seq=%llu",
 		      dccp_packet_name(dh->dccph_type),
-		      NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport),
-		      NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport),
+		      NIPQUAD(iph->saddr), ntohs(dh->dccph_sport),
+		      NIPQUAD(iph->daddr), ntohs(dh->dccph_dport),
 		      (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq);
 
 	if (dccp_packet_without_ack(skb)) {
@@ -848,10 +851,8 @@ static int dccp_v4_rcv(struct sk_buff *skb)
 	/* Step 2:
 	 *	Look up flow ID in table and get corresponding socket */
 	sk = __inet_lookup(&dccp_hashinfo,
-			   skb->nh.iph->saddr, dh->dccph_sport,
-			   skb->nh.iph->daddr, dh->dccph_dport,
-			   inet_iif(skb));
-
+			   iph->saddr, dh->dccph_sport,
+			   iph->daddr, dh->dccph_dport, inet_iif(skb));
 	/*
 	 * Step 2:
 	 *	If no socket ...
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 099543f5401..dcc2e4b6b2f 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -848,7 +848,7 @@ static void aun_send_response(__u32 addr, unsigned long seq, int code, int cb)
 
 static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len)
 {
-	struct iphdr *ip = skb->nh.iph;
+	struct iphdr *ip = ip_hdr(skb);
 	unsigned char stn = ntohl(ip->saddr) & 0xff;
 	struct sock *sk;
 	struct sk_buff *newskb;
@@ -946,7 +946,7 @@ static void aun_data_available(struct sock *sk, int slen)
 	data = skb->h.raw + sizeof(struct udphdr);
 	ah = (struct aunhdr *)data;
 	len = skb->len - sizeof(struct udphdr);
-	ip = skb->nh.iph;
+	ip = ip_hdr(skb);
 
 	switch (ah->code)
 	{
diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c
index 3fca4345ebe..62a8a2b7653 100644
--- a/net/ieee80211/ieee80211_tx.c
+++ b/net/ieee80211/ieee80211_tx.c
@@ -228,7 +228,7 @@ static int ieee80211_classify(struct sk_buff *skb)
 	if (eth->h_proto != htons(ETH_P_IP))
 		return 0;
 
-	ip = skb->nh.iph;
+	ip = ip_hdr(skb);
 	switch (ip->tos & 0xfc) {
 	case 0x20:
 		return 2;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index ab552a6098f..e7720c72a6e 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1112,7 +1112,7 @@ static int inet_gso_send_check(struct sk_buff *skb)
 	if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
 		goto out;
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	ihl = iph->ihl * 4;
 	if (ihl < sizeof(*iph))
 		goto out;
@@ -1121,7 +1121,7 @@ static int inet_gso_send_check(struct sk_buff *skb)
 		goto out;
 
 	skb->h.raw = __skb_pull(skb, ihl);
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	proto = iph->protocol & (MAX_INET_PROTOS - 1);
 	err = -EPROTONOSUPPORT;
 
@@ -1155,7 +1155,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
 	if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
 		goto out;
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	ihl = iph->ihl * 4;
 	if (ihl < sizeof(*iph))
 		goto out;
@@ -1164,7 +1164,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
 		goto out;
 
 	skb->h.raw = __skb_pull(skb, ihl);
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	id = ntohs(iph->id);
 	proto = iph->protocol & (MAX_INET_PROTOS - 1);
 	segs = ERR_PTR(-EPROTONOSUPPORT);
@@ -1180,7 +1180,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
 
 	skb = segs;
 	do {
-		iph = skb->nh.iph;
+		iph = ip_hdr(skb);
 		iph->id = htons(id++);
 		iph->tot_len = htons(skb->len - skb->mac_len);
 		iph->check = 0;
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 95ddbbd1552..00fd31da252 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -65,7 +65,7 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
 		char 		buf[60];
 	} tmp_iph;
 
-	top_iph = skb->nh.iph;
+	top_iph = ip_hdr(skb);
 	iph = &tmp_iph.iph;
 
 	iph->tos = top_iph->tos;
@@ -152,7 +152,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 	skb->ip_summed = CHECKSUM_NONE;
 
 	ah = (struct ip_auth_hdr*)skb->data;
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 
 	ihl = skb->data - skb_network_header(skb);
 	memcpy(work_buf, iph, ihl);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index fd36eebbd90..01d0e8dd17d 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -342,13 +342,13 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
 	switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
 	default:
 	case 0:		/* By default announce any local IP */
-		if (skb && inet_addr_type(skb->nh.iph->saddr) == RTN_LOCAL)
-			saddr = skb->nh.iph->saddr;
+		if (skb && inet_addr_type(ip_hdr(skb)->saddr) == RTN_LOCAL)
+			saddr = ip_hdr(skb)->saddr;
 		break;
 	case 1:		/* Restrict announcements of saddr in same subnet */
 		if (!skb)
 			break;
-		saddr = skb->nh.iph->saddr;
+		saddr = ip_hdr(skb)->saddr;
 		if (inet_addr_type(saddr) == RTN_LOCAL) {
 			/* saddr should be known to target */
 			if (inet_addr_onlink(in_dev, target, saddr))
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index b0182aa2c81..11a3404d65a 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1676,7 +1676,7 @@ validate_return:
  */
 void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
 {
-	if (skb->nh.iph->protocol == IPPROTO_ICMP || error != -EACCES)
+	if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES)
 		return;
 
 	if (gateway)
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 222d21e5bbe..ed3deed6644 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -58,7 +58,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	pskb_put(skb, trailer, clen - skb->len);
 
 	__skb_push(skb, skb->data - skb_network_header(skb));
-	top_iph = skb->nh.iph;
+	top_iph = ip_hdr(skb);
 	esph = (struct ip_esp_hdr *)(skb_network_header(skb) +
 				     top_iph->ihl * 4);
 	top_iph->tot_len = htons(skb->len + alen);
@@ -218,7 +218,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	/* ... check padding bits here. Silly. :-) */
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	ihl = iph->ihl * 4;
 
 	if (x->encap) {
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index ff124d40c58..4d70c21c50a 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -392,7 +392,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 	icmp_param->data.icmph.checksum = 0;
 	icmp_out_count(icmp_param->data.icmph.type);
 
-	inet->tos = skb->nh.iph->tos;
+	inet->tos = ip_hdr(skb)->tos;
 	daddr = ipc.addr = rt->rt_src;
 	ipc.opt = NULL;
 	if (icmp_param->replyopts.optlen) {
@@ -404,7 +404,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 		struct flowi fl = { .nl_u = { .ip4_u =
 					      { .daddr = daddr,
 						.saddr = rt->rt_spec_dst,
-						.tos = RT_TOS(skb->nh.iph->tos) } },
+						.tos = RT_TOS(ip_hdr(skb)->tos) } },
 				    .proto = IPPROTO_ICMP };
 		security_skb_classify_flow(skb, &fl);
 		if (ip_route_output_key(&rt, &fl))
@@ -448,7 +448,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	 *	Check this, icmp_send is called from the most obscure devices
 	 *	sometimes.
 	 */
-	iph = skb_in->nh.iph;
+	iph = ip_hdr(skb_in);
 
 	if ((u8 *)iph < skb_in->head || (u8 *)(iph + 1) > skb_in->tail)
 		goto out;
@@ -676,7 +676,7 @@ static void icmp_unreach(struct sk_buff *skb)
 			printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP "
 					    "type %u, code %u "
 					    "error to a broadcast: %u.%u.%u.%u on %s\n",
-			       NIPQUAD(skb->nh.iph->saddr),
+			       NIPQUAD(ip_hdr(skb)->saddr),
 			       icmph->type, icmph->code,
 			       NIPQUAD(iph->daddr),
 			       skb->dev->name);
@@ -751,7 +751,7 @@ static void icmp_redirect(struct sk_buff *skb)
 		 */
 	case ICMP_REDIR_HOST:
 	case ICMP_REDIR_HOSTTOS:
-		ip_rt_redirect(skb->nh.iph->saddr, iph->daddr,
+		ip_rt_redirect(ip_hdr(skb)->saddr, iph->daddr,
 			       skb->h.icmph->un.gateway,
 			       iph->saddr, skb->dev);
 		break;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 0687a7235a6..f511d03e243 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -315,7 +315,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
 
 	skb_reset_network_header(skb);
-	pip = skb->nh.iph;
+	pip = ip_hdr(skb);
 	skb_put(skb, sizeof(struct iphdr) + 4);
 
 	pip->version  = 4;
@@ -345,16 +345,14 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 
 static int igmpv3_sendpack(struct sk_buff *skb)
 {
-	struct iphdr *pip = skb->nh.iph;
+	struct iphdr *pip = ip_hdr(skb);
 	struct igmphdr *pig = skb->h.igmph;
-	int iplen, igmplen;
+	const int iplen = skb->tail - skb->nh.raw;
+	const int igmplen = skb->tail - skb->h.raw;
 
-	iplen = skb->tail - (unsigned char *)skb->nh.iph;
 	pip->tot_len = htons(iplen);
 	ip_send_check(pip);
-
-	igmplen = skb->tail - (unsigned char *)skb->h.igmph;
-	pig->csum = ip_compute_csum((void *)skb->h.igmph, igmplen);
+	pig->csum = ip_compute_csum(skb->h.igmph, igmplen);
 
 	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dev,
 		       dst_output);
@@ -667,7 +665,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
 
 	skb_reset_network_header(skb);
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	skb_put(skb, sizeof(struct iphdr) + 4);
 
 	iph->version  = 4;
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 369e721c4ba..467ebedb99b 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -74,7 +74,7 @@ int ip_forward(struct sk_buff *skb)
 	 *	that reaches zero, we must reply an ICMP control message telling
 	 *	that the packet's lifetime expired.
 	 */
-	if (skb->nh.iph->ttl <= 1)
+	if (ip_hdr(skb)->ttl <= 1)
 		goto too_many_hops;
 
 	if (!xfrm4_route_forward(skb))
@@ -88,7 +88,7 @@ int ip_forward(struct sk_buff *skb)
 	/* We are about to mangle packet. Copy it! */
 	if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len))
 		goto drop;
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 
 	/* Decrease ttl after skb cow done */
 	ip_decrease_ttl(iph);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index af120b2d533..0231bdcb2ab 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -479,7 +479,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 		goto err;
 	}
 
-	offset = ntohs(skb->nh.iph->frag_off);
+	offset = ntohs(ip_hdr(skb)->frag_off);
 	flags = offset & ~IP_OFFSET;
 	offset &= IP_OFFSET;
 	offset <<= 3;		/* offset is in 8-byte chunks */
@@ -676,7 +676,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
 	head->dev = dev;
 	head->tstamp = qp->stamp;
 
-	iph = head->nh.iph;
+	iph = ip_hdr(head);
 	iph->frag_off = 0;
 	iph->tot_len = htons(len);
 	IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
@@ -700,7 +700,6 @@ out_fail:
 /* Process an incoming IP datagram fragment. */
 struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
 {
-	struct iphdr *iph = skb->nh.iph;
 	struct ipq *qp;
 	struct net_device *dev;
 
@@ -713,7 +712,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
 	dev = skb->dev;
 
 	/* Lookup (or create) queue header */
-	if ((qp = ip_find(iph, user)) != NULL) {
+	if ((qp = ip_find(ip_hdr(skb), user)) != NULL) {
 		struct sk_buff *ret = NULL;
 
 		spin_lock(&qp->lock);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 7c6fda6fe84..851f46b910f 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -533,7 +533,7 @@ static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
 {
 	if (INET_ECN_is_ce(iph->tos)) {
 		if (skb->protocol == htons(ETH_P_IP)) {
-			IP_ECN_set_ce(skb->nh.iph);
+			IP_ECN_set_ce(ip_hdr(skb));
 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
 			IP6_ECN_set_ce(skb->nh.ipv6h);
 		}
@@ -565,7 +565,7 @@ static int ipgre_rcv(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, 16))
 		goto drop_nolock;
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	h = skb->data;
 	flags = *(__be16*)h;
 
@@ -670,7 +670,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct net_device_stats *stats = &tunnel->stat;
-	struct iphdr  *old_iph = skb->nh.iph;
+	struct iphdr  *old_iph = ip_hdr(skb);
 	struct iphdr  *tiph;
 	u8     tos;
 	__be16 df;
@@ -825,7 +825,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 			skb_set_owner_w(new_skb, skb->sk);
 		dev_kfree_skb(skb);
 		skb = new_skb;
-		old_iph = skb->nh.iph;
+		old_iph = ip_hdr(skb);
 	}
 
 	skb->h.raw = skb->nh.raw;
@@ -841,7 +841,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	 *	Push down and install the IPIP header.
 	 */
 
-	iph 			=	skb->nh.iph;
+	iph 			=	ip_hdr(skb);
 	iph->version		=	4;
 	iph->ihl		=	sizeof(struct iphdr) >> 2;
 	iph->frag_off		=	df;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 2ee132b330f..237880a8043 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -158,7 +158,7 @@ DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics) __read_mostly;
 int ip_call_ra_chain(struct sk_buff *skb)
 {
 	struct ip_ra_chain *ra;
-	u8 protocol = skb->nh.iph->protocol;
+	u8 protocol = ip_hdr(skb)->protocol;
 	struct sock *last = NULL;
 
 	read_lock(&ip_ra_lock);
@@ -171,7 +171,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
 		if (sk && inet_sk(sk)->num == protocol &&
 		    (!sk->sk_bound_dev_if ||
 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
-			if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+			if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
 				skb = ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN);
 				if (skb == NULL) {
 					read_unlock(&ip_ra_lock);
@@ -206,7 +206,7 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
 	rcu_read_lock();
 	{
 		/* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */
-		int protocol = skb->nh.iph->protocol;
+		int protocol = ip_hdr(skb)->protocol;
 		int hash;
 		struct sock *raw_sk;
 		struct net_protocol *ipprot;
@@ -218,7 +218,7 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
 		/* If there maybe a raw socket we must check - if not we
 		 * don't care less
 		 */
-		if (raw_sk && !raw_v4_input(skb, skb->nh.iph, hash))
+		if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash))
 			raw_sk = NULL;
 
 		if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) {
@@ -264,7 +264,7 @@ int ip_local_deliver(struct sk_buff *skb)
 	 *	Reassemble IP fragments.
 	 */
 
-	if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
 		skb = ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER);
 		if (!skb)
 			return 0;
@@ -292,7 +292,7 @@ static inline int ip_rcv_options(struct sk_buff *skb)
 		goto drop;
 	}
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 
 	if (ip_options_compile(NULL, skb)) {
 		IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
@@ -328,7 +328,7 @@ drop:
 
 static inline int ip_rcv_finish(struct sk_buff *skb)
 {
-	struct iphdr *iph = skb->nh.iph;
+	const struct iphdr *iph = ip_hdr(skb);
 
 	/*
 	 *	Initialise the virtual path cache for the packet. It describes
@@ -389,7 +389,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
 		goto inhdr_error;
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 
 	/*
 	 *	RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum.
@@ -408,7 +408,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	if (!pskb_may_pull(skb, iph->ihl*4))
 		goto inhdr_error;
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 
 	if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
 		goto inhdr_error;
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index f7e9db61256..251346828cb 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -110,7 +110,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
 	if (skb->dst)
 		daddr = ((struct rtable*)skb->dst)->rt_spec_dst;
 	else
-		daddr = skb->nh.iph->daddr;
+		daddr = ip_hdr(skb)->daddr;
 
 	if (sopt->rr) {
 		optlen  = sptr[sopt->rr+1];
@@ -180,7 +180,8 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb)
 			/*
 			 * RFC1812 requires to fix illegal source routes.
 			 */
-			if (memcmp(&skb->nh.iph->saddr, &start[soffset+3], 4) == 0)
+			if (memcmp(&ip_hdr(skb)->saddr,
+				   &start[soffset + 3], 4) == 0)
 				doffset -= 4;
 		}
 		if (doffset > 3) {
@@ -269,7 +270,8 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb)
 		optptr = iph + sizeof(struct iphdr);
 		opt->is_data = 0;
 	} else {
-		optptr = opt->is_data ? opt->__data : (unsigned char*)&(skb->nh.iph[1]);
+		optptr = opt->is_data ? opt->__data :
+					(unsigned char *)&(ip_hdr(skb)[1]);
 		iph = optptr - sizeof(struct iphdr);
 	}
 
@@ -587,7 +589,7 @@ void ip_forward_options(struct sk_buff *skb)
 		if (srrptr + 3 <= srrspace) {
 			opt->is_changed = 1;
 			ip_rt_get_source(&optptr[srrptr-1], rt);
-			skb->nh.iph->daddr = rt->rt_dst;
+			ip_hdr(skb)->daddr = rt->rt_dst;
 			optptr[2] = srrptr+4;
 		} else if (net_ratelimit())
 			printk(KERN_CRIT "ip_forward(): Argh! Destination lost!\n");
@@ -599,7 +601,7 @@ void ip_forward_options(struct sk_buff *skb)
 	}
 	if (opt->is_changed) {
 		opt->is_changed = 0;
-		ip_send_check(skb->nh.iph);
+		ip_send_check(ip_hdr(skb));
 	}
 }
 
@@ -608,7 +610,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
 	struct ip_options *opt = &(IPCB(skb)->opt);
 	int srrspace, srrptr;
 	__be32 nexthop;
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 	unsigned char *optptr = skb_network_header(skb) + opt->srr;
 	struct rtable *rt = (struct rtable*)skb->dst;
 	struct rtable *rt2;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 15de9d43950..1abc48899f2 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -127,7 +127,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
 	/* Build the IP header. */
 	skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
 	skb_reset_network_header(skb);
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	iph->version  = 4;
 	iph->ihl      = 5;
 	iph->tos      = inet->tos;
@@ -245,7 +245,7 @@ int ip_mc_output(struct sk_buff *skb)
 
 		/* Multicasts with ttl 0 must not go beyond the host */
 
-		if (skb->nh.iph->ttl == 0) {
+		if (ip_hdr(skb)->ttl == 0) {
 			kfree_skb(skb);
 			return 0;
 		}
@@ -332,7 +332,7 @@ packet_routed:
 	/* OK, we know where to send it, allocate and build IP header. */
 	skb_push(skb, sizeof(struct iphdr) + (opt ? opt->optlen : 0));
 	skb_reset_network_header(skb);
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
 	iph->tot_len = htons(skb->len);
 	if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
@@ -428,7 +428,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 	 *	Point into the IP datagram header.
 	 */
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 
 	if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
 		IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
@@ -504,7 +504,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 				__skb_push(frag, hlen);
 				skb_reset_network_header(frag);
 				memcpy(skb_network_header(frag), iph, hlen);
-				iph = frag->nh.iph;
+				iph = ip_hdr(frag);
 				iph->tot_len = htons(frag->len);
 				ip_copy_metadata(frag, skb);
 				if (offset == 0)
@@ -619,7 +619,7 @@ slow_path:
 		/*
 		 *	Fill in the new header fields.
 		 */
-		iph = skb2->nh.iph;
+		iph = ip_hdr(skb2);
 		iph->frag_off = htons((offset >> 3));
 
 		/* ANK: dirty, but effective trick. Upgrade options only if
@@ -1125,7 +1125,7 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 			 */
 			data = skb_put(skb, fragheaderlen + fraggap);
 			skb_reset_network_header(skb);
-			iph = skb->nh.iph;
+			iph = ip_hdr(skb);
 			data += fragheaderlen;
 			skb->h.raw = data;
 
@@ -1352,7 +1352,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
 		struct flowi fl = { .nl_u = { .ip4_u =
 					      { .daddr = daddr,
 						.saddr = rt->rt_spec_dst,
-						.tos = RT_TOS(skb->nh.iph->tos) } },
+						.tos = RT_TOS(ip_hdr(skb)->tos) } },
 				    /* Not quite clean, but right. */
 				    .uli_u = { .ports =
 					       { .sport = skb->h.th->dest,
@@ -1370,9 +1370,9 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
 	   with locally disabled BH and that sk cannot be already spinlocked.
 	 */
 	bh_lock_sock(sk);
-	inet->tos = skb->nh.iph->tos;
+	inet->tos = ip_hdr(skb)->tos;
 	sk->sk_priority = skb->priority;
-	sk->sk_protocol = skb->nh.iph->protocol;
+	sk->sk_protocol = ip_hdr(skb)->protocol;
 	ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
 		       &ipc, rt, MSG_DONTWAIT);
 	if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 70888e1ef6b..fabc250e16d 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -59,7 +59,7 @@ static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
 	struct in_pktinfo info;
 	struct rtable *rt = (struct rtable *)skb->dst;
 
-	info.ipi_addr.s_addr = skb->nh.iph->daddr;
+	info.ipi_addr.s_addr = ip_hdr(skb)->daddr;
 	if (rt) {
 		info.ipi_ifindex = rt->rt_iif;
 		info.ipi_spec_dst.s_addr = rt->rt_spec_dst;
@@ -73,13 +73,13 @@ static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
 
 static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb)
 {
-	int ttl = skb->nh.iph->ttl;
+	int ttl = ip_hdr(skb)->ttl;
 	put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl);
 }
 
 static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb)
 {
-	put_cmsg(msg, SOL_IP, IP_TOS, 1, &skb->nh.iph->tos);
+	put_cmsg(msg, SOL_IP, IP_TOS, 1, &ip_hdr(skb)->tos);
 }
 
 static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
@@ -87,7 +87,8 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
 	if (IPCB(skb)->opt.optlen == 0)
 		return;
 
-	put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen, skb->nh.iph+1);
+	put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen,
+		 ip_hdr(skb) + 1);
 }
 
 
@@ -299,7 +300,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
 
 	skb_put(skb, sizeof(struct iphdr));
 	skb_reset_network_header(skb);
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	iph->daddr = daddr;
 
 	serr = SKB_EXT_ERR(skb);
@@ -369,7 +370,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
 		struct inet_sock *inet = inet_sk(sk);
 
 		sin->sin_family = AF_INET;
-		sin->sin_addr.s_addr = skb->nh.iph->saddr;
+		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
 		sin->sin_port = 0;
 		memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
 		if (inet->cmsg_flags)
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index aa704b88f01..8eb46064c52 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -43,21 +43,15 @@ static LIST_HEAD(ipcomp_tfms_list);
 
 static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
 {
-	int err, plen, dlen;
 	struct ipcomp_data *ipcd = x->data;
-	u8 *start, *scratch;
-	struct crypto_comp *tfm;
-	int cpu;
-
-	plen = skb->len;
-	dlen = IPCOMP_SCRATCH_SIZE;
-	start = skb->data;
+	const int plen = skb->len;
+	int dlen = IPCOMP_SCRATCH_SIZE;
+	const u8 *start = skb->data;
+	const int cpu = get_cpu();
+	u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
+	struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+	int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
 
-	cpu = get_cpu();
-	scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
-	tfm = *per_cpu_ptr(ipcd->tfms, cpu);
-
-	err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
 	if (err)
 		goto out;
 
@@ -90,7 +84,7 @@ static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb)
 	skb->ip_summed = CHECKSUM_NONE;
 
 	/* Remove ipcomp header and decompress original payload */
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	ipch = (void *)skb->data;
 	iph->protocol = ipch->nexthdr;
 	skb->h.raw = skb->nh.raw + sizeof(*ipch);
@@ -103,23 +97,16 @@ out:
 
 static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
 {
-	int err, plen, dlen, ihlen;
-	struct iphdr *iph = skb->nh.iph;
 	struct ipcomp_data *ipcd = x->data;
-	u8 *start, *scratch;
-	struct crypto_comp *tfm;
-	int cpu;
+	const int ihlen = ip_hdrlen(skb);
+	const int plen = skb->len - ihlen;
+	int dlen = IPCOMP_SCRATCH_SIZE;
+	u8 *start = skb->data + ihlen;
+	const int cpu = get_cpu();
+	u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
+	struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+	int err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
 
-	ihlen = iph->ihl * 4;
-	plen = skb->len - ihlen;
-	dlen = IPCOMP_SCRATCH_SIZE;
-	start = skb->data + ihlen;
-
-	cpu = get_cpu();
-	scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
-	tfm = *per_cpu_ptr(ipcd->tfms, cpu);
-
-	err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
 	if (err)
 		goto out;
 
@@ -142,12 +129,11 @@ out:
 static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int err;
-	struct iphdr *iph;
 	struct ip_comp_hdr *ipch;
 	struct ipcomp_data *ipcd = x->data;
 	int hdr_len = 0;
+	struct iphdr *iph = ip_hdr(skb);
 
-	iph = skb->nh.iph;
 	iph->tot_len = htons(skb->len);
 	hdr_len = iph->ihl * 4;
 	if ((skb->len - hdr_len) < ipcd->threshold) {
@@ -159,7 +145,7 @@ static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb)
 		goto out_ok;
 
 	err = ipcomp_compress(x, skb);
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 
 	if (err) {
 		goto out_ok;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 6e8998409cb..6b91c9f5d57 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -703,7 +703,7 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
 
 	/* Construct IP header */
 	skb_reset_network_header(skb);
-	h = skb->nh.iph;
+	h = ip_hdr(skb);
 	h->version = 4;
 	h->ihl = 5;
 	h->tot_len = htons(sizeof(struct bootp_pkt));
@@ -846,7 +846,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
 			   sizeof(struct udphdr)))
 		goto drop;
 
-	b = (struct bootp_pkt *) skb->nh.iph;
+	b = (struct bootp_pkt *)skb_network_header(skb);
 	h = &b->iph;
 
 	if (h->ihl != 5 || h->version != 4 || h->protocol != IPPROTO_UDP)
@@ -884,7 +884,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
 	if (!pskb_may_pull(skb, skb->len))
 		goto drop;
 
-	b = (struct bootp_pkt *) skb->nh.iph;
+	b = (struct bootp_pkt *)skb_network_header(skb);
 	h = &b->iph;
 
 	/* One reply at a time, please. */
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 843cc09f961..b7f6ff4705b 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -461,9 +461,10 @@ out:
 #endif
 }
 
-static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff *skb)
+static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
+					struct sk_buff *skb)
 {
-	struct iphdr *inner_iph = skb->nh.iph;
+	struct iphdr *inner_iph = ip_hdr(skb);
 
 	if (INET_ECN_is_ce(outer_iph->tos))
 		IP_ECN_set_ce(inner_iph);
@@ -471,10 +472,8 @@ static inline void ipip_ecn_decapsulate(struct iphdr *outer_iph, struct sk_buff
 
 static int ipip_rcv(struct sk_buff *skb)
 {
-	struct iphdr *iph;
 	struct ip_tunnel *tunnel;
-
-	iph = skb->nh.iph;
+	const struct iphdr *iph = ip_hdr(skb);
 
 	read_lock(&ipip_lock);
 	if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
@@ -521,7 +520,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	__be16 df = tiph->frag_off;
 	struct rtable *rt;     			/* Route to the other host */
 	struct net_device *tdev;			/* Device to other host */
-	struct iphdr  *old_iph = skb->nh.iph;
+	struct iphdr  *old_iph = ip_hdr(skb);
 	struct iphdr  *iph;			/* Our new IP header */
 	int    max_headroom;			/* The extra header space needed */
 	__be32 dst = tiph->daddr;
@@ -615,7 +614,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 			skb_set_owner_w(new_skb, skb->sk);
 		dev_kfree_skb(skb);
 		skb = new_skb;
-		old_iph = skb->nh.iph;
+		old_iph = ip_hdr(skb);
 	}
 
 	skb->h.raw = skb->nh.raw;
@@ -631,7 +630,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	 *	Push down and install the IPIP header.
 	 */
 
-	iph 			=	skb->nh.iph;
+	iph 			=	ip_hdr(skb);
 	iph->version		=	4;
 	iph->ihl		=	sizeof(struct iphdr)>>2;
 	iph->frag_off		=	df;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index b24dffe3bd4..e0021499093 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -303,7 +303,7 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
 	atomic_dec(&cache_resolve_queue_len);
 
 	while ((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
-		if (skb->nh.iph->version == 0) {
+		if (ip_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 			nlh->nlmsg_type = NLMSG_ERROR;
 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
@@ -509,7 +509,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 	 */
 
 	while ((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
-		if (skb->nh.iph->version == 0) {
+		if (ip_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 
 			if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
@@ -569,8 +569,9 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 		msg->im_msgtype = IGMPMSG_WHOLEPKT;
 		msg->im_mbz = 0;
 		msg->im_vif = reg_vif_num;
-		skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
-		skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
+		ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
+		ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
+					     sizeof(struct iphdr));
 	} else
 #endif
 	{
@@ -579,10 +580,10 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 	 *	Copy the IP header
 	 */
 
-	skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
+	skb->nh.raw = skb_put(skb, ihl);
 	memcpy(skb->data,pkt->data,ihl);
-	skb->nh.iph->protocol = 0;			/* Flag to the kernel this is a route add */
-	msg = (struct igmpmsg*)skb->nh.iph;
+	ip_hdr(skb)->protocol = 0;			/* Flag to the kernel this is a route add */
+	msg = (struct igmpmsg *)skb_network_header(skb);
 	msg->im_vif = vifi;
 	skb->dst = dst_clone(pkt->dst);
 
@@ -594,7 +595,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 	igmp->type	=
 	msg->im_msgtype = assert;
 	igmp->code 	=	0;
-	skb->nh.iph->tot_len=htons(skb->len);			/* Fix the length */
+	ip_hdr(skb)->tot_len = htons(skb->len);			/* Fix the length */
 	skb->h.raw = skb->nh.raw;
 	}
 
@@ -624,11 +625,12 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
 {
 	int err;
 	struct mfc_cache *c;
+	const struct iphdr *iph = ip_hdr(skb);
 
 	spin_lock_bh(&mfc_unres_lock);
 	for (c=mfc_unres_queue; c; c=c->next) {
-		if (c->mfc_mcastgrp == skb->nh.iph->daddr &&
-		    c->mfc_origin == skb->nh.iph->saddr)
+		if (c->mfc_mcastgrp == iph->daddr &&
+		    c->mfc_origin == iph->saddr)
 			break;
 	}
 
@@ -648,9 +650,9 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
 		/*
 		 *	Fill in the new cache entry
 		 */
-		c->mfc_parent=-1;
-		c->mfc_origin=skb->nh.iph->saddr;
-		c->mfc_mcastgrp=skb->nh.iph->daddr;
+		c->mfc_parent	= -1;
+		c->mfc_origin	= iph->saddr;
+		c->mfc_mcastgrp	= iph->daddr;
 
 		/*
 		 *	Reflect first query at mrouted.
@@ -1096,12 +1098,12 @@ static struct notifier_block ip_mr_notifier={
 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
 {
 	struct iphdr *iph;
-	struct iphdr *old_iph = skb->nh.iph;
+	struct iphdr *old_iph = ip_hdr(skb);
 
 	skb_push(skb, sizeof(struct iphdr));
-	skb->h.ipiph = skb->nh.iph;
+	skb->h.raw = skb->nh.raw;
 	skb_reset_network_header(skb);
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 
 	iph->version	= 	4;
 	iph->tos	=	old_iph->tos;
@@ -1137,7 +1139,7 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
 
 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
 {
-	struct iphdr *iph = skb->nh.iph;
+	const struct iphdr *iph = ip_hdr(skb);
 	struct vif_device *vif = &vif_table[vifi];
 	struct net_device *dev;
 	struct rtable *rt;
@@ -1203,8 +1205,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
 
 	dst_release(skb->dst);
 	skb->dst = &rt->u.dst;
-	iph = skb->nh.iph;
-	ip_decrease_ttl(iph);
+	ip_decrease_ttl(ip_hdr(skb));
 
 	/* FIXME: forward and output firewalls used to be called here.
 	 * What do we do with netfilter? -- RR */
@@ -1304,7 +1305,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
 	 *	Forward the frame
 	 */
 	for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
-		if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) {
+		if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
 			if (psend != -1) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 				if (skb2)
@@ -1350,7 +1351,7 @@ int ip_mr_input(struct sk_buff *skb)
 		    if (IPCB(skb)->opt.router_alert) {
 			    if (ip_call_ra_chain(skb))
 				    return 0;
-		    } else if (skb->nh.iph->protocol == IPPROTO_IGMP){
+		    } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
 			    /* IGMPv1 (and broken IGMPv2 implementations sort of
 			       Cisco IOS <= 11.2(8)) do not put router alert
 			       option to IGMP packets destined to routable
@@ -1369,7 +1370,7 @@ int ip_mr_input(struct sk_buff *skb)
 	}
 
 	read_lock(&mrt_lock);
-	cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
+	cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
 
 	/*
 	 *	No usable cache entry
@@ -1580,6 +1581,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
 
 	if (cache==NULL) {
 		struct sk_buff *skb2;
+		struct iphdr *iph;
 		struct net_device *dev;
 		int vif;
 
@@ -1601,10 +1603,11 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
 
 		skb_push(skb2, sizeof(struct iphdr));
 		skb_reset_network_header(skb2);
-		skb2->nh.iph->ihl = sizeof(struct iphdr)>>2;
-		skb2->nh.iph->saddr = rt->rt_src;
-		skb2->nh.iph->daddr = rt->rt_dst;
-		skb2->nh.iph->version = 0;
+		iph = ip_hdr(skb2);
+		iph->ihl = sizeof(struct iphdr) >> 2;
+		iph->saddr = rt->rt_src;
+		iph->daddr = rt->rt_dst;
+		iph->version = 0;
 		err = ipmr_cache_unresolved(vif, skb2);
 		read_unlock(&mrt_lock);
 		return err;
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index e5beab28cd0..c8a822c0aa7 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -577,7 +577,6 @@ static const struct file_operations ip_vs_app_fops = {
 int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
 		      char *o_buf, int o_len, char *n_buf, int n_len)
 {
-	struct iphdr *iph;
 	int diff;
 	int o_offset;
 	int o_left;
@@ -607,8 +606,7 @@ int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
 	}
 
 	/* must update the iph total length here */
-	iph = skb->nh.iph;
-	iph->tot_len = htons(skb->len);
+	ip_hdr(skb)->tot_len = htons(skb->len);
 
 	LeaveFunction(9);
 	return 0;
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 7893c00a91f..62cfbed317b 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -212,7 +212,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 		    __be16 ports[2])
 {
 	struct ip_vs_conn *cp = NULL;
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 	struct ip_vs_dest *dest;
 	struct ip_vs_conn *ct;
 	__be16  dport;	 /* destination port to forward */
@@ -381,7 +381,7 @@ struct ip_vs_conn *
 ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 {
 	struct ip_vs_conn *cp = NULL;
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 	struct ip_vs_dest *dest;
 	__be16 _ports[2], *pptr;
 
@@ -447,7 +447,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 		struct ip_vs_protocol *pp)
 {
 	__be16 _ports[2], *pptr;
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 
 	pptr = skb_header_pointer(skb, iph->ihl*4,
 				  sizeof(_ports), _ports);
@@ -546,7 +546,7 @@ ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
 {
 	skb = ip_defrag(skb, user);
 	if (skb)
-		ip_send_check(skb->nh.iph);
+		ip_send_check(ip_hdr(skb));
 	return skb;
 }
 
@@ -557,7 +557,7 @@ ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
 void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		    struct ip_vs_conn *cp, int inout)
 {
-	struct iphdr *iph	 = skb->nh.iph;
+	struct iphdr *iph	 = ip_hdr(skb);
 	unsigned int icmp_offset = iph->ihl*4;
 	struct icmphdr *icmph	 = (struct icmphdr *)(skb_network_header(skb) +
 						      icmp_offset);
@@ -618,14 +618,14 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
 	*related = 1;
 
 	/* reassemble IP fragments */
-	if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
 		skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
 		if (!skb)
 			return NF_STOLEN;
 		*pskb = skb;
 	}
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	offset = ihl = iph->ihl * 4;
 	ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
 	if (ic == NULL)
@@ -740,14 +740,14 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
 	if (skb->ipvs_property)
 		return NF_ACCEPT;
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	if (unlikely(iph->protocol == IPPROTO_ICMP)) {
 		int related, verdict = ip_vs_out_icmp(pskb, &related);
 
 		if (related)
 			return verdict;
 		skb = *pskb;
-		iph = skb->nh.iph;
+		iph = ip_hdr(skb);
 	}
 
 	pp = ip_vs_proto_get(iph->protocol);
@@ -760,7 +760,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
 		skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
 		if (!skb)
 			return NF_STOLEN;
-		iph = skb->nh.iph;
+		iph = ip_hdr(skb);
 		*pskb = skb;
 	}
 
@@ -810,8 +810,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
 	if (pp->snat_handler && !pp->snat_handler(pskb, pp, cp))
 		goto drop;
 	skb = *pskb;
-	skb->nh.iph->saddr = cp->vaddr;
-	ip_send_check(skb->nh.iph);
+	ip_hdr(skb)->saddr = cp->vaddr;
+	ip_send_check(ip_hdr(skb));
 
 	/* For policy routing, packets originating from this
 	 * machine itself may be routed differently to packets
@@ -861,7 +861,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
 	*related = 1;
 
 	/* reassemble IP fragments */
-	if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
 		skb = ip_vs_gather_frags(skb,
 					 hooknum == NF_IP_LOCAL_IN ?
 					 IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD);
@@ -870,7 +870,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
 		*pskb = skb;
 	}
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	offset = ihl = iph->ihl * 4;
 	ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
 	if (ic == NULL)
@@ -966,19 +966,19 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
 		     || skb->dev == &loopback_dev || skb->sk)) {
 		IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
 			  skb->pkt_type,
-			  skb->nh.iph->protocol,
-			  NIPQUAD(skb->nh.iph->daddr));
+			  ip_hdr(skb)->protocol,
+			  NIPQUAD(ip_hdr(skb)->daddr));
 		return NF_ACCEPT;
 	}
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	if (unlikely(iph->protocol == IPPROTO_ICMP)) {
 		int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum);
 
 		if (related)
 			return verdict;
 		skb = *pskb;
-		iph = skb->nh.iph;
+		iph = ip_hdr(skb);
 	}
 
 	/* Protocol supported? */
@@ -1064,7 +1064,7 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff **pskb,
 {
 	int r;
 
-	if ((*pskb)->nh.iph->protocol != IPPROTO_ICMP)
+	if (ip_hdr(*pskb)->protocol != IPPROTO_ICMP)
 		return NF_ACCEPT;
 
 	return ip_vs_in_icmp(pskb, &r, hooknum);
diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c
index 502111fba87..dcf5d46aaa5 100644
--- a/net/ipv4/ipvs/ip_vs_dh.c
+++ b/net/ipv4/ipvs/ip_vs_dh.c
@@ -204,7 +204,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 {
 	struct ip_vs_dest *dest;
 	struct ip_vs_dh_bucket *tbl;
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 
 	IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n");
 
diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c
index 847c47af040..25bd6896730 100644
--- a/net/ipv4/ipvs/ip_vs_ftp.c
+++ b/net/ipv4/ipvs/ip_vs_ftp.c
@@ -159,7 +159,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		return 0;
 
 	if (cp->app_data == &ip_vs_ftp_pasv) {
-		iph = (*pskb)->nh.iph;
+		iph = ip_hdr(*pskb);
 		th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
 		data = (char *)th + (th->doff << 2);
 		data_limit = (*pskb)->tail;
@@ -262,7 +262,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	/*
 	 * Detecting whether it is passive
 	 */
-	iph = (*pskb)->nh.iph;
+	iph = ip_hdr(*pskb);
 	th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
 
 	/* Since there may be OPTIONS in the TCP packet and the HLEN is
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index c801273cb88..052f4ed5917 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -521,7 +521,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	struct ip_vs_dest *dest;
 	struct ip_vs_lblc_table *tbl;
 	struct ip_vs_lblc_entry *en;
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 
 	IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n");
 
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index 23f9b9e73c8..6225acac7a3 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -775,7 +775,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	struct ip_vs_dest *dest;
 	struct ip_vs_lblcr_table *tbl;
 	struct ip_vs_lblcr_entry *en;
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 
 	IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n");
 
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index e65382da713..e65577a7700 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -83,8 +83,8 @@ tcp_conn_schedule(struct sk_buff *skb,
 	}
 
 	if (th->syn &&
-	    (svc = ip_vs_service_get(skb->mark, skb->nh.iph->protocol,
-				     skb->nh.iph->daddr, th->dest))) {
+	    (svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
+				     ip_hdr(skb)->daddr, th->dest))) {
 		if (ip_vs_todrop()) {
 			/*
 			 * It seems that we are very loaded.
@@ -142,7 +142,7 @@ tcp_snat_handler(struct sk_buff **pskb,
 			return 0;
 	}
 
-	tcph = (void *)(*pskb)->nh.iph + tcphoff;
+	tcph = (void *)ip_hdr(*pskb) + tcphoff;
 	tcph->source = cp->vport;
 
 	/* Adjust TCP checksums */
@@ -193,7 +193,7 @@ tcp_dnat_handler(struct sk_buff **pskb,
 			return 0;
 	}
 
-	tcph = (void *)(*pskb)->nh.iph + tcphoff;
+	tcph = (void *)ip_hdr(*pskb) + tcphoff;
 	tcph->dest = cp->dport;
 
 	/*
@@ -229,9 +229,9 @@ tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
 	case CHECKSUM_NONE:
 		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
 	case CHECKSUM_COMPLETE:
-		if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
+		if (csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
 				      skb->len - tcphoff,
-				      skb->nh.iph->protocol, skb->csum)) {
+				      ip_hdr(skb)->protocol, skb->csum)) {
 			IP_VS_DBG_RL_PKT(0, pp, skb, 0,
 					 "Failed checksum for");
 			return 0;
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 2cd95063892..8ee5fe6a101 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -89,8 +89,8 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		return 0;
 	}
 
-	if ((svc = ip_vs_service_get(skb->mark, skb->nh.iph->protocol,
-				     skb->nh.iph->daddr, uh->dest))) {
+	if ((svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
+				     ip_hdr(skb)->daddr, uh->dest))) {
 		if (ip_vs_todrop()) {
 			/*
 			 * It seems that we are very loaded.
@@ -151,7 +151,7 @@ udp_snat_handler(struct sk_buff **pskb,
 			return 0;
 	}
 
-	udph = (void *)(*pskb)->nh.iph + udphoff;
+	udph = (void *)ip_hdr(*pskb) + udphoff;
 	udph->source = cp->vport;
 
 	/*
@@ -206,7 +206,7 @@ udp_dnat_handler(struct sk_buff **pskb,
 			return 0;
 	}
 
-	udph = (void *)(*pskb)->nh.iph + udphoff;
+	udph = (void *)ip_hdr(*pskb) + udphoff;
 	udph->dest = cp->dport;
 
 	/*
@@ -251,10 +251,10 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
 			skb->csum = skb_checksum(skb, udphoff,
 						 skb->len - udphoff, 0);
 		case CHECKSUM_COMPLETE:
-			if (csum_tcpudp_magic(skb->nh.iph->saddr,
-					      skb->nh.iph->daddr,
+			if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
+					      ip_hdr(skb)->daddr,
 					      skb->len - udphoff,
-					      skb->nh.iph->protocol,
+					      ip_hdr(skb)->protocol,
 					      skb->csum)) {
 				IP_VS_DBG_RL_PKT(0, pp, skb, 0,
 						 "Failed checksum for");
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c
index 338668f88fe..1b25b00ef1e 100644
--- a/net/ipv4/ipvs/ip_vs_sh.c
+++ b/net/ipv4/ipvs/ip_vs_sh.c
@@ -201,7 +201,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 {
 	struct ip_vs_dest *dest;
 	struct ip_vs_sh_bucket *tbl;
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 
 	IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
 
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index d1403d0855e..a7aee682203 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -156,7 +156,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		  struct ip_vs_protocol *pp)
 {
 	struct rtable *rt;			/* Route to the other host */
-	struct iphdr  *iph = skb->nh.iph;
+	struct iphdr  *iph = ip_hdr(skb);
 	u8     tos = iph->tos;
 	int    mtu;
 	struct flowi fl = {
@@ -193,7 +193,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		ip_rt_put(rt);
 		return NF_STOLEN;
 	}
-	ip_send_check(skb->nh.iph);
+	ip_send_check(ip_hdr(skb));
 
 	/* drop old route */
 	dst_release(skb->dst);
@@ -226,7 +226,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 {
 	struct rtable *rt;		/* Route to the other host */
 	int mtu;
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 
 	EnterFunction(10);
 
@@ -266,8 +266,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* mangle the packet */
 	if (pp->dnat_handler && !pp->dnat_handler(&skb, pp, cp))
 		goto tx_error;
-	skb->nh.iph->daddr = cp->daddr;
-	ip_send_check(skb->nh.iph);
+	ip_hdr(skb)->daddr = cp->daddr;
+	ip_send_check(ip_hdr(skb));
 
 	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
 
@@ -320,7 +320,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 {
 	struct rtable *rt;			/* Route to the other host */
 	struct net_device *tdev;		/* Device to other host */
-	struct iphdr  *old_iph = skb->nh.iph;
+	struct iphdr  *old_iph = ip_hdr(skb);
 	u8     tos = old_iph->tos;
 	__be16 df = old_iph->frag_off;
 	struct iphdr  *iph;			/* Our new IP header */
@@ -377,7 +377,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		}
 		kfree_skb(skb);
 		skb = new_skb;
-		old_iph = skb->nh.iph;
+		old_iph = ip_hdr(skb);
 	}
 
 	skb->h.raw = (void *) old_iph;
@@ -396,7 +396,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/*
 	 *	Push down and install the IPIP header.
 	 */
-	iph			=	skb->nh.iph;
+	iph			=	ip_hdr(skb);
 	iph->version		=	4;
 	iph->ihl		=	sizeof(struct iphdr)>>2;
 	iph->frag_off		=	df;
@@ -436,7 +436,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	      struct ip_vs_protocol *pp)
 {
 	struct rtable *rt;			/* Route to the other host */
-	struct iphdr  *iph = skb->nh.iph;
+	struct iphdr  *iph = ip_hdr(skb);
 	int    mtu;
 
 	EnterFunction(10);
@@ -461,7 +461,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		ip_rt_put(rt);
 		return NF_STOLEN;
 	}
-	ip_send_check(skb->nh.iph);
+	ip_send_check(ip_hdr(skb));
 
 	/* drop old route */
 	dst_release(skb->dst);
@@ -515,12 +515,12 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	 * mangle and send the packet here (only for VS/NAT)
 	 */
 
-	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(skb->nh.iph->tos))))
+	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos))))
 		goto tx_error_icmp;
 
 	/* MTU checking */
 	mtu = dst_mtu(&rt->u.dst);
-	if ((skb->len > mtu) && (skb->nh.iph->frag_off & htons(IP_DF))) {
+	if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
 		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 		IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 6069a11514f..b44192924f9 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -10,7 +10,7 @@
 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
 int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type)
 {
-	struct iphdr *iph = (*pskb)->nh.iph;
+	const struct iphdr *iph = ip_hdr(*pskb);
 	struct rtable *rt;
 	struct flowi fl = {};
 	struct dst_entry *odst;
@@ -142,7 +142,7 @@ static void nf_ip_saveroute(const struct sk_buff *skb, struct nf_info *info)
 	struct ip_rt_info *rt_info = nf_info_reroute(info);
 
 	if (info->hook == NF_IP_LOCAL_OUT) {
-		const struct iphdr *iph = skb->nh.iph;
+		const struct iphdr *iph = ip_hdr(skb);
 
 		rt_info->tos = iph->tos;
 		rt_info->daddr = iph->daddr;
@@ -155,7 +155,7 @@ static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info)
 	const struct ip_rt_info *rt_info = nf_info_reroute(info);
 
 	if (info->hook == NF_IP_LOCAL_OUT) {
-		struct iphdr *iph = (*pskb)->nh.iph;
+		const struct iphdr *iph = ip_hdr(*pskb);
 
 		if (!(iph->tos == rt_info->tos
 		      && iph->daddr == rt_info->daddr
@@ -168,7 +168,7 @@ static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info)
 __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
 			    unsigned int dataoff, u_int8_t protocol)
 {
-	struct iphdr *iph = skb->nh.iph;
+	const struct iphdr *iph = ip_hdr(skb);
 	__sum16 csum = 0;
 
 	switch (skb->ip_summed) {
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 8c013d9f690..986c0c81294 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -748,9 +748,9 @@ resolve_normal_ct(struct sk_buff *skb,
 	struct ip_conntrack_tuple_hash *h;
 	struct ip_conntrack *ct;
 
-	IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
+	IP_NF_ASSERT((ip_hdr(skb)->frag_off & htons(IP_OFFSET)) == 0);
 
-	if (!ip_ct_get_tuple(skb->nh.iph, skb, ip_hdrlen(skb), &tuple,proto))
+	if (!ip_ct_get_tuple(ip_hdr(skb), skb, ip_hdrlen(skb), &tuple,proto))
 		return NULL;
 
 	/* look for tuple match */
@@ -811,10 +811,10 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
 	}
 
 	/* Never happen */
-	if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
+	if (ip_hdr(*pskb)->frag_off & htons(IP_OFFSET)) {
 		if (net_ratelimit()) {
 		printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
-		       (*pskb)->nh.iph->protocol, hooknum);
+		       ip_hdr(*pskb)->protocol, hooknum);
 		}
 		return NF_DROP;
 	}
@@ -825,17 +825,17 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
 	if ((*pskb)->pkt_type == PACKET_BROADCAST) {
 		printk("Broadcast packet!\n");
 		return NF_ACCEPT;
-	} else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF))
+	} else if ((ip_hdr(*pskb)->daddr & htonl(0x000000FF))
 		   == htonl(0x000000FF)) {
 		printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
-		       NIPQUAD((*pskb)->nh.iph->saddr),
-		       NIPQUAD((*pskb)->nh.iph->daddr),
+		       NIPQUAD(ip_hdr(*pskb)->saddr),
+		       NIPQUAD(ip_hdr(*pskb)->daddr),
 		       (*pskb)->sk, (*pskb)->pkt_type);
 	}
 #endif
 
 	/* rcu_read_lock()ed by nf_hook_slow */
-	proto = __ip_conntrack_proto_find((*pskb)->nh.iph->protocol);
+	proto = __ip_conntrack_proto_find(ip_hdr(*pskb)->protocol);
 
 	/* It may be an special packet, error, unclean...
 	 * inverse of the return code tells to the netfilter
@@ -1152,7 +1152,7 @@ void __ip_ct_refresh_acct(struct ip_conntrack *ct,
 	if (do_acct) {
 		ct->counters[CTINFO2DIR(ctinfo)].packets++;
 		ct->counters[CTINFO2DIR(ctinfo)].bytes +=
-						ntohs(skb->nh.iph->tot_len);
+						ntohs(ip_hdr(skb)->tot_len);
 		if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
 		    || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
 			event |= IPCT_COUNTER_FILLING;
@@ -1210,7 +1210,7 @@ ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
 	local_bh_enable();
 
 	if (skb)
-		ip_send_check(skb->nh.iph);
+		ip_send_check(ip_hdr(skb));
 	return skb;
 }
 
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323.c b/net/ipv4/netfilter/ip_conntrack_helper_h323.c
index 5d638149b0e..cecb6e0c8ed 100644
--- a/net/ipv4/netfilter/ip_conntrack_helper_h323.c
+++ b/net/ipv4/netfilter/ip_conntrack_helper_h323.c
@@ -576,8 +576,8 @@ static int h245_help(struct sk_buff **pskb, struct ip_conntrack *ct,
 	/* Process each TPKT */
 	while (get_tpkt_data(pskb, ct, ctinfo, &data, &datalen, &dataoff)) {
 		DEBUGP("ip_ct_h245: TPKT %u.%u.%u.%u->%u.%u.%u.%u, len=%d\n",
-		       NIPQUAD((*pskb)->nh.iph->saddr),
-		       NIPQUAD((*pskb)->nh.iph->daddr), datalen);
+		       NIPQUAD(ip_hdr(*pskb)->saddr),
+		       NIPQUAD(ip_hdr(*pskb)->daddr), datalen);
 
 		/* Decode H.245 signal */
 		ret = DecodeMultimediaSystemControlMessage(data, datalen,
@@ -1128,8 +1128,8 @@ static int q931_help(struct sk_buff **pskb, struct ip_conntrack *ct,
 	/* Process each TPKT */
 	while (get_tpkt_data(pskb, ct, ctinfo, &data, &datalen, &dataoff)) {
 		DEBUGP("ip_ct_q931: TPKT %u.%u.%u.%u->%u.%u.%u.%u, len=%d\n",
-		       NIPQUAD((*pskb)->nh.iph->saddr),
-		       NIPQUAD((*pskb)->nh.iph->daddr), datalen);
+		       NIPQUAD(ip_hdr(*pskb)->saddr),
+		       NIPQUAD(ip_hdr(*pskb)->daddr), datalen);
 
 		/* Decode Q.931 signal */
 		ret = DecodeQ931(data, datalen, &q931);
@@ -1741,8 +1741,8 @@ static int ras_help(struct sk_buff **pskb, struct ip_conntrack *ct,
 	if (data == NULL)
 		goto accept;
 	DEBUGP("ip_ct_ras: RAS message %u.%u.%u.%u->%u.%u.%u.%u, len=%d\n",
-	       NIPQUAD((*pskb)->nh.iph->saddr),
-	       NIPQUAD((*pskb)->nh.iph->daddr), datalen);
+	       NIPQUAD(ip_hdr(*pskb)->saddr),
+	       NIPQUAD(ip_hdr(*pskb)->daddr), datalen);
 
 	/* Decode RAS message */
 	ret = DecodeRasMessage(data, datalen, &ras);
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
index cc6dd49c9da..df07c5f1d87 100644
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
@@ -45,7 +45,7 @@ static int help(struct sk_buff **pskb,
 		struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
 {
 	struct ip_conntrack_expect *exp;
-	struct iphdr *iph = (*pskb)->nh.iph;
+	struct iphdr *iph = ip_hdr(*pskb);
 	struct rtable *rt = (struct rtable *)(*pskb)->dst;
 	struct in_device *in_dev;
 	__be32 mask = 0;
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index e29c436144b..91d0c05c8e8 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -316,7 +316,7 @@ static int sctp_packet(struct ip_conntrack *conntrack,
 		       enum ip_conntrack_info ctinfo)
 {
 	enum sctp_conntrack newconntrack, oldsctpstate;
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 	sctp_sctphdr_t _sctph, *sh;
 	sctp_chunkhdr_t _sch, *sch;
 	u_int32_t offset, count;
@@ -430,7 +430,7 @@ static int sctp_new(struct ip_conntrack *conntrack,
 		    const struct sk_buff *skb)
 {
 	enum sctp_conntrack newconntrack;
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 	sctp_sctphdr_t _sctph, *sh;
 	sctp_chunkhdr_t _sch, *sch;
 	u_int32_t offset, count;
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index fce3a3c6981..d03436edfd9 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -770,8 +770,8 @@ void ip_conntrack_tcp_update(struct sk_buff *skb,
 			     struct ip_conntrack *conntrack,
 			     enum ip_conntrack_dir dir)
 {
-	struct iphdr *iph = skb->nh.iph;
-	struct tcphdr *tcph = (void *)skb->nh.iph + ip_hdrlen(skb);
+	struct iphdr *iph = ip_hdr(skb);
+	struct tcphdr *tcph = (void *)iph + ip_hdrlen(skb);
 	__u32 end;
 #ifdef DEBUGP_VARS
 	struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir];
@@ -834,13 +834,13 @@ static int tcp_error(struct sk_buff *skb,
 		     enum ip_conntrack_info *ctinfo,
 		     unsigned int hooknum)
 {
-	struct iphdr *iph = skb->nh.iph;
+	const unsigned int hdrlen = ip_hdrlen(skb);
 	struct tcphdr _tcph, *th;
-	unsigned int tcplen = skb->len - iph->ihl * 4;
+	unsigned int tcplen = skb->len - hdrlen;
 	u_int8_t tcpflags;
 
 	/* Smaller that minimal TCP header? */
-	th = skb_header_pointer(skb, iph->ihl * 4,
+	th = skb_header_pointer(skb, hdrlen,
 				sizeof(_tcph), &_tcph);
 	if (th == NULL) {
 		if (LOG_INVALID(IPPROTO_TCP))
@@ -863,7 +863,7 @@ static int tcp_error(struct sk_buff *skb,
 	 */
 	/* FIXME: Source route IP option packets --RR */
 	if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
-	    nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_TCP)) {
+	    nf_ip_checksum(skb, hooknum, hdrlen, IPPROTO_TCP)) {
 		if (LOG_INVALID(IPPROTO_TCP))
 			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
 				  "ip_ct_tcp: bad TCP checksum ");
@@ -889,7 +889,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
 {
 	enum tcp_conntrack new_state, old_state;
 	enum ip_conntrack_dir dir;
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 	struct tcphdr *th, _tcph;
 	unsigned long timeout;
 	unsigned int index;
@@ -1062,7 +1062,7 @@ static int tcp_new(struct ip_conntrack *conntrack,
 		   const struct sk_buff *skb)
 {
 	enum tcp_conntrack new_state;
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 	struct tcphdr *th, _tcph;
 #ifdef DEBUGP_VARS
 	struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0];
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index 14c30c646c7..3b47987bf1b 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -89,12 +89,12 @@ static int udp_new(struct ip_conntrack *conntrack, const struct sk_buff *skb)
 static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
 		     unsigned int hooknum)
 {
-	struct iphdr *iph = skb->nh.iph;
-	unsigned int udplen = skb->len - iph->ihl * 4;
+	const unsigned int hdrlen = ip_hdrlen(skb);
+	unsigned int udplen = skb->len - hdrlen;
 	struct udphdr _hdr, *hdr;
 
 	/* Header is too small? */
-	hdr = skb_header_pointer(skb, iph->ihl*4, sizeof(_hdr), &_hdr);
+	hdr = skb_header_pointer(skb, hdrlen, sizeof(_hdr), &_hdr);
 	if (hdr == NULL) {
 		if (LOG_INVALID(IPPROTO_UDP))
 			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
@@ -119,7 +119,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
 	 * because the checksum is assumed to be correct.
 	 * FIXME: Source route IP option packets --RR */
 	if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
-	    nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_UDP)) {
+	    nf_ip_checksum(skb, hooknum, hdrlen, IPPROTO_UDP)) {
 		if (LOG_INVALID(IPPROTO_UDP))
 			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
 				  "ip_ct_udp: bad UDP checksum ");
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index 92609a4dcd7..c32200153d6 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -439,7 +439,7 @@ static unsigned int ip_conntrack_defrag(unsigned int hooknum,
 #endif
 
 	/* Gather fragments. */
-	if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+	if (ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)) {
 		*pskb = ip_ct_gather_frags(*pskb,
 					   hooknum == NF_IP_PRE_ROUTING ?
 					   IP_DEFRAG_CONNTRACK_IN :
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
index 25624e55856..4cddc295174 100644
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -94,7 +94,7 @@ static void mangle_contents(struct sk_buff *skb,
 	unsigned char *data;
 
 	BUG_ON(skb_is_nonlinear(skb));
-	data = (unsigned char *)skb->nh.iph + dataoff;
+	data = skb_network_header(skb) + dataoff;
 
 	/* move post-replacement */
 	memmove(data + match_offset + rep_len,
@@ -118,8 +118,8 @@ static void mangle_contents(struct sk_buff *skb,
 	}
 
 	/* fix IP hdr checksum information */
-	skb->nh.iph->tot_len = htons(skb->len);
-	ip_send_check(skb->nh.iph);
+	ip_hdr(skb)->tot_len = htons(skb->len);
+	ip_send_check(ip_hdr(skb));
 }
 
 /* Unusual, but possible case. */
@@ -173,7 +173,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
 
 	SKB_LINEAR_ASSERT(*pskb);
 
-	iph = (*pskb)->nh.iph;
+	iph = ip_hdr(*pskb);
 	tcph = (void *)iph + iph->ihl*4;
 
 	oldlen = (*pskb)->len - iph->ihl*4;
@@ -227,7 +227,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb,
 	int datalen, oldlen;
 
 	/* UDP helpers might accidentally mangle the wrong packet */
-	iph = (*pskb)->nh.iph;
+	iph = ip_hdr(*pskb);
 	if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) +
 			       match_offset + match_len)
 		return 0;
@@ -240,7 +240,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb,
 	    && !enlarge_skb(pskb, rep_len - match_len))
 		return 0;
 
-	iph = (*pskb)->nh.iph;
+	iph = ip_hdr(*pskb);
 	udph = (void *)iph + iph->ihl*4;
 
 	oldlen = (*pskb)->len - iph->ihl*4;
diff --git a/net/ipv4/netfilter/ip_nat_helper_h323.c b/net/ipv4/netfilter/ip_nat_helper_h323.c
index 8b1e3388bd0..0d9444f9236 100644
--- a/net/ipv4/netfilter/ip_nat_helper_h323.c
+++ b/net/ipv4/netfilter/ip_nat_helper_h323.c
@@ -46,7 +46,7 @@ static int set_addr(struct sk_buff **pskb,
 	buf.port = htons(port);
 	addroff += dataoff;
 
-	if ((*pskb)->nh.iph->protocol == IPPROTO_TCP) {
+	if (ip_hdr(*pskb)->protocol == IPPROTO_TCP) {
 		if (!ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
 					      addroff, sizeof(buf),
 					      (char *) &buf, sizeof(buf))) {
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
index 080eb1d9220..25415a91e02 100644
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ b/net/ipv4/netfilter/ip_nat_rule.c
@@ -158,7 +158,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb,
 
 	if (hooknum == NF_IP_LOCAL_OUT
 	    && mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
-		warn_if_extra_mangle((*pskb)->nh.iph->daddr,
+		warn_if_extra_mangle(ip_hdr(*pskb)->daddr,
 				     mr->range[0].min_ip);
 
 	return ip_nat_setup_info(ct, &mr->range[0], hooknum);
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
index e41d0efae51..025e0458778 100644
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c
@@ -1193,7 +1193,7 @@ static int snmp_translate(struct ip_conntrack *ct,
 			  enum ip_conntrack_info ctinfo,
 			  struct sk_buff **pskb)
 {
-	struct iphdr *iph = (*pskb)->nh.iph;
+	struct iphdr *iph = ip_hdr(*pskb);
 	struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
 	u_int16_t udplen = ntohs(udph->len);
 	u_int16_t paylen = udplen - sizeof(struct udphdr);
@@ -1234,7 +1234,7 @@ static int help(struct sk_buff **pskb,
 {
 	int dir = CTINFO2DIR(ctinfo);
 	unsigned int ret;
-	struct iphdr *iph = (*pskb)->nh.iph;
+	struct iphdr *iph = ip_hdr(*pskb);
 	struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
 
 	/* SNMP replies and originating SNMP traps get mangled */
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index dbaaf78ff9a..32f7bf661fc 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -97,7 +97,7 @@ ip_nat_fn(unsigned int hooknum,
 
 	/* We never see fragments: conntrack defrags on pre-routing
 	   and local-out, and ip_nat_out protects post-routing. */
-	IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
+	IP_NF_ASSERT(!(ip_hdr(*pskb)->frag_off
 		       & htons(IP_MF|IP_OFFSET)));
 
 	ct = ip_conntrack_get(*pskb, &ctinfo);
@@ -109,7 +109,7 @@ ip_nat_fn(unsigned int hooknum,
 		/* Exception: ICMP redirect to new connection (not in
 		   hash table yet).  We must not let this through, in
 		   case we're doing NAT to the same network. */
-		if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
+		if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) {
 			struct icmphdr _hdr, *hp;
 
 			hp = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
@@ -128,7 +128,7 @@ ip_nat_fn(unsigned int hooknum,
 	switch (ctinfo) {
 	case IP_CT_RELATED:
 	case IP_CT_RELATED+IP_CT_IS_REPLY:
-		if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
+		if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) {
 			if (!ip_nat_icmp_reply_translation(ct, ctinfo,
 							   hooknum, pskb))
 				return NF_DROP;
@@ -184,11 +184,11 @@ ip_nat_in(unsigned int hooknum,
 	  int (*okfn)(struct sk_buff *))
 {
 	unsigned int ret;
-	__be32 daddr = (*pskb)->nh.iph->daddr;
+	__be32 daddr = ip_hdr(*pskb)->daddr;
 
 	ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
 	if (ret != NF_DROP && ret != NF_STOLEN
-	    && daddr != (*pskb)->nh.iph->daddr) {
+	    && daddr != ip_hdr(*pskb)->daddr) {
 		dst_release((*pskb)->dst);
 		(*pskb)->dst = NULL;
 	}
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index f6696665021..39ab8ae282e 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -231,7 +231,7 @@ ipt_do_table(struct sk_buff **pskb,
 	struct xt_table_info *private;
 
 	/* Initialization */
-	ip = (*pskb)->nh.iph;
+	ip = ip_hdr(*pskb);
 	datalen = (*pskb)->len - ip->ihl * 4;
 	indev = in ? in->name : nulldevname;
 	outdev = out ? out->name : nulldevname;
@@ -320,7 +320,7 @@ ipt_do_table(struct sk_buff **pskb,
 					= 0x57acc001;
 #endif
 				/* Target might have changed stuff. */
-				ip = (*pskb)->nh.iph;
+				ip = ip_hdr(*pskb);
 				datalen = (*pskb)->len - ip->ihl * 4;
 
 				if (verdict == IPT_CONTINUE)
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 42b08029e86..af5b82b8ceb 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -240,7 +240,7 @@ clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
 static inline u_int32_t
 clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config)
 {
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 	unsigned long hashval;
 	u_int16_t sport, dport;
 	u_int16_t *ports;
@@ -328,7 +328,7 @@ target(struct sk_buff **pskb,
 
 	/* special case: ICMP error handling. conntrack distinguishes between
 	 * error messages (RELATED) and information requests (see below) */
-	if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP
+	if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP
 	    && (ctinfo == IP_CT_RELATED
 		|| ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY))
 		return XT_CONTINUE;
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 44daf9e1da3..97c0e53c8b2 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -30,13 +30,13 @@ MODULE_DESCRIPTION("iptables ECN modification module");
 static inline int
 set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 {
-	struct iphdr *iph = (*pskb)->nh.iph;
+	struct iphdr *iph = ip_hdr(*pskb);
 
 	if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
 		__u8 oldtos;
 		if (!skb_make_writable(pskb, sizeof(struct iphdr)))
 			return 0;
-		iph = (*pskb)->nh.iph;
+		iph = ip_hdr(*pskb);
 		oldtos = iph->tos;
 		iph->tos &= ~IPT_ECN_IP_MASK;
 		iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
@@ -66,7 +66,7 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 
 	if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph)))
 		return 0;
-	tcph = (void *)(*pskb)->nh.iph + ip_hdrlen(*pskb);
+	tcph = (void *)ip_hdr(*pskb) + ip_hdrlen(*pskb);
 
 	oldval = ((__be16 *)tcph)[6];
 	if (einfo->operation & IPT_ECN_OP_SET_ECE)
@@ -94,7 +94,7 @@ target(struct sk_buff **pskb,
 			return NF_DROP;
 
 	if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR)
-	    && (*pskb)->nh.iph->protocol == IPPROTO_TCP)
+	    && ip_hdr(*pskb)->protocol == IPPROTO_TCP)
 		if (!set_ect_tcp(pskb, einfo))
 			return NF_DROP;
 
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index fd7aaa347cd..d03f165722d 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -75,9 +75,9 @@ target(struct sk_buff **pskb,
 	netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
 
 	if (hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT)
-		new_ip = (*pskb)->nh.iph->daddr & ~netmask;
+		new_ip = ip_hdr(*pskb)->daddr & ~netmask;
 	else
-		new_ip = (*pskb)->nh.iph->saddr & ~netmask;
+		new_ip = ip_hdr(*pskb)->saddr & ~netmask;
 	new_ip |= mr->range[0].min_ip & netmask;
 
 	newrange = ((struct ip_nat_range)
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 01c04f0e5c9..1399e7c183b 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -43,6 +43,7 @@ MODULE_DESCRIPTION("iptables REJECT target module");
 static void send_reset(struct sk_buff *oldskb, int hook)
 {
 	struct sk_buff *nskb;
+	struct iphdr *niph;
 	struct tcphdr _otcph, *oth, *tcph;
 	__be16 tmp_port;
 	__be32 tmp_addr;
@@ -50,7 +51,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	unsigned int addr_type;
 
 	/* IP header checks: fragment. */
-	if (oldskb->nh.iph->frag_off & htons(IP_OFFSET))
+	if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
 		return;
 
 	oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
@@ -86,9 +87,10 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	tcph = (struct tcphdr *)(skb_network_header(nskb) + ip_hdrlen(nskb));
 
 	/* Swap source and dest */
-	tmp_addr = nskb->nh.iph->saddr;
-	nskb->nh.iph->saddr = nskb->nh.iph->daddr;
-	nskb->nh.iph->daddr = tmp_addr;
+	niph = ip_hdr(nskb);
+	tmp_addr = niph->saddr;
+	niph->saddr = niph->daddr;
+	niph->daddr = tmp_addr;
 	tmp_port = tcph->source;
 	tcph->source = tcph->dest;
 	tcph->dest = tmp_port;
@@ -96,7 +98,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	/* Truncate to length (no data) */
 	tcph->doff = sizeof(struct tcphdr)/4;
 	skb_trim(nskb, ip_hdrlen(nskb) + sizeof(struct tcphdr));
-	nskb->nh.iph->tot_len = htons(nskb->len);
+	niph->tot_len = htons(nskb->len);
 
 	if (tcph->ack) {
 		needs_ack = 0;
@@ -121,14 +123,13 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	/* Adjust TCP checksum */
 	tcph->check = 0;
 	tcph->check = tcp_v4_check(sizeof(struct tcphdr),
-				   nskb->nh.iph->saddr,
-				   nskb->nh.iph->daddr,
+				   niph->saddr, niph->daddr,
 				   csum_partial((char *)tcph,
 						sizeof(struct tcphdr), 0));
 
 	/* Set DF, id = 0 */
-	nskb->nh.iph->frag_off = htons(IP_DF);
-	nskb->nh.iph->id = 0;
+	niph->frag_off = htons(IP_DF);
+	niph->id = 0;
 
 	addr_type = RTN_UNSPEC;
 	if (hook != NF_IP_FORWARD
@@ -144,12 +145,11 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	nskb->ip_summed = CHECKSUM_NONE;
 
 	/* Adjust IP TTL */
-	nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
+	niph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
 
 	/* Adjust IP checksum */
-	nskb->nh.iph->check = 0;
-	nskb->nh.iph->check = ip_fast_csum(skb_network_header(nskb),
-					   nskb->nh.iph->ihl);
+	niph->check = 0;
+	niph->check = ip_fast_csum(skb_network_header(nskb), niph->ihl);
 
 	/* "Never happens" */
 	if (nskb->len > dst_mtu(nskb->dst))
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index cedf9f7d9d6..0ad02f24983 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -29,13 +29,13 @@ target(struct sk_buff **pskb,
        const void *targinfo)
 {
 	const struct ipt_tos_target_info *tosinfo = targinfo;
-	struct iphdr *iph = (*pskb)->nh.iph;
+	struct iphdr *iph = ip_hdr(*pskb);
 
 	if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
 		__u8 oldtos;
 		if (!skb_make_writable(pskb, sizeof(struct iphdr)))
 			return NF_DROP;
-		iph = (*pskb)->nh.iph;
+		iph = ip_hdr(*pskb);
 		oldtos = iph->tos;
 		iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos;
 		nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index 64be31c22ba..a991ec7bd4e 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -32,7 +32,7 @@ ipt_ttl_target(struct sk_buff **pskb,
 	if (!skb_make_writable(pskb, (*pskb)->len))
 		return NF_DROP;
 
-	iph = (*pskb)->nh.iph;
+	iph = ip_hdr(*pskb);
 
 	switch (info->mode) {
 		case IPT_TTL_SET:
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index cfa0472617f..a652a145155 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -33,7 +33,7 @@ static int match(const struct sk_buff *skb,
 		 int offset, unsigned int protoff, int *hotdrop)
 {
 	const struct ipt_addrtype_info *info = matchinfo;
-	const struct iphdr *iph = skb->nh.iph;
+	const struct iphdr *iph = ip_hdr(skb);
 	int ret = 1;
 
 	if (info->source)
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index b8ade3cc775..3b4ca0c5c12 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -27,7 +27,7 @@ MODULE_LICENSE("GPL");
 static inline int match_ip(const struct sk_buff *skb,
 			   const struct ipt_ecn_info *einfo)
 {
-	return ((skb->nh.iph->tos&IPT_ECN_IP_MASK) == einfo->ip_ect);
+	return (ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect;
 }
 
 static inline int match_tcp(const struct sk_buff *skb,
@@ -80,7 +80,7 @@ static int match(const struct sk_buff *skb,
 			return 0;
 
 	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
-		if (skb->nh.iph->protocol != IPPROTO_TCP)
+		if (ip_hdr(skb)->protocol != IPPROTO_TCP)
 			return 0;
 		if (!match_tcp(skb, info, hotdrop))
 			return 0;
diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c
index bc5d5e6091e..33af9e94088 100644
--- a/net/ipv4/netfilter/ipt_iprange.c
+++ b/net/ipv4/netfilter/ipt_iprange.c
@@ -32,7 +32,7 @@ match(const struct sk_buff *skb,
       int offset, unsigned int protoff, int *hotdrop)
 {
 	const struct ipt_iprange_info *info = matchinfo;
-	const struct iphdr *iph = skb->nh.iph;
+	const struct iphdr *iph = ip_hdr(skb);
 
 	if (info->flags & IPRANGE_SRC) {
 		if (((ntohl(iph->saddr) < ntohl(info->src.min_ip))
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index aecb9c48e15..15a9e8bbb7c 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -183,11 +183,11 @@ ipt_recent_match(const struct sk_buff *skb,
 	int ret = info->invert;
 
 	if (info->side == IPT_RECENT_DEST)
-		addr = skb->nh.iph->daddr;
+		addr = ip_hdr(skb)->daddr;
 	else
-		addr = skb->nh.iph->saddr;
+		addr = ip_hdr(skb)->saddr;
 
-	ttl = skb->nh.iph->ttl;
+	ttl = ip_hdr(skb)->ttl;
 	/* use TTL as seen before forwarding */
 	if (out && !skb->sk)
 		ttl++;
diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c
index 5d33b51d49d..d314844af12 100644
--- a/net/ipv4/netfilter/ipt_tos.c
+++ b/net/ipv4/netfilter/ipt_tos.c
@@ -30,7 +30,7 @@ match(const struct sk_buff *skb,
 {
 	const struct ipt_tos_info *info = matchinfo;
 
-	return (skb->nh.iph->tos == info->tos) ^ info->invert;
+	return (ip_hdr(skb)->tos == info->tos) ^ info->invert;
 }
 
 static struct xt_match tos_match = {
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
index 1eca9f40037..9615c04a2fc 100644
--- a/net/ipv4/netfilter/ipt_ttl.c
+++ b/net/ipv4/netfilter/ipt_ttl.c
@@ -26,19 +26,20 @@ static int match(const struct sk_buff *skb,
 		 int offset, unsigned int protoff, int *hotdrop)
 {
 	const struct ipt_ttl_info *info = matchinfo;
+	const u8 ttl = ip_hdr(skb)->ttl;
 
 	switch (info->mode) {
 		case IPT_TTL_EQ:
-			return (skb->nh.iph->ttl == info->ttl);
+			return (ttl == info->ttl);
 			break;
 		case IPT_TTL_NE:
-			return (!(skb->nh.iph->ttl == info->ttl));
+			return (!(ttl == info->ttl));
 			break;
 		case IPT_TTL_LT:
-			return (skb->nh.iph->ttl < info->ttl);
+			return (ttl < info->ttl);
 			break;
 		case IPT_TTL_GT:
-			return (skb->nh.iph->ttl > info->ttl);
+			return (ttl > info->ttl);
 			break;
 		default:
 			printk(KERN_WARNING "ipt_ttl: unknown mode %d\n",
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 6cc3245f676..26e60fbe7ee 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -131,6 +131,7 @@ ipt_local_hook(unsigned int hook,
 		   int (*okfn)(struct sk_buff *))
 {
 	unsigned int ret;
+	const struct iphdr *iph;
 	u_int8_t tos;
 	__be32 saddr, daddr;
 	u_int32_t mark;
@@ -145,19 +146,23 @@ ipt_local_hook(unsigned int hook,
 
 	/* Save things which could affect route */
 	mark = (*pskb)->mark;
-	saddr = (*pskb)->nh.iph->saddr;
-	daddr = (*pskb)->nh.iph->daddr;
-	tos = (*pskb)->nh.iph->tos;
+	iph = ip_hdr(*pskb);
+	saddr = iph->saddr;
+	daddr = iph->daddr;
+	tos = iph->tos;
 
 	ret = ipt_do_table(pskb, hook, in, out, &packet_mangler);
 	/* Reroute for ANY change. */
-	if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE
-	    && ((*pskb)->nh.iph->saddr != saddr
-		|| (*pskb)->nh.iph->daddr != daddr
-		|| (*pskb)->mark != mark
-		|| (*pskb)->nh.iph->tos != tos))
-		if (ip_route_me_harder(pskb, RTN_UNSPEC))
-			ret = NF_DROP;
+	if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) {
+		iph = ip_hdr(*pskb);
+
+		if (iph->saddr != saddr ||
+		    iph->daddr != daddr ||
+		    (*pskb)->mark != mark ||
+		    iph->tos != tos)
+			if (ip_route_me_harder(pskb, RTN_UNSPEC))
+				ret = NF_DROP;
+	}
 
 	return ret;
 }
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index fa14eb77f9b..d52ca0c1ce8 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -87,7 +87,7 @@ nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
 	local_bh_enable();
 
 	if (skb)
-		ip_send_check(skb->nh.iph);
+		ip_send_check(ip_hdr(skb));
 
 	return skb;
 }
@@ -97,16 +97,16 @@ ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
 	     u_int8_t *protonum)
 {
 	/* Never happen */
-	if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
+	if (ip_hdr(*pskb)->frag_off & htons(IP_OFFSET)) {
 		if (net_ratelimit()) {
 			printk(KERN_ERR "ipv4_prepare: Frag of proto %u (hook=%u)\n",
-			(*pskb)->nh.iph->protocol, hooknum);
+			ip_hdr(*pskb)->protocol, hooknum);
 		}
 		return -NF_DROP;
 	}
 
 	*dataoff = skb_network_offset(*pskb) + ip_hdrlen(*pskb);
-	*protonum = (*pskb)->nh.iph->protocol;
+	*protonum = ip_hdr(*pskb)->protocol;
 
 	return NF_ACCEPT;
 }
@@ -170,7 +170,7 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
 #endif
 
 	/* Gather fragments. */
-	if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
+	if (ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)) {
 		*pskb = nf_ct_ipv4_gather_frags(*pskb,
 						hooknum == NF_IP_PRE_ROUTING ?
 						IP_DEFRAG_CONNTRACK_IN :
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 2eb3832db3a..3c58fea0d39 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -44,7 +44,7 @@ static int set_addr(struct sk_buff **pskb,
 	buf.port = port;
 	addroff += dataoff;
 
-	if ((*pskb)->nh.iph->protocol == IPPROTO_TCP) {
+	if (ip_hdr(*pskb)->protocol == IPPROTO_TCP) {
 		if (!nf_nat_mangle_tcp_packet(pskb, ct, ctinfo,
 					      addroff, sizeof(buf),
 					      (char *) &buf, sizeof(buf))) {
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 723302afd84..c2c92ff1278 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -87,7 +87,7 @@ static void mangle_contents(struct sk_buff *skb,
 	unsigned char *data;
 
 	BUG_ON(skb_is_nonlinear(skb));
-	data = (unsigned char *)skb->nh.iph + dataoff;
+	data = skb_network_header(skb) + dataoff;
 
 	/* move post-replacement */
 	memmove(data + match_offset + rep_len,
@@ -111,8 +111,8 @@ static void mangle_contents(struct sk_buff *skb,
 	}
 
 	/* fix IP hdr checksum information */
-	skb->nh.iph->tot_len = htons(skb->len);
-	ip_send_check(skb->nh.iph);
+	ip_hdr(skb)->tot_len = htons(skb->len);
+	ip_send_check(ip_hdr(skb));
 }
 
 /* Unusual, but possible case. */
@@ -166,7 +166,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
 
 	SKB_LINEAR_ASSERT(*pskb);
 
-	iph = (*pskb)->nh.iph;
+	iph = ip_hdr(*pskb);
 	tcph = (void *)iph + iph->ihl*4;
 
 	oldlen = (*pskb)->len - iph->ihl*4;
@@ -221,7 +221,7 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb,
 	int datalen, oldlen;
 
 	/* UDP helpers might accidentally mangle the wrong packet */
-	iph = (*pskb)->nh.iph;
+	iph = ip_hdr(*pskb);
 	if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) +
 			       match_offset + match_len)
 		return 0;
@@ -234,7 +234,7 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb,
 	    !enlarge_skb(pskb, rep_len - match_len))
 		return 0;
 
-	iph = (*pskb)->nh.iph;
+	iph = ip_hdr(*pskb);
 	udph = (void *)iph + iph->ihl*4;
 
 	oldlen = (*pskb)->len - iph->ihl*4;
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 147a4370cf0..2a283397a8b 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -191,7 +191,7 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb,
 
 	if (hooknum == NF_IP_LOCAL_OUT &&
 	    mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
-		warn_if_extra_mangle((*pskb)->nh.iph->daddr,
+		warn_if_extra_mangle(ip_hdr(*pskb)->daddr,
 				     mr->range[0].min_ip);
 
 	return nf_nat_setup_info(ct, &mr->range[0], hooknum);
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index ce5c4939a6e..0cc0d97585d 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1194,7 +1194,7 @@ static int snmp_translate(struct nf_conn *ct,
 			  enum ip_conntrack_info ctinfo,
 			  struct sk_buff **pskb)
 {
-	struct iphdr *iph = (*pskb)->nh.iph;
+	struct iphdr *iph = ip_hdr(*pskb);
 	struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
 	u_int16_t udplen = ntohs(udph->len);
 	u_int16_t paylen = udplen - sizeof(struct udphdr);
@@ -1235,7 +1235,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
 {
 	int dir = CTINFO2DIR(ctinfo);
 	unsigned int ret;
-	struct iphdr *iph = (*pskb)->nh.iph;
+	struct iphdr *iph = ip_hdr(*pskb);
 	struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
 
 	/* SNMP replies and originating SNMP traps get mangled */
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 61ca272165a..64bbed2ba78 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -86,8 +86,7 @@ nf_nat_fn(unsigned int hooknum,
 
 	/* We never see fragments: conntrack defrags on pre-routing
 	   and local-out, and nf_nat_out protects post-routing. */
-	NF_CT_ASSERT(!((*pskb)->nh.iph->frag_off
-		       & htons(IP_MF|IP_OFFSET)));
+	NF_CT_ASSERT(!(ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)));
 
 	ct = nf_ct_get(*pskb, &ctinfo);
 	/* Can't track?  It's not due to stress, or conntrack would
@@ -98,7 +97,7 @@ nf_nat_fn(unsigned int hooknum,
 		/* Exception: ICMP redirect to new connection (not in
 		   hash table yet).  We must not let this through, in
 		   case we're doing NAT to the same network. */
-		if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
+		if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) {
 			struct icmphdr _hdr, *hp;
 
 			hp = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
@@ -121,7 +120,7 @@ nf_nat_fn(unsigned int hooknum,
 	switch (ctinfo) {
 	case IP_CT_RELATED:
 	case IP_CT_RELATED+IP_CT_IS_REPLY:
-		if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
+		if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) {
 			if (!nf_nat_icmp_reply_translation(ct, ctinfo,
 							   hooknum, pskb))
 				return NF_DROP;
@@ -176,11 +175,11 @@ nf_nat_in(unsigned int hooknum,
 	  int (*okfn)(struct sk_buff *))
 {
 	unsigned int ret;
-	__be32 daddr = (*pskb)->nh.iph->daddr;
+	__be32 daddr = ip_hdr(*pskb)->daddr;
 
 	ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
-	    daddr != (*pskb)->nh.iph->daddr) {
+	    daddr != ip_hdr(*pskb)->daddr) {
 		dst_release((*pskb)->dst);
 		(*pskb)->dst = NULL;
 	}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index c3757bb270c..ac57afa7c31 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -292,7 +292,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 	skb->dst = dst_clone(&rt->u.dst);
 
 	skb_reset_network_header(skb);
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	skb_put(skb, length);
 
 	skb->ip_summed = CHECKSUM_NONE;
@@ -615,7 +615,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	/* Copy the address. */
 	if (sin) {
 		sin->sin_family = AF_INET;
-		sin->sin_addr.s_addr = skb->nh.iph->saddr;
+		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
 		sin->sin_port = 0;
 		memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
 	}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e50ad7dbbde..58417393dec 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1519,7 +1519,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
 static int ip_rt_bug(struct sk_buff *skb)
 {
 	printk(KERN_DEBUG "ip_rt_bug: %u.%u.%u.%u -> %u.%u.%u.%u, %s\n",
-		NIPQUAD(skb->nh.iph->saddr), NIPQUAD(skb->nh.iph->daddr),
+		NIPQUAD(ip_hdr(skb)->saddr), NIPQUAD(ip_hdr(skb)->daddr),
 		skb->dev ? skb->dev->name : "?");
 	kfree_skb(skb);
 	return 0;
@@ -2134,7 +2134,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		rcu_read_lock();
 		if ((in_dev = __in_dev_get_rcu(dev)) != NULL) {
 			int our = ip_check_mc(in_dev, daddr, saddr,
-				skb->nh.iph->protocol);
+				ip_hdr(skb)->protocol);
 			if (our
 #ifdef CONFIG_IP_MROUTE
 			    || (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev))
@@ -2751,7 +2751,7 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 	skb_reset_network_header(skb);
 
 	/* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
-	skb->nh.iph->protocol = IPPROTO_ICMP;
+	ip_hdr(skb)->protocol = IPPROTO_ICMP;
 	skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
 
 	src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 33016cc90f0..26160717849 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -138,7 +138,7 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
 
 	NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT);
 
-	return secure_tcp_syn_cookie(skb->nh.iph->saddr, skb->nh.iph->daddr,
+	return secure_tcp_syn_cookie(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
 				     skb->h.th->source, skb->h.th->dest,
 				     ntohl(skb->h.th->seq),
 				     jiffies / (HZ * 60), mssind);
@@ -162,7 +162,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
 
 	seq = ntohl(skb->h.th->seq)-1;
 	mssind = check_tcp_syn_cookie(cookie,
-				      skb->nh.iph->saddr, skb->nh.iph->daddr,
+				      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
 				      skb->h.th->source, skb->h.th->dest,
 				      seq, jiffies / (HZ * 60), COUNTER_TRIES);
 
@@ -224,8 +224,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	treq->snt_isn		= cookie;
 	req->mss		= mss;
 	ireq->rmt_port		= skb->h.th->source;
-	ireq->loc_addr		= skb->nh.iph->daddr;
-	ireq->rmt_addr		= skb->nh.iph->saddr;
+	ireq->loc_addr		= ip_hdr(skb)->daddr;
+	ireq->rmt_addr		= ip_hdr(skb)->saddr;
 	ireq->opt		= NULL;
 
 	/* We throwed the options of the initial SYN away, so we hope
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3326681b842..3a86d6b887a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -125,8 +125,8 @@ void tcp_unhash(struct sock *sk)
 
 static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
 {
-	return secure_tcp_sequence_number(skb->nh.iph->daddr,
-					  skb->nh.iph->saddr,
+	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
+					  ip_hdr(skb)->saddr,
 					  skb->h.th->dest,
 					  skb->h.th->source);
 }
@@ -515,13 +515,13 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
 
 int tcp_v4_gso_send_check(struct sk_buff *skb)
 {
-	struct iphdr *iph;
+	const struct iphdr *iph;
 	struct tcphdr *th;
 
 	if (!pskb_may_pull(skb, sizeof(*th)))
 		return -EINVAL;
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	th = skb->h.th;
 
 	th->check = 0;
@@ -585,7 +585,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 	arg.iov[0].iov_len  = sizeof(rep.th);
 
 #ifdef CONFIG_TCP_MD5SIG
-	key = sk ? tcp_v4_md5_do_lookup(sk, skb->nh.iph->daddr) : NULL;
+	key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL;
 	if (key) {
 		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
 				   (TCPOPT_NOP << 16) |
@@ -597,14 +597,14 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 
 		tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1],
 					key,
-					skb->nh.iph->daddr,
-					skb->nh.iph->saddr,
+					ip_hdr(skb)->daddr,
+					ip_hdr(skb)->saddr,
 					&rep.th, IPPROTO_TCP,
 					arg.iov[0].iov_len);
 	}
 #endif
-	arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
-				      skb->nh.iph->saddr, /* XXX */
+	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
+				      ip_hdr(skb)->saddr, /* XXX */
 				      sizeof(struct tcphdr), IPPROTO_TCP, 0);
 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 
@@ -670,7 +670,7 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
 	 * skb->sk) holds true, but we program defensively.
 	 */
 	if (!twsk && skb->sk) {
-		key = tcp_v4_md5_do_lookup(skb->sk, skb->nh.iph->daddr);
+		key = tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr);
 	} else if (twsk && twsk->tw_md5_keylen) {
 		tw_key.key = twsk->tw_md5_key;
 		tw_key.keylen = twsk->tw_md5_keylen;
@@ -690,14 +690,14 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
 
 		tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset],
 					key,
-					skb->nh.iph->daddr,
-					skb->nh.iph->saddr,
+					ip_hdr(skb)->daddr,
+					ip_hdr(skb)->saddr,
 					&rep.th, IPPROTO_TCP,
 					arg.iov[0].iov_len);
 	}
 #endif
-	arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
-				      skb->nh.iph->saddr, /* XXX */
+	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
+				      ip_hdr(skb)->saddr, /* XXX */
 				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 
@@ -1133,7 +1133,7 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
 	 */
 	__u8 *hash_location = NULL;
 	struct tcp_md5sig_key *hash_expected;
-	struct iphdr *iph = skb->nh.iph;
+	const struct iphdr *iph = ip_hdr(skb);
 	struct tcphdr *th = skb->h.th;
 	int length = (th->doff << 2) - sizeof(struct tcphdr);
 	int genhash;
@@ -1251,8 +1251,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	struct inet_request_sock *ireq;
 	struct tcp_options_received tmp_opt;
 	struct request_sock *req;
-	__be32 saddr = skb->nh.iph->saddr;
-	__be32 daddr = skb->nh.iph->daddr;
+	__be32 saddr = ip_hdr(skb)->saddr;
+	__be32 daddr = ip_hdr(skb)->daddr;
 	__u32 isn = TCP_SKB_CB(skb)->when;
 	struct dst_entry *dst = NULL;
 #ifdef CONFIG_SYN_COOKIES
@@ -1439,7 +1439,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	newinet->opt	      = ireq->opt;
 	ireq->opt	      = NULL;
 	newinet->mc_index     = inet_iif(skb);
-	newinet->mc_ttl	      = skb->nh.iph->ttl;
+	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
 	if (newinet->opt)
 		inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
@@ -1482,7 +1482,7 @@ exit:
 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcphdr *th = skb->h.th;
-	struct iphdr *iph = skb->nh.iph;
+	const struct iphdr *iph = ip_hdr(skb);
 	struct sock *nsk;
 	struct request_sock **prev;
 	/* Find possible connection requests. */
@@ -1491,9 +1491,8 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 	if (req)
 		return tcp_check_req(sk, skb, req, prev);
 
-	nsk = inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
-				      th->source, skb->nh.iph->daddr,
-				      th->dest, inet_iif(skb));
+	nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source,
+				      iph->daddr, th->dest, inet_iif(skb));
 
 	if (nsk) {
 		if (nsk->sk_state != TCP_TIME_WAIT) {
@@ -1513,15 +1512,17 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 
 static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
 {
+	const struct iphdr *iph = ip_hdr(skb);
+
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		if (!tcp_v4_check(skb->len, skb->nh.iph->saddr,
-				  skb->nh.iph->daddr, skb->csum)) {
+		if (!tcp_v4_check(skb->len, iph->saddr,
+				  iph->daddr, skb->csum)) {
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 			return 0;
 		}
 	}
 
-	skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, skb->nh.iph->daddr,
+	skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
 				       skb->len, IPPROTO_TCP, 0);
 
 	if (skb->len <= 76) {
@@ -1610,6 +1611,7 @@ csum_err:
 
 int tcp_v4_rcv(struct sk_buff *skb)
 {
+	const struct iphdr *iph;
 	struct tcphdr *th;
 	struct sock *sk;
 	int ret;
@@ -1639,18 +1641,17 @@ int tcp_v4_rcv(struct sk_buff *skb)
 		goto bad_packet;
 
 	th = skb->h.th;
+	iph = ip_hdr(skb);
 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
 				    skb->len - th->doff * 4);
 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
 	TCP_SKB_CB(skb)->when	 = 0;
-	TCP_SKB_CB(skb)->flags	 = skb->nh.iph->tos;
+	TCP_SKB_CB(skb)->flags	 = iph->tos;
 	TCP_SKB_CB(skb)->sacked	 = 0;
 
-	sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source,
-			   skb->nh.iph->daddr, th->dest,
-			   inet_iif(skb));
-
+	sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source,
+			   iph->daddr, th->dest, inet_iif(skb));
 	if (!sk)
 		goto no_tcp_socket;
 
@@ -1724,8 +1725,7 @@ do_time_wait:
 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
 	case TCP_TW_SYN: {
 		struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
-							skb->nh.iph->daddr,
-							th->dest,
+							iph->daddr, th->dest,
 							inet_iif(skb));
 		if (sk2) {
 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1bbf5510cf3..b4cad50c18e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -867,7 +867,7 @@ try_again:
 	{
 		sin->sin_family = AF_INET;
 		sin->sin_port = skb->h.uh->source;
-		sin->sin_addr.s_addr = skb->nh.iph->saddr;
+		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
 		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
 	}
 	if (inet->cmsg_flags)
@@ -990,7 +990,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
 		return 0;
 
 	/* Now we can update and verify the packet length... */
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	iphlen = iph->ihl << 2;
 	iph->tot_len = htons(ntohs(iph->tot_len) - len);
 	if (skb->len < iphlen + len) {
@@ -1168,6 +1168,7 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
 static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
 				 int proto)
 {
+	const struct iphdr *iph;
 	int err;
 
 	UDP_SKB_CB(skb)->partial_cov = 0;
@@ -1179,16 +1180,16 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
 			return err;
 	}
 
+	iph = ip_hdr(skb);
 	if (uh->check == 0) {
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	} else if (skb->ip_summed == CHECKSUM_COMPLETE) {
-	       if (!csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
-				      skb->len, proto, skb->csum))
+	       if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
+				      proto, skb->csum))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
-		skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr,
-					       skb->nh.iph->daddr,
+		skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
 					       skb->len, proto, 0);
 	/* Probably, we should checksum udp header (it should be in cache
 	 * in any case) and data in tiny packets (< rx copybreak).
@@ -1208,8 +1209,8 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 	struct udphdr *uh = skb->h.uh;
 	unsigned short ulen;
 	struct rtable *rt = (struct rtable*)skb->dst;
-	__be32 saddr = skb->nh.iph->saddr;
-	__be32 daddr = skb->nh.iph->daddr;
+	__be32 saddr = ip_hdr(skb)->saddr;
+	__be32 daddr = ip_hdr(skb)->daddr;
 
 	/*
 	 *  Validate the packet.
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index d89969c502d..5ceca951d73 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -28,7 +28,7 @@ static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32
 	switch (nexthdr) {
 	case IPPROTO_IPIP:
 	case IPPROTO_IPV6:
-		*spi = skb->nh.iph->saddr;
+		*spi = ip_hdr(skb)->saddr;
 		*seq = 0;
 		return 0;
 	}
@@ -39,9 +39,9 @@ static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32
 #ifdef CONFIG_NETFILTER
 static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
 {
-	struct iphdr *iph = skb->nh.iph;
-
 	if (skb->dst == NULL) {
+		const struct iphdr *iph = ip_hdr(skb);
+
 		if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
 				   skb->dev))
 			goto drop;
@@ -55,18 +55,18 @@ drop:
 
 int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
 {
-	int err;
 	__be32 spi, seq;
 	struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH];
 	struct xfrm_state *x;
 	int xfrm_nr = 0;
 	int decaps = 0;
+	int err = xfrm4_parse_spi(skb, ip_hdr(skb)->protocol, &spi, &seq);
 
-	if ((err = xfrm4_parse_spi(skb, skb->nh.iph->protocol, &spi, &seq)) != 0)
+	if (err != 0)
 		goto drop;
 
 	do {
-		struct iphdr *iph = skb->nh.iph;
+		const struct iphdr *iph = ip_hdr(skb);
 
 		if (xfrm_nr == XFRM_MAX_DEPTH)
 			goto drop;
@@ -113,7 +113,8 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
 			break;
 		}
 
-		if ((err = xfrm_parse_spi(skb, skb->nh.iph->protocol, &spi, &seq)) < 0)
+		err = xfrm_parse_spi(skb, ip_hdr(skb)->protocol, &spi, &seq);
+		if (err < 0)
 			goto drop;
 	} while (!err);
 
@@ -147,14 +148,14 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
 	} else {
 #ifdef CONFIG_NETFILTER
 		__skb_push(skb, skb->data - skb_network_header(skb));
-		skb->nh.iph->tot_len = htons(skb->len);
-		ip_send_check(skb->nh.iph);
+		ip_hdr(skb)->tot_len = htons(skb->len);
+		ip_send_check(ip_hdr(skb));
 
 		NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
 			xfrm4_rcv_encap_finish);
 		return 0;
 #else
-		return -skb->nh.iph->protocol;
+		return -ip_hdr(skb)->protocol;
 #endif
 	}
 
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index 505fca034a1..9e5ba12c6c7 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -32,8 +32,8 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 	struct iphdr *iph, *top_iph = NULL;
 	int hdrlen, optlen;
 
-	iph = skb->nh.iph;
-	skb->h.ipiph = iph;
+	iph = ip_hdr(skb);
+	skb->h.raw = skb->nh.raw;
 
 	hdrlen = 0;
 	optlen = iph->ihl * 4 - sizeof(*iph);
@@ -42,7 +42,7 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	skb_push(skb, x->props.header_len + hdrlen);
 	skb_reset_network_header(skb);
-	top_iph = skb->nh.iph;
+	top_iph = ip_hdr(skb);
 	skb->h.raw += sizeof(*iph) - hdrlen;
 
 	memmove(top_iph, iph, sizeof(*iph));
@@ -70,7 +70,7 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 
 static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 	int phlen = 0;
 	int optlen = 0;
 	__u8 ph_nexthdr = 0, protocol = 0;
@@ -102,7 +102,7 @@ static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 	skb->h.raw = skb->data + (phlen + optlen);
 	skb->data = skb->h.raw;
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	iph->ihl = (sizeof(*iph) + optlen) / 4;
 	iph->tot_len = htons(skb->len + iph->ihl * 4);
 	iph->daddr = x->sel.daddr.a4;
diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c
index b198087c073..124f24bc4db 100644
--- a/net/ipv4/xfrm4_mode_transport.c
+++ b/net/ipv4/xfrm4_mode_transport.c
@@ -23,13 +23,10 @@
  */
 static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct iphdr *iph;
-	int ihl;
+	struct iphdr *iph = ip_hdr(skb);
+	int ihl = iph->ihl * 4;
 
-	iph = skb->nh.iph;
-	skb->h.ipiph = iph;
-
-	ihl = iph->ihl * 4;
+	skb->h.raw = skb->nh.raw;
 	skb->h.raw += ihl;
 
 	skb_push(skb, x->props.header_len);
@@ -54,7 +51,7 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
 		memmove(skb->h.raw, skb_network_header(skb), ihl);
 		skb->nh.raw = skb->h.raw;
 	}
-	skb->nh.iph->tot_len = htons(skb->len + ihl);
+	ip_hdr(skb)->tot_len = htons(skb->len + ihl);
 	skb->h.raw = skb->data;
 	return 0;
 }
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index bec851f278e..faa1b9a76e7 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -16,7 +16,7 @@
 
 static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
 {
-	struct iphdr *outer_iph = skb->nh.iph;
+	struct iphdr *outer_iph = ip_hdr(skb);
 	struct iphdr *inner_iph = skb->h.ipiph;
 
 	if (INET_ECN_is_ce(outer_iph->tos))
@@ -46,12 +46,12 @@ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 	struct iphdr *iph, *top_iph;
 	int flags;
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	skb->h.ipiph = iph;
 
 	skb_push(skb, x->props.header_len);
 	skb_reset_network_header(skb);
-	top_iph = skb->nh.iph;
+	top_iph = ip_hdr(skb);
 
 	top_iph->ihl = 5;
 	top_iph->version = 4;
@@ -91,7 +91,7 @@ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 
 static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 	const unsigned char *old_mac;
 	int err = -EINVAL;
 
@@ -113,7 +113,7 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
 		goto out;
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 	if (iph->protocol == IPPROTO_IPIP) {
 		if (x->props.flags & XFRM_STATE_DECAP_DSCP)
 			ipv4_copy_dscp(iph, skb->h.ipiph);
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 038ca160fe2..44ef208a75c 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -22,14 +22,13 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
 {
 	int mtu, ret = 0;
 	struct dst_entry *dst;
-	struct iphdr *iph = skb->nh.iph;
 
 	if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE)
 		goto out;
 
 	IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
 
-	if (!(iph->frag_off & htons(IP_DF)) || skb->local_df)
+	if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df)
 		goto out;
 
 	dst = skb->dst;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index fbb1d3decf0..f1c32ff59d1 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -209,7 +209,7 @@ error:
 static void
 _decode_session4(struct sk_buff *skb, struct flowi *fl)
 {
-	struct iphdr *iph = skb->nh.iph;
+	struct iphdr *iph = ip_hdr(skb);
 	u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
 
 	memset(fl, 0, sizeof(struct flowi));
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 3eef06454da..56851030455 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -12,9 +12,8 @@
 
 static int ipip_output(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct iphdr *iph;
+	struct iphdr *iph = ip_hdr(skb);
 
-	iph = skb->nh.iph;
 	iph->tot_len = htons(skb->len);
 	ip_send_check(iph);
 
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 7a86db6163e..ac95d3bfdfb 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -349,8 +349,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 			struct inet_sock *inet = inet_sk(sk);
 
 			ipv6_addr_set(&sin->sin6_addr, 0, 0,
-				      htonl(0xffff),
-				      skb->nh.iph->saddr);
+				      htonl(0xffff), ip_hdr(skb)->saddr);
 			if (inet->cmsg_flags)
 				ip_cmsg_recv(msg, skb);
 		}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index aafbdfa8d78..bb65779be7a 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -526,7 +526,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	skb2->dst = NULL;
 	skb_pull(skb2, offset);
 	skb_reset_network_header(skb2);
-	eiph = skb2->nh.iph;
+	eiph = ip_hdr(skb2);
 
 	/* Try to guess incoming interface */
 	memset(&fl, 0, sizeof(fl));
@@ -625,10 +625,10 @@ static void ip4ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
 	__u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
 
 	if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
-		ipv4_change_dsfield(skb->nh.iph, INET_ECN_MASK, dsfield);
+		ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
 
 	if (INET_ECN_is_ce(dsfield))
-		IP_ECN_set_ce(skb->nh.iph);
+		IP_ECN_set_ce(ip_hdr(skb));
 }
 
 static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
@@ -944,7 +944,7 @@ static inline int
 ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
-	struct iphdr  *iph = skb->nh.iph;
+	struct iphdr  *iph = ip_hdr(skb);
 	int encap_limit = -1;
 	struct flowi fl;
 	__u8 dsfield;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 62883d41b6c..e33ac3c3a9c 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -377,7 +377,7 @@ static int ipip6_rcv(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 		goto out;
 
-	iph = skb->nh.iph;
+	iph = ip_hdr(skb);
 
 	read_lock(&ipip6_lock);
 	if ((tunnel = ipip6_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
@@ -565,7 +565,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	 *	Push down and install the IPIP header.
 	 */
 
-	iph 			=	skb->nh.iph;
+	iph 			=	ip_hdr(skb);
 	iph->version		=	4;
 	iph->ihl		=	sizeof(struct iphdr)>>2;
 	if (mtu > IPV6_MIN_MTU)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 87b06a80102..e991e606ab1 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -178,7 +178,7 @@ try_again:
 
 		if (skb->protocol == htons(ETH_P_IP))
 			ipv6_addr_set(&sin6->sin6_addr, 0, 0,
-				      htonl(0xffff), skb->nh.iph->saddr);
+				      htonl(0xffff), ip_hdr(skb)->saddr);
 		else {
 			ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr);
 			if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index bb26a658cc1..1093478cc00 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -46,7 +46,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
 		struct nf_conn *ct, enum ip_conntrack_info ctinfo)
 {
 	struct nf_conntrack_expect *exp;
-	struct iphdr *iph = (*pskb)->nh.iph;
+	struct iphdr *iph = ip_hdr(*pskb);
 	struct rtable *rt = (struct rtable *)(*pskb)->dst;
 	struct in_device *in_dev;
 	__be32 mask = 0;
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index a7cc75aeb38..de647bd5489 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -35,13 +35,13 @@ static unsigned int target(struct sk_buff **pskb,
 			   const void *targinfo)
 {
 	const struct xt_DSCP_info *dinfo = targinfo;
-	u_int8_t dscp = ipv4_get_dsfield((*pskb)->nh.iph) >> XT_DSCP_SHIFT;
+	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(*pskb)) >> XT_DSCP_SHIFT;
 
 	if (dscp != dinfo->dscp) {
 		if (!skb_make_writable(pskb, sizeof(struct iphdr)))
 			return NF_DROP;
 
-		ipv4_change_dsfield((*pskb)->nh.iph, (__u8)(~XT_DSCP_MASK),
+		ipv4_change_dsfield(ip_hdr(*pskb), (__u8)(~XT_DSCP_MASK),
 				    dinfo->dscp << XT_DSCP_SHIFT);
 
 	}
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index afc0c60e19d..9e948ce2760 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -145,7 +145,7 @@ xt_tcpmss_target4(struct sk_buff **pskb,
 		  const struct xt_target *target,
 		  const void *targinfo)
 {
-	struct iphdr *iph = (*pskb)->nh.iph;
+	struct iphdr *iph = ip_hdr(*pskb);
 	__be16 newlen;
 	int ret;
 
@@ -154,7 +154,7 @@ xt_tcpmss_target4(struct sk_buff **pskb,
 	if (ret < 0)
 		return NF_DROP;
 	if (ret > 0) {
-		iph = (*pskb)->nh.iph;
+		iph = ip_hdr(*pskb);
 		newlen = htons(ntohs(iph->tot_len) + ret);
 		nf_csum_replace2(&iph->check, iph->tot_len, newlen);
 		iph->tot_len = newlen;
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 26c7f4ad102..9ec294cd243 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -34,7 +34,7 @@ static int match(const struct sk_buff *skb,
 		 int *hotdrop)
 {
 	const struct xt_dscp_info *info = matchinfo;
-	u_int8_t dscp = ipv4_get_dsfield(skb->nh.iph) >> XT_DSCP_SHIFT;
+	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
 
 	return (dscp == info->dscp) ^ !!info->invert;
 }
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 9f37d593ca3..47af19ab03c 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -380,14 +380,14 @@ hashlimit_init_dst(struct xt_hashlimit_htable *hinfo, struct dsthash_dst *dst,
 	switch (hinfo->family) {
 	case AF_INET:
 		if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP)
-			dst->addr.ip.dst = skb->nh.iph->daddr;
+			dst->addr.ip.dst = ip_hdr(skb)->daddr;
 		if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP)
-			dst->addr.ip.src = skb->nh.iph->saddr;
+			dst->addr.ip.src = ip_hdr(skb)->saddr;
 
 		if (!(hinfo->cfg.mode &
 		      (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT)))
 			return 0;
-		nexthdr = skb->nh.iph->protocol;
+		nexthdr = ip_hdr(skb)->protocol;
 		break;
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 	case AF_INET6:
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index 32fb998d9ba..65fdb216699 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -31,7 +31,7 @@ match(const struct sk_buff *skb,
       int *hotdrop)
 {
 	const struct xt_length_info *info = matchinfo;
-	u_int16_t pktlen = ntohs(skb->nh.iph->tot_len);
+	u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len);
 
 	return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
 }
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index 16e7b080428..e1409fc5c28 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -34,7 +34,7 @@ static int match(const struct sk_buff *skb,
 	const struct xt_pkttype_info *info = matchinfo;
 
 	if (skb->pkt_type == PACKET_LOOPBACK)
-		type = (MULTICAST(skb->nh.iph->daddr)
+		type = (MULTICAST(ip_hdr(skb)->daddr)
 			? PACKET_MULTICAST
 			: PACKET_BROADCAST);
 	else
diff --git a/net/rxrpc/connection.c b/net/rxrpc/connection.c
index a7c929a9fdc..e601fa87bb7 100644
--- a/net/rxrpc/connection.c
+++ b/net/rxrpc/connection.c
@@ -267,7 +267,7 @@ int rxrpc_connection_lookup(struct rxrpc_peer *peer,
 		/* fill in the specifics */
 		candidate->addr.sin_family	= AF_INET;
 		candidate->addr.sin_port	= x_port;
-		candidate->addr.sin_addr.s_addr = pkt->nh.iph->saddr;
+		candidate->addr.sin_addr.s_addr = ip_hdr(pkt)->saddr;
 		candidate->in_epoch		= x_epoch;
 		candidate->out_epoch		= x_epoch;
 		candidate->in_clientflag	= RXRPC_CLIENT_INITIATED;
diff --git a/net/rxrpc/transport.c b/net/rxrpc/transport.c
index 8e57be2df93..cac078b7406 100644
--- a/net/rxrpc/transport.c
+++ b/net/rxrpc/transport.c
@@ -478,7 +478,7 @@ void rxrpc_trans_receive_packet(struct rxrpc_transport *trans)
 			return;
 		}
 
-		addr = pkt->nh.iph->saddr;
+		addr = ip_hdr(pkt)->saddr;
 		port = pkt->h.uh->source;
 
 		_net("Rx Received UDP packet from %08x:%04hu",
@@ -626,7 +626,7 @@ int rxrpc_trans_immediate_abort(struct rxrpc_transport *trans,
 	memset(&sin,0,sizeof(sin));
 	sin.sin_family		= AF_INET;
 	sin.sin_port		= msg->pkt->h.uh->source;
-	sin.sin_addr.s_addr	= msg->pkt->nh.iph->saddr;
+	sin.sin_addr.s_addr	= ip_hdr(msg->pkt)->saddr;
 
 	msghdr.msg_name		= &sin;
 	msghdr.msg_namelen	= sizeof(sin);
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index b6ac0e28787..cb8cf5bfa05 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -145,7 +145,7 @@ static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
 #if RSVP_DST_LEN == 4
 	struct ipv6hdr *nhptr = skb->nh.ipv6h;
 #else
-	struct iphdr *nhptr = skb->nh.iph;
+	struct iphdr *nhptr = ip_hdr(skb);
 #endif
 
 restart:
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index afb3bbd571f..baca8743c12 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -503,7 +503,7 @@ static void sch_atm_dequeue(unsigned long data)
 			}
 			D2PRINTK("atm_tc_dequeue: sending on class %p\n",flow);
 			/* remove any LL header somebody else has attached */
-			skb_pull(skb,(char *) skb->nh.iph-(char *) skb->data);
+			skb_pull(skb, skb_network_offset(skb));
 			if (skb_headroom(skb) < flow->hdr_len) {
 				struct sk_buff *new;
 
@@ -513,7 +513,7 @@ static void sch_atm_dequeue(unsigned long data)
 				skb = new;
 			}
 			D2PRINTK("sch_atm_dequeue: ip %p, data %p\n",
-			    skb->nh.iph,skb->data);
+				 skb_network_header(skb), skb->data);
 			ATM_SKB(skb)->vcc = flow->vcc;
 			memcpy(skb_push(skb,flow->hdr_len),flow->hdr,
 			    flow->hdr_len);
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 96324cf4e6a..45b5734dd72 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -216,7 +216,7 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
 		/* FIXME: Safe with non-linear skbs? --RR */
 		switch (skb->protocol) {
 			case __constant_htons(ETH_P_IP):
-				skb->tc_index = ipv4_get_dsfield(skb->nh.iph)
+				skb->tc_index = ipv4_get_dsfield(ip_hdr(skb))
 					& ~INET_ECN_MASK;
 				break;
 			case __constant_htons(ETH_P_IPV6):
@@ -292,7 +292,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 
 	switch (skb->protocol) {
 		case __constant_htons(ETH_P_IP):
-			ipv4_change_dsfield(skb->nh.iph, p->mask[index],
+			ipv4_change_dsfield(ip_hdr(skb), p->mask[index],
 					    p->value[index]);
 			break;
 		case __constant_htons(ETH_P_IPV6):
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 66f32051a99..02081bc9e0d 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -137,7 +137,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 	switch (skb->protocol) {
 	case __constant_htons(ETH_P_IP):
 	{
-		struct iphdr *iph = skb->nh.iph;
+		const struct iphdr *iph = ip_hdr(skb);
 		h = iph->daddr;
 		h2 = iph->saddr^iph->protocol;
 		if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 2b0863aba3f..595fe32b3d4 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -154,7 +154,7 @@ int sctp_rcv(struct sk_buff *skb)
 	if (skb->len < sizeof(struct sctp_chunkhdr))
 		goto discard_it;
 
-	family = ipver2af(skb->nh.iph->version);
+	family = ipver2af(ip_hdr(skb)->version);
 	af = sctp_get_af_specific(family);
 	if (unlikely(!af))
 		goto discard_it;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 5f9b145b0b9..742f9ff42fb 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -770,9 +770,9 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname,
 
 		/* Map ipv4 address into v4-mapped-on-v6 address. */
 		if (sctp_sk(skb->sk)->v4mapped &&
-		    skb->nh.iph->version == 4) {
+		    ip_hdr(skb)->version == 4) {
 			sctp_v4_map_v6((union sctp_addr *)sin6);
-			sin6->sin6_addr.s6_addr32[3] = skb->nh.iph->saddr;
+			sin6->sin6_addr.s6_addr32[3] = ip_hdr(skb)->saddr;
 			return;
 		}
 
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index e17a823ca90..08f92ba4ebd 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -238,10 +238,10 @@ static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb,
 	sh = (struct sctphdr *) skb->h.raw;
 	if (is_saddr) {
 		*port  = sh->source;
-		from = &skb->nh.iph->saddr;
+		from = &ip_hdr(skb)->saddr;
 	} else {
 		*port = sh->dest;
-		from = &skb->nh.iph->daddr;
+		from = &ip_hdr(skb)->daddr;
 	}
 	memcpy(&addr->v4.sin_addr.s_addr, from, sizeof(struct in_addr));
 }
@@ -530,7 +530,7 @@ static int sctp_v4_skb_iif(const struct sk_buff *skb)
 /* Was this packet marked by Explicit Congestion Notification? */
 static int sctp_v4_is_ce(const struct sk_buff *skb)
 {
-	return INET_ECN_is_ce(skb->nh.iph->tos);
+	return INET_ECN_is_ce(ip_hdr(skb)->tos);
 }
 
 /* Create and initialize a new sk for the socket returned by accept(). */
@@ -739,7 +739,7 @@ static void sctp_inet_skb_msgname(struct sk_buff *skb, char *msgname, int *len)
 		sin = (struct sockaddr_in *)msgname;
 		sh = (struct sctphdr *)skb->h.raw;
 		sin->sin_port = sh->source;
-		sin->sin_addr.s_addr = skb->nh.iph->saddr;
+		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
 	}
 }
 
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index f7fb29d5a0c..60c5b59d4c6 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -86,7 +86,7 @@ int sctp_chunk_iif(const struct sctp_chunk *chunk)
 	struct sctp_af *af;
 	int iif = 0;
 
-	af = sctp_get_af_specific(ipver2af(chunk->skb->nh.iph->version));
+	af = sctp_get_af_specific(ipver2af(ip_hdr(chunk->skb)->version));
 	if (af)
 		iif = af->skb_iif(chunk->skb);
 
@@ -1233,7 +1233,7 @@ struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep,
 	asoc->temp = 1;
 	skb = chunk->skb;
 	/* Create an entry for the source address of the packet.  */
-	af = sctp_get_af_specific(ipver2af(skb->nh.iph->version));
+	af = sctp_get_af_specific(ipver2af(ip_hdr(skb)->version));
 	if (unlikely(!af))
 		goto fail;
 	af->from_skb(&asoc->c.peer_addr, skb, 1);
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index e9097cf614b..bf502c499c8 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -5286,7 +5286,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 		chunk->ecn_ce_done = 1;
 
 		af = sctp_get_af_specific(
-			ipver2af(chunk->skb->nh.iph->version));
+			ipver2af(ip_hdr(chunk->skb)->version));
 
 		if (af && af->is_ce(chunk->skb) && asoc->peer.ecn_capable) {
 			/* Do real work as sideffect. */
-- 
cgit v1.2.3


From fd74e6ccd522e2f26163eb5ac1abebcab2bd017c Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 12 Mar 2007 16:25:32 -0700
Subject: [BRIDGE]: faster compare for link local addresses

Use logic operations rather than memcmp() to compare destination
address with link local multicast addresses.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_input.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 35b94f9a1ac..a260679afad 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -112,7 +112,11 @@ static int br_handle_local_finish(struct sk_buff *skb)
  */
 static inline int is_link_local(const unsigned char *dest)
 {
-	return memcmp(dest, br_group_address, 5) == 0 && (dest[5] & 0xf0) == 0;
+	const u16 *a = (const u16 *) dest;
+	static const u16 *const b = (const u16 *const ) br_group_address;
+	static const u16 m = __constant_cpu_to_be16(0xfff0);
+
+	return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0;
 }
 
 /*
-- 
cgit v1.2.3


From d0a92be05ed4aea7d35c2b257e3f9173565fe4eb Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 12 Mar 2007 20:56:31 -0300
Subject: [SK_BUFF]: Introduce arp_hdr(), remove skb->nh.arph

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c    | 2 +-
 drivers/net/chelsio/sge.c          | 2 +-
 include/linux/if_arp.h             | 9 +++++++++
 include/linux/skbuff.h             | 1 -
 net/bridge/br_netfilter.c          | 2 +-
 net/core/netpoll.c                 | 2 +-
 net/ipv4/arp.c                     | 4 ++--
 net/ipv4/netfilter/arp_tables.c    | 4 ++--
 net/ipv4/netfilter/arpt_mangle.c   | 2 +-
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 2 +-
 10 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 7f11388893f..76d3504505b 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2524,7 +2524,7 @@ static int bond_arp_rcv(struct sk_buff *skb, struct net_device *dev, struct pack
 				 (2 * sizeof(u32)))))
 		goto out_unlock;
 
-	arp = skb->nh.arph;
+	arp = arp_hdr(skb);
 	if (arp->ar_hln != dev->addr_len ||
 	    skb->pkt_type == PACKET_OTHERHOST ||
 	    skb->pkt_type == PACKET_LOOPBACK ||
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index c357f45a16c..a4204dff363 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -1925,7 +1925,7 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		 */
 		if ((unlikely(!adapter->sge->espibug_skb[dev->if_port]))) {
 			if (skb->protocol == htons(ETH_P_ARP) &&
-			    skb->nh.arph->ar_op == htons(ARPOP_REQUEST)) {
+			    arp_hdr(skb)->ar_op == htons(ARPOP_REQUEST)) {
 				adapter->sge->espibug_skb[dev->if_port] = skb;
 				/* We want to re-use this skb later. We
 				 * simply bump the reference count and it
diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h
index 7f5714214ee..ed7b93c3083 100644
--- a/include/linux/if_arp.h
+++ b/include/linux/if_arp.h
@@ -148,4 +148,13 @@ struct arphdr
 
 };
 
+#ifdef __KERNEL__
+#include <linux/skbuff.h>
+
+static inline struct arphdr *arp_hdr(const struct sk_buff *skb)
+{
+	return (struct arphdr *)skb_network_header(skb);
+}
+#endif
+
 #endif	/* _LINUX_IF_ARP_H */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 62f841b5b70..9cb674b12b2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -248,7 +248,6 @@ struct sk_buff {
 
 	union {
 		struct ipv6hdr	*ipv6h;
-		struct arphdr	*arph;
 		unsigned char	*raw;
 	} nh;
 
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index ebe740f6b90..0ee74b1e477 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -670,7 +670,7 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb,
 		(*pskb)->nh.raw += VLAN_HLEN;
 	}
 
-	if (skb->nh.arph->ar_pln != 4) {
+	if (arp_hdr(skb)->ar_pln != 4) {
 		if (IS_VLAN_ARP(skb)) {
 			skb_push(*pskb, VLAN_HLEN);
 			(*pskb)->nh.raw -= VLAN_HLEN;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index c4cec17be33..496b06244a8 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -363,7 +363,7 @@ static void arp_reply(struct sk_buff *skb)
 
 	skb_reset_network_header(skb);
 	skb->h.raw = skb->data;
-	arp = skb->nh.arph;
+	arp = arp_hdr(skb);
 
 	if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
 	     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 01d0e8dd17d..7110779a024 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -721,7 +721,7 @@ static int arp_process(struct sk_buff *skb)
 	if (in_dev == NULL)
 		goto out;
 
-	arp = skb->nh.arph;
+	arp = arp_hdr(skb);
 
 	switch (dev_type) {
 	default:
@@ -937,7 +937,7 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
 				 (2 * sizeof(u32)))))
 		goto freeskb;
 
-	arp = skb->nh.arph;
+	arp = arp_hdr(skb);
 	if (arp->ar_hln != dev->addr_len ||
 	    dev->flags & IFF_NOARP ||
 	    skb->pkt_type == PACKET_OTHERHOST ||
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 57b0221f9e2..cae41215e3c 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -245,7 +245,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
 	e = get_entry(table_base, private->hook_entry[hook]);
 	back = get_entry(table_base, private->underflow[hook]);
 
-	arp = (*pskb)->nh.arph;
+	arp = arp_hdr(*pskb);
 	do {
 		if (arp_packet_match(arp, (*pskb)->dev, indev, outdev, &e->arp)) {
 			struct arpt_entry_target *t;
@@ -297,7 +297,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
 								     t->data);
 
 				/* Target might have changed stuff. */
-				arp = (*pskb)->nh.arph;
+				arp = arp_hdr(*pskb);
 
 				if (verdict == ARPT_CONTINUE)
 					e = (void *)e + e->next_offset;
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index af1c8593eb1..b4450f1ccc1 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -30,7 +30,7 @@ target(struct sk_buff **pskb,
 		*pskb = nskb;
 	}
 
-	arp = (*pskb)->nh.arph;
+	arp = arp_hdr(*pskb);
 	arpptr = skb_network_header(*pskb) + sizeof(*arp);
 	pln = arp->ar_pln;
 	hln = arp->ar_hln;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index af5b82b8ceb..d3b16817a99 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -521,7 +521,7 @@ arp_mangle(unsigned int hook,
 	   const struct net_device *out,
 	   int (*okfn)(struct sk_buff *))
 {
-	struct arphdr *arp = (*pskb)->nh.arph;
+	struct arphdr *arp = arp_hdr(*pskb);
 	struct arp_payload *payload;
 	struct clusterip_config *c;
 
-- 
cgit v1.2.3


From 0660e03f6b18f19b6bbafe7583265a51b90daf36 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 25 Apr 2007 17:54:47 -0700
Subject: [SK_BUFF]: Introduce ipv6_hdr(), remove skb->nh.ipv6h

Now the skb->nh union has just one member, .raw, i.e. it is just like the
skb->mac union, strange, no? I'm just leaving it like that till the transport
layer is done with, when we'll rename skb->mac.raw to skb->mac_header (or
->mac_header_offset?), ditto for ->{h,nh}.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_alb.c                 |  4 +-
 drivers/net/e1000/e1000_main.c                 | 10 ++--
 drivers/s390/net/qeth_eddp.c                   |  8 +--
 drivers/s390/net/qeth_main.c                   |  3 +-
 drivers/s390/net/qeth_tso.h                    |  2 +-
 include/linux/ipv6.h                           |  5 ++
 include/linux/skbuff.h                         |  1 -
 include/net/inet_ecn.h                         |  2 +-
 net/bridge/br_netfilter.c                      |  6 +--
 net/core/pktgen.c                              |  2 +-
 net/dccp/ipv6.c                                | 32 +++++------
 net/ipv4/ip_gre.c                              |  4 +-
 net/ipv4/xfrm4_mode_tunnel.c                   |  2 +-
 net/ipv6/ah6.c                                 | 14 ++---
 net/ipv6/datagram.c                            | 16 +++---
 net/ipv6/esp6.c                                |  2 +-
 net/ipv6/exthdrs.c                             | 20 +++----
 net/ipv6/icmp.c                                | 20 +++----
 net/ipv6/ip6_input.c                           |  8 +--
 net/ipv6/ip6_output.c                          | 37 +++++++------
 net/ipv6/ip6_tunnel.c                          | 16 +++---
 net/ipv6/ipcomp6.c                             |  2 +-
 net/ipv6/ipv6_sockglue.c                       |  6 +--
 net/ipv6/mcast.c                               | 15 +++---
 net/ipv6/mip6.c                                | 20 +++----
 net/ipv6/ndisc.c                               | 50 ++++++++---------
 net/ipv6/netfilter.c                           |  8 +--
 net/ipv6/netfilter/ip6_tables.c                |  8 +--
 net/ipv6/netfilter/ip6t_HL.c                   |  2 +-
 net/ipv6/netfilter/ip6t_LOG.c                  |  2 +-
 net/ipv6/netfilter/ip6t_REJECT.c               |  8 +--
 net/ipv6/netfilter/ip6t_eui64.c                |  4 +-
 net/ipv6/netfilter/ip6t_hl.c                   |  2 +-
 net/ipv6/netfilter/ip6t_ipv6header.c           |  2 +-
 net/ipv6/netfilter/ip6table_mangle.c           | 14 ++---
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 19 +++----
 net/ipv6/netfilter/nf_conntrack_reasm.c        | 16 +++---
 net/ipv6/raw.c                                 | 17 +++---
 net/ipv6/reassembly.c                          | 12 ++---
 net/ipv6/route.c                               |  4 +-
 net/ipv6/sit.c                                 |  8 +--
 net/ipv6/tcp_ipv6.c                            | 75 +++++++++++++-------------
 net/ipv6/udp.c                                 | 17 +++---
 net/ipv6/xfrm6_input.c                         |  4 +-
 net/ipv6/xfrm6_mode_beet.c                     |  6 +--
 net/ipv6/xfrm6_mode_ro.c                       |  2 +-
 net/ipv6/xfrm6_mode_transport.c                |  4 +-
 net/ipv6/xfrm6_mode_tunnel.c                   | 10 ++--
 net/ipv6/xfrm6_policy.c                        |  2 +-
 net/ipv6/xfrm6_tunnel.c                        |  2 +-
 net/netfilter/xt_DSCP.c                        |  4 +-
 net/netfilter/xt_TCPMSS.c                      |  4 +-
 net/netfilter/xt_dscp.c                        |  2 +-
 net/netfilter/xt_hashlimit.c                   |  4 +-
 net/netfilter/xt_length.c                      |  3 +-
 net/sched/cls_rsvp.h                           |  2 +-
 net/sched/sch_dsmark.c                         |  4 +-
 net/sched/sch_sfq.c                            |  2 +-
 net/sctp/ipv6.c                                |  8 +--
 59 files changed, 296 insertions(+), 292 deletions(-)

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 8555afa574a..b8cf777542f 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -1304,8 +1304,8 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 			break;
 		}
 
-		hash_start = (char*)&(skb->nh.ipv6h->daddr);
-		hash_size = sizeof(skb->nh.ipv6h->daddr);
+		hash_start = (char *)&(ipv6_hdr(skb)->daddr);
+		hash_size = sizeof(ipv6_hdr(skb)->daddr);
 		break;
 	case ETH_P_IPX:
 		if (ipx_hdr(skb)->ipx_checksum !=
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index c324866c978..a3d9986b417 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2899,13 +2899,11 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 			cmd_length = E1000_TXD_CMD_IP;
 			ipcse = skb->h.raw - skb->data - 1;
 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
-			skb->nh.ipv6h->payload_len = 0;
+			ipv6_hdr(skb)->payload_len = 0;
 			skb->h.th->check =
-				~csum_ipv6_magic(&skb->nh.ipv6h->saddr,
-						 &skb->nh.ipv6h->daddr,
-						 0,
-						 IPPROTO_TCP,
-						 0);
+				~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+						 &ipv6_hdr(skb)->daddr,
+						 0, IPPROTO_TCP, 0);
 			ipcse = 0;
 		}
 		ipcss = skb_network_offset(skb);
diff --git a/drivers/s390/net/qeth_eddp.c b/drivers/s390/net/qeth_eddp.c
index 1574247abaa..90da58b4e53 100644
--- a/drivers/s390/net/qeth_eddp.c
+++ b/drivers/s390/net/qeth_eddp.c
@@ -479,9 +479,11 @@ qeth_eddp_fill_context_tcp(struct qeth_eddp_context *ctx,
 						  skb->h.raw,
 						  skb->h.th->doff * 4);
 	else
-		eddp = qeth_eddp_create_eddp_data(qhdr, (u8 *)skb->nh.ipv6h,
-				sizeof(struct ipv6hdr),
-				(u8 *)skb->h.th, skb->h.th->doff*4);
+		eddp = qeth_eddp_create_eddp_data(qhdr,
+						  skb_network_header(skb),
+						  sizeof(struct ipv6hdr),
+						  skb->h.raw,
+						  skb->h.th->doff * 4);
 
 	if (eddp == NULL) {
 		QETH_DBF_TEXT(trace, 2, "eddpfcnm");
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index 8a07d548a05..df7f279ec40 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -4053,7 +4053,8 @@ qeth_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
 			       skb->dst->neighbour->primary_key, 16);
 		} else {
 			/* fill in destination address used in ip header */
-			memcpy(hdr->hdr.l3.dest_addr, &skb->nh.ipv6h->daddr, 16);
+			memcpy(hdr->hdr.l3.dest_addr,
+			       &ipv6_hdr(skb)->daddr, 16);
 		}
 	} else { /* passthrough */
                 if((skb->dev->type == ARPHRD_IEEE802_TR) &&
diff --git a/drivers/s390/net/qeth_tso.h b/drivers/s390/net/qeth_tso.h
index 255cb2e9c79..4040bdd8c32 100644
--- a/drivers/s390/net/qeth_tso.h
+++ b/drivers/s390/net/qeth_tso.h
@@ -64,7 +64,7 @@ static inline void
 qeth_tso_set_tcpip_header(struct qeth_card *card, struct sk_buff *skb)
 {
 	struct iphdr *iph    = ip_hdr(skb);
-	struct ipv6hdr *ip6h = skb->nh.ipv6h;
+	struct ipv6hdr *ip6h = ipv6_hdr(skb);
 	struct tcphdr *tcph  = skb->h.th;
 
 	tcph->check = 0;
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index e046b22a222..ec79c59b207 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -223,6 +223,11 @@ enum {
 #include <net/if_inet6.h>       /* struct ipv6_mc_socklist */
 #include <net/inet_sock.h>
 
+static inline struct ipv6hdr *ipv6_hdr(const struct sk_buff *skb)
+{
+	return (struct ipv6hdr *)skb_network_header(skb);
+}
+
 /* 
    This structure contains results of exthdrs parsing
    as offsets from skb->nh.
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9cb674b12b2..31806a7ce40 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -247,7 +247,6 @@ struct sk_buff {
 	} h;
 
 	union {
-		struct ipv6hdr	*ipv6h;
 		unsigned char	*raw;
 	} nh;
 
diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index 6fd4452c15d..06a2c69a89e 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -122,7 +122,7 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb)
 	case __constant_htons(ETH_P_IPV6):
 		if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
 		    skb->tail)
-			return IP6_ECN_set_ce(skb->nh.ipv6h);
+			return IP6_ECN_set_ce(ipv6_hdr(skb));
 		break;
 	}
 
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 0ee74b1e477..f2796c97b4a 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -372,7 +372,7 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb)
 /* We only check the length. A bridge shouldn't do any hop-by-hop stuff anyway */
 static int check_hbh_len(struct sk_buff *skb)
 {
-	unsigned char *raw = (u8 *) (skb->nh.ipv6h + 1);
+	unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1);
 	u32 pkt_len;
 	const unsigned char *nh = skb_network_header(skb);
 	int off = raw - nh;
@@ -400,7 +400,7 @@ static int check_hbh_len(struct sk_buff *skb)
 				goto bad;
 			pkt_len = ntohl(*(__be32 *) (nh + off + 2));
 			if (pkt_len <= IPV6_MAXPLEN ||
-			    skb->nh.ipv6h->payload_len)
+			    ipv6_hdr(skb)->payload_len)
 				goto bad;
 			if (pkt_len > skb->len - sizeof(struct ipv6hdr))
 				goto bad;
@@ -441,7 +441,7 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 		goto inhdr_error;
 
-	hdr = skb->nh.ipv6h;
+	hdr = ipv6_hdr(skb);
 
 	if (hdr->version != 6)
 		goto inhdr_error;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index e0faff8eb65..ee82364c8f3 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2736,7 +2736,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	skb->protocol = protocol;
 	skb->dev = odev;
 	skb->pkt_type = PACKET_HOST;
-	skb->nh.ipv6h = iph;
+	skb->nh.raw = (unsigned char *)iph;
 	skb->h.uh = udph;
 
 	if (pkt_dev->nfrags <= 0)
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 627d0c3c51c..64eac2515aa 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -84,8 +84,8 @@ static inline __u32 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
 
 static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb)
 {
-	return secure_dccpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
-					     skb->nh.ipv6h->saddr.s6_addr32,
+	return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
+					     ipv6_hdr(skb)->saddr.s6_addr32,
 					     dccp_hdr(skb)->dccph_dport,
 					     dccp_hdr(skb)->dccph_sport     );
 
@@ -313,6 +313,7 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req)
 static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
 {
 	struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
+	struct ipv6hdr *rxip6h;
 	const u32 dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
 				       sizeof(struct dccp_hdr_ext) +
 				       sizeof(struct dccp_hdr_reset);
@@ -352,12 +353,13 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
 	dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq);
 
 	dccp_csum_outgoing(skb);
-	dh->dccph_checksum = dccp_v6_csum_finish(skb, &rxskb->nh.ipv6h->saddr,
-						      &rxskb->nh.ipv6h->daddr);
+	rxip6h = ipv6_hdr(rxskb);
+	dh->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr,
+						      &rxip6h->daddr);
 
 	memset(&fl, 0, sizeof(fl));
-	ipv6_addr_copy(&fl.fl6_dst, &rxskb->nh.ipv6h->saddr);
-	ipv6_addr_copy(&fl.fl6_src, &rxskb->nh.ipv6h->daddr);
+	ipv6_addr_copy(&fl.fl6_dst, &rxip6h->saddr);
+	ipv6_addr_copy(&fl.fl6_src, &rxip6h->daddr);
 
 	fl.proto = IPPROTO_DCCP;
 	fl.oif = inet6_iif(rxskb);
@@ -390,7 +392,7 @@ static struct request_sock_ops dccp6_request_sock_ops = {
 static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
 {
 	const struct dccp_hdr *dh = dccp_hdr(skb);
-	const struct ipv6hdr *iph = skb->nh.ipv6h;
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
 	struct sock *nsk;
 	struct request_sock **prev;
 	/* Find possible connection requests. */
@@ -460,8 +462,8 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 		goto drop_and_free;
 
 	ireq6 = inet6_rsk(req);
-	ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr);
-	ipv6_addr_copy(&ireq6->loc_addr, &skb->nh.ipv6h->daddr);
+	ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
+	ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
 	ireq6->pktopts	= NULL;
 
 	if (ipv6_opt_accepted(sk, skb) ||
@@ -546,7 +548,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 		newnp->pktoptions  = NULL;
 		newnp->opt	   = NULL;
 		newnp->mcast_oif   = inet6_iif(skb);
-		newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
+		newnp->mcast_hops  = ipv6_hdr(skb)->hop_limit;
 
 		/*
 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
@@ -653,7 +655,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 	}
 	newnp->opt	  = NULL;
 	newnp->mcast_oif  = inet6_iif(skb);
-	newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
+	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
 
 	/*
 	 * Clone native IPv6 options from listening socket (if any)
@@ -826,8 +828,8 @@ static int dccp_v6_rcv(struct sk_buff **pskb)
 		goto discard_it;
 
 	/* Step 1: If header checksum is incorrect, drop packet and return. */
-	if (dccp_v6_csum_finish(skb, &skb->nh.ipv6h->saddr,
-				     &skb->nh.ipv6h->daddr)) {
+	if (dccp_v6_csum_finish(skb, &ipv6_hdr(skb)->saddr,
+				     &ipv6_hdr(skb)->daddr)) {
 		DCCP_WARN("dropped packet with invalid checksum\n");
 		goto discard_it;
 	}
@@ -844,9 +846,9 @@ static int dccp_v6_rcv(struct sk_buff **pskb)
 
 	/* Step 2:
 	 *	Look up flow ID in table and get corresponding socket */
-	sk = __inet6_lookup(&dccp_hashinfo, &skb->nh.ipv6h->saddr,
+	sk = __inet6_lookup(&dccp_hashinfo, &ipv6_hdr(skb)->saddr,
 			    dh->dccph_sport,
-			    &skb->nh.ipv6h->daddr, ntohs(dh->dccph_dport),
+			    &ipv6_hdr(skb)->daddr, ntohs(dh->dccph_dport),
 			    inet6_iif(skb));
 	/*
 	 * Step 2:
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 851f46b910f..969fe31723a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -535,7 +535,7 @@ static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
 		if (skb->protocol == htons(ETH_P_IP)) {
 			IP_ECN_set_ce(ip_hdr(skb));
 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
-			IP6_ECN_set_ce(skb->nh.ipv6h);
+			IP6_ECN_set_ce(ipv6_hdr(skb));
 		}
 	}
 }
@@ -721,7 +721,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 			addr_type = ipv6_addr_type(addr6);
 
 			if (addr_type == IPV6_ADDR_ANY) {
-				addr6 = &skb->nh.ipv6h->daddr;
+				addr6 = &ipv6_hdr(skb)->daddr;
 				addr_type = ipv6_addr_type(addr6);
 			}
 
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index faa1b9a76e7..edba75610a4 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -26,7 +26,7 @@ static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
 static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
 {
 	if (INET_ECN_is_ce(iph->tos))
-		IP6_ECN_set_ce(skb->nh.ipv6h);
+		IP6_ECN_set_ce(ipv6_hdr(skb));
 }
 
 /* Add encapsulation header.
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 1c914386982..b682d2368c2 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -325,6 +325,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 	 */
 
 	struct ipv6_auth_hdr *ah;
+	struct ipv6hdr *ip6h;
 	struct ah_data *ahp;
 	unsigned char *tmp_hdr = NULL;
 	u16 hdr_len;
@@ -357,13 +358,14 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 	tmp_hdr = kmemdup(skb_network_header(skb), hdr_len, GFP_ATOMIC);
 	if (!tmp_hdr)
 		goto out;
-	if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len, XFRM_POLICY_IN))
+	ip6h = ipv6_hdr(skb);
+	if (ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN))
 		goto free_out;
-	skb->nh.ipv6h->priority    = 0;
-	skb->nh.ipv6h->flow_lbl[0] = 0;
-	skb->nh.ipv6h->flow_lbl[1] = 0;
-	skb->nh.ipv6h->flow_lbl[2] = 0;
-	skb->nh.ipv6h->hop_limit   = 0;
+	ip6h->priority    = 0;
+	ip6h->flow_lbl[0] = 0;
+	ip6h->flow_lbl[1] = 0;
+	ip6h->flow_lbl[2] = 0;
+	ip6h->hop_limit   = 0;
 
 	{
 		u8 auth_data[MAX_AH_AUTH_LEN];
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index ac95d3bfdfb..f429290c2c3 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -254,7 +254,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
 
 	skb_put(skb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(skb);
-	iph = skb->nh.ipv6h;
+	iph = ipv6_hdr(skb);
 	ipv6_addr_copy(&iph->daddr, &fl->fl6_dst);
 
 	serr = SKB_EXT_ERR(skb);
@@ -340,7 +340,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 		sin->sin6_flowinfo = 0;
 		sin->sin6_scope_id = 0;
 		if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
-			ipv6_addr_copy(&sin->sin6_addr, &skb->nh.ipv6h->saddr);
+			ipv6_addr_copy(&sin->sin6_addr, &ipv6_hdr(skb)->saddr);
 			if (np->rxopt.all)
 				datagram_recv_ctl(sk, msg, skb);
 			if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
@@ -391,17 +391,17 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 		struct in6_pktinfo src_info;
 
 		src_info.ipi6_ifindex = opt->iif;
-		ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr);
+		ipv6_addr_copy(&src_info.ipi6_addr, &ipv6_hdr(skb)->daddr);
 		put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
 	}
 
 	if (np->rxopt.bits.rxhlim) {
-		int hlim = skb->nh.ipv6h->hop_limit;
+		int hlim = ipv6_hdr(skb)->hop_limit;
 		put_cmsg(msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
 	}
 
 	if (np->rxopt.bits.rxtclass) {
-		int tclass = (ntohl(*(__be32 *)skb->nh.ipv6h) >> 20) & 0xff;
+		int tclass = (ntohl(*(__be32 *)ipv6_hdr(skb)) >> 20) & 0xff;
 		put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
 	}
 
@@ -428,7 +428,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 		 * IPV6_RECVDSTOPTS is more generic. --yoshfuji
 		 */
 		unsigned int off = sizeof(struct ipv6hdr);
-		u8 nexthdr = skb->nh.ipv6h->nexthdr;
+		u8 nexthdr = ipv6_hdr(skb)->nexthdr;
 
 		while (off <= opt->lastopt) {
 			unsigned len;
@@ -466,11 +466,11 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 		struct in6_pktinfo src_info;
 
 		src_info.ipi6_ifindex = opt->iif;
-		ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr);
+		ipv6_addr_copy(&src_info.ipi6_addr, &ipv6_hdr(skb)->daddr);
 		put_cmsg(msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
 	}
 	if (np->rxopt.bits.rxohlim) {
-		int hlim = skb->nh.ipv6h->hop_limit;
+		int hlim = ipv6_hdr(skb)->hop_limit;
 		put_cmsg(msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);
 	}
 	if (np->rxopt.bits.ohopopts && opt->hop) {
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 6e6b57ac801..7aff380e74e 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -191,7 +191,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
 	skb->ip_summed = CHECKSUM_NONE;
 
 	esph = (struct ipv6_esp_hdr*)skb->data;
-	iph = skb->nh.ipv6h;
+	iph = ipv6_hdr(skb);
 
 	/* Get ivec. This can be wrong, check against another impls. */
 	if (esp->conf.ivlen)
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 9ebf120ba6d..dab069b0b3f 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -125,7 +125,7 @@ static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff)
 		/* Actually, it is redundant check. icmp_send
 		   will recheck in any case.
 		 */
-		if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr))
+		if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr))
 			break;
 	case 2: /* send ICMP PARM PROB regardless and drop packet */
 		icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
@@ -202,7 +202,7 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
 	struct sk_buff *skb = *skbp;
 	struct ipv6_destopt_hao *hao;
 	struct inet6_skb_parm *opt = IP6CB(skb);
-	struct ipv6hdr *ipv6h = skb->nh.ipv6h;
+	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	struct in6_addr tmp_addr;
 	int ret;
 
@@ -248,7 +248,7 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
 		*skbp = skb = skb2;
 		hao = (struct ipv6_destopt_hao *)(skb_network_header(skb2) +
 						  optoff);
-		ipv6h = skb2->nh.ipv6h;
+		ipv6h = ipv6_hdr(skb2);
 	}
 
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
@@ -414,7 +414,7 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
 		return -1;
 	}
 
-	if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr) ||
+	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) ||
 	    skb->pkt_type != PACKET_HOST) {
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INADDRERRORS);
@@ -522,7 +522,7 @@ looped_back:
 #ifdef CONFIG_IPV6_MIP6
 	case IPV6_SRCRT_TYPE_2:
 		if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
-				     (xfrm_address_t *)&skb->nh.ipv6h->saddr,
+				     (xfrm_address_t *)&ipv6_hdr(skb)->saddr,
 				     IPPROTO_ROUTING) < 0) {
 			IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 					 IPSTATS_MIB_INADDRERRORS);
@@ -549,8 +549,8 @@ looped_back:
 	}
 
 	ipv6_addr_copy(&daddr, addr);
-	ipv6_addr_copy(addr, &skb->nh.ipv6h->daddr);
-	ipv6_addr_copy(&skb->nh.ipv6h->daddr, &daddr);
+	ipv6_addr_copy(addr, &ipv6_hdr(skb)->daddr);
+	ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &daddr);
 
 	dst_release(xchg(&skb->dst, NULL));
 	ip6_route_input(skb);
@@ -561,7 +561,7 @@ looped_back:
 	}
 
 	if (skb->dst->dev->flags&IFF_LOOPBACK) {
-		if (skb->nh.ipv6h->hop_limit <= 1) {
+		if (ipv6_hdr(skb)->hop_limit <= 1) {
 			IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 					 IPSTATS_MIB_INHDRERRORS);
 			icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
@@ -569,7 +569,7 @@ looped_back:
 			kfree_skb(skb);
 			return -1;
 		}
-		skb->nh.ipv6h->hop_limit--;
+		ipv6_hdr(skb)->hop_limit--;
 		goto looped_back;
 	}
 
@@ -698,7 +698,7 @@ static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff)
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
 		return 0;
 	}
-	if (skb->nh.ipv6h->payload_len) {
+	if (ipv6_hdr(skb)->payload_len) {
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
 		return 0;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index e5293b34229..3a01effda69 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -129,9 +129,9 @@ void icmpv6_param_prob(struct sk_buff *skb, int code, int pos)
 
 static int is_ineligible(struct sk_buff *skb)
 {
-	int ptr = (u8*)(skb->nh.ipv6h+1) - skb->data;
+	int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
 	int len = skb->len - ptr;
-	__u8 nexthdr = skb->nh.ipv6h->nexthdr;
+	__u8 nexthdr = ipv6_hdr(skb)->nexthdr;
 
 	if (len < 0)
 		return 1;
@@ -275,7 +275,7 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st
 #ifdef CONFIG_IPV6_MIP6
 static void mip6_addr_swap(struct sk_buff *skb)
 {
-	struct ipv6hdr *iph = skb->nh.ipv6h;
+	struct ipv6hdr *iph = ipv6_hdr(skb);
 	struct inet6_skb_parm *opt = IP6CB(skb);
 	struct ipv6_destopt_hao *hao;
 	struct in6_addr tmp;
@@ -303,7 +303,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
 		 struct net_device *dev)
 {
 	struct inet6_dev *idev = NULL;
-	struct ipv6hdr *hdr = skb->nh.ipv6h;
+	struct ipv6hdr *hdr = ipv6_hdr(skb);
 	struct sock *sk;
 	struct ipv6_pinfo *np;
 	struct in6_addr *saddr = NULL;
@@ -485,7 +485,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	int hlimit;
 	int tclass;
 
-	saddr = &skb->nh.ipv6h->daddr;
+	saddr = &ipv6_hdr(skb)->daddr;
 
 	if (!ipv6_unicast_destination(skb))
 		saddr = NULL;
@@ -495,7 +495,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 
 	memset(&fl, 0, sizeof(fl));
 	fl.proto = IPPROTO_ICMPV6;
-	ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
+	ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
 	if (saddr)
 		ipv6_addr_copy(&fl.fl6_src, saddr);
 	fl.oif = skb->dev->ifindex;
@@ -583,8 +583,8 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
 	if (!pskb_may_pull(skb, inner_offset+8))
 		return;
 
-	saddr = &skb->nh.ipv6h->saddr;
-	daddr = &skb->nh.ipv6h->daddr;
+	saddr = &ipv6_hdr(skb)->saddr;
+	daddr = &ipv6_hdr(skb)->daddr;
 
 	/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
 	   Without this we will not able f.e. to make source routed
@@ -628,8 +628,8 @@ static int icmpv6_rcv(struct sk_buff **pskb)
 
 	ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS);
 
-	saddr = &skb->nh.ipv6h->saddr;
-	daddr = &skb->nh.ipv6h->daddr;
+	saddr = &ipv6_hdr(skb)->saddr;
+	daddr = &ipv6_hdr(skb)->daddr;
 
 	/* Perform checksum. */
 	switch (skb->ip_summed) {
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index aecc74da072..9c3c787a21c 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -96,7 +96,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	if (unlikely(!pskb_may_pull(skb, sizeof(*hdr))))
 		goto err;
 
-	hdr = skb->nh.ipv6h;
+	hdr = ipv6_hdr(skb);
 
 	if (hdr->version != 6)
 		goto err;
@@ -116,7 +116,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 			IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS);
 			goto drop;
 		}
-		hdr = skb->nh.ipv6h;
+		hdr = ipv6_hdr(skb);
 	}
 
 	if (hdr->nexthdr == NEXTHDR_HOP) {
@@ -183,7 +183,7 @@ resubmit:
 
 			skb_postpull_rcsum(skb, skb_network_header(skb),
 					   skb->h.raw - skb->nh.raw);
-			hdr = skb->nh.ipv6h;
+			hdr = ipv6_hdr(skb);
 			if (ipv6_addr_is_multicast(&hdr->daddr) &&
 			    !ipv6_chk_mcast_addr(skb->dev, &hdr->daddr,
 			    &hdr->saddr) &&
@@ -234,7 +234,7 @@ int ip6_mc_input(struct sk_buff *skb)
 
 	IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INMCASTPKTS);
 
-	hdr = skb->nh.ipv6h;
+	hdr = ipv6_hdr(skb);
 	deliver = likely(!(skb->dev->flags & (IFF_PROMISC|IFF_ALLMULTI))) ||
 	    ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL);
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index bd25825c0cc..1900c622686 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -107,13 +107,13 @@ static int ip6_output2(struct sk_buff *skb)
 	skb->protocol = htons(ETH_P_IPV6);
 	skb->dev = dev;
 
-	if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
+	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
 		struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
 		struct inet6_dev *idev = ip6_dst_idev(skb->dst);
 
 		if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
-		    ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
-				&skb->nh.ipv6h->saddr)) {
+		    ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
+					&ipv6_hdr(skb)->saddr)) {
 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 
 			/* Do not check for IFF_ALLMULTI; multicast routing
@@ -124,7 +124,7 @@ static int ip6_output2(struct sk_buff *skb)
 					newskb->dev,
 					ip6_dev_loopback_xmit);
 
-			if (skb->nh.ipv6h->hop_limit == 0) {
+			if (ipv6_hdr(skb)->hop_limit == 0) {
 				IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
 				kfree_skb(skb);
 				return 0;
@@ -193,7 +193,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 
 	skb_push(skb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(skb);
-	hdr = skb->nh.ipv6h;
+	hdr = ipv6_hdr(skb);
 
 	/*
 	 *	Fill in the IPv6 header
@@ -263,8 +263,8 @@ int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
 
 	totlen = len + sizeof(struct ipv6hdr);
 
-	hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
-	skb->nh.ipv6h = hdr;
+	skb->nh.raw = skb_put(skb, sizeof(struct ipv6hdr));
+	hdr = ipv6_hdr(skb);
 
 	*(__be32*)hdr = htonl(0x60000000);
 
@@ -309,7 +309,7 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 
 static int ip6_forward_proxy_check(struct sk_buff *skb)
 {
-	struct ipv6hdr *hdr = skb->nh.ipv6h;
+	struct ipv6hdr *hdr = ipv6_hdr(skb);
 	u8 nexthdr = hdr->nexthdr;
 	int offset;
 
@@ -366,7 +366,7 @@ static inline int ip6_forward_finish(struct sk_buff *skb)
 int ip6_forward(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb->dst;
-	struct ipv6hdr *hdr = skb->nh.ipv6h;
+	struct ipv6hdr *hdr = ipv6_hdr(skb);
 	struct inet6_skb_parm *opt = IP6CB(skb);
 
 	if (ipv6_devconf.forwarding == 0)
@@ -475,7 +475,7 @@ int ip6_forward(struct sk_buff *skb)
 		goto drop;
 	}
 
-	hdr = skb->nh.ipv6h;
+	hdr = ipv6_hdr(skb);
 
 	/* Mangling hops number delayed to point after skb COW */
 
@@ -527,10 +527,11 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 {
 	u16 offset = sizeof(struct ipv6hdr);
-	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
+	struct ipv6_opt_hdr *exthdr =
+				(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
 	unsigned int packet_len = skb->tail - skb_network_header(skb);
 	int found_rhdr = 0;
-	*nexthdr = &skb->nh.ipv6h->nexthdr;
+	*nexthdr = &ipv6_hdr(skb)->nexthdr;
 
 	while (offset + 1 <= packet_len) {
 
@@ -643,7 +644,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 		first_len = skb_pagelen(skb);
 		skb->data_len = first_len - skb_headlen(skb);
 		skb->len = first_len;
-		skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
+		ipv6_hdr(skb)->payload_len = htons(first_len -
+						   sizeof(struct ipv6hdr));
 
 		dst_hold(&rt->u.dst);
 
@@ -665,7 +667,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 				if (frag->next != NULL)
 					fh->frag_off |= htons(IP6_MF);
 				fh->identification = frag_id;
-				frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+				ipv6_hdr(frag)->payload_len =
+						htons(frag->len -
+						      sizeof(struct ipv6hdr));
 				ip6_copy_metadata(frag, skb);
 			}
 
@@ -779,7 +783,8 @@ slow_path:
 		fh->frag_off = htons(offset);
 		if (left > 0)
 			fh->frag_off |= htons(IP6_MF);
-		frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+		ipv6_hdr(frag)->payload_len = htons(frag->len -
+						    sizeof(struct ipv6hdr));
 
 		ptr += len;
 		offset += len;
@@ -1355,7 +1360,7 @@ int ip6_push_pending_frames(struct sock *sk)
 
 	skb_push(skb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(skb);
-	hdr = skb->nh.ipv6h;
+	hdr = ipv6_hdr(skb);
 
 	*(__be32*)hdr = fl->fl6_flowlabel |
 		     htonl(0x60000000 | ((int)np->cork.tclass << 20));
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index bb65779be7a..05b59a77bc6 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -602,7 +602,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		skb_reset_network_header(skb2);
 
 		/* Try to guess incoming interface */
-		rt = rt6_lookup(&skb2->nh.ipv6h->saddr, NULL, 0, 0);
+		rt = rt6_lookup(&ipv6_hdr(skb2)->saddr, NULL, 0, 0);
 
 		if (rt && rt->rt6i_dev)
 			skb2->dev = rt->rt6i_dev;
@@ -636,10 +636,10 @@ static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
 					struct sk_buff *skb)
 {
 	if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
-		ipv6_copy_dscp(ipv6h, skb->nh.ipv6h);
+		ipv6_copy_dscp(ipv6h, ipv6_hdr(skb));
 
 	if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h)))
-		IP6_ECN_set_ce(skb->nh.ipv6h);
+		IP6_ECN_set_ce(ipv6_hdr(skb));
 }
 
 static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
@@ -679,10 +679,8 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
 						    struct ipv6hdr *ipv6h,
 						    struct sk_buff *skb))
 {
-	struct ipv6hdr *ipv6h;
 	struct ip6_tnl *t;
-
-	ipv6h = skb->nh.ipv6h;
+	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 
 	read_lock(&ip6_tnl_lock);
 
@@ -836,7 +834,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
 {
 	struct ip6_tnl *t = netdev_priv(dev);
 	struct net_device_stats *stats = &t->stat;
-	struct ipv6hdr *ipv6h = skb->nh.ipv6h;
+	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	struct ipv6_tel_txoption opt;
 	struct dst_entry *dst;
 	struct net_device *tdev;
@@ -909,7 +907,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
 	}
 	skb_push(skb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(skb);
-	ipv6h = skb->nh.ipv6h;
+	ipv6h = ipv6_hdr(skb);
 	*(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000);
 	dsfield = INET_ECN_encapsulate(0, dsfield);
 	ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
@@ -983,7 +981,7 @@ static inline int
 ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
-	struct ipv6hdr *ipv6h = skb->nh.ipv6h;
+	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	int encap_limit = -1;
 	__u16 offset;
 	struct flowi fl;
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 3e71d1691b7..e2404a62968 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -79,7 +79,7 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
 	skb->ip_summed = CHECKSUM_NONE;
 
 	/* Remove ipcomp header and decompress original payload */
-	iph = skb->nh.ipv6h;
+	iph = ipv6_hdr(skb);
 	ipch = (void *)skb->data;
 	skb->h.raw = skb->nh.raw + sizeof(*ipch);
 	__skb_pull(skb, sizeof(*ipch));
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 1d56b465bdd..d16e0fd2cd8 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -101,7 +101,7 @@ static int ipv6_gso_send_check(struct sk_buff *skb)
 	if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
 		goto out;
 
-	ipv6h = skb->nh.ipv6h;
+	ipv6h = ipv6_hdr(skb);
 	__skb_pull(skb, sizeof(*ipv6h));
 	err = -EPROTONOSUPPORT;
 
@@ -137,7 +137,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
 	if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
 		goto out;
 
-	ipv6h = skb->nh.ipv6h;
+	ipv6h = ipv6_hdr(skb);
 	__skb_pull(skb, sizeof(*ipv6h));
 	segs = ERR_PTR(-EPROTONOSUPPORT);
 
@@ -153,7 +153,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
 		goto out;
 
 	for (skb = segs; skb; skb = skb->next) {
-		ipv6h = skb->nh.ipv6h;
+		ipv6h = ipv6_hdr(skb);
 		ipv6h->payload_len = htons(skb->len - skb->mac_len -
 					   sizeof(*ipv6h));
 	}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 924e24907c3..b2b37ba48b9 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1167,11 +1167,11 @@ int igmp6_event_query(struct sk_buff *skb)
 		return -EINVAL;
 
 	/* compute payload length excluding extension headers */
-	len = ntohs(skb->nh.ipv6h->payload_len) + sizeof(struct ipv6hdr);
-	len -= (char *)skb->h.raw - (char *)skb->nh.ipv6h;
+	len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr);
+	len -= skb->h.raw - skb->nh.raw;
 
 	/* Drop queries with not link local source */
-	if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr)&IPV6_ADDR_LINKLOCAL))
+	if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL))
 		return -EINVAL;
 
 	idev = in6_dev_get(skb->dev);
@@ -1303,7 +1303,7 @@ int igmp6_event_report(struct sk_buff *skb)
 	hdr = (struct icmp6hdr*) skb->h.raw;
 
 	/* Drop reports with not link local source */
-	addr_type = ipv6_addr_type(&skb->nh.ipv6h->saddr);
+	addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr);
 	if (addr_type != IPV6_ADDR_ANY &&
 	    !(addr_type&IPV6_ADDR_LINKLOCAL))
 		return -EINVAL;
@@ -1441,7 +1441,7 @@ static inline int mld_dev_queue_xmit2(struct sk_buff *skb)
 		unsigned char ha[MAX_ADDR_LEN];
 		int err;
 
-		ndisc_mc_map(&skb->nh.ipv6h->daddr, ha, dev, 1);
+		ndisc_mc_map(&ipv6_hdr(skb)->daddr, ha, dev, 1);
 		err = dev->hard_header(skb, dev, ETH_P_IPV6, ha, NULL, skb->len);
 		if (err < 0) {
 			kfree_skb(skb);
@@ -1459,15 +1459,14 @@ static inline int mld_dev_queue_xmit(struct sk_buff *skb)
 
 static void mld_sendpack(struct sk_buff *skb)
 {
-	struct ipv6hdr *pip6 = skb->nh.ipv6h;
+	struct ipv6hdr *pip6 = ipv6_hdr(skb);
 	struct mld2_report *pmr = (struct mld2_report *)skb->h.raw;
 	int payload_len, mldlen;
 	struct inet6_dev *idev = in6_dev_get(skb->dev);
 	int err;
 
 	IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
-	payload_len = skb->tail - (unsigned char *)skb->nh.ipv6h -
-		sizeof(struct ipv6hdr);
+	payload_len = skb->tail - skb_network_header(skb) - sizeof(*pip6);
 	mldlen = skb->tail - skb->h.raw;
 	pip6->payload_len = htons(payload_len);
 
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index bb4033553f3..7b5f9d82e80 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -129,7 +129,7 @@ static struct mip6_report_rate_limiter mip6_report_rl = {
 
 static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct ipv6hdr *iph = skb->nh.ipv6h;
+	struct ipv6hdr *iph = ipv6_hdr(skb);
 	struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data;
 
 	if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) &&
@@ -223,16 +223,16 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct
 
 	skb_get_timestamp(skb, &stamp);
 
-	if (!mip6_report_rl_allow(&stamp, &skb->nh.ipv6h->daddr,
-				  hao ? &hao->addr : &skb->nh.ipv6h->saddr,
+	if (!mip6_report_rl_allow(&stamp, &ipv6_hdr(skb)->daddr,
+				  hao ? &hao->addr : &ipv6_hdr(skb)->saddr,
 				  opt->iif))
 		goto out;
 
 	memset(&sel, 0, sizeof(sel));
-	memcpy(&sel.daddr, (xfrm_address_t *)&skb->nh.ipv6h->daddr,
+	memcpy(&sel.daddr, (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
 	       sizeof(sel.daddr));
 	sel.prefixlen_d = 128;
-	memcpy(&sel.saddr, (xfrm_address_t *)&skb->nh.ipv6h->saddr,
+	memcpy(&sel.saddr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr,
 	       sizeof(sel.saddr));
 	sel.prefixlen_s = 128;
 	sel.family = AF_INET6;
@@ -256,12 +256,13 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
 			       u8 **nexthdr)
 {
 	u16 offset = sizeof(struct ipv6hdr);
-	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
+	struct ipv6_opt_hdr *exthdr =
+				   (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
 	const unsigned char *nh = skb_network_header(skb);
 	unsigned int packet_len = skb->tail - nh;
 	int found_rhdr = 0;
 
-	*nexthdr = &skb->nh.ipv6h->nexthdr;
+	*nexthdr = &ipv6_hdr(skb)->nexthdr;
 
 	while (offset + 1 <= packet_len) {
 
@@ -387,12 +388,13 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
 			     u8 **nexthdr)
 {
 	u16 offset = sizeof(struct ipv6hdr);
-	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
+	struct ipv6_opt_hdr *exthdr =
+				   (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
 	const unsigned char *nh = skb_network_header(skb);
 	unsigned int packet_len = skb->tail - nh;
 	int found_rhdr = 0;
 
-	*nexthdr = &skb->nh.ipv6h->nexthdr;
+	*nexthdr = &ipv6_hdr(skb)->nexthdr;
 
 	while (offset + 1 <= packet_len) {
 
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index a3e3d9e2f44..00feb4c4d98 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -598,7 +598,7 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
 				       dev->addr_len, dev->type);
 
 	/* checksum */
-	msg->icmph.icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr,
+	msg->icmph.icmp6_cksum = csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
 						 daddr, len,
 						 IPPROTO_ICMPV6,
 						 csum_partial((__u8 *) msg,
@@ -697,7 +697,7 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
 				       dev->addr_len, dev->type);
 
 	/* checksum */
-	hdr->icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr, daddr, len,
+	hdr->icmp6_cksum = csum_ipv6_magic(&ipv6_hdr(skb)->saddr, daddr, len,
 					   IPPROTO_ICMPV6,
 					   csum_partial((__u8 *) hdr, len, 0));
 
@@ -736,8 +736,8 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
 	struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
 	int probes = atomic_read(&neigh->probes);
 
-	if (skb && ipv6_chk_addr(&skb->nh.ipv6h->saddr, dev, 1))
-		saddr = &skb->nh.ipv6h->saddr;
+	if (skb && ipv6_chk_addr(&ipv6_hdr(skb)->saddr, dev, 1))
+		saddr = &ipv6_hdr(skb)->saddr;
 
 	if ((probes -= neigh->parms->ucast_probes) < 0) {
 		if (!(neigh->nud_state & NUD_VALID)) {
@@ -761,8 +761,8 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
 static void ndisc_recv_ns(struct sk_buff *skb)
 {
 	struct nd_msg *msg = (struct nd_msg *)skb->h.raw;
-	struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
-	struct in6_addr *daddr = &skb->nh.ipv6h->daddr;
+	struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
+	struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
 	u8 *lladdr = NULL;
 	u32 ndoptlen = skb->tail - msg->opt;
 	struct ndisc_options ndopts;
@@ -939,8 +939,8 @@ out:
 static void ndisc_recv_na(struct sk_buff *skb)
 {
 	struct nd_msg *msg = (struct nd_msg *)skb->h.raw;
-	struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
-	struct in6_addr *daddr = &skb->nh.ipv6h->daddr;
+	struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
+	struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
 	u8 *lladdr = NULL;
 	u32 ndoptlen = skb->tail - msg->opt;
 	struct ndisc_options ndopts;
@@ -1044,7 +1044,7 @@ static void ndisc_recv_rs(struct sk_buff *skb)
 	unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
 	struct neighbour *neigh;
 	struct inet6_dev *idev;
-	struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
+	struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
 	struct ndisc_options ndopts;
 	u8 *lladdr = NULL;
 
@@ -1110,7 +1110,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 
 	optlen = (skb->tail - skb->h.raw) - sizeof(struct ra_msg);
 
-	if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr) & IPV6_ADDR_LINKLOCAL)) {
+	if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
 		ND_PRINTK2(KERN_WARNING
 			   "ICMPv6 RA: source address is not link-local.\n");
 		return;
@@ -1176,7 +1176,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 		pref = ICMPV6_ROUTER_PREF_MEDIUM;
 #endif
 
-	rt = rt6_get_dflt_router(&skb->nh.ipv6h->saddr, skb->dev);
+	rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev);
 
 	if (rt)
 		neigh = rt->rt6i_nexthop;
@@ -1191,7 +1191,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 		ND_PRINTK3(KERN_DEBUG
 			   "ICMPv6 RA: adding default router.\n");
 
-		rt = rt6_add_dflt_router(&skb->nh.ipv6h->saddr, skb->dev, pref);
+		rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref);
 		if (rt == NULL) {
 			ND_PRINTK0(KERN_ERR
 				   "ICMPv6 RA: %s() failed to add default route.\n",
@@ -1263,7 +1263,7 @@ skip_defrtr:
 	 */
 
 	if (!neigh)
-		neigh = __neigh_lookup(&nd_tbl, &skb->nh.ipv6h->saddr,
+		neigh = __neigh_lookup(&nd_tbl, &ipv6_hdr(skb)->saddr,
 				       skb->dev, 1);
 	if (neigh) {
 		u8 *lladdr = NULL;
@@ -1292,7 +1292,7 @@ skip_defrtr:
 			if (((struct route_info *)p)->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
 				continue;
 			rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3,
-				      &skb->nh.ipv6h->saddr);
+				      &ipv6_hdr(skb)->saddr);
 		}
 	}
 #endif
@@ -1351,7 +1351,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
 	int optlen;
 	u8 *lladdr = NULL;
 
-	if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr) & IPV6_ADDR_LINKLOCAL)) {
+	if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
 		ND_PRINTK2(KERN_WARNING
 			   "ICMPv6 Redirect: source address is not link-local.\n");
 		return;
@@ -1416,8 +1416,8 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
 
 	neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
 	if (neigh) {
-		rt6_redirect(dest, &skb->nh.ipv6h->daddr,
-			     &skb->nh.ipv6h->saddr, neigh, lladdr,
+		rt6_redirect(dest, &ipv6_hdr(skb)->daddr,
+			     &ipv6_hdr(skb)->saddr, neigh, lladdr,
 			     on_link);
 		neigh_release(neigh);
 	}
@@ -1453,14 +1453,14 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 		return;
 	}
 
-	if (!ipv6_addr_equal(&skb->nh.ipv6h->daddr, target) &&
+	if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) &&
 	    !(ipv6_addr_type(target) & IPV6_ADDR_LINKLOCAL)) {
 		ND_PRINTK2(KERN_WARNING
 			"ICMPv6 Redirect: target address is not link-local.\n");
 		return;
 	}
 
-	ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr,
+	ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &ipv6_hdr(skb)->saddr,
 			dev->ifindex);
 
 	dst = ip6_route_output(NULL, &fl);
@@ -1515,7 +1515,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 	hlen = 0;
 
 	skb_reserve(buff, LL_RESERVED_SPACE(dev));
-	ip6_nd_hdr(sk, buff, dev, &saddr_buf, &skb->nh.ipv6h->saddr,
+	ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr,
 		   IPPROTO_ICMPV6, len);
 
 	icmph = (struct icmp6hdr *)skb_put(buff, len);
@@ -1531,7 +1531,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 	addrp = (struct in6_addr *)(icmph + 1);
 	ipv6_addr_copy(addrp, target);
 	addrp++;
-	ipv6_addr_copy(addrp, &skb->nh.ipv6h->daddr);
+	ipv6_addr_copy(addrp, &ipv6_hdr(skb)->daddr);
 
 	opt = (u8*) (addrp + 1);
 
@@ -1552,9 +1552,9 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 	*(opt++) = (rd_len >> 3);
 	opt += 6;
 
-	memcpy(opt, skb->nh.ipv6h, rd_len - 8);
+	memcpy(opt, ipv6_hdr(skb), rd_len - 8);
 
-	icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &skb->nh.ipv6h->saddr,
+	icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr,
 					     len, IPPROTO_ICMPV6,
 					     csum_partial((u8 *) icmph, len, 0));
 
@@ -1588,10 +1588,10 @@ int ndisc_rcv(struct sk_buff *skb)
 
 	__skb_push(skb, skb->data-skb->h.raw);
 
-	if (skb->nh.ipv6h->hop_limit != 255) {
+	if (ipv6_hdr(skb)->hop_limit != 255) {
 		ND_PRINTK2(KERN_WARNING
 			   "ICMPv6 NDISC: invalid hop-limit: %d\n",
-			   skb->nh.ipv6h->hop_limit);
+			   ipv6_hdr(skb)->hop_limit);
 		return 0;
 	}
 
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 1c405dd30c6..38b14961391 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -11,7 +11,7 @@
 
 int ip6_route_me_harder(struct sk_buff *skb)
 {
-	struct ipv6hdr *iph = skb->nh.ipv6h;
+	struct ipv6hdr *iph = ipv6_hdr(skb);
 	struct dst_entry *dst;
 	struct flowi fl = {
 		.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
@@ -61,7 +61,7 @@ static void nf_ip6_saveroute(const struct sk_buff *skb, struct nf_info *info)
 	struct ip6_rt_info *rt_info = nf_info_reroute(info);
 
 	if (info->hook == NF_IP6_LOCAL_OUT) {
-		struct ipv6hdr *iph = skb->nh.ipv6h;
+		struct ipv6hdr *iph = ipv6_hdr(skb);
 
 		rt_info->daddr = iph->daddr;
 		rt_info->saddr = iph->saddr;
@@ -73,7 +73,7 @@ static int nf_ip6_reroute(struct sk_buff **pskb, const struct nf_info *info)
 	struct ip6_rt_info *rt_info = nf_info_reroute(info);
 
 	if (info->hook == NF_IP6_LOCAL_OUT) {
-		struct ipv6hdr *iph = (*pskb)->nh.ipv6h;
+		struct ipv6hdr *iph = ipv6_hdr(*pskb);
 		if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
 		    !ipv6_addr_equal(&iph->saddr, &rt_info->saddr))
 			return ip6_route_me_harder(*pskb);
@@ -84,7 +84,7 @@ static int nf_ip6_reroute(struct sk_buff **pskb, const struct nf_info *info)
 __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
 			     unsigned int dataoff, u_int8_t protocol)
 {
-	struct ipv6hdr *ip6h = skb->nh.ipv6h;
+	struct ipv6hdr *ip6h = ipv6_hdr(skb);
 	__sum16 csum = 0;
 
 	switch (skb->ip_summed) {
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 7c512e13f95..caf9e375a0f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -115,7 +115,7 @@ ip6_packet_match(const struct sk_buff *skb,
 {
 	size_t i;
 	unsigned long ret;
-	const struct ipv6hdr *ipv6 = skb->nh.ipv6h;
+	const struct ipv6hdr *ipv6 = ipv6_hdr(skb);
 
 #define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg))
 
@@ -301,7 +301,7 @@ ip6t_do_table(struct sk_buff **pskb,
 				goto no_match;
 
 			ADD_COUNTER(e->counters,
-				    ntohs((*pskb)->nh.ipv6h->payload_len)
+				    ntohs(ipv6_hdr(*pskb)->payload_len)
 				    + IPV6_HDR_LEN,
 				    1);
 
@@ -1448,8 +1448,8 @@ static void __exit ip6_tables_fini(void)
 int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
 		  int target, unsigned short *fragoff)
 {
-	unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data;
-	u8 nexthdr = skb->nh.ipv6h->nexthdr;
+	unsigned int start = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
+	u8 nexthdr = ipv6_hdr(skb)->nexthdr;
 	unsigned int len = skb->len - start;
 
 	if (fragoff)
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index ccbab66277e..4115a576ba2 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -32,7 +32,7 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb,
 	if (!skb_make_writable(pskb, (*pskb)->len))
 		return NF_DROP;
 
-	ip6h = (*pskb)->nh.ipv6h;
+	ip6h = ipv6_hdr(*pskb);
 
 	switch (info->mode) {
 		case IP6T_HL_SET:
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index fc9e51a7778..54d176187f3 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -422,7 +422,7 @@ ip6t_log_packet(unsigned int pf,
 			printk(" ");
 	}
 
-	dump_packet(loginfo, skb, (u8*)skb->nh.ipv6h - skb->data, 1);
+	dump_packet(loginfo, skb, skb_network_offset(skb), 1);
 	printk("\n");
 	spin_unlock_bh(&log_lock);
 }
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 4441bed430f..cb3d2415a06 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -47,7 +47,7 @@ static void send_reset(struct sk_buff *oldskb)
 	struct tcphdr otcph, *tcph;
 	unsigned int otcplen, hh_len;
 	int tcphoff, needs_ack;
-	struct ipv6hdr *oip6h = oldskb->nh.ipv6h, *ip6h;
+	struct ipv6hdr *oip6h = ipv6_hdr(oldskb), *ip6h;
 	struct dst_entry *dst = NULL;
 	u8 proto;
 	struct flowi fl;
@@ -122,7 +122,7 @@ static void send_reset(struct sk_buff *oldskb)
 
 	skb_put(nskb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(nskb);
-	ip6h = nskb->nh.ipv6h;
+	ip6h = ipv6_hdr(nskb);
 	ip6h->version = 6;
 	ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT);
 	ip6h->nexthdr = IPPROTO_TCP;
@@ -156,8 +156,8 @@ static void send_reset(struct sk_buff *oldskb)
 	tcph->check = 0;
 
 	/* Adjust TCP checksum */
-	tcph->check = csum_ipv6_magic(&nskb->nh.ipv6h->saddr,
-				      &nskb->nh.ipv6h->daddr,
+	tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr,
+				      &ipv6_hdr(nskb)->daddr,
 				      sizeof(struct tcphdr), IPPROTO_TCP,
 				      csum_partial((char *)tcph,
 						   sizeof(struct tcphdr), 0));
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index c2676066a80..0f3dd932f0a 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -42,7 +42,7 @@ match(const struct sk_buff *skb,
 	memset(eui64, 0, sizeof(eui64));
 
 	if (eth_hdr(skb)->h_proto == htons(ETH_P_IPV6)) {
-		if (skb->nh.ipv6h->version == 0x6) {
+		if (ipv6_hdr(skb)->version == 0x6) {
 			memcpy(eui64, eth_hdr(skb)->h_source, 3);
 			memcpy(eui64 + 5, eth_hdr(skb)->h_source + 3, 3);
 			eui64[3] = 0xff;
@@ -50,7 +50,7 @@ match(const struct sk_buff *skb,
 			eui64[0] |= 0x02;
 
 			i = 0;
-			while ((skb->nh.ipv6h->saddr.s6_addr[8+i] == eui64[i])
+			while ((ipv6_hdr(skb)->saddr.s6_addr[8 + i] == eui64[i])
 			       && (i < 8))
 				i++;
 
diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c
index 37c8a4d4ed7..d606c0e6d6f 100644
--- a/net/ipv6/netfilter/ip6t_hl.c
+++ b/net/ipv6/netfilter/ip6t_hl.c
@@ -25,7 +25,7 @@ static int match(const struct sk_buff *skb,
 		 int offset, unsigned int protoff, int *hotdrop)
 {
 	const struct ip6t_hl_info *info = matchinfo;
-	const struct ipv6hdr *ip6h = skb->nh.ipv6h;
+	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
 
 	switch (info->mode) {
 		case IP6T_HL_EQ:
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 700a11d25de..fd6a0869099 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -45,7 +45,7 @@ ipv6header_match(const struct sk_buff *skb,
 	/* Make sure this isn't an evil packet */
 
 	/* type of the 1st exthdr */
-	nexthdr = skb->nh.ipv6h->nexthdr;
+	nexthdr = ipv6_hdr(skb)->nexthdr;
 	/* pointer to the 1st exthdr */
 	ptr = sizeof(struct ipv6hdr);
 	/* available length */
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index da2c1994539..85b1c272946 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -146,21 +146,21 @@ ip6t_local_hook(unsigned int hook,
 #endif
 
 	/* save source/dest address, mark, hoplimit, flowlabel, priority,  */
-	memcpy(&saddr, &(*pskb)->nh.ipv6h->saddr, sizeof(saddr));
-	memcpy(&daddr, &(*pskb)->nh.ipv6h->daddr, sizeof(daddr));
+	memcpy(&saddr, &ipv6_hdr(*pskb)->saddr, sizeof(saddr));
+	memcpy(&daddr, &ipv6_hdr(*pskb)->daddr, sizeof(daddr));
 	mark = (*pskb)->mark;
-	hop_limit = (*pskb)->nh.ipv6h->hop_limit;
+	hop_limit = ipv6_hdr(*pskb)->hop_limit;
 
 	/* flowlabel and prio (includes version, which shouldn't change either */
-	flowlabel = *((u_int32_t *) (*pskb)->nh.ipv6h);
+	flowlabel = *((u_int32_t *)ipv6_hdr(*pskb));
 
 	ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler);
 
 	if (ret != NF_DROP && ret != NF_STOLEN
-		&& (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr))
-		    || memcmp(&(*pskb)->nh.ipv6h->daddr, &daddr, sizeof(daddr))
+		&& (memcmp(&ipv6_hdr(*pskb)->saddr, &saddr, sizeof(saddr))
+		    || memcmp(&ipv6_hdr(*pskb)->daddr, &daddr, sizeof(daddr))
 		    || (*pskb)->mark != mark
-		    || (*pskb)->nh.ipv6h->hop_limit != hop_limit))
+		    || ipv6_hdr(*pskb)->hop_limit != hop_limit))
 		return ip6_route_me_harder(*pskb) == 0 ? ret : NF_DROP;
 
 	return ret;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index d1102455668..fe7f46c4dbf 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -138,16 +138,10 @@ static int
 ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
 	     u_int8_t *protonum)
 {
-	unsigned int extoff;
-	unsigned char pnum;
-	int protoff;
-
-	extoff = (u8*)((*pskb)->nh.ipv6h + 1) - (*pskb)->data;
-	pnum = (*pskb)->nh.ipv6h->nexthdr;
-
-	protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum,
-					 (*pskb)->len - extoff);
-
+	unsigned int extoff = (u8 *)(ipv6_hdr(*pskb) + 1) - (*pskb)->data;
+	unsigned char pnum = ipv6_hdr(*pskb)->nexthdr;
+	int protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum,
+					     (*pskb)->len - extoff);
 	/*
 	 * (protoff == (*pskb)->len) mean that the packet doesn't have no data
 	 * except of IPv6 & ext headers. but it's tracked anyway. - YK
@@ -179,9 +173,8 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
 	struct nf_conn_help *help;
 	enum ip_conntrack_info ctinfo;
 	unsigned int ret, protoff;
-	unsigned int extoff = (u8*)((*pskb)->nh.ipv6h + 1)
-			      - (*pskb)->data;
-	unsigned char pnum = (*pskb)->nh.ipv6h->nexthdr;
+	unsigned int extoff = (u8 *)(ipv6_hdr(*pskb) + 1) - (*pskb)->data;
+	unsigned char pnum = ipv6_hdr(*pskb)->nexthdr;
 
 
 	/* This is where we call the helper: as the packet goes out. */
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index bc1d0958400..d3fbb1f1caf 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -400,8 +400,8 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 	}
 
 	offset = ntohs(fhdr->frag_off) & ~0x7;
-	end = offset + (ntohs(skb->nh.ipv6h->payload_len) -
-			((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1)));
+	end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
+			((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
 
 	if ((unsigned int)end > IPV6_MAXPLEN) {
 		DEBUGP("offset is too large.\n");
@@ -652,7 +652,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 	head->next = NULL;
 	head->dev = dev;
 	head->tstamp = fq->stamp;
-	head->nh.ipv6h->payload_len = htons(payload_len);
+	ipv6_hdr(head)->payload_len = htons(payload_len);
 
 	/* Yes, and fold redundant checksum back. 8) */
 	if (head->ip_summed == CHECKSUM_COMPLETE)
@@ -706,9 +706,9 @@ out_fail:
 static int
 find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
 {
-	u8 nexthdr = skb->nh.ipv6h->nexthdr;
-	u8 prev_nhoff = (u8 *)&skb->nh.ipv6h->nexthdr - skb->data;
-	int start = (u8 *)(skb->nh.ipv6h+1) - skb->data;
+	u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+	u8 prev_nhoff = (u8 *)&ipv6_hdr(skb)->nexthdr - skb->data;
+	int start = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
 	int len = skb->len - start;
 	u8 prevhdr = NEXTHDR_IPV6;
 
@@ -764,7 +764,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
 	struct sk_buff *ret_skb = NULL;
 
 	/* Jumbo payload inhibits frag. header */
-	if (skb->nh.ipv6h->payload_len == 0) {
+	if (ipv6_hdr(skb)->payload_len == 0) {
 		DEBUGP("payload len = 0\n");
 		return skb;
 	}
@@ -786,7 +786,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
 	}
 
 	clone->h.raw = clone->data + fhoff;
-	hdr = clone->nh.ipv6h;
+	hdr = ipv6_hdr(clone);
 	fhdr = (struct frag_hdr *)clone->h.raw;
 
 	if (!(fhdr->frag_off & htons(0xFFF9))) {
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 9b2bcde73f1..0e2b56ce0a5 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -152,7 +152,7 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 	int delivered = 0;
 	__u8 hash;
 
-	saddr = &skb->nh.ipv6h->saddr;
+	saddr = &ipv6_hdr(skb)->saddr;
 	daddr = saddr + 1;
 
 	hash = nexthdr & (MAX_INET_PROTOS - 1);
@@ -363,15 +363,16 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
 		skb_postpull_rcsum(skb, skb_network_header(skb),
 				   skb->h.raw - skb->nh.raw);
-		if (!csum_ipv6_magic(&skb->nh.ipv6h->saddr,
-				     &skb->nh.ipv6h->daddr,
+		if (!csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+				     &ipv6_hdr(skb)->daddr,
 				     skb->len, inet->num, skb->csum))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
-		skb->csum = ~csum_unfold(csum_ipv6_magic(&skb->nh.ipv6h->saddr,
-					     &skb->nh.ipv6h->daddr,
-					     skb->len, inet->num, 0));
+		skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+							 &ipv6_hdr(skb)->daddr,
+							 skb->len,
+							 inet->num, 0));
 
 	if (inet->hdrincl) {
 		if (skb_checksum_complete(skb)) {
@@ -438,7 +439,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	if (sin6) {
 		sin6->sin6_family = AF_INET6;
 		sin6->sin6_port = 0;
-		ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr);
+		ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr);
 		sin6->sin6_flowinfo = 0;
 		sin6->sin6_scope_id = 0;
 		if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
@@ -578,7 +579,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
 
 	skb_put(skb, length);
 	skb_reset_network_header(skb);
-	iph = skb->nh.ipv6h;
+	iph = ipv6_hdr(skb);
 
 	skb->ip_summed = CHECKSUM_NONE;
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index f85e49acb91..156a37fe3ff 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -430,8 +430,8 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 		goto err;
 
 	offset = ntohs(fhdr->frag_off) & ~0x7;
-	end = offset + (ntohs(skb->nh.ipv6h->payload_len) -
-			((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1)));
+	end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
+			((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
 
 	if ((unsigned int)end > IPV6_MAXPLEN) {
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
@@ -671,7 +671,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 	head->next = NULL;
 	head->dev = dev;
 	head->tstamp = fq->stamp;
-	head->nh.ipv6h->payload_len = htons(payload_len);
+	ipv6_hdr(head)->payload_len = htons(payload_len);
 	IP6CB(head)->nhoff = nhoff;
 
 	*skb_in = head;
@@ -708,9 +708,7 @@ static int ipv6_frag_rcv(struct sk_buff **skbp)
 	struct net_device *dev = skb->dev;
 	struct frag_hdr *fhdr;
 	struct frag_queue *fq;
-	struct ipv6hdr *hdr;
-
-	hdr = skb->nh.ipv6h;
+	struct ipv6hdr *hdr = ipv6_hdr(skb);
 
 	IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS);
 
@@ -726,7 +724,7 @@ static int ipv6_frag_rcv(struct sk_buff **skbp)
 		return -1;
 	}
 
-	hdr = skb->nh.ipv6h;
+	hdr = ipv6_hdr(skb);
 	fhdr = (struct frag_hdr *)skb->h.raw;
 
 	if (!(fhdr->frag_off & htons(0xFFF9))) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 0aa4762f53f..52cbe1cd404 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -726,7 +726,7 @@ out2:
 
 void ip6_route_input(struct sk_buff *skb)
 {
-	struct ipv6hdr *iph = skb->nh.ipv6h;
+	struct ipv6hdr *iph = ipv6_hdr(skb);
 	int flags = RT6_LOOKUP_F_HAS_SADDR;
 	struct flowi fl = {
 		.iif = skb->dev->ifindex,
@@ -1775,7 +1775,7 @@ static inline int ip6_pkt_drop(struct sk_buff *skb, int code,
 	int type;
 	switch (ipstats_mib_noroutes) {
 	case IPSTATS_MIB_INNOROUTES:
-		type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
+		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
 		if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
 			IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
 			break;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index e33ac3c3a9c..18ec86f177d 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -366,7 +366,7 @@ out:
 static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
 {
 	if (INET_ECN_is_ce(iph->tos))
-		IP6_ECN_set_ce(skb->nh.ipv6h);
+		IP6_ECN_set_ce(ipv6_hdr(skb));
 }
 
 static int ipip6_rcv(struct sk_buff *skb)
@@ -430,7 +430,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct net_device_stats *stats = &tunnel->stat;
 	struct iphdr  *tiph = &tunnel->parms.iph;
-	struct ipv6hdr *iph6 = skb->nh.ipv6h;
+	struct ipv6hdr *iph6 = ipv6_hdr(skb);
 	u8     tos = tunnel->parms.iph.tos;
 	struct rtable *rt;     			/* Route to the other host */
 	struct net_device *tdev;			/* Device to other host */
@@ -468,7 +468,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 		addr_type = ipv6_addr_type(addr6);
 
 		if (addr_type == IPV6_ADDR_ANY) {
-			addr6 = &skb->nh.ipv6h->daddr;
+			addr6 = &ipv6_hdr(skb)->daddr;
 			addr_type = ipv6_addr_type(addr6);
 		}
 
@@ -550,7 +550,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 			skb_set_owner_w(new_skb, skb->sk);
 		dev_kfree_skb(skb);
 		skb = new_skb;
-		iph6 = skb->nh.ipv6h;
+		iph6 = ipv6_hdr(skb);
 	}
 
 	skb->h.raw = skb->nh.raw;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 80a52ab1e38..85b3e89110f 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -115,8 +115,8 @@ static __inline__ __sum16 tcp_v6_check(struct tcphdr *th, int len,
 
 static __u32 tcp_v6_init_sequence(struct sk_buff *skb)
 {
-	return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
-					    skb->nh.ipv6h->saddr.s6_addr32,
+	return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
+					    ipv6_hdr(skb)->saddr.s6_addr32,
 					    skb->h.th->dest,
 					    skb->h.th->source);
 }
@@ -837,7 +837,7 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
 {
 	__u8 *hash_location = NULL;
 	struct tcp_md5sig_key *hash_expected;
-	struct ipv6hdr *ip6h = skb->nh.ipv6h;
+	struct ipv6hdr *ip6h = ipv6_hdr(skb);
 	struct tcphdr *th = skb->h.th;
 	int length = (th->doff << 2) - sizeof (*th);
 	int genhash;
@@ -966,7 +966,7 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, sizeof(*th)))
 		return -EINVAL;
 
-	ipv6h = skb->nh.ipv6h;
+	ipv6h = ipv6_hdr(skb);
 	th = skb->h.th;
 
 	th->check = 0;
@@ -995,7 +995,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 
 #ifdef CONFIG_TCP_MD5SIG
 	if (sk)
-		key = tcp_v6_md5_do_lookup(sk, &skb->nh.ipv6h->daddr);
+		key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr);
 	else
 		key = NULL;
 
@@ -1039,20 +1039,18 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 			       (TCPOPT_NOP << 16) |
 			       (TCPOPT_MD5SIG << 8) |
 			       TCPOLEN_MD5SIG);
-		tcp_v6_do_calc_md5_hash((__u8*)&opt[1],
-					key,
-					&skb->nh.ipv6h->daddr,
-					&skb->nh.ipv6h->saddr,
-					t1, IPPROTO_TCP,
-					tot_len);
+		tcp_v6_do_calc_md5_hash((__u8 *)&opt[1], key,
+					&ipv6_hdr(skb)->daddr,
+					&ipv6_hdr(skb)->saddr,
+					t1, IPPROTO_TCP, tot_len);
 	}
 #endif
 
 	buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
 
 	memset(&fl, 0, sizeof(fl));
-	ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
-	ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
+	ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
+	ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
 
 	t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
 				    sizeof(*t1), IPPROTO_TCP,
@@ -1093,7 +1091,7 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
 
 #ifdef CONFIG_TCP_MD5SIG
 	if (!tw && skb->sk) {
-		key = tcp_v6_md5_do_lookup(skb->sk, &skb->nh.ipv6h->daddr);
+		key = tcp_v6_md5_do_lookup(skb->sk, &ipv6_hdr(skb)->daddr);
 	} else if (tw && tw->tw_md5_keylen) {
 		tw_key.key = tw->tw_md5_key;
 		tw_key.keylen = tw->tw_md5_keylen;
@@ -1142,20 +1140,18 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
 	if (key) {
 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
-		tcp_v6_do_calc_md5_hash((__u8 *)topt,
-					key,
-					&skb->nh.ipv6h->daddr,
-					&skb->nh.ipv6h->saddr,
-					t1, IPPROTO_TCP,
-					tot_len);
+		tcp_v6_do_calc_md5_hash((__u8 *)topt, key,
+					&ipv6_hdr(skb)->daddr,
+					&ipv6_hdr(skb)->saddr,
+					t1, IPPROTO_TCP, tot_len);
 	}
 #endif
 
 	buff->csum = csum_partial((char *)t1, tot_len, 0);
 
 	memset(&fl, 0, sizeof(fl));
-	ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
-	ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
+	ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
+	ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
 
 	t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
 				    tot_len, IPPROTO_TCP,
@@ -1204,13 +1200,13 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
 
 	/* Find possible connection requests. */
 	req = inet6_csk_search_req(sk, &prev, th->source,
-				   &skb->nh.ipv6h->saddr,
-				   &skb->nh.ipv6h->daddr, inet6_iif(skb));
+				   &ipv6_hdr(skb)->saddr,
+				   &ipv6_hdr(skb)->daddr, inet6_iif(skb));
 	if (req)
 		return tcp_check_req(sk, skb, req, prev);
 
-	nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
-					 th->source, &skb->nh.ipv6h->daddr,
+	nsk = __inet6_lookup_established(&tcp_hashinfo, &ipv6_hdr(skb)->saddr,
+					 th->source, &ipv6_hdr(skb)->daddr,
 					 ntohs(th->dest), inet6_iif(skb));
 
 	if (nsk) {
@@ -1277,8 +1273,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	tcp_openreq_init(req, &tmp_opt, skb);
 
 	treq = inet6_rsk(req);
-	ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
-	ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
+	ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
+	ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
 	TCP_ECN_create_request(req, skb->h.th);
 	treq->pktopts = NULL;
 	if (ipv6_opt_accepted(sk, skb) ||
@@ -1365,7 +1361,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		newnp->pktoptions  = NULL;
 		newnp->opt	   = NULL;
 		newnp->mcast_oif   = inet6_iif(skb);
-		newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
+		newnp->mcast_hops  = ipv6_hdr(skb)->hop_limit;
 
 		/*
 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
@@ -1473,7 +1469,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	}
 	newnp->opt	  = NULL;
 	newnp->mcast_oif  = inet6_iif(skb);
-	newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
+	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
 
 	/* Clone native IPv6 options from listening socket (if any)
 
@@ -1532,15 +1528,16 @@ out:
 static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
 {
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
-				  &skb->nh.ipv6h->daddr,skb->csum)) {
+		if (!tcp_v6_check(skb->h.th, skb->len, &ipv6_hdr(skb)->saddr,
+				  &ipv6_hdr(skb)->daddr, skb->csum)) {
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 			return 0;
 		}
 	}
 
-	skb->csum = ~csum_unfold(tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
-				  &skb->nh.ipv6h->daddr, 0));
+	skb->csum = ~csum_unfold(tcp_v6_check(skb->h.th, skb->len,
+					      &ipv6_hdr(skb)->saddr,
+					      &ipv6_hdr(skb)->daddr, 0));
 
 	if (skb->len <= 76) {
 		return __skb_checksum_complete(skb);
@@ -1668,7 +1665,7 @@ ipv6_pktoptions:
 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
 			np->mcast_oif = inet6_iif(opt_skb);
 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
-			np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
+			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
 		if (ipv6_opt_accepted(sk, opt_skb)) {
 			skb_set_owner_r(opt_skb, sk);
 			opt_skb = xchg(&np->pktoptions, opt_skb);
@@ -1718,11 +1715,11 @@ static int tcp_v6_rcv(struct sk_buff **pskb)
 				    skb->len - th->doff*4);
 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
 	TCP_SKB_CB(skb)->when = 0;
-	TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
+	TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb));
 	TCP_SKB_CB(skb)->sacked = 0;
 
-	sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
-			    &skb->nh.ipv6h->daddr, ntohs(th->dest),
+	sk = __inet6_lookup(&tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source,
+			    &ipv6_hdr(skb)->daddr, ntohs(th->dest),
 			    inet6_iif(skb));
 
 	if (!sk)
@@ -1802,7 +1799,7 @@ do_time_wait:
 		struct sock *sk2;
 
 		sk2 = inet6_lookup_listener(&tcp_hashinfo,
-					    &skb->nh.ipv6h->daddr,
+					    &ipv6_hdr(skb)->daddr,
 					    ntohs(th->dest), inet6_iif(skb));
 		if (sk2 != NULL) {
 			struct inet_timewait_sock *tw = inet_twsk(sk);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e991e606ab1..55affe39b2e 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -180,7 +180,8 @@ try_again:
 			ipv6_addr_set(&sin6->sin6_addr, 0, 0,
 				      htonl(0xffff), ip_hdr(skb)->saddr);
 		else {
-			ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr);
+			ipv6_addr_copy(&sin6->sin6_addr,
+				       &ipv6_hdr(skb)->saddr);
 			if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
 				sin6->sin6_scope_id = IP6CB(skb)->iif;
 		}
@@ -392,13 +393,13 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh,
 		return 1;
 	}
 	if (skb->ip_summed == CHECKSUM_COMPLETE &&
-	    !csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr,
+	    !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
 			     skb->len, proto, skb->csum))
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
 	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
-		skb->csum = ~csum_unfold(csum_ipv6_magic(&skb->nh.ipv6h->saddr,
-							 &skb->nh.ipv6h->daddr,
+		skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+							 &ipv6_hdr(skb)->daddr,
 							 skb->len, proto, 0));
 
 	return 0;
@@ -417,8 +418,8 @@ int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
 	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
 		goto short_packet;
 
-	saddr = &skb->nh.ipv6h->saddr;
-	daddr = &skb->nh.ipv6h->daddr;
+	saddr = &ipv6_hdr(skb)->saddr;
+	daddr = &ipv6_hdr(skb)->daddr;
 	uh = skb->h.uh;
 
 	ulen = ntohs(uh->len);
@@ -438,8 +439,8 @@ int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
 		if (ulen < skb->len) {
 			if (pskb_trim_rcsum(skb, ulen))
 				goto short_packet;
-			saddr = &skb->nh.ipv6h->saddr;
-			daddr = &skb->nh.ipv6h->daddr;
+			saddr = &ipv6_hdr(skb)->saddr;
+			daddr = &ipv6_hdr(skb)->daddr;
 			uh = skb->h.uh;
 		}
 	}
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 5c929f88612..f30ef16072f 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -35,7 +35,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
 		goto drop;
 
 	do {
-		struct ipv6hdr *iph = skb->nh.ipv6h;
+		struct ipv6hdr *iph = ipv6_hdr(skb);
 
 		if (xfrm_nr == XFRM_MAX_DEPTH)
 			goto drop;
@@ -112,7 +112,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
 		return -1;
 	} else {
 #ifdef CONFIG_NETFILTER
-		skb->nh.ipv6h->payload_len = htons(skb->len);
+		ipv6_hdr(skb)->payload_len = htons(skb->len);
 		__skb_push(skb, skb->data - skb_network_header(skb));
 
 		NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL,
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index 247e2d5d2ac..b5a48c255f0 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -38,7 +38,7 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 	int hdr_len;
 
 	skb_push(skb, x->props.header_len);
-	iph = skb->nh.ipv6h;
+	iph = ipv6_hdr(skb);
 
 	hdr_len = ip6_find_1stfragopt(skb, &prevhdr);
 	skb->nh.raw = prevhdr - x->props.header_len;
@@ -46,7 +46,7 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 	memmove(skb->data, iph, hdr_len);
 
 	skb_reset_network_header(skb);
-	top_iph = skb->nh.ipv6h;
+	top_iph = ipv6_hdr(skb);
 	skb->nh.raw = &top_iph->nexthdr;
 	skb->h.ipv6h = top_iph + 1;
 
@@ -74,7 +74,7 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 	skb_set_mac_header(skb, -skb->mac_len);
 	memmove(skb_mac_header(skb), old_mac, skb->mac_len);
 
-	ip6h = skb->nh.ipv6h;
+	ip6h = ipv6_hdr(skb);
 	ip6h->payload_len = htons(skb->len - size);
 	ipv6_addr_copy(&ip6h->daddr, (struct in6_addr *) &x->sel.daddr.a6);
 	ipv6_addr_copy(&ip6h->saddr, (struct in6_addr *) &x->sel.saddr.a6);
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
index 6031c16d46c..d01958d921a 100644
--- a/net/ipv6/xfrm6_mode_ro.c
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -50,7 +50,7 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
 	int hdr_len;
 
 	skb_push(skb, x->props.header_len);
-	iph = skb->nh.ipv6h;
+	iph = ipv6_hdr(skb);
 
 	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
 	skb->nh.raw = prevhdr - x->props.header_len;
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index ace0bbf4f25..358b60d9d91 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -32,7 +32,7 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 	int hdr_len;
 
 	skb_push(skb, x->props.header_len);
-	iph = skb->nh.ipv6h;
+	iph = ipv6_hdr(skb);
 
 	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
 	skb->nh.raw = prevhdr - x->props.header_len;
@@ -57,7 +57,7 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
 		memmove(skb->h.raw, skb_network_header(skb), ihl);
 		skb->nh.raw = skb->h.raw;
 	}
-	skb->nh.ipv6h->payload_len = htons(skb->len + ihl -
+	ipv6_hdr(skb)->payload_len = htons(skb->len + ihl -
 					   sizeof(struct ipv6hdr));
 	skb->h.raw = skb->data;
 	return 0;
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 498f17b5c42..28f36b363d1 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -18,7 +18,7 @@
 
 static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
 {
-	struct ipv6hdr *outer_iph = skb->nh.ipv6h;
+	struct ipv6hdr *outer_iph = ipv6_hdr(skb);
 	struct ipv6hdr *inner_iph = skb->h.ipv6h;
 
 	if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
@@ -27,7 +27,7 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
 
 static inline void ip6ip_ecn_decapsulate(struct sk_buff *skb)
 {
-	if (INET_ECN_is_ce(ipv6_get_dsfield(skb->nh.ipv6h)))
+	if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6_hdr(skb))))
 			IP_ECN_set_ce(skb->h.ipiph);
 }
 
@@ -51,10 +51,10 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 	int dsfield;
 
 	skb_push(skb, x->props.header_len);
-	iph = skb->nh.ipv6h;
+	iph = ipv6_hdr(skb);
 
 	skb_reset_network_header(skb);
-	top_iph = skb->nh.ipv6h;
+	top_iph = ipv6_hdr(skb);
 	skb->nh.raw = &top_iph->nexthdr;
 	skb->h.ipv6h = top_iph + 1;
 
@@ -102,7 +102,7 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	nh = skb_network_header(skb);
 	if (nh[IP6CB(skb)->nhoff] == IPPROTO_IPV6) {
 		if (x->props.flags & XFRM_STATE_DECAP_DSCP)
-			ipv6_copy_dscp(skb->nh.ipv6h, skb->h.ipv6h);
+			ipv6_copy_dscp(ipv6_hdr(skb), skb->h.ipv6h);
 		if (!(x->props.flags & XFRM_STATE_NOECN))
 			ipip6_ecn_decapsulate(skb);
 	} else {
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index cb5a723d4cb..b93bfb87f49 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -271,7 +271,7 @@ static inline void
 _decode_session6(struct sk_buff *skb, struct flowi *fl)
 {
 	u16 offset = skb->h.raw - skb->nh.raw;
-	struct ipv6hdr *hdr = skb->nh.ipv6h;
+	struct ipv6hdr *hdr = ipv6_hdr(skb);
 	struct ipv6_opt_hdr *exthdr;
 	const unsigned char *nh = skb_network_header(skb);
 	u8 nexthdr = nh[IP6CB(skb)->nhoff];
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 93c42232aa3..538499a8997 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -257,7 +257,7 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 
 static int xfrm6_tunnel_rcv(struct sk_buff *skb)
 {
-	struct ipv6hdr *iph = skb->nh.ipv6h;
+	struct ipv6hdr *iph = ipv6_hdr(skb);
 	__be32 spi;
 
 	spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr);
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index de647bd5489..a97704a3f95 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -56,13 +56,13 @@ static unsigned int target6(struct sk_buff **pskb,
 			    const void *targinfo)
 {
 	const struct xt_DSCP_info *dinfo = targinfo;
-	u_int8_t dscp = ipv6_get_dsfield((*pskb)->nh.ipv6h) >> XT_DSCP_SHIFT;
+	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(*pskb)) >> XT_DSCP_SHIFT;
 
 	if (dscp != dinfo->dscp) {
 		if (!skb_make_writable(pskb, sizeof(struct ipv6hdr)))
 			return NF_DROP;
 
-		ipv6_change_dsfield((*pskb)->nh.ipv6h, (__u8)(~XT_DSCP_MASK),
+		ipv6_change_dsfield(ipv6_hdr(*pskb), (__u8)(~XT_DSCP_MASK),
 				    dinfo->dscp << XT_DSCP_SHIFT);
 	}
 	return XT_CONTINUE;
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 9e948ce2760..15fe8f64951 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -171,7 +171,7 @@ xt_tcpmss_target6(struct sk_buff **pskb,
 		  const struct xt_target *target,
 		  const void *targinfo)
 {
-	struct ipv6hdr *ipv6h = (*pskb)->nh.ipv6h;
+	struct ipv6hdr *ipv6h = ipv6_hdr(*pskb);
 	u8 nexthdr;
 	int tcphoff;
 	int ret;
@@ -187,7 +187,7 @@ xt_tcpmss_target6(struct sk_buff **pskb,
 	if (ret < 0)
 		return NF_DROP;
 	if (ret > 0) {
-		ipv6h = (*pskb)->nh.ipv6h;
+		ipv6h = ipv6_hdr(*pskb);
 		ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret);
 	}
 	return XT_CONTINUE;
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 9ec294cd243..000e9c2f8d1 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -49,7 +49,7 @@ static int match6(const struct sk_buff *skb,
 		  int *hotdrop)
 {
 	const struct xt_dscp_info *info = matchinfo;
-	u_int8_t dscp = ipv6_get_dsfield(skb->nh.ipv6h) >> XT_DSCP_SHIFT;
+	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
 
 	return (dscp == info->dscp) ^ !!info->invert;
 }
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 47af19ab03c..eb932913693 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -392,10 +392,10 @@ hashlimit_init_dst(struct xt_hashlimit_htable *hinfo, struct dsthash_dst *dst,
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 	case AF_INET6:
 		if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP)
-			memcpy(&dst->addr.ip6.dst, &skb->nh.ipv6h->daddr,
+			memcpy(&dst->addr.ip6.dst, &ipv6_hdr(skb)->daddr,
 			       sizeof(dst->addr.ip6.dst));
 		if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP)
-			memcpy(&dst->addr.ip6.src, &skb->nh.ipv6h->saddr,
+			memcpy(&dst->addr.ip6.src, &ipv6_hdr(skb)->saddr,
 			       sizeof(dst->addr.ip6.src));
 
 		if (!(hinfo->cfg.mode &
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index 65fdb216699..77288c5ada7 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -47,7 +47,8 @@ match6(const struct sk_buff *skb,
        int *hotdrop)
 {
 	const struct xt_length_info *info = matchinfo;
-	u_int16_t pktlen = ntohs(skb->nh.ipv6h->payload_len) + sizeof(struct ipv6hdr);
+	const u_int16_t pktlen = (ntohs(ipv6_hdr(skb)->payload_len) +
+				  sizeof(struct ipv6hdr));
 
 	return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
 }
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index cb8cf5bfa05..6f373b020eb 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -143,7 +143,7 @@ static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
 	u8 tunnelid = 0;
 	u8 *xprt;
 #if RSVP_DST_LEN == 4
-	struct ipv6hdr *nhptr = skb->nh.ipv6h;
+	struct ipv6hdr *nhptr = ipv6_hdr(skb);
 #else
 	struct iphdr *nhptr = ip_hdr(skb);
 #endif
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 45b5734dd72..2c857af79a1 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -220,7 +220,7 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
 					& ~INET_ECN_MASK;
 				break;
 			case __constant_htons(ETH_P_IPV6):
-				skb->tc_index = ipv6_get_dsfield(skb->nh.ipv6h)
+				skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb))
 					& ~INET_ECN_MASK;
 				break;
 			default:
@@ -296,7 +296,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 					    p->value[index]);
 			break;
 		case __constant_htons(ETH_P_IPV6):
-			ipv6_change_dsfield(skb->nh.ipv6h, p->mask[index],
+			ipv6_change_dsfield(ipv6_hdr(skb), p->mask[index],
 					    p->value[index]);
 			break;
 		default:
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 02081bc9e0d..e3695407afc 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -152,7 +152,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 	}
 	case __constant_htons(ETH_P_IPV6):
 	{
-		struct ipv6hdr *iph = skb->nh.ipv6h;
+		struct ipv6hdr *iph = ipv6_hdr(skb);
 		h = iph->daddr.s6_addr32[3];
 		h2 = iph->saddr.s6_addr32[3]^iph->nexthdr;
 		if (iph->nexthdr == IPPROTO_TCP ||
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 742f9ff42fb..001be2de0b3 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -393,10 +393,10 @@ static void sctp_v6_from_skb(union sctp_addr *addr,struct sk_buff *skb,
 	sh = (struct sctphdr *) skb->h.raw;
 	if (is_saddr) {
 		*port  = sh->source;
-		from = &skb->nh.ipv6h->saddr;
+		from = &ipv6_hdr(skb)->saddr;
 	} else {
 		*port = sh->dest;
-		from = &skb->nh.ipv6h->daddr;
+		from = &ipv6_hdr(skb)->daddr;
 	}
 	ipv6_addr_copy(&addr->v6.sin6_addr, from);
 }
@@ -698,7 +698,7 @@ static int sctp_v6_skb_iif(const struct sk_buff *skb)
 /* Was this packet marked by Explicit Congestion Notification? */
 static int sctp_v6_is_ce(const struct sk_buff *skb)
 {
-	return *((__u32 *)(skb->nh.ipv6h)) & htonl(1<<20);
+	return *((__u32 *)(ipv6_hdr(skb))) & htonl(1 << 20);
 }
 
 /* Dump the v6 addr to the seq file. */
@@ -777,7 +777,7 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname,
 		}
 
 		/* Otherwise, just copy the v6 address. */
-		ipv6_addr_copy(&sin6->sin6_addr, &skb->nh.ipv6h->saddr);
+		ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr);
 		if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) {
 			struct sctp_ulpevent *ev = sctp_skb2event(skb);
 			sin6->sin6_scope_id = ev->iif;
-- 
cgit v1.2.3


From badff6d01a8589a1c828b0bf118903ca38627f4e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 13 Mar 2007 13:06:52 -0300
Subject: [SK_BUFF]: Introduce skb_reset_transport_header(skb)

For the common, open coded 'skb->h.raw = skb->data' operation, so that we can
later turn skb->h.raw into a offset, reducing the size of struct sk_buff in
64bit land while possibly keeping it as a pointer on 32bit.

This one touches just the most simple cases:

skb->h.raw = skb->data;
skb->h.raw = {skb_push|[__]skb_pull}()

The next ones will handle the slightly more "complex" cases.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/cxgb3/iwch_cm.c   | 6 +++---
 drivers/net/appletalk/cops.c            | 2 +-
 drivers/net/appletalk/ltpc.c            | 4 ++--
 drivers/net/cxgb3/sge.c                 | 2 +-
 include/linux/dccp.h                    | 6 +++---
 include/linux/skbuff.h                  | 5 +++++
 net/appletalk/aarp.c                    | 6 +++---
 net/appletalk/ddp.c                     | 4 ++--
 net/ax25/af_ax25.c                      | 4 ++--
 net/ax25/ax25_in.c                      | 8 ++++----
 net/bluetooth/af_bluetooth.c            | 2 +-
 net/bluetooth/hci_core.c                | 9 +++++----
 net/bluetooth/hci_sock.c                | 2 +-
 net/core/dev.c                          | 2 +-
 net/core/netpoll.c                      | 2 +-
 net/decnet/dn_nsp_in.c                  | 2 +-
 net/decnet/dn_nsp_out.c                 | 2 +-
 net/decnet/dn_route.c                   | 4 ++--
 net/ipv4/af_inet.c                      | 6 ++++--
 net/ipv4/ah4.c                          | 3 ++-
 net/ipv4/ip_input.c                     | 2 +-
 net/ipv4/ip_output.c                    | 2 +-
 net/ipv4/ipmr.c                         | 2 +-
 net/ipv4/udp.c                          | 3 ++-
 net/ipv4/xfrm4_mode_transport.c         | 2 +-
 net/ipv6/ip6_input.c                    | 2 +-
 net/ipv6/ip6_output.c                   | 8 ++++----
 net/ipv6/ipv6_sockglue.c                | 4 ++--
 net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +-
 net/ipv6/reassembly.c                   | 2 +-
 net/ipv6/xfrm6_mode_transport.c         | 2 +-
 net/ipx/af_ipx.c                        | 2 +-
 net/ipx/ipx_route.c                     | 2 +-
 net/irda/af_irda.c                      | 4 ++--
 net/irda/irlap_frame.c                  | 2 +-
 net/iucv/af_iucv.c                      | 2 +-
 net/key/af_key.c                        | 2 +-
 net/llc/llc_sap.c                       | 2 +-
 net/netlink/af_netlink.c                | 2 +-
 net/netrom/af_netrom.c                  | 6 +++---
 net/netrom/nr_in.c                      | 2 +-
 net/netrom/nr_loopback.c                | 2 +-
 net/rose/af_rose.c                      | 2 +-
 net/rose/rose_loopback.c                | 2 +-
 net/rose/rose_route.c                   | 2 +-
 net/unix/af_unix.c                      | 2 +-
 net/x25/af_x25.c                        | 3 +--
 net/x25/x25_dev.c                       | 2 +-
 net/x25/x25_in.c                        | 2 +-
 49 files changed, 82 insertions(+), 73 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 2d2de9b8b72..66ad4d40ba1 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -507,7 +507,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
 	 */
 	skb_get(skb);
 	set_arp_failure_handler(skb, arp_failure_discard);
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	len = skb->len;
 	req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
 	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
@@ -559,7 +559,7 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
 	skb_get(skb);
 	skb->priority = CPL_PRIORITY_DATA;
 	set_arp_failure_handler(skb, arp_failure_discard);
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
 	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
 	req->wr_lo = htonl(V_WR_TID(ep->hwtid));
@@ -610,7 +610,7 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
 	 */
 	skb_get(skb);
 	set_arp_failure_handler(skb, arp_failure_discard);
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	len = skb->len;
 	req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
 	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c
index 28cb79cee91..da6ffa8cd81 100644
--- a/drivers/net/appletalk/cops.c
+++ b/drivers/net/appletalk/cops.c
@@ -855,7 +855,7 @@ static void cops_rx(struct net_device *dev)
 
         skb_reset_mac_header(skb);    /* Point to entire packet. */
         skb_pull(skb,3);
-        skb->h.raw      = skb->data;    /* Point to data (Skip header). */
+        skb_reset_transport_header(skb);    /* Point to data (Skip header). */
 
         /* Update the counters. */
         lp->stats.rx_packets++;
diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c
index 12682439f8b..dc3bce992dc 100644
--- a/drivers/net/appletalk/ltpc.c
+++ b/drivers/net/appletalk/ltpc.c
@@ -776,7 +776,7 @@ static int sendup_buffer (struct net_device *dev)
 	/* copy ddp(s,e)hdr + contents */
 	memcpy(skb->data,(void*)ltdmabuf,len);
 
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 
 	stats->rx_packets++;
 	stats->rx_bytes+=skb->len;
@@ -923,7 +923,7 @@ static int ltpc_xmit(struct sk_buff *skb, struct net_device *dev)
 	cbuf.laptype = skb->data[2];
 	skb_pull(skb,3);	/* skip past LLAP header */
 	cbuf.length = skb->len;	/* this is host order */
-	skb->h.raw=skb->data;
+	skb_reset_transport_header(skb);
 
 	if(debug & DEBUG_UPPER) {
 		printk("command ");
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 892e5dcafa0..a891f6f8152 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -1622,7 +1622,7 @@ static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
 	rq->offload_pkts++;
 	skb_reset_mac_header(skb);
 	skb_reset_network_header(skb);
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 
 	if (rq->polling) {
 		rx_gather[gather_idx++] = skb;
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 1cb054bd93f..1f4df61735f 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -265,9 +265,9 @@ static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb)
 
 static inline struct dccp_hdr *dccp_zeroed_hdr(struct sk_buff *skb, int headlen)
 {
-	skb->h.raw = skb_push(skb, headlen);
-	memset(skb->h.raw, 0, headlen);
-	return dccp_hdr(skb);
+	skb_push(skb, headlen);
+	skb_reset_transport_header(skb);
+	return memset(skb->h.raw, 0, headlen);
 }
 
 static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 31806a7ce40..7c1f1756e48 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -957,6 +957,11 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
 	skb->tail += len;
 }
 
+static inline void skb_reset_transport_header(struct sk_buff *skb)
+{
+	skb->h.raw = skb->data;
+}
+
 static inline unsigned char *skb_network_header(const struct sk_buff *skb)
 {
 	return skb->nh.raw;
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index de495c57aa9..5ef6a238bdb 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -119,7 +119,7 @@ static void __aarp_send_query(struct aarp_entry *a)
 	/* Set up the buffer */
 	skb_reserve(skb, dev->hard_header_len + aarp_dl->header_length);
 	skb_reset_network_header(skb);
-	skb->h.raw	 = skb->data;
+	skb_reset_transport_header(skb);
 	skb_put(skb, sizeof(*eah));
 	skb->protocol    = htons(ETH_P_ATALK);
 	skb->dev	 = dev;
@@ -166,7 +166,7 @@ static void aarp_send_reply(struct net_device *dev, struct atalk_addr *us,
 	/* Set up the buffer */
 	skb_reserve(skb, dev->hard_header_len + aarp_dl->header_length);
 	skb_reset_network_header(skb);
-	skb->h.raw	 = skb->data;
+	skb_reset_transport_header(skb);
 	skb_put(skb, sizeof(*eah));
 	skb->protocol    = htons(ETH_P_ATALK);
 	skb->dev	 = dev;
@@ -217,7 +217,7 @@ static void aarp_send_probe(struct net_device *dev, struct atalk_addr *us)
 	/* Set up the buffer */
 	skb_reserve(skb, dev->hard_header_len + aarp_dl->header_length);
 	skb_reset_network_header(skb);
-	skb->h.raw	 = skb->data;
+	skb_reset_transport_header(skb);
 	skb_put(skb, sizeof(*eah));
 	skb->protocol    = htons(ETH_P_ATALK);
 	skb->dev	 = dev;
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 934f25993ce..137341b4d83 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1275,7 +1275,7 @@ static int handle_ip_over_ddp(struct sk_buff *skb)
 	skb->protocol = htons(ETH_P_IP);
 	skb_pull(skb, 13);
 	skb->dev   = dev;
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 
 	stats = dev->priv;
 	stats->rx_packets++;
@@ -1522,7 +1522,7 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
 		/* Non routable, so force a drop if we slip up later */
 		ddp->deh_len_hops = htons(skb->len + (DDP_MAXHOPS << 10));
 	}
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 
 	return atalk_rcv(skb, dev, pt, orig_dev);
 freeit:
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index b1a4d60ce9a..14db01a4ff6 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1631,8 +1631,8 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (!ax25_sk(sk)->pidincl)
 		skb_pull(skb, 1);		/* Remove PID */
 
-	skb->h.raw = skb->data;
-	copied     = skb->len;
+	skb_reset_transport_header(skb);
+	copied = skb->len;
 
 	if (copied > size) {
 		copied = size;
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index 3b2aac67026..724ad5ce75d 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -62,7 +62,7 @@ static int ax25_rx_fragment(ax25_cb *ax25, struct sk_buff *skb)
 
 					skbn->dev   = ax25->ax25_dev->dev;
 					skb_reset_network_header(skbn);
-					skbn->h.raw = skbn->data;
+					skb_reset_transport_header(skbn);
 
 					/* Copy data from the fragments */
 					while ((skbo = skb_dequeue(&ax25->frag_queue)) != NULL) {
@@ -196,7 +196,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
 	 *	Process the AX.25/LAPB frame.
 	 */
 
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 
 	if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) {
 		kfree_skb(skb);
@@ -246,7 +246,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
 		switch (skb->data[1]) {
 		case AX25_P_IP:
 			skb_pull(skb,2);		/* drop PID/CTRL */
-			skb->h.raw    = skb->data;
+			skb_reset_transport_header(skb);
 			skb_reset_network_header(skb);
 			skb->dev      = dev;
 			skb->pkt_type = PACKET_HOST;
@@ -256,7 +256,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
 
 		case AX25_P_ARP:
 			skb_pull(skb,2);
-			skb->h.raw    = skb->data;
+			skb_reset_transport_header(skb);
 			skb_reset_network_header(skb);
 			skb->dev      = dev;
 			skb->pkt_type = PACKET_HOST;
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index c7228cfc621..d942b946ba0 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -221,7 +221,7 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 		copied = len;
 	}
 
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 
 	skb_free_datagram(sk, skb);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 64fea0903fd..c11ceb6b3f7 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1074,11 +1074,11 @@ static void hci_add_acl_hdr(struct sk_buff *skb, __u16 handle, __u16 flags)
 	struct hci_acl_hdr *hdr;
 	int len = skb->len;
 
-	hdr = (struct hci_acl_hdr *) skb_push(skb, HCI_ACL_HDR_SIZE);
+	skb_push(skb, HCI_ACL_HDR_SIZE);
+	skb_reset_transport_header(skb);
+	hdr = (struct hci_acl_hdr *)skb->h.raw;
 	hdr->handle = cpu_to_le16(hci_handle_pack(handle, flags));
 	hdr->dlen   = cpu_to_le16(len);
-
-	skb->h.raw = (void *) hdr;
 }
 
 int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
@@ -1143,7 +1143,8 @@ int hci_send_sco(struct hci_conn *conn, struct sk_buff *skb)
 	hdr.handle = cpu_to_le16(conn->handle);
 	hdr.dlen   = skb->len;
 
-	skb->h.raw = skb_push(skb, HCI_SCO_HDR_SIZE);
+	skb_push(skb, HCI_SCO_HDR_SIZE);
+	skb_reset_transport_header(skb);
 	memcpy(skb->h.raw, &hdr, HCI_SCO_HDR_SIZE);
 
 	skb->dev = (void *) hdev;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 71f5cfbbebb..832b5f44be5 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -375,7 +375,7 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 		copied = len;
 	}
 
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 
 	hci_sock_cmsg(sk, msg, skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index 3af0bdc8649..99f15728d9c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1773,7 +1773,7 @@ int netif_receive_skb(struct sk_buff *skb)
 	__get_cpu_var(netdev_rx_stat).total++;
 
 	skb_reset_network_header(skb);
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	skb->mac_len = skb->nh.raw - skb->mac.raw;
 
 	pt_prev = NULL;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 496b06244a8..8b22723d643 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -362,7 +362,7 @@ static void arp_reply(struct sk_buff *skb)
 		return;
 
 	skb_reset_network_header(skb);
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	arp = arp_hdr(skb);
 
 	if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 9d20904f6f5..d9498a165ac 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -725,7 +725,7 @@ static int dn_nsp_rx_packet(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, 2))
 		goto free_out;
 
-	skb->h.raw    = skb->data;
+	skb_reset_transport_header(skb);
 	cb->nsp_flags = *ptr++;
 
 	if (decnet_debug_level & 2)
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 2d2cda82c7d..84b8c5b45fe 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -79,7 +79,7 @@ static void dn_nsp_send(struct sk_buff *skb)
 	struct dst_entry *dst;
 	struct flowi fl;
 
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	scp->stamp = jiffies;
 
 	dst = sk_dst_check(sk, 0);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 32a7db36c9e..bb73bf16630 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -504,7 +504,7 @@ static int dn_route_rx_long(struct sk_buff *skb)
 		goto drop_it;
 
 	skb_pull(skb, 20);
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 
 	/* Destination info */
 	ptr += 2;
@@ -542,7 +542,7 @@ static int dn_route_rx_short(struct sk_buff *skb)
 		goto drop_it;
 
 	skb_pull(skb, 5);
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 
 	cb->dst = *(__le16 *)ptr;
 	ptr += 2;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e7720c72a6e..f011390f19c 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1120,7 +1120,8 @@ static int inet_gso_send_check(struct sk_buff *skb)
 	if (unlikely(!pskb_may_pull(skb, ihl)))
 		goto out;
 
-	skb->h.raw = __skb_pull(skb, ihl);
+	__skb_pull(skb, ihl);
+	skb_reset_transport_header(skb);
 	iph = ip_hdr(skb);
 	proto = iph->protocol & (MAX_INET_PROTOS - 1);
 	err = -EPROTONOSUPPORT;
@@ -1163,7 +1164,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
 	if (unlikely(!pskb_may_pull(skb, ihl)))
 		goto out;
 
-	skb->h.raw = __skb_pull(skb, ihl);
+	__skb_pull(skb, ihl);
+	skb_reset_transport_header(skb);
 	iph = ip_hdr(skb);
 	id = ntohs(iph->id);
 	proto = iph->protocol & (MAX_INET_PROTOS - 1);
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 00fd31da252..ebcc797e1c1 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -182,7 +182,8 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 	}
 	((struct iphdr*)work_buf)->protocol = ah->nexthdr;
 	skb->nh.raw += ah_hlen;
-	skb->h.raw = memcpy(skb_network_header(skb), work_buf, ihl);
+	memcpy(skb_network_header(skb), work_buf, ihl);
+	skb->h.raw = skb->nh.raw;
 	__skb_pull(skb, ah_hlen + ihl);
 
 	return 0;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 237880a8043..324e7e0fdb2 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -201,7 +201,7 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
 	__skb_pull(skb, ip_hdrlen(skb));
 
 	/* Point into the IP datagram, just past the header. */
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 
 	rcu_read_lock();
 	{
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 1abc48899f2..63c05be0764 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -500,7 +500,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 			 * before previous one went down. */
 			if (frag) {
 				frag->ip_summed = CHECKSUM_NONE;
-				frag->h.raw = frag->data;
+				skb_reset_transport_header(frag);
 				__skb_push(frag, hlen);
 				skb_reset_network_header(frag);
 				memcpy(skb_network_header(frag), iph, hlen);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index e0021499093..03869d91f6f 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -563,7 +563,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 		 */
 		skb_push(skb, sizeof(struct iphdr));
 		skb_reset_network_header(skb);
-		skb->h.raw = skb->data;
+		skb_reset_transport_header(skb);
 		msg = (struct igmpmsg *)skb_network_header(skb);
 		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
 		msg->im_msgtype = IGMPMSG_WHOLEPKT;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b4cad50c18e..13739cd8206 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1002,7 +1002,8 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
 	 * transport header to point to ESP.  Keep UDP on the stack
 	 * for later.
 	 */
-	skb->h.raw = __skb_pull(skb, len);
+	__skb_pull(skb, len);
+	skb_reset_transport_header(skb);
 
 	/* modify the protocol (it's ESP!) */
 	iph->protocol = IPPROTO_ESP;
diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c
index 124f24bc4db..2c46cbb3bbb 100644
--- a/net/ipv4/xfrm4_mode_transport.c
+++ b/net/ipv4/xfrm4_mode_transport.c
@@ -52,7 +52,7 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
 		skb->nh.raw = skb->h.raw;
 	}
 	ip_hdr(skb)->tot_len = htons(skb->len + ihl);
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	return 0;
 }
 
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 9c3c787a21c..2dd32a2ca05 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -101,7 +101,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	if (hdr->version != 6)
 		goto err;
 
-	skb->h.raw = (u8 *)(hdr + 1);
+	skb->h.raw = skb->nh.raw + sizeof(*hdr);
 	IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
 
 	pkt_len = ntohs(hdr->payload_len);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 1900c622686..0f4434eff66 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -654,7 +654,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 			 * before previous one went down. */
 			if (frag) {
 				frag->ip_summed = CHECKSUM_NONE;
-				frag->h.raw = frag->data;
+				skb_reset_transport_header(frag);
 				fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
 				__skb_push(frag, hlen);
 				skb_reset_network_header(frag);
@@ -747,8 +747,8 @@ slow_path:
 		skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
 		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 		skb_reset_network_header(frag);
-		fh = (struct frag_hdr*)(frag->data + hlen);
-		frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
+		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
+		frag->h.raw = frag->nh.raw + hlen + sizeof(struct frag_hdr);
 
 		/*
 		 *	Charge the memory for the fragment to any owner
@@ -991,7 +991,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
 		skb_reset_network_header(skb);
 
 		/* initialize protocol header pointer */
-		skb->h.raw = skb->data + fragheaderlen;
+		skb->h.raw = skb->nh.raw + fragheaderlen;
 
 		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb->csum = 0;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index d16e0fd2cd8..da930fa089c 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -108,7 +108,7 @@ static int ipv6_gso_send_check(struct sk_buff *skb)
 	rcu_read_lock();
 	ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
 	if (likely(ops && ops->gso_send_check)) {
-		skb->h.raw = skb->data;
+		skb_reset_transport_header(skb);
 		err = ops->gso_send_check(skb);
 	}
 	rcu_read_unlock();
@@ -144,7 +144,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
 	rcu_read_lock();
 	ops = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
 	if (likely(ops && ops->gso_segment)) {
-		skb->h.raw = skb->data;
+		skb_reset_transport_header(skb);
 		segs = ops->gso_segment(skb, features);
 	}
 	rcu_read_unlock();
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index d3fbb1f1caf..75138cf1fa6 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -634,7 +634,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 	head->nh.raw += sizeof(struct frag_hdr);
 
 	skb_shinfo(head)->frag_list = head->next;
-	head->h.raw = head->data;
+	skb_reset_transport_header(head);
 	skb_push(head, head->data - skb_network_header(head));
 	atomic_sub(head->truesize, &nf_ct_frag6_mem);
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 156a37fe3ff..2594f0fb290 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -653,7 +653,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 	head->nh.raw += sizeof(struct frag_hdr);
 
 	skb_shinfo(head)->frag_list = head->next;
-	head->h.raw = head->data;
+	skb_reset_transport_header(head);
 	skb_push(head, head->data - skb_network_header(head));
 	atomic_sub(head->truesize, &ip6_frag_mem);
 
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index 358b60d9d91..cae6cacd58c 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -59,7 +59,7 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
 	}
 	ipv6_hdr(skb)->payload_len = htons(skb->len + ihl -
 					   sizeof(struct ipv6hdr));
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	return 0;
 }
 
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 9aa7b961b3e..392f8bc9269 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -577,7 +577,7 @@ static struct sk_buff *ipxitf_adjust_skbuff(struct ipx_interface *intrfc,
 	if (skb2) {
 		skb_reserve(skb2, out_offset);
 		skb_reset_network_header(skb2);
-		skb2->h.raw = skb2->data;
+		skb_reset_transport_header(skb2);
 		skb_put(skb2, skb->len);
 		memcpy(ipx_hdr(skb2), ipx_hdr(skb), skb->len);
 		memcpy(skb2->cb, skb->cb, sizeof(skb->cb));
diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c
index e8277f544e7..e16c1142352 100644
--- a/net/ipx/ipx_route.c
+++ b/net/ipx/ipx_route.c
@@ -204,7 +204,7 @@ int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx,
 
 	/* Fill in IPX header */
 	skb_reset_network_header(skb);
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	skb_put(skb, sizeof(struct ipxhdr));
 	ipx = ipx_hdr(skb);
 	ipx->ipx_pktsize = htons(len + sizeof(struct ipxhdr));
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 0eb7d596d47..c3cd2ba123e 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1363,8 +1363,8 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock,
 	if (!skb)
 		return err;
 
-	skb->h.raw = skb->data;
-	copied     = skb->len;
+	skb_reset_transport_header(skb);
+	copied = skb->len;
 
 	if (copied > size) {
 		IRDA_DEBUG(2, "%s(), Received truncated frame (%zd < %zd)!\n",
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
index 7c815de4a3b..803ac418748 100644
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -95,7 +95,7 @@ void irlap_queue_xmit(struct irlap_cb *self, struct sk_buff *skb)
 	skb->dev = self->netdev;
 	skb_reset_mac_header(skb);
 	skb_reset_network_header(skb);
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	skb->protocol = htons(ETH_P_IRDA);
 	skb->priority = TC_PRIO_BESTEFFORT;
 
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index a485496059c..55632883d17 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -953,7 +953,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
 			return;
 		}
 
-		skb->h.raw = skb->data;
+		skb_reset_transport_header(skb);
 		skb_reset_network_header(skb);
 		skb->len = msg->length;
 	}
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 345019345f0..3cd228aacfe 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3667,7 +3667,7 @@ static int pfkey_recvmsg(struct kiocb *kiocb,
 		copied = len;
 	}
 
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 	if (err)
 		goto out_free;
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index e76bbbfb64b..2525165e2e8 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -39,7 +39,7 @@ struct sk_buff *llc_alloc_frame(struct sock *sk, struct net_device *dev)
 		skb_reset_mac_header(skb);
 		skb_reserve(skb, 50);
 		skb_reset_network_header(skb);
-		skb->h.raw = skb->data;
+		skb_reset_transport_header(skb);
 		skb->protocol = htons(ETH_P_802_2);
 		skb->dev      = dev;
 		if (sk != NULL)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 350ed1c0e70..50dc5edb775 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1215,7 +1215,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 		copied = len;
 	}
 
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 
 	if (msg->msg_name) {
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index a54e7ef2568..8d0f30a015d 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -878,7 +878,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
 	if (frametype == NR_PROTOEXT &&
 	    circuit_index == NR_PROTO_IP && circuit_id == NR_PROTO_IP) {
 		skb_pull(skb, NR_NETWORK_LEN + NR_TRANSPORT_LEN);
-		skb->h.raw = skb->data;
+		skb_reset_transport_header(skb);
 
 		return nr_rx_ip(skb, dev);
 	}
@@ -904,7 +904,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	if (sk != NULL) {
-		skb->h.raw = skb->data;
+		skb_reset_transport_header(skb);
 
 		if (frametype == NR_CONNACK && skb->len == 22)
 			nr_sk(sk)->bpqext = 1;
@@ -1149,7 +1149,7 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock,
 		return er;
 	}
 
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	copied     = skb->len;
 
 	if (copied > size) {
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
index 5560acbaaa9..e6dc749e14b 100644
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c
@@ -51,7 +51,7 @@ static int nr_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more)
 		if ((skbn = alloc_skb(nr->fraglen, GFP_ATOMIC)) == NULL)
 			return 1;
 
-		skbn->h.raw = skbn->data;
+		skb_reset_transport_header(skbn);
 
 		while ((skbo = skb_dequeue(&nr->frag_queue)) != NULL) {
 			memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len);
diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c
index e856ae1b360..99fdab16ded 100644
--- a/net/netrom/nr_loopback.c
+++ b/net/netrom/nr_loopback.c
@@ -35,7 +35,7 @@ int nr_loopback_queue(struct sk_buff *skb)
 
 	if ((skbn = alloc_skb(skb->len, GFP_ATOMIC)) != NULL) {
 		memcpy(skb_put(skbn, skb->len), skb->data, skb->len);
-		skbn->h.raw = skbn->data;
+		skb_reset_transport_header(skbn);
 
 		skb_queue_tail(&loopback_queue, skbn);
 
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index f64be9369ef..6d8684a11ac 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1234,7 +1234,7 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock,
 		*asmptr = qbit;
 	}
 
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	copied     = skb->len;
 
 	if (copied > size) {
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index 3e41bd93ab9..cd01642f049 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -77,7 +77,7 @@ static void rose_loopback_timer(unsigned long param)
 		dest      = (rose_address *)(skb->data + 4);
 		lci_o     = 0xFFF - lci_i;
 
-		skb->h.raw = skb->data;
+		skb_reset_transport_header(skb);
 
 		sk = rose_find_socket(lci_o, &rose_loopback_neigh);
 		if (sk) {
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index a1233e1b1ab..1f9aefd95a9 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -906,7 +906,7 @@ int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)
 			}
 		}
 		else {
-			skb->h.raw = skb->data;
+			skb_reset_transport_header(skb);
 			res = rose_process_rx_frame(sk, skb);
 			goto out;
 		}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 606971645b3..aec8cf165e1 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1319,7 +1319,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		unix_attach_fds(siocb->scm, skb);
 	unix_get_secdata(siocb->scm, skb);
 
-	skb->h.raw = skb->data;
+	skb_reset_transport_header(skb);
 	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
 	if (err)
 		goto out_free;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index a1988431562..fc713059ccd 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1210,8 +1210,7 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
 		}
 	}
 
-	skb->h.raw = skb->data;
-
+	skb_reset_transport_header(skb);
 	copied = skb->len;
 
 	if (copied > size) {
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index 94fd12f3a90..848a6b6f90a 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -48,7 +48,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb)
 	if ((sk = x25_find_socket(lci, nb)) != NULL) {
 		int queued = 1;
 
-		skb->h.raw = skb->data;
+		skb_reset_transport_header(skb);
 		bh_lock_sock(sk);
 		if (!sock_owned_by_user(sk)) {
 			queued = x25_process_rx_frame(sk, skb);
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index c5239fcdefa..b2bbe552a89 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -53,7 +53,7 @@ static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more)
 
 		skb_queue_tail(&x25->fragment_queue, skb);
 
-		skbn->h.raw = skbn->data;
+		skb_reset_transport_header(skbn);
 
 		skbo = skb_dequeue(&x25->fragment_queue);
 		memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len);
-- 
cgit v1.2.3


From ea2ae17d6443abddc79480dc9f7af8feacabddc4 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 25 Apr 2007 17:55:53 -0700
Subject: [SK_BUFF]: Introduce skb_transport_offset()

For the quite common 'skb->h.raw - skb->data' sequence.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/atl1/atl1_main.c       | 10 +++++-----
 drivers/net/cassini.c              |  6 ++----
 drivers/net/cxgb3/sge.c            |  7 ++++---
 drivers/net/e1000/e1000_main.c     | 10 +++++-----
 drivers/net/ixgb/ixgb_main.c       |  8 ++++----
 drivers/net/myri10ge/myri10ge.c    |  5 +++--
 drivers/net/netxen/netxen_nic_hw.c |  2 +-
 drivers/net/sk98lin/skge.c         |  4 ++--
 drivers/net/skge.c                 |  2 +-
 drivers/net/sky2.c                 |  2 +-
 drivers/net/sungem.c               |  6 ++----
 drivers/net/sunhme.c               |  6 ++----
 include/linux/skbuff.h             |  5 +++++
 include/net/udplite.h              |  6 +++---
 net/core/dev.c                     |  2 +-
 net/core/skbuff.c                  |  2 +-
 net/ipv4/esp4.c                    |  2 +-
 net/ipv4/udp.c                     |  2 +-
 net/ipv6/esp6.c                    |  9 +++------
 net/ipv6/exthdrs.c                 | 12 +++++++-----
 net/ipv6/ip6_input.c               |  2 +-
 net/ipv6/ipcomp6.c                 |  4 +---
 net/ipv6/mip6.c                    |  5 +++--
 net/ipv6/raw.c                     |  4 ++--
 net/ipv6/reassembly.c              |  3 ++-
 net/sctp/input.c                   |  2 +-
 26 files changed, 64 insertions(+), 64 deletions(-)

diff --git a/drivers/net/atl1/atl1_main.c b/drivers/net/atl1/atl1_main.c
index d2be79a30f8..c26f8ce320e 100644
--- a/drivers/net/atl1/atl1_main.c
+++ b/drivers/net/atl1/atl1_main.c
@@ -1326,8 +1326,8 @@ static int atl1_tx_csum(struct atl1_adapter *adapter, struct sk_buff *skb,
 	u8 css, cso;
 
 	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
-		cso = skb->h.raw - skb->data;
-		css = (skb->h.raw + skb->csum_offset) - skb->data;
+		cso = skb_transport_offset(skb);
+		css = cso + skb->csum;
 		if (unlikely(cso & 0x1)) {
 			printk(KERN_DEBUG "%s: payload offset != even number\n",
 				atl1_driver_name);
@@ -1369,8 +1369,8 @@ static void atl1_tx_map(struct atl1_adapter *adapter,
 
 	if (tcp_seg) {
 		/* TSO/GSO */
-		proto_hdr_len =
-		    ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
+		proto_hdr_len = (skb_transport_offset(skb) +
+				 (skb->h.th->doff << 2));
 		buffer_info->length = proto_hdr_len;
 		page = virt_to_page(skb->data);
 		offset = (unsigned long)skb->data & ~PAGE_MASK;
@@ -1562,7 +1562,7 @@ static int atl1_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 	mss = skb_shinfo(skb)->gso_size;
 	if (mss) {
 		if (skb->protocol == htons(ETH_P_IP)) {
-			proto_hdr_len = ((skb->h.raw - skb->data) +
+			proto_hdr_len = (skb_transport_offset(skb) +
 					 (skb->h.th->doff << 2));
 			if (unlikely(proto_hdr_len > len)) {
 				dev_kfree_skb_any(skb);
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
index 68e37a655fe..bd3ab6493e3 100644
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -2821,10 +2821,8 @@ static inline int cas_xmit_tx_ringN(struct cas *cp, int ring,
 
 	ctrl = 0;
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		u64 csum_start_off, csum_stuff_off;
-
-		csum_start_off = (u64) (skb->h.raw - skb->data);
-		csum_stuff_off = csum_start_off + skb->csum_offset;
+		const u64 csum_start_off = skb_transport_offset(skb);
+		const u64 csum_stuff_off = csum_start_off + skb->csum_offset;
 
 		ctrl =  TX_DESC_CSUM_EN |
 			CAS_BASE(TX_DESC_CSUM_START, csum_start_off) |
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index a891f6f8152..d38b1bcd138 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -1319,9 +1319,10 @@ static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
 	/* Only TX_DATA builds SGLs */
 
 	from = (struct work_request_hdr *)skb->data;
-	memcpy(&d->flit[1], &from[1], skb->h.raw - skb->data - sizeof(*from));
+	memcpy(&d->flit[1], &from[1],
+	       skb_transport_offset(skb) - sizeof(*from));
 
-	flits = (skb->h.raw - skb->data) / 8;
+	flits = skb_transport_offset(skb) / 8;
 	sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
 	sgl_flits = make_sgl(skb, sgp, skb->h.raw, skb->tail - skb->h.raw,
 			     adap->pdev);
@@ -1349,7 +1350,7 @@ static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
 	if (skb->len <= WR_LEN && cnt == 0)
 		return 1;	/* packet fits as immediate data */
 
-	flits = (skb->h.raw - skb->data) / 8;	/* headers */
+	flits = skb_transport_offset(skb) / 8;	/* headers */
 	if (skb->tail != skb->h.raw)
 		cnt++;
 	return flits_to_desc(flits + sgl_len(cnt));
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index a3d9986b417..78cf417cf23 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2887,7 +2887,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 				return err;
 		}
 
-		hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
+		hdr_len = (skb_transport_offset(skb) + (skb->h.th->doff << 2));
 		mss = skb_shinfo(skb)->gso_size;
 		if (skb->protocol == htons(ETH_P_IP)) {
 			struct iphdr *iph = ip_hdr(skb);
@@ -2897,7 +2897,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 							      iph->daddr, 0,
 							      IPPROTO_TCP, 0);
 			cmd_length = E1000_TXD_CMD_IP;
-			ipcse = skb->h.raw - skb->data - 1;
+			ipcse = skb_transport_offset(skb) - 1;
 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
 			ipv6_hdr(skb)->payload_len = 0;
 			skb->h.th->check =
@@ -2908,7 +2908,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 		}
 		ipcss = skb_network_offset(skb);
 		ipcso = (void *)&(ip_hdr(skb)->check) - (void *)skb->data;
-		tucss = skb->h.raw - skb->data;
+		tucss = skb_transport_offset(skb);
 		tucso = (void *)&(skb->h.th->check) - (void *)skb->data;
 		tucse = 0;
 
@@ -2950,7 +2950,7 @@ e1000_tx_csum(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 	uint8_t css;
 
 	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
-		css = skb->h.raw - skb->data;
+		css = skb_transport_offset(skb);
 
 		i = tx_ring->next_to_use;
 		buffer_info = &tx_ring->buffer_info[i];
@@ -3292,7 +3292,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 		/* TSO Workaround for 82571/2/3 Controllers -- if skb->data
 		* points to just header, pull a few bytes of payload from
 		* frags into skb->data */
-		hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
+		hdr_len = (skb_transport_offset(skb) + (skb->h.th->doff << 2));
 		if (skb->data_len && (hdr_len == (skb->len - skb->data_len))) {
 			switch (adapter->hw.mac_type) {
 				unsigned int pull_size;
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index bba4dcaf92e..ceea6e45792 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -1190,7 +1190,7 @@ ixgb_tso(struct ixgb_adapter *adapter, struct sk_buff *skb)
 				return err;
 		}
 
-		hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
+		hdr_len = (skb_transport_offset(skb) + (skb->h.th->doff << 2));
 		mss = skb_shinfo(skb)->gso_size;
 		iph = ip_hdr(skb);
 		iph->tot_len = 0;
@@ -1199,8 +1199,8 @@ ixgb_tso(struct ixgb_adapter *adapter, struct sk_buff *skb)
 						      0, IPPROTO_TCP, 0);
 		ipcss = skb_network_offset(skb);
 		ipcso = (void *)&(iph->check) - (void *)skb->data;
-		ipcse = skb->h.raw - skb->data - 1;
-		tucss = skb->h.raw - skb->data;
+		ipcse = skb_transport_offset(skb) - 1;
+		tucss = skb_transport_offset(skb);
 		tucso = (void *)&(skb->h.th->check) - (void *)skb->data;
 		tucse = 0;
 
@@ -1245,7 +1245,7 @@ ixgb_tx_csum(struct ixgb_adapter *adapter, struct sk_buff *skb)
 
 	if(likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
 		struct ixgb_buffer *buffer_info;
-		css = skb->h.raw - skb->data;
+		css = skb_transport_offset(skb);
 		cso = css + skb->csum_offset;
 
 		i = adapter->tx_ring.next_to_use;
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index 7c04179c7b1..e04228c7b14 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -2029,7 +2029,7 @@ again:
 	odd_flag = 0;
 	flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST);
 	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
-		cksum_offset = (skb->h.raw - skb->data);
+		cksum_offset = skb_transport_offset(skb);
 		pseudo_hdr_offset = cksum_offset + skb->csum_offset;
 		/* If the headers are excessively large, then we must
 		 * fall back to a software checksum */
@@ -2054,7 +2054,8 @@ again:
 		 * send loop that we are still in the
 		 * header portion of the TSO packet.
 		 * TSO header must be at most 134 bytes long */
-		cum_len = -((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
+		cum_len = -(skb_transport_offset(skb) +
+			    (skb->h.th->doff << 2));
 
 		/* for TSO, pseudo_hdr_offset holds mss.
 		 * The firmware figures out where to put
diff --git a/drivers/net/netxen/netxen_nic_hw.c b/drivers/net/netxen/netxen_nic_hw.c
index 28d68c3550e..09ca2192cbf 100644
--- a/drivers/net/netxen/netxen_nic_hw.c
+++ b/drivers/net/netxen/netxen_nic_hw.c
@@ -387,7 +387,7 @@ void netxen_tso_check(struct netxen_adapter *adapter,
 		}
 	}
 	adapter->stats.xmitcsummed++;
-	desc->tcp_hdr_offset = skb->h.raw - skb->data;
+	desc->tcp_hdr_offset = skb_transport_offset(skb);
 	desc->ip_hdr_offset = skb_network_offset(skb);
 }
 
diff --git a/drivers/net/sk98lin/skge.c b/drivers/net/sk98lin/skge.c
index 9ac1fe659dc..e4ab7a8acc1 100644
--- a/drivers/net/sk98lin/skge.c
+++ b/drivers/net/sk98lin/skge.c
@@ -1562,7 +1562,7 @@ struct sk_buff	*pMessage)	/* pointer to send-message              */
 	pTxd->pMBuf     = pMessage;
 
 	if (pMessage->ip_summed == CHECKSUM_PARTIAL) {
-		u16 hdrlen = pMessage->h.raw - pMessage->data;
+		u16 hdrlen = skb_transport_offset(pMessage);
 		u16 offset = hdrlen + pMessage->csum_offset;
 
 		if ((pMessage->h.ipiph->protocol == IPPROTO_UDP ) &&
@@ -1681,7 +1681,7 @@ struct sk_buff	*pMessage)	/* pointer to send-message              */
 	** Does the HW need to evaluate checksum for TCP or UDP packets? 
 	*/
 	if (pMessage->ip_summed == CHECKSUM_PARTIAL) {
-		u16 hdrlen = pMessage->h.raw - pMessage->data;
+		u16 hdrlen = skb_transport_offset(pMessage);
 		u16 offset = hdrlen + pMessage->csum_offset;
 
 		Control = BMU_STFWD;
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index d476a3cc2e9..ca7a0e03984 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -2654,7 +2654,7 @@ static int skge_xmit_frame(struct sk_buff *skb, struct net_device *dev)
 	td->dma_hi = map >> 32;
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		int offset = skb->h.raw - skb->data;
+		const int offset = skb_transport_offset(skb);
 
 		/* This seems backwards, but it is what the sk98lin
 		 * does.  Looks like hardware is wrong?
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index a37bb205f3d..a35f2f2784a 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -1421,7 +1421,7 @@ static int sky2_xmit_frame(struct sk_buff *skb, struct net_device *dev)
 
 	/* Handle TCP checksum offload */
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		unsigned offset = skb->h.raw - skb->data;
+		const unsigned offset = skb_transport_offset(skb);
 		u32 tcpsum;
 
 		tcpsum = offset << 16;		/* sum start */
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index db2e1a6b723..4bb89dec565 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -1028,10 +1028,8 @@ static int gem_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	ctrl = 0;
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		u64 csum_start_off, csum_stuff_off;
-
-		csum_start_off = (u64) (skb->h.raw - skb->data);
-		csum_stuff_off = csum_start_off + skb->csum_offset;
+		const u64 csum_start_off = skb_transport_offset(skb);
+		const u64 csum_stuff_off = csum_start_off + skb->csum_offset;
 
 		ctrl = (TXDCTRL_CENAB |
 			(csum_start_off << 15) |
diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c
index aca592bc032..4b69c1deb9f 100644
--- a/drivers/net/sunhme.c
+++ b/drivers/net/sunhme.c
@@ -2269,10 +2269,8 @@ static int happy_meal_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	tx_flags = TXFLAG_OWN;
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		u32 csum_start_off, csum_stuff_off;
-
-		csum_start_off = (u32) (skb->h.raw - skb->data);
-		csum_stuff_off = csum_start_off + skb->csum_offset;
+		const u32 csum_start_off = skb_transport_offset(skb);
+		const u32 csum_stuff_off = csum_start_off + skb->csum_offset;
 
 		tx_flags = (TXFLAG_OWN | TXFLAG_CSENABLE |
 			    ((csum_start_off << 14) & TXFLAG_CSBUFBEGIN) |
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7c1f1756e48..64c3c1687e4 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -962,6 +962,11 @@ static inline void skb_reset_transport_header(struct sk_buff *skb)
 	skb->h.raw = skb->data;
 }
 
+static inline int skb_transport_offset(const struct sk_buff *skb)
+{
+	return skb->h.raw - skb->data;
+}
+
 static inline unsigned char *skb_network_header(const struct sk_buff *skb)
 {
 	return skb->nh.raw;
diff --git a/include/net/udplite.h b/include/net/udplite.h
index d99df75fe54..76503203665 100644
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -101,14 +101,14 @@ static inline int udplite_sender_cscov(struct udp_sock *up, struct udphdr *uh)
 
 static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb)
 {
-	int off, len, cscov = udplite_sender_cscov(udp_sk(sk), skb->h.uh);
+	int cscov = udplite_sender_cscov(udp_sk(sk), skb->h.uh);
 	__wsum csum = 0;
 
 	skb->ip_summed = CHECKSUM_NONE;     /* no HW support for checksumming */
 
 	skb_queue_walk(&sk->sk_write_queue, skb) {
-		off = skb->h.raw - skb->data;
-		len = skb->len - off;
+		const int off = skb_transport_offset(skb);
+		const int len = skb->len - off;
 
 		csum = skb_checksum(skb, off, (cscov > len)? len : cscov, csum);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 99f15728d9c..f7f7e5687e4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1156,7 +1156,7 @@ EXPORT_SYMBOL(netif_device_attach);
 int skb_checksum_help(struct sk_buff *skb)
 {
 	__wsum csum;
-	int ret = 0, offset = skb->h.raw - skb->data;
+	int ret = 0, offset = skb_transport_offset(skb);
 
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		goto out_set_summed;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 1493c95f633..b242020c02f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1348,7 +1348,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
 	long csstart;
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL)
-		csstart = skb->h.raw - skb->data;
+		csstart = skb_transport_offset(skb);
 	else
 		csstart = skb_headlen(skb);
 
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index ed3deed6644..95767456280 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -27,7 +27,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	int nfrags;
 
 	/* Strip IP+ESP header. */
-	__skb_pull(skb, skb->h.raw - skb->data);
+	__skb_pull(skb, skb_transport_offset(skb));
 	/* Now skb is pure payload to encrypt */
 
 	err = -ENOMEM;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 13739cd8206..13875e8419a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -435,7 +435,7 @@ static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
 		 * fragments on the socket so that all csums of sk_buffs
 		 * should be together
 		 */
-		offset = skb->h.raw - skb->data;
+		offset = skb_transport_offset(skb);
 		skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
 
 		skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 7aff380e74e..35905867ded 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -42,21 +42,18 @@
 static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int err;
-	int hdr_len;
 	struct ipv6hdr *top_iph;
 	struct ipv6_esp_hdr *esph;
 	struct crypto_blkcipher *tfm;
 	struct blkcipher_desc desc;
-	struct esp_data *esp;
 	struct sk_buff *trailer;
 	int blksize;
 	int clen;
 	int alen;
 	int nfrags;
-
-	esp = x->data;
-	hdr_len = skb->h.raw - skb->data +
-		  sizeof(*esph) + esp->conf.ivlen;
+	struct esp_data *esp = x->data;
+	int hdr_len = (skb_transport_offset(skb) +
+		       sizeof(*esph) + esp->conf.ivlen);
 
 	/* Strip IP+ESP header. */
 	__skb_pull(skb, hdr_len);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index dab069b0b3f..1bda0299890 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -146,7 +146,7 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
 	int off = skb->h.raw - skb->nh.raw;
 	int len = ((skb->h.raw[1]+1)<<3);
 
-	if ((skb->h.raw + len) - skb->data > skb_headlen(skb))
+	if (skb_transport_offset(skb) + len > skb_headlen(skb))
 		goto bad;
 
 	off += 2;
@@ -288,8 +288,9 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
 #endif
 	struct dst_entry *dst;
 
-	if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
-	    !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
+	if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
+	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
+				 ((skb->h.raw[1] + 1) << 3)))) {
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INHDRERRORS);
 		kfree_skb(skb);
@@ -387,8 +388,9 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
 
 	in6_dev_put(idev);
 
-	if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
-	    !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
+	if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
+	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
+				 ((skb->h.raw[1] + 1) << 3)))) {
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INHDRERRORS);
 		kfree_skb(skb);
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 2dd32a2ca05..44275411d1a 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -160,7 +160,7 @@ static inline int ip6_input_finish(struct sk_buff *skb)
 	rcu_read_lock();
 resubmit:
 	idev = ip6_dst_idev(skb->dst);
-	if (!pskb_pull(skb, skb->h.raw - skb->data))
+	if (!pskb_pull(skb, skb_transport_offset(skb)))
 		goto discard;
 	nhoff = IP6CB(skb)->nhoff;
 	nexthdr = skb_network_header(skb)[nhoff];
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index e2404a62968..4a6501695e9 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -124,15 +124,13 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int err;
 	struct ipv6hdr *top_iph;
-	int hdr_len;
 	struct ipv6_comp_hdr *ipch;
 	struct ipcomp_data *ipcd = x->data;
 	int plen, dlen;
 	u8 *start, *scratch;
 	struct crypto_comp *tfm;
 	int cpu;
-
-	hdr_len = skb->h.raw - skb->data;
+	int hdr_len = skb_transport_offset(skb);
 
 	/* check whether datagram len is larger than threshold */
 	if ((skb->len - hdr_len) < ipcd->threshold) {
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 7b5f9d82e80..85202891644 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -90,8 +90,9 @@ int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
 {
 	struct ip6_mh *mh;
 
-	if (!pskb_may_pull(skb, (skb->h.raw - skb->data) + 8) ||
-	    !pskb_may_pull(skb, (skb->h.raw - skb->data) + ((skb->h.raw[1] + 1) << 3)))
+	if (!pskb_may_pull(skb, (skb_transport_offset(skb)) + 8) ||
+	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
+				 ((skb->h.raw[1] + 1) << 3))))
 		return -1;
 
 	mh = (struct ip6_mh *)skb->h.raw;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 0e2b56ce0a5..bb049f1c267 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -513,7 +513,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
 			if (csum_skb)
 				continue;
 
-			len = skb->len - (skb->h.raw - skb->data);
+			len = skb->len - skb_transport_offset(skb);
 			if (offset >= len) {
 				offset -= len;
 				continue;
@@ -525,7 +525,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
 		skb = csum_skb;
 	}
 
-	offset += skb->h.raw - skb->data;
+	offset += skb_transport_offset(skb);
 	if (skb_copy_bits(skb, offset, &csum, 2))
 		BUG();
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 2594f0fb290..ef29a7bb97c 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -718,7 +718,8 @@ static int ipv6_frag_rcv(struct sk_buff **skbp)
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw);
 		return -1;
 	}
-	if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+sizeof(struct frag_hdr))) {
+	if (!pskb_may_pull(skb, (skb_transport_offset(skb) +
+				 sizeof(struct frag_hdr)))) {
 		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw);
 		return -1;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 595fe32b3d4..9311b5ddf5c 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -141,7 +141,7 @@ int sctp_rcv(struct sk_buff *skb)
 	sh = (struct sctphdr *) skb->h.raw;
 
 	/* Pull up the IP and SCTP headers. */
-	__skb_pull(skb, skb->h.raw - skb->data);
+	__skb_pull(skb, skb_transport_offset(skb));
 	if (skb->len < sizeof(struct sctphdr))
 		goto discard_it;
 	if ((skb->ip_summed != CHECKSUM_UNNECESSARY) &&
-- 
cgit v1.2.3


From 967b05f64e27d04a4c8879addd0e1c52137e2c9e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 13 Mar 2007 13:51:52 -0300
Subject: [SK_BUFF]: Introduce skb_set_transport_header

For the cases where the transport header is being set to a offset from
skb->data.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h                  |  6 ++++++
 net/ax25/af_ax25.c                      | 20 ++++++++------------
 net/ax25/ax25_in.c                      |  2 +-
 net/ipv4/esp4.c                         |  3 ++-
 net/ipv4/ip_output.c                    | 19 ++++++++-----------
 net/ipv4/tcp_input.c                    |  2 +-
 net/ipv6/ah6.c                          |  2 +-
 net/ipv6/esp6.c                         |  4 ++--
 net/ipv6/netfilter/nf_conntrack_reasm.c |  2 +-
 net/ipv6/xfrm6_mode_beet.c              |  2 +-
 net/ipv6/xfrm6_mode_ro.c                |  2 +-
 net/ipv6/xfrm6_mode_transport.c         |  2 +-
 12 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 64c3c1687e4..684292efa82 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -962,6 +962,12 @@ static inline void skb_reset_transport_header(struct sk_buff *skb)
 	skb->h.raw = skb->data;
 }
 
+static inline void skb_set_transport_header(struct sk_buff *skb,
+					    const int offset)
+{
+	skb->h.raw = skb->data + offset;
+}
+
 static inline int skb_transport_offset(const struct sk_buff *skb)
 {
 	return skb->h.raw - skb->data;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 14db01a4ff6..75d4d695ede 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1425,7 +1425,6 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
 	struct sockaddr_ax25 sax;
 	struct sk_buff *skb;
 	ax25_digi dtmp, *dp;
-	unsigned char *asmptr;
 	ax25_cb *ax25;
 	size_t size;
 	int lv, err, addr_len = msg->msg_namelen;
@@ -1551,10 +1550,8 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
 	skb_reset_network_header(skb);
 
 	/* Add the PID if one is not supplied by the user in the skb */
-	if (!ax25->pidincl) {
-		asmptr  = skb_push(skb, 1);
-		*asmptr = sk->sk_protocol;
-	}
+	if (!ax25->pidincl)
+		*skb_push(skb, 1) = sk->sk_protocol;
 
 	SOCK_DEBUG(sk, "AX.25: Transmitting buffer\n");
 
@@ -1573,7 +1570,7 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
 		goto out;
 	}
 
-	asmptr = skb_push(skb, 1 + ax25_addr_size(dp));
+	skb_push(skb, 1 + ax25_addr_size(dp));
 
 	SOCK_DEBUG(sk, "Building AX.25 Header (dp=%p).\n", dp);
 
@@ -1581,17 +1578,16 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
 		SOCK_DEBUG(sk, "Num digipeaters=%d\n", dp->ndigi);
 
 	/* Build an AX.25 header */
-	asmptr += (lv = ax25_addr_build(asmptr, &ax25->source_addr,
-					&sax.sax25_call, dp,
-					AX25_COMMAND, AX25_MODULUS));
+	lv = ax25_addr_build(skb->data, &ax25->source_addr, &sax.sax25_call,
+			     dp, AX25_COMMAND, AX25_MODULUS);
 
 	SOCK_DEBUG(sk, "Built header (%d bytes)\n",lv);
 
-	skb->h.raw = asmptr;
+	skb_set_transport_header(skb, lv);
 
-	SOCK_DEBUG(sk, "base=%p pos=%p\n", skb->data, asmptr);
+	SOCK_DEBUG(sk, "base=%p pos=%p\n", skb->data, skb->h.raw);
 
-	*asmptr = AX25_UI;
+	*skb->h.raw = AX25_UI;
 
 	/* Datagram frames go straight out of the door as UI */
 	ax25_queue_xmit(skb, ax25->ax25_dev->dev);
diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index 724ad5ce75d..31c59387a6f 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -233,7 +233,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	/* UI frame - bypass LAPB processing */
 	if ((*skb->data & ~0x10) == AX25_UI && dp.lastrepeat + 1 == dp.ndigi) {
-		skb->h.raw = skb->data + 2;		/* skip control and pid */
+		skb_set_transport_header(skb, 2); /* skip control and pid */
 
 		ax25_send_to_raw(&dest, skb, skb->data[1]);
 
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 95767456280..82543eebfa5 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -261,7 +261,8 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	iph->protocol = nexthdr[1];
 	pskb_trim(skb, skb->len - alen - padlen - 2);
-	skb->h.raw = __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen) - ihl;
+	__skb_pull(skb, sizeof(*esph) + esp->conf.ivlen);
+	skb_set_transport_header(skb, -ihl);
 
 	return 0;
 
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 63c05be0764..6d92358fc51 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -593,7 +593,7 @@ slow_path:
 		skb_reserve(skb2, ll_rs);
 		skb_put(skb2, len + hlen);
 		skb_reset_network_header(skb2);
-		skb2->h.raw = skb2->data + hlen;
+		skb2->h.raw = skb2->nh.raw + hlen;
 
 		/*
 		 *	Charge the memory for the fragment to any owner
@@ -724,7 +724,7 @@ static inline int ip_ufo_append_data(struct sock *sk,
 		skb_reset_network_header(skb);
 
 		/* initialize protocol header pointer */
-		skb->h.raw = skb->data + fragheaderlen;
+		skb->h.raw = skb->nh.raw + fragheaderlen;
 
 		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb->csum = 0;
@@ -1099,8 +1099,6 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 		}
 		if (len <= 0) {
 			struct sk_buff *skb_prev;
-			char *data;
-			struct iphdr *iph;
 			int alloclen;
 
 			skb_prev = skb;
@@ -1123,16 +1121,15 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 			/*
 			 *	Find where to start putting bytes.
 			 */
-			data = skb_put(skb, fragheaderlen + fraggap);
+			skb_put(skb, fragheaderlen + fraggap);
 			skb_reset_network_header(skb);
-			iph = ip_hdr(skb);
-			data += fragheaderlen;
-			skb->h.raw = data;
+			skb->h.raw = skb->nh.raw + fragheaderlen;
 
 			if (fraggap) {
-				skb->csum = skb_copy_and_csum_bits(
-					skb_prev, maxfraglen,
-					data, fraggap, 0);
+				skb->csum = skb_copy_and_csum_bits(skb_prev,
+								   maxfraglen,
+								   skb->h.raw,
+								   fraggap, 0);
 				skb_prev->csum = csum_sub(skb_prev->csum,
 							  skb->csum);
 				pskb_trim_unique(skb_prev, maxfraglen);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5da823a3225..2776a8b0133 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3636,7 +3636,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 		skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
 		skb_set_network_header(nskb,
 				       skb_network_header(skb) - skb->head);
-		nskb->h.raw = nskb->data + (skb->h.raw - skb->head);
+		skb_set_transport_header(nskb, skb->h.raw - skb->head);
 
 		skb_reserve(nskb, header);
 		memcpy(nskb->head, skb->head, header);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index b682d2368c2..e5ee981d3e1 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -247,7 +247,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 	memcpy(tmp_base, top_iph, sizeof(tmp_base));
 
 	tmp_ext = NULL;
-	extlen = skb->h.raw - (unsigned char *)(top_iph + 1);
+	extlen = skb_transport_offset(skb) + sizeof(struct ipv6hdr);
 	if (extlen) {
 		extlen += sizeof(*tmp_ext);
 		tmp_ext = kmalloc(extlen, GFP_ATOMIC);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 35905867ded..ad522b7b577 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -228,8 +228,8 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
 		ret = nexthdr[1];
 	}
 
-	skb->h.raw = __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen) - hdr_len;
-
+	__skb_pull(skb, sizeof(*esph) + esp->conf.ivlen);
+	skb_set_transport_header(skb, -hdr_len);
 out:
 	return ret;
 }
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 75138cf1fa6..015950522c8 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -785,7 +785,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
 		goto ret_orig;
 	}
 
-	clone->h.raw = clone->data + fhoff;
+	skb_set_transport_header(clone, fhoff);
 	hdr = ipv6_hdr(clone);
 	fhdr = (struct frag_hdr *)clone->h.raw;
 
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index b5a48c255f0..abac09409de 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -42,7 +42,7 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	hdr_len = ip6_find_1stfragopt(skb, &prevhdr);
 	skb->nh.raw = prevhdr - x->props.header_len;
-	skb->h.raw = skb->data + hdr_len;
+	skb_set_transport_header(skb, hdr_len);
 	memmove(skb->data, iph, hdr_len);
 
 	skb_reset_network_header(skb);
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
index d01958d921a..da48ecf3fe9 100644
--- a/net/ipv6/xfrm6_mode_ro.c
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -54,7 +54,7 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
 	skb->nh.raw = prevhdr - x->props.header_len;
-	skb->h.raw = skb->data + hdr_len;
+	skb_set_transport_header(skb, hdr_len);
 	memmove(skb->data, iph, hdr_len);
 	return 0;
 }
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index cae6cacd58c..0134d74ef08 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -36,7 +36,7 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
 	skb->nh.raw = prevhdr - x->props.header_len;
-	skb->h.raw = skb->data + hdr_len;
+	skb_set_transport_header(skb, hdr_len);
 	memmove(skb->data, iph, hdr_len);
 	return 0;
 }
-- 
cgit v1.2.3


From 2c0fd387b00a6758550b5ca1aae4408374483fe7 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 13 Mar 2007 13:59:32 -0300
Subject: [SCTP]: Introduce sctp_hdr()

For consistency with all the other skb->h.raw accessors.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h |  9 +++++++++
 net/sctp/input.c     | 14 +++++---------
 net/sctp/ipv6.c      |  4 ++--
 net/sctp/protocol.c  | 10 ++++------
 4 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index d4f86560bff..d76767dfe59 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -63,6 +63,15 @@ typedef struct sctphdr {
 	__be32 checksum;
 } __attribute__((packed)) sctp_sctphdr_t;
 
+#ifdef __KERNEL__
+#include <linux/skbuff.h>
+
+static inline struct sctphdr *sctp_hdr(const struct sk_buff *skb)
+{
+	return (struct sctphdr *)skb->h.raw;
+}
+#endif
+
 /* Section 3.2.  Chunk Field Descriptions. */
 typedef struct sctp_chunkhdr {
 	__u8 type;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 9311b5ddf5c..3a322c584c7 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -79,14 +79,10 @@ static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb);
 /* Calculate the SCTP checksum of an SCTP packet.  */
 static inline int sctp_rcv_checksum(struct sk_buff *skb)
 {
-	struct sctphdr *sh;
-	__u32 cmp, val;
 	struct sk_buff *list = skb_shinfo(skb)->frag_list;
-
-	sh = (struct sctphdr *) skb->h.raw;
-	cmp = ntohl(sh->checksum);
-
-	val = sctp_start_cksum((__u8 *)sh, skb_headlen(skb));
+	struct sctphdr *sh = sctp_hdr(skb);
+	__u32 cmp = ntohl(sh->checksum);
+	__u32 val = sctp_start_cksum((__u8 *)sh, skb_headlen(skb));
 
 	for (; list; list = list->next)
 		val = sctp_update_cksum((__u8 *)list->data, skb_headlen(list),
@@ -138,7 +134,7 @@ int sctp_rcv(struct sk_buff *skb)
 	if (skb_linearize(skb))
 		goto discard_it;
 
-	sh = (struct sctphdr *) skb->h.raw;
+	sh = sctp_hdr(skb);
 
 	/* Pull up the IP and SCTP headers. */
 	__skb_pull(skb, skb_transport_offset(skb));
@@ -905,7 +901,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct sk_buff *skb,
 	struct sctp_association *asoc;
 	union sctp_addr addr;
 	union sctp_addr *paddr = &addr;
-	struct sctphdr *sh = (struct sctphdr *) skb->h.raw;
+	struct sctphdr *sh = sctp_hdr(skb);
 	sctp_chunkhdr_t *ch;
 	union sctp_params params;
 	sctp_init_chunk_t *init;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 001be2de0b3..0992bc5bb52 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -390,7 +390,7 @@ static void sctp_v6_from_skb(union sctp_addr *addr,struct sk_buff *skb,
 	addr->v6.sin6_flowinfo = 0; /* FIXME */
 	addr->v6.sin6_scope_id = ((struct inet6_skb_parm *)skb->cb)->iif;
 
-	sh = (struct sctphdr *) skb->h.raw;
+	sh = sctp_hdr(skb);
 	if (is_saddr) {
 		*port  = sh->source;
 		from = &ipv6_hdr(skb)->saddr;
@@ -765,7 +765,7 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname,
 	if (msgname) {
 		sctp_inet6_msgname(msgname, addr_len);
 		sin6 = (struct sockaddr_in6 *)msgname;
-		sh = (struct sctphdr *)skb->h.raw;
+		sh = sctp_hdr(skb);
 		sin6->sin6_port = sh->source;
 
 		/* Map ipv4 address into v4-mapped-on-v6 address. */
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 08f92ba4ebd..7c28c9b959e 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -235,7 +235,7 @@ static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb,
 	port = &addr->v4.sin_port;
 	addr->v4.sin_family = AF_INET;
 
-	sh = (struct sctphdr *) skb->h.raw;
+	sh = sctp_hdr(skb);
 	if (is_saddr) {
 		*port  = sh->source;
 		from = &ip_hdr(skb)->saddr;
@@ -731,13 +731,11 @@ static void sctp_inet_event_msgname(struct sctp_ulpevent *event, char *msgname,
 /* Initialize and copy out a msgname from an inbound skb. */
 static void sctp_inet_skb_msgname(struct sk_buff *skb, char *msgname, int *len)
 {
-	struct sctphdr *sh;
-	struct sockaddr_in *sin;
-
 	if (msgname) {
+		struct sctphdr *sh = sctp_hdr(skb);
+		struct sockaddr_in *sin = (struct sockaddr_in *)msgname;
+
 		sctp_inet_msgname(msgname, len);
-		sin = (struct sockaddr_in *)msgname;
-		sh = (struct sctphdr *)skb->h.raw;
 		sin->sin_port = sh->source;
 		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
 	}
-- 
cgit v1.2.3


From cc70ab261c9f997589546100ddec5da6bfd89c4e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 13 Mar 2007 14:03:22 -0300
Subject: [ICMP6]: Introduce icmp6_hdr()

For consistency with all the other skb->h.raw accessors.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/icmpv6.h | 9 +++++++++
 net/ipv6/datagram.c    | 2 +-
 net/ipv6/icmp.c        | 8 ++++----
 net/ipv6/mcast.c       | 6 +++---
 net/ipv6/ndisc.c       | 2 +-
 5 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h
index 68d3526c3a0..0b5ba5eb7ed 100644
--- a/include/linux/icmpv6.h
+++ b/include/linux/icmpv6.h
@@ -75,6 +75,15 @@ struct icmp6hdr {
 #define icmp6_router_pref	icmp6_dataun.u_nd_ra.router_pref
 };
 
+#ifdef __KERNEL__
+#include <linux/skbuff.h>
+
+static inline struct icmp6hdr *icmp6_hdr(const struct sk_buff *skb)
+{
+	return (struct icmp6hdr *)skb->h.raw;
+}
+#endif
+
 #define ICMPV6_ROUTER_PREF_LOW		0x3
 #define ICMPV6_ROUTER_PREF_MEDIUM	0x0
 #define ICMPV6_ROUTER_PREF_HIGH		0x1
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index f429290c2c3..feba6b197fe 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -209,7 +209,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
 		     __be16 port, u32 info, u8 *payload)
 {
 	struct ipv6_pinfo *np  = inet6_sk(sk);
-	struct icmp6hdr *icmph = (struct icmp6hdr *)skb->h.raw;
+	struct icmp6hdr *icmph = icmp6_hdr(skb);
 	struct sock_exterr_skb *serr;
 
 	if (!np->recverr)
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 3a01effda69..d3edc3cf1ce 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -222,7 +222,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct
 	if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
 		goto out;
 
-	icmp6h = (struct icmp6hdr*) skb->h.raw;
+	icmp6h = icmp6_hdr(skb);
 	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
 	icmp6h->icmp6_cksum = 0;
 
@@ -476,7 +476,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	struct inet6_dev *idev;
 	struct ipv6_pinfo *np;
 	struct in6_addr *saddr = NULL;
-	struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw;
+	struct icmp6hdr *icmph = icmp6_hdr(skb);
 	struct icmp6hdr tmp_hdr;
 	struct flowi fl;
 	struct icmpv6_msg msg;
@@ -651,7 +651,7 @@ static int icmpv6_rcv(struct sk_buff **pskb)
 	if (!pskb_pull(skb, sizeof(struct icmp6hdr)))
 		goto discard_it;
 
-	hdr = (struct icmp6hdr *) skb->h.raw;
+	hdr = icmp6_hdr(skb);
 
 	type = hdr->icmp6_type;
 
@@ -677,7 +677,7 @@ static int icmpv6_rcv(struct sk_buff **pskb)
 		 */
 		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 			goto discard_it;
-		hdr = (struct icmp6hdr *) skb->h.raw;
+		hdr = icmp6_hdr(skb);
 		orig_hdr = (struct ipv6hdr *) (hdr + 1);
 		rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
 				   ntohl(hdr->icmp6_mtu));
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index b2b37ba48b9..1f2a3be9308 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -988,7 +988,7 @@ int ipv6_is_mld(struct sk_buff *skb, int nexthdr)
 	if (!pskb_may_pull(skb, sizeof(struct icmp6hdr)))
 		return 0;
 
-	pic = (struct icmp6hdr *)skb->h.raw;
+	pic = icmp6_hdr(skb);
 
 	switch (pic->icmp6_type) {
 	case ICMPV6_MGM_QUERY:
@@ -1179,7 +1179,7 @@ int igmp6_event_query(struct sk_buff *skb)
 	if (idev == NULL)
 		return 0;
 
-	hdr = (struct icmp6hdr *) skb->h.raw;
+	hdr = icmp6_hdr(skb);
 	group = (struct in6_addr *) (hdr + 1);
 	group_type = ipv6_addr_type(group);
 
@@ -1300,7 +1300,7 @@ int igmp6_event_report(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
 		return -EINVAL;
 
-	hdr = (struct icmp6hdr*) skb->h.raw;
+	hdr = icmp6_hdr(skb);
 
 	/* Drop reports with not link local source */
 	addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 00feb4c4d98..8b946f56287 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1366,7 +1366,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
 		return;
 	}
 
-	icmph = (struct icmp6hdr *) skb->h.raw;
+	icmph = icmp6_hdr(skb);
 	target = (struct in6_addr *) (icmph + 1);
 	dest = target + 1;
 
-- 
cgit v1.2.3


From d9edf9e2be0f7661558984c32bd53867a7037fd3 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 13 Mar 2007 14:19:23 -0300
Subject: [SK_BUFF]: Introduce igmp_hdr() & friends, remove skb->h.igmph

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h   | 21 +++++++++++++++++++++
 include/linux/skbuff.h |  1 -
 net/ipv4/igmp.c        | 22 +++++++++++-----------
 net/ipv4/ipmr.c        |  2 +-
 4 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index a113fe68d8a..ca285527b87 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -80,6 +80,27 @@ struct igmpv3_query {
 	__be32 srcs[0];
 };
 
+#ifdef __KERNEL__
+#include <linux/skbuff.h>
+
+static inline struct igmphdr *igmp_hdr(const struct sk_buff *skb)
+{
+	return (struct igmphdr *)skb->h.raw;
+}
+
+static inline struct igmpv3_report *
+			igmpv3_report_hdr(const struct sk_buff *skb)
+{
+	return (struct igmpv3_report *)skb->h.raw;
+}
+
+static inline struct igmpv3_query *
+			igmpv3_query_hdr(const struct sk_buff *skb)
+{
+	return (struct igmpv3_query *)skb->h.raw;
+}
+#endif
+
 #define IGMP_HOST_MEMBERSHIP_QUERY	0x11	/* From RFC1112 */
 #define IGMP_HOST_MEMBERSHIP_REPORT	0x12	/* Ditto */
 #define IGMP_DVMRP			0x13	/* DVMRP routing */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 684292efa82..0a4a7ac034f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -240,7 +240,6 @@ struct sk_buff {
 		struct tcphdr	*th;
 		struct udphdr	*uh;
 		struct icmphdr	*icmph;
-		struct igmphdr	*igmph;
 		struct iphdr	*ipiph;
 		struct ipv6hdr	*ipv6h;
 		unsigned char	*raw;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index f511d03e243..292516bb1ec 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -333,8 +333,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 	((u8*)&pip[1])[2] = 0;
 	((u8*)&pip[1])[3] = 0;
 
-	pig =(struct igmpv3_report *)skb_put(skb, sizeof(*pig));
-	skb->h.igmph = (struct igmphdr *)pig;
+	skb->h.raw = skb_put(skb, sizeof(*pig));
+	pig = igmpv3_report_hdr(skb);
 	pig->type = IGMPV3_HOST_MEMBERSHIP_REPORT;
 	pig->resv1 = 0;
 	pig->csum = 0;
@@ -346,13 +346,13 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 static int igmpv3_sendpack(struct sk_buff *skb)
 {
 	struct iphdr *pip = ip_hdr(skb);
-	struct igmphdr *pig = skb->h.igmph;
+	struct igmphdr *pig = igmp_hdr(skb);
 	const int iplen = skb->tail - skb->nh.raw;
 	const int igmplen = skb->tail - skb->h.raw;
 
 	pip->tot_len = htons(iplen);
 	ip_send_check(pip);
-	pig->csum = ip_compute_csum(skb->h.igmph, igmplen);
+	pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen);
 
 	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dev,
 		       dst_output);
@@ -379,7 +379,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
 	pgr->grec_auxwords = 0;
 	pgr->grec_nsrcs = 0;
 	pgr->grec_mca = pmc->multiaddr;
-	pih = (struct igmpv3_report *)skb->h.igmph;
+	pih = igmpv3_report_hdr(skb);
 	pih->ngrec = htons(ntohs(pih->ngrec)+1);
 	*ppgr = pgr;
 	return skb;
@@ -412,7 +412,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
 	if (!*psf_list)
 		goto empty_source;
 
-	pih = skb ? (struct igmpv3_report *)skb->h.igmph : NULL;
+	pih = skb ? igmpv3_report_hdr(skb) : NULL;
 
 	/* EX and TO_EX get a fresh packet, if needed */
 	if (truncate) {
@@ -829,8 +829,8 @@ static void igmp_heard_report(struct in_device *in_dev, __be32 group)
 static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 	int len)
 {
-	struct igmphdr 		*ih = skb->h.igmph;
-	struct igmpv3_query *ih3 = (struct igmpv3_query *)ih;
+	struct igmphdr 		*ih = igmp_hdr(skb);
+	struct igmpv3_query *ih3 = igmpv3_query_hdr(skb);
 	struct ip_mc_list	*im;
 	__be32			group = ih->group;
 	int			max_delay;
@@ -863,12 +863,12 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 		if (!pskb_may_pull(skb, sizeof(struct igmpv3_query)))
 			return;
 
-		ih3 = (struct igmpv3_query *) skb->h.raw;
+		ih3 = igmpv3_query_hdr(skb);
 		if (ih3->nsrcs) {
 			if (!pskb_may_pull(skb, sizeof(struct igmpv3_query)
 					   + ntohs(ih3->nsrcs)*sizeof(__be32)))
 				return;
-			ih3 = (struct igmpv3_query *) skb->h.raw;
+			ih3 = igmpv3_query_hdr(skb);
 		}
 
 		max_delay = IGMPV3_MRC(ih3->code)*(HZ/IGMP_TIMER_SCALE);
@@ -945,7 +945,7 @@ int igmp_rcv(struct sk_buff *skb)
 			goto drop;
 	}
 
-	ih = skb->h.igmph;
+	ih = igmp_hdr(skb);
 	switch (ih->type) {
 	case IGMP_HOST_MEMBERSHIP_QUERY:
 		igmp_heard_query(in_dev, skb, len);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 03869d91f6f..05bc27002de 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1430,7 +1430,7 @@ int pim_rcv_v1(struct sk_buff * skb)
 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 		goto drop;
 
-	pim = (struct igmphdr*)skb->h.raw;
+	pim = igmp_hdr(skb);
 
 	if (!mroute_do_pim ||
 	    skb->len < sizeof(*pim) + sizeof(*encap) ||
-- 
cgit v1.2.3


From 4bedb45203eab92a87b4c863fe2d0cded633427f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 13 Mar 2007 14:28:48 -0300
Subject: [SK_BUFF]: Introduce udp_hdr(), remove skb->h.uh

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/gianfar.c     |  4 ++--
 drivers/net/ioc3-eth.c    |  2 +-
 drivers/net/mv643xx_eth.c |  2 +-
 include/linux/skbuff.h    |  1 -
 include/linux/udp.h       |  9 +++++++++
 include/net/udplite.h     |  2 +-
 net/core/netpoll.c        |  4 +++-
 net/core/pktgen.c         |  4 ++--
 net/ipv4/udp.c            | 12 ++++++------
 net/ipv6/udp.c            | 10 +++++-----
 net/rxrpc/connection.c    |  4 ++--
 net/rxrpc/transport.c     |  4 ++--
 12 files changed, 34 insertions(+), 24 deletions(-)

diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index c9abc96a091..b9f44602c5e 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -944,9 +944,9 @@ static inline void gfar_tx_checksum(struct sk_buff *skb, struct txfcb *fcb)
 	/* And provide the already calculated phcs */
 	if (ip_hdr(skb)->protocol == IPPROTO_UDP) {
 		flags |= TXFCB_UDP;
-		fcb->phcs = skb->h.uh->check;
+		fcb->phcs = udp_hdr(skb)->check;
 	} else
-		fcb->phcs = skb->h.th->check;
+		fcb->phcs = udp_hdr(skb)->check;
 
 	/* l3os is the distance between the start of the
 	 * frame (skb->data) and the start of the IP hdr.
diff --git a/drivers/net/ioc3-eth.c b/drivers/net/ioc3-eth.c
index d375e786b4b..ba012e10d79 100644
--- a/drivers/net/ioc3-eth.c
+++ b/drivers/net/ioc3-eth.c
@@ -1422,7 +1422,7 @@ static int ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		csoff = ETH_HLEN + (ih->ihl << 2);
 		if (proto == IPPROTO_UDP) {
 			csoff += offsetof(struct udphdr, check);
-			skb->h.uh->check = csum;
+			udp_hdr(skb)->check = csum;
 		}
 		if (proto == IPPROTO_TCP) {
 			csoff += offsetof(struct tcphdr, check);
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index 6b39a268ec2..43723839e93 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -1166,7 +1166,7 @@ static void eth_tx_submit_descs_for_skb(struct mv643xx_private *mp,
 		switch (ip_hdr(skb)->protocol) {
 		case IPPROTO_UDP:
 			cmd_sts |= ETH_UDP_FRAME;
-			desc->l4i_chk = skb->h.uh->check;
+			desc->l4i_chk = udp_hdr(skb)->check;
 			break;
 		case IPPROTO_TCP:
 			desc->l4i_chk = skb->h.th->check;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0a4a7ac034f..cb1ac48cc80 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -238,7 +238,6 @@ struct sk_buff {
 
 	union {
 		struct tcphdr	*th;
-		struct udphdr	*uh;
 		struct icmphdr	*icmph;
 		struct iphdr	*ipiph;
 		struct ipv6hdr	*ipv6h;
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 7e08c07efe0..1f58503af9a 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -26,6 +26,15 @@ struct udphdr {
 	__sum16	check;
 };
 
+#ifdef __KERNEL__
+#include <linux/skbuff.h>
+
+static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
+{
+	return (struct udphdr *)skb->h.raw;
+}
+#endif
+
 /* UDP socket options */
 #define UDP_CORK	1	/* Never send partially complete segments */
 #define UDP_ENCAP	100	/* Set the socket to accept encapsulated packets */
diff --git a/include/net/udplite.h b/include/net/udplite.h
index 76503203665..635b0eafca9 100644
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -101,7 +101,7 @@ static inline int udplite_sender_cscov(struct udp_sock *up, struct udphdr *uh)
 
 static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb)
 {
-	int cscov = udplite_sender_cscov(udp_sk(sk), skb->h.uh);
+	int cscov = udplite_sender_cscov(udp_sk(sk), udp_hdr(skb));
 	__wsum csum = 0;
 
 	skb->ip_summed = CHECKSUM_NONE;     /* no HW support for checksumming */
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 8b22723d643..57a82445c46 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -296,7 +296,9 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 	memcpy(skb->data, msg, len);
 	skb->len += len;
 
-	skb->h.uh = udph = (struct udphdr *) skb_push(skb, sizeof(*udph));
+	skb_push(skb, sizeof(*udph));
+	skb_reset_transport_header(skb);
+	udph = udp_hdr(skb);
 	udph->source = htons(np->local_port);
 	udph->dest = htons(np->remote_port);
 	udph->len = htons(udp_len);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index ee82364c8f3..160d4f01c46 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2392,7 +2392,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	skb->dev = odev;
 	skb->pkt_type = PACKET_HOST;
 	skb->nh.raw = (unsigned char *)iph;
-	skb->h.uh = udph;
+	skb->h.raw = (unsigned char *)udph;
 
 	if (pkt_dev->nfrags <= 0)
 		pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
@@ -2737,7 +2737,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	skb->dev = odev;
 	skb->pkt_type = PACKET_HOST;
 	skb->nh.raw = (unsigned char *)iph;
-	skb->h.uh = udph;
+	skb->h.raw = (unsigned char *)udph;
 
 	if (pkt_dev->nfrags <= 0)
 		pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 13875e8419a..926404c5e58 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -420,7 +420,7 @@ static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
 				 __be32 src, __be32 dst, int len      )
 {
 	unsigned int offset;
-	struct udphdr *uh = skb->h.uh;
+	struct udphdr *uh = udp_hdr(skb);
 	__wsum csum = 0;
 
 	if (skb_queue_len(&sk->sk_write_queue) == 1) {
@@ -470,7 +470,7 @@ static int udp_push_pending_frames(struct sock *sk)
 	/*
 	 * Create a UDP header
 	 */
-	uh = skb->h.uh;
+	uh = udp_hdr(skb);
 	uh->source = fl->fl_ip_sport;
 	uh->dest = fl->fl_ip_dport;
 	uh->len = htons(up->len);
@@ -866,7 +866,7 @@ try_again:
 	if (sin)
 	{
 		sin->sin_family = AF_INET;
-		sin->sin_port = skb->h.uh->source;
+		sin->sin_port = udp_hdr(skb)->source;
 		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
 		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
 	}
@@ -949,7 +949,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
 		return 1;
 
 	/* Now we can get the pointers */
-	uh = skb->h.uh;
+	uh = udp_hdr(skb);
 	udpdata = (__u8 *)uh + sizeof(struct udphdr);
 	udpdata32 = (__be32 *)udpdata;
 
@@ -1207,7 +1207,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 		   int proto)
 {
 	struct sock *sk;
-	struct udphdr *uh = skb->h.uh;
+	struct udphdr *uh = udp_hdr(skb);
 	unsigned short ulen;
 	struct rtable *rt = (struct rtable*)skb->dst;
 	__be32 saddr = ip_hdr(skb)->saddr;
@@ -1227,7 +1227,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 		/* UDP validates ulen. */
 		if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
 			goto short_packet;
-		uh = skb->h.uh;
+		uh = udp_hdr(skb);
 	}
 
 	if (udp4_csum_init(skb, uh, proto))
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 55affe39b2e..1e3dfb20b1c 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -172,7 +172,7 @@ try_again:
 
 		sin6 = (struct sockaddr_in6 *) msg->msg_name;
 		sin6->sin6_family = AF_INET6;
-		sin6->sin6_port = skb->h.uh->source;
+		sin6->sin6_port = udp_hdr(skb)->source;
 		sin6->sin6_flowinfo = 0;
 		sin6->sin6_scope_id = 0;
 
@@ -346,7 +346,7 @@ static int __udp6_lib_mcast_deliver(struct sk_buff *skb, struct in6_addr *saddr,
 			   struct in6_addr *daddr, struct hlist_head udptable[])
 {
 	struct sock *sk, *sk2;
-	const struct udphdr *uh = skb->h.uh;
+	const struct udphdr *uh = udp_hdr(skb);
 	int dif;
 
 	read_lock(&udp_hash_lock);
@@ -420,7 +420,7 @@ int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
 
 	saddr = &ipv6_hdr(skb)->saddr;
 	daddr = &ipv6_hdr(skb)->daddr;
-	uh = skb->h.uh;
+	uh = udp_hdr(skb);
 
 	ulen = ntohs(uh->len);
 	if (ulen > skb->len)
@@ -441,7 +441,7 @@ int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
 				goto short_packet;
 			saddr = &ipv6_hdr(skb)->saddr;
 			daddr = &ipv6_hdr(skb)->daddr;
-			uh = skb->h.uh;
+			uh = udp_hdr(skb);
 		}
 	}
 
@@ -534,7 +534,7 @@ static int udp_v6_push_pending_frames(struct sock *sk)
 	/*
 	 * Create a UDP header
 	 */
-	uh = skb->h.uh;
+	uh = udp_hdr(skb);
 	uh->source = fl->fl_ip_sport;
 	uh->dest = fl->fl_ip_dport;
 	uh->len = htons(up->len);
diff --git a/net/rxrpc/connection.c b/net/rxrpc/connection.c
index e601fa87bb7..665a9995244 100644
--- a/net/rxrpc/connection.c
+++ b/net/rxrpc/connection.c
@@ -229,10 +229,10 @@ int rxrpc_connection_lookup(struct rxrpc_peer *peer,
 	_enter("%p{{%hu}},%u,%hu",
 	       peer,
 	       peer->trans->port,
-	       ntohs(pkt->h.uh->source),
+	       ntohs(udp_hdr(pkt)->source),
 	       ntohs(msg->hdr.serviceId));
 
-	x_port		= pkt->h.uh->source;
+	x_port		= udp_hdr(pkt)->source;
 	x_epoch		= msg->hdr.epoch;
 	x_clflag	= msg->hdr.flags & RXRPC_CLIENT_INITIATED;
 	x_connid	= htonl(ntohl(msg->hdr.cid) & RXRPC_CIDMASK);
diff --git a/net/rxrpc/transport.c b/net/rxrpc/transport.c
index cac078b7406..62398fd01f8 100644
--- a/net/rxrpc/transport.c
+++ b/net/rxrpc/transport.c
@@ -479,7 +479,7 @@ void rxrpc_trans_receive_packet(struct rxrpc_transport *trans)
 		}
 
 		addr = ip_hdr(pkt)->saddr;
-		port = pkt->h.uh->source;
+		port = udp_hdr(pkt)->source;
 
 		_net("Rx Received UDP packet from %08x:%04hu",
 		     ntohl(addr), ntohs(port));
@@ -625,7 +625,7 @@ int rxrpc_trans_immediate_abort(struct rxrpc_transport *trans,
 
 	memset(&sin,0,sizeof(sin));
 	sin.sin_family		= AF_INET;
-	sin.sin_port		= msg->pkt->h.uh->source;
+	sin.sin_port		= udp_hdr(msg->pkt)->source;
 	sin.sin_addr.s_addr	= ip_hdr(msg->pkt)->saddr;
 
 	msghdr.msg_name		= &sin;
-- 
cgit v1.2.3


From 88c7664f13bd1a36acb8566b93892a4c58759ac6 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 13 Mar 2007 14:43:18 -0300
Subject: [SK_BUFF]: Introduce icmp_hdr(), remove skb->h.icmph

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/icmp.h   |  9 +++++++++
 include/linux/skbuff.h |  1 -
 net/dccp/ipv4.c        |  4 ++--
 net/ipv4/ah4.c         |  4 ++--
 net/ipv4/esp4.c        |  4 ++--
 net/ipv4/icmp.c        | 14 +++++++-------
 net/ipv4/ip_gre.c      | 12 ++++++------
 net/ipv4/ip_sockglue.c |  6 +++---
 net/ipv4/ipcomp.c      |  4 ++--
 net/ipv4/ipip.c        | 12 ++++++------
 net/ipv4/raw.c         |  6 +++---
 net/ipv4/tcp_ipv4.c    |  4 ++--
 net/ipv4/udp.c         |  4 ++--
 net/ipv6/sit.c         | 12 ++++++------
 net/sctp/input.c       |  4 ++--
 15 files changed, 54 insertions(+), 46 deletions(-)

diff --git a/include/linux/icmp.h b/include/linux/icmp.h
index 24da4fbc1a2..cd3017a1578 100644
--- a/include/linux/icmp.h
+++ b/include/linux/icmp.h
@@ -82,6 +82,15 @@ struct icmphdr {
   } un;
 };
 
+#ifdef __KERNEL__
+#include <linux/skbuff.h>
+
+static inline struct icmphdr *icmp_hdr(const struct sk_buff *skb)
+{
+	return (struct icmphdr *)skb->h.raw;
+}
+#endif
+
 /*
  *	constants for (set|get)sockopt
  */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index cb1ac48cc80..e580416de78 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -238,7 +238,6 @@ struct sk_buff {
 
 	union {
 		struct tcphdr	*th;
-		struct icmphdr	*icmph;
 		struct iphdr	*ipiph;
 		struct ipv6hdr	*ipv6h;
 		unsigned char	*raw;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index b85437dae0e..718f2fa923a 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -207,8 +207,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 							(iph->ihl << 2));
 	struct dccp_sock *dp;
 	struct inet_sock *inet;
-	const int type = skb->h.icmph->type;
-	const int code = skb->h.icmph->code;
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
 	struct sock *sk;
 	__u64 seq;
 	int err;
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index ebcc797e1c1..e1bb9e0aa5f 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -198,8 +198,8 @@ static void ah4_err(struct sk_buff *skb, u32 info)
 	struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+(iph->ihl<<2));
 	struct xfrm_state *x;
 
-	if (skb->h.icmph->type != ICMP_DEST_UNREACH ||
-	    skb->h.icmph->code != ICMP_FRAG_NEEDED)
+	if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
+	    icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
 		return;
 
 	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 82543eebfa5..de019f9fbfe 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -304,8 +304,8 @@ static void esp4_err(struct sk_buff *skb, u32 info)
 	struct ip_esp_hdr *esph = (struct ip_esp_hdr*)(skb->data+(iph->ihl<<2));
 	struct xfrm_state *x;
 
-	if (skb->h.icmph->type != ICMP_DEST_UNREACH ||
-	    skb->h.icmph->code != ICMP_FRAG_NEEDED)
+	if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
+	    icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
 		return;
 
 	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4d70c21c50a..8372f8b8f0c 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -355,7 +355,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
 			   ipc, rt, MSG_DONTWAIT) < 0)
 		ip_flush_pending_frames(icmp_socket->sk);
 	else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) {
-		struct icmphdr *icmph = skb->h.icmph;
+		struct icmphdr *icmph = icmp_hdr(skb);
 		__wsum csum = 0;
 		struct sk_buff *skb1;
 
@@ -613,7 +613,7 @@ static void icmp_unreach(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
 		goto out_err;
 
-	icmph = skb->h.icmph;
+	icmph = icmp_hdr(skb);
 	iph   = (struct iphdr *)skb->data;
 
 	if (iph->ihl < 5) /* Mangled header, drop. */
@@ -743,7 +743,7 @@ static void icmp_redirect(struct sk_buff *skb)
 
 	iph = (struct iphdr *)skb->data;
 
-	switch (skb->h.icmph->code & 7) {
+	switch (icmp_hdr(skb)->code & 7) {
 	case ICMP_REDIR_NET:
 	case ICMP_REDIR_NETTOS:
 		/*
@@ -752,7 +752,7 @@ static void icmp_redirect(struct sk_buff *skb)
 	case ICMP_REDIR_HOST:
 	case ICMP_REDIR_HOSTTOS:
 		ip_rt_redirect(ip_hdr(skb)->saddr, iph->daddr,
-			       skb->h.icmph->un.gateway,
+			       icmp_hdr(skb)->un.gateway,
 			       iph->saddr, skb->dev);
 		break;
 	}
@@ -780,7 +780,7 @@ static void icmp_echo(struct sk_buff *skb)
 	if (!sysctl_icmp_echo_ignore_all) {
 		struct icmp_bxm icmp_param;
 
-		icmp_param.data.icmph	   = *skb->h.icmph;
+		icmp_param.data.icmph	   = *icmp_hdr(skb);
 		icmp_param.data.icmph.type = ICMP_ECHOREPLY;
 		icmp_param.skb		   = skb;
 		icmp_param.offset	   = 0;
@@ -816,7 +816,7 @@ static void icmp_timestamp(struct sk_buff *skb)
 	icmp_param.data.times[2] = icmp_param.data.times[1];
 	if (skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4))
 		BUG();
-	icmp_param.data.icmph	   = *skb->h.icmph;
+	icmp_param.data.icmph	   = *icmp_hdr(skb);
 	icmp_param.data.icmph.type = ICMP_TIMESTAMPREPLY;
 	icmp_param.data.icmph.code = 0;
 	icmp_param.skb		   = skb;
@@ -943,7 +943,7 @@ int icmp_rcv(struct sk_buff *skb)
 	if (!pskb_pull(skb, sizeof(struct icmphdr)))
 		goto error;
 
-	icmph = skb->h.icmph;
+	icmph = icmp_hdr(skb);
 
 	/*
 	 *	18 is the highest 'known' ICMP type. Anything else is a mystery
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 969fe31723a..39216e6a59e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -320,8 +320,8 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
 	struct iphdr *iph = (struct iphdr*)skb->data;
 	__be16	     *p = (__be16*)(skb->data+(iph->ihl<<2));
 	int grehlen = (iph->ihl<<2) + 4;
-	int type = skb->h.icmph->type;
-	int code = skb->h.icmph->code;
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
 	struct ip_tunnel *t;
 	__be16 flags;
 
@@ -388,8 +388,8 @@ out:
 	struct iphdr *iph = (struct iphdr*)dp;
 	struct iphdr *eiph;
 	__be16	     *p = (__be16*)(dp+(iph->ihl<<2));
-	int type = skb->h.icmph->type;
-	int code = skb->h.icmph->code;
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
 	int rel_type = 0;
 	int rel_code = 0;
 	__be32 rel_info = 0;
@@ -422,7 +422,7 @@ out:
 	default:
 		return;
 	case ICMP_PARAMETERPROB:
-		n = ntohl(skb->h.icmph->un.gateway) >> 24;
+		n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
 		if (n < (iph->ihl<<2))
 			return;
 
@@ -442,7 +442,7 @@ out:
 			return;
 		case ICMP_FRAG_NEEDED:
 			/* And it is the only really necessary thing :-) */
-			n = ntohs(skb->h.icmph->un.frag.mtu);
+			n = ntohs(icmp_hdr(skb)->un.frag.mtu);
 			if (n < grehlen+68)
 				return;
 			n -= grehlen;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index fabc250e16d..ccdc59df015 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -269,12 +269,12 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
 	serr = SKB_EXT_ERR(skb);
 	serr->ee.ee_errno = err;
 	serr->ee.ee_origin = SO_EE_ORIGIN_ICMP;
-	serr->ee.ee_type = skb->h.icmph->type;
-	serr->ee.ee_code = skb->h.icmph->code;
+	serr->ee.ee_type = icmp_hdr(skb)->type;
+	serr->ee.ee_code = icmp_hdr(skb)->code;
 	serr->ee.ee_pad = 0;
 	serr->ee.ee_info = info;
 	serr->ee.ee_data = 0;
-	serr->addr_offset = (u8 *)&(((struct iphdr *)(skb->h.icmph + 1))->daddr) -
+	serr->addr_offset = (u8 *)&(((struct iphdr *)(icmp_hdr(skb) + 1))->daddr) -
 				   skb_network_header(skb);
 	serr->port = port;
 
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 8eb46064c52..1f13cc507a4 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -174,8 +174,8 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
 	struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
 	struct xfrm_state *x;
 
-	if (skb->h.icmph->type != ICMP_DEST_UNREACH ||
-	    skb->h.icmph->code != ICMP_FRAG_NEEDED)
+	if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
+	    icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
 		return;
 
 	spi = htonl(ntohs(ipch->cpi));
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index b7f6ff4705b..b32b5011480 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -280,8 +280,8 @@ static int ipip_err(struct sk_buff *skb, u32 info)
    ICMP in the real Internet is absolutely infeasible.
  */
 	struct iphdr *iph = (struct iphdr*)skb->data;
-	int type = skb->h.icmph->type;
-	int code = skb->h.icmph->code;
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
 	struct ip_tunnel *t;
 	int err;
 
@@ -336,8 +336,8 @@ out:
 	struct iphdr *iph = (struct iphdr*)dp;
 	int hlen = iph->ihl<<2;
 	struct iphdr *eiph;
-	int type = skb->h.icmph->type;
-	int code = skb->h.icmph->code;
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
 	int rel_type = 0;
 	int rel_code = 0;
 	__be32 rel_info = 0;
@@ -354,7 +354,7 @@ out:
 	default:
 		return 0;
 	case ICMP_PARAMETERPROB:
-		n = ntohl(skb->h.icmph->un.gateway) >> 24;
+		n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
 		if (n < hlen)
 			return 0;
 
@@ -373,7 +373,7 @@ out:
 			return 0;
 		case ICMP_FRAG_NEEDED:
 			/* And it is the only really necessary thing :-) */
-			n = ntohs(skb->h.icmph->un.frag.mtu);
+			n = ntohs(icmp_hdr(skb)->un.frag.mtu);
 			if (n < hlen+68)
 				return 0;
 			n -= hlen;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index ac57afa7c31..bf101dc1a97 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -132,7 +132,7 @@ static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
 	if (!pskb_may_pull(skb, sizeof(struct icmphdr)))
 		return 1;
 
-	type = skb->h.icmph->type;
+	type = icmp_hdr(skb)->type;
 	if (type < 32) {
 		__u32 data = raw_sk(sk)->filter.data;
 
@@ -184,8 +184,8 @@ out:
 void raw_err (struct sock *sk, struct sk_buff *skb, u32 info)
 {
 	struct inet_sock *inet = inet_sk(sk);
-	int type = skb->h.icmph->type;
-	int code = skb->h.icmph->code;
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
 	int err = 0;
 	int harderr = 0;
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3a86d6b887a..51424df9078 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -354,8 +354,8 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
 	struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
 	struct tcp_sock *tp;
 	struct inet_sock *inet;
-	int type = skb->h.icmph->type;
-	int code = skb->h.icmph->code;
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
 	struct sock *sk;
 	__u32 seq;
 	int err;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 926404c5e58..71b0b60ba53 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -330,8 +330,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
 	struct inet_sock *inet;
 	struct iphdr *iph = (struct iphdr*)skb->data;
 	struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
-	int type = skb->h.icmph->type;
-	int code = skb->h.icmph->code;
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
 	struct sock *sk;
 	int harderr;
 	int err;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 18ec86f177d..1e8827b90aa 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -224,8 +224,8 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
    ICMP in the real Internet is absolutely infeasible.
  */
 	struct iphdr *iph = (struct iphdr*)skb->data;
-	int type = skb->h.icmph->type;
-	int code = skb->h.icmph->code;
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
 	struct ip_tunnel *t;
 	int err;
 
@@ -280,8 +280,8 @@ out:
 	struct iphdr *iph = (struct iphdr*)dp;
 	int hlen = iph->ihl<<2;
 	struct ipv6hdr *iph6;
-	int type = skb->h.icmph->type;
-	int code = skb->h.icmph->code;
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
 	int rel_type = 0;
 	int rel_code = 0;
 	int rel_info = 0;
@@ -296,14 +296,14 @@ out:
 	default:
 		return;
 	case ICMP_PARAMETERPROB:
-		if (skb->h.icmph->un.gateway < hlen)
+		if (icmp_hdr(skb)->un.gateway < hlen)
 			return;
 
 		/* So... This guy found something strange INSIDE encapsulated
 		   packet. Well, he is fool, but what can we do ?
 		 */
 		rel_type = ICMPV6_PARAMPROB;
-		rel_info = skb->h.icmph->un.gateway - hlen;
+		rel_info = icmp_hdr(skb)->un.gateway - hlen;
 		break;
 
 	case ICMP_DEST_UNREACH:
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 3a322c584c7..40d0df80183 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -507,8 +507,8 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
 {
 	struct iphdr *iph = (struct iphdr *)skb->data;
 	struct sctphdr *sh = (struct sctphdr *)(skb->data + (iph->ihl <<2));
-	int type = skb->h.icmph->type;
-	int code = skb->h.icmph->code;
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
 	struct sock *sk;
 	struct sctp_association *asoc = NULL;
 	struct sctp_transport *transport;
-- 
cgit v1.2.3


From ab6a5bb6b28a970104a34f0f6959b73cf61bdc72 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 18 Mar 2007 17:43:48 -0700
Subject: [TCP]: Introduce tcp_hdrlen() and tcp_optlen()

The ip_hdrlen() buddy, created to reduce the number of skb->h.th-> uses and to
avoid the longer, open coded equivalent.

Ditched a no-op in bnx2 in the process.

I wonder if we should have a BUG_ON(skb->h.th->doff < 5) in tcp_optlen()...

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/atl1/atl1_main.c         |  7 +++----
 drivers/net/bnx2.c                   |  7 +++----
 drivers/net/e1000/e1000_main.c       |  4 ++--
 drivers/net/ehea/ehea_main.c         |  2 +-
 drivers/net/ixgb/ixgb_main.c         |  2 +-
 drivers/net/myri10ge/myri10ge.c      |  3 +--
 drivers/net/netxen/netxen_nic_hw.c   |  3 +--
 drivers/net/netxen/netxen_nic_main.c |  2 +-
 drivers/net/sky2.c                   |  2 +-
 drivers/net/tg3.c                    |  4 ++--
 drivers/s390/net/qeth_eddp.c         |  8 ++++----
 include/linux/tcp.h                  | 10 ++++++++++
 net/ipv4/tcp_ipv4.c                  |  2 +-
 net/ipv6/tcp_ipv6.c                  |  2 +-
 14 files changed, 32 insertions(+), 26 deletions(-)

diff --git a/drivers/net/atl1/atl1_main.c b/drivers/net/atl1/atl1_main.c
index c26f8ce320e..8d5994751e2 100644
--- a/drivers/net/atl1/atl1_main.c
+++ b/drivers/net/atl1/atl1_main.c
@@ -1307,7 +1307,7 @@ static int atl1_tso(struct atl1_adapter *adapter, struct sk_buff *skb,
 
 			tso->tsopl |= (iph->ihl &
 				CSUM_PARAM_IPHL_MASK) << CSUM_PARAM_IPHL_SHIFT;
-			tso->tsopl |= ((skb->h.th->doff << 2) &
+			tso->tsopl |= (tcp_hdrlen(skb) &
 				TSO_PARAM_TCPHDRLEN_MASK) << TSO_PARAM_TCPHDRLEN_SHIFT;
 			tso->tsopl |= (skb_shinfo(skb)->gso_size &
 				TSO_PARAM_MSS_MASK) << TSO_PARAM_MSS_SHIFT;
@@ -1369,8 +1369,7 @@ static void atl1_tx_map(struct atl1_adapter *adapter,
 
 	if (tcp_seg) {
 		/* TSO/GSO */
-		proto_hdr_len = (skb_transport_offset(skb) +
-				 (skb->h.th->doff << 2));
+		proto_hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
 		buffer_info->length = proto_hdr_len;
 		page = virt_to_page(skb->data);
 		offset = (unsigned long)skb->data & ~PAGE_MASK;
@@ -1563,7 +1562,7 @@ static int atl1_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 	if (mss) {
 		if (skb->protocol == htons(ETH_P_IP)) {
 			proto_hdr_len = (skb_transport_offset(skb) +
-					 (skb->h.th->doff << 2));
+					 tcp_hdrlen(skb));
 			if (unlikely(proto_hdr_len > len)) {
 				dev_kfree_skb_any(skb);
 				return NETDEV_TX_OK;
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index eb0c4f1d448..73512fb1645 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -4521,13 +4521,12 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			return NETDEV_TX_OK;
 		}
 
-		tcp_opt_len = ((skb->h.th->doff - 5) * 4);
 		vlan_tag_flags |= TX_BD_FLAGS_SW_LSO;
 
 		tcp_opt_len = 0;
-		if (skb->h.th->doff > 5) {
-			tcp_opt_len = (skb->h.th->doff - 5) << 2;
-		}
+		if (skb->h.th->doff > 5)
+			tcp_opt_len = tcp_optlen(skb);
+
 		ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr);
 
 		iph = ip_hdr(skb);
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 78cf417cf23..4572fbba50f 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2887,7 +2887,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 				return err;
 		}
 
-		hdr_len = (skb_transport_offset(skb) + (skb->h.th->doff << 2));
+		hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
 		mss = skb_shinfo(skb)->gso_size;
 		if (skb->protocol == htons(ETH_P_IP)) {
 			struct iphdr *iph = ip_hdr(skb);
@@ -3292,7 +3292,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 		/* TSO Workaround for 82571/2/3 Controllers -- if skb->data
 		* points to just header, pull a few bytes of payload from
 		* frags into skb->data */
-		hdr_len = (skb_transport_offset(skb) + (skb->h.th->doff << 2));
+		hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
 		if (skb->data_len && (hdr_len == (skb->len - skb->data_len))) {
 			switch (adapter->hw.mac_type) {
 				unsigned int pull_size;
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index 0dc701e611e..63732d2305b 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -1300,7 +1300,7 @@ static void write_swqe2_TSO(struct sk_buff *skb,
 	/* copy only eth/ip/tcp headers to immediate data and
 	 * the rest of skb->data to sg1entry
 	 */
-	headersize = ETH_HLEN + ip_hdrlen(skb) + (skb->h.th->doff * 4);
+	headersize = ETH_HLEN + ip_hdrlen(skb) + tcp_hdrlen(skb);
 
 	skb_data_size = skb->len - skb->data_len;
 
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index ceea6e45792..96550d68162 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -1190,7 +1190,7 @@ ixgb_tso(struct ixgb_adapter *adapter, struct sk_buff *skb)
 				return err;
 		}
 
-		hdr_len = (skb_transport_offset(skb) + (skb->h.th->doff << 2));
+		hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
 		mss = skb_shinfo(skb)->gso_size;
 		iph = ip_hdr(skb);
 		iph->tot_len = 0;
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index e04228c7b14..e4b69a0485b 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -2054,8 +2054,7 @@ again:
 		 * send loop that we are still in the
 		 * header portion of the TSO packet.
 		 * TSO header must be at most 134 bytes long */
-		cum_len = -(skb_transport_offset(skb) +
-			    (skb->h.th->doff << 2));
+		cum_len = -(skb_transport_offset(skb) + tcp_hdrlen(skb));
 
 		/* for TSO, pseudo_hdr_offset holds mss.
 		 * The firmware figures out where to put
diff --git a/drivers/net/netxen/netxen_nic_hw.c b/drivers/net/netxen/netxen_nic_hw.c
index 09ca2192cbf..0fba8f19076 100644
--- a/drivers/net/netxen/netxen_nic_hw.c
+++ b/drivers/net/netxen/netxen_nic_hw.c
@@ -374,8 +374,7 @@ void netxen_tso_check(struct netxen_adapter *adapter,
 {
 	if (desc->mss) {
 		desc->total_hdr_length = (sizeof(struct ethhdr) +
-					  ip_hdrlen(skb) +
-					  skb->h.th->doff * 4);
+					  ip_hdrlen(skb) + tcp_hdrlen(skb));
 		netxen_set_cmd_desc_opcode(desc, TX_TCP_LSO);
 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c
index b548a30e5c8..b488e94bc4c 100644
--- a/drivers/net/netxen/netxen_nic_main.c
+++ b/drivers/net/netxen/netxen_nic_main.c
@@ -779,7 +779,7 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 		if (skb_shinfo(skb)->gso_size > 0) {
 
 			no_of_desc++;
-			if ((ip_hdrlen(skb) + skb->h.th->doff * 4 +
+			if ((ip_hdrlen(skb) + tcp_hdrlen(skb) +
 			     sizeof(struct ethhdr)) >
 			    (sizeof(struct cmd_desc_type0) - 2)) {
 				no_of_desc++;
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index a35f2f2784a..fd291fc9316 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -1392,7 +1392,7 @@ static int sky2_xmit_frame(struct sk_buff *skb, struct net_device *dev)
 	/* Check for TCP Segmentation Offload */
 	mss = skb_shinfo(skb)->gso_size;
 	if (mss != 0) {
-		mss += ((skb->h.th->doff - 5) * 4);	/* TCP options */
+		mss += tcp_optlen(skb); /* TCP options */
 		mss += ip_hdrlen(skb) + sizeof(struct tcphdr);
 		mss += ETH_HLEN;
 
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 76a31afe20d..7ca30d76bf6 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -3911,7 +3911,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		else {
 			struct iphdr *iph = ip_hdr(skb);
 
-			tcp_opt_len = ((skb->h.th->doff - 5) * 4);
+			tcp_opt_len = tcp_optlen(skb);
 			ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr);
 
 			iph->check = 0;
@@ -4065,7 +4065,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
 			goto out_unlock;
 		}
 
-		tcp_opt_len = ((skb->h.th->doff - 5) * 4);
+		tcp_opt_len = tcp_optlen(skb);
 		ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr);
 
 		hdr_len = ip_tcp_len + tcp_opt_len;
diff --git a/drivers/s390/net/qeth_eddp.c b/drivers/s390/net/qeth_eddp.c
index 90da58b4e53..273f1745a00 100644
--- a/drivers/s390/net/qeth_eddp.c
+++ b/drivers/s390/net/qeth_eddp.c
@@ -477,13 +477,13 @@ qeth_eddp_fill_context_tcp(struct qeth_eddp_context *ctx,
 						  skb_network_header(skb),
 						  ip_hdrlen(skb),
 						  skb->h.raw,
-						  skb->h.th->doff * 4);
+						  tcp_hdrlen(skb));
 	else
 		eddp = qeth_eddp_create_eddp_data(qhdr,
 						  skb_network_header(skb),
 						  sizeof(struct ipv6hdr),
 						  skb->h.raw,
-						  skb->h.th->doff * 4);
+						  tcp_hdrlen(skb));
 
 	if (eddp == NULL) {
 		QETH_DBF_TEXT(trace, 2, "eddpfcnm");
@@ -596,11 +596,11 @@ qeth_eddp_create_context_tcp(struct qeth_card *card, struct sk_buff *skb,
 		ctx = qeth_eddp_create_context_generic(card, skb,
 						       (sizeof(struct qeth_hdr) +
 						        ip_hdrlen(skb) +
-							skb->h.th->doff * 4));
+							tcp_hdrlen(skb)));
 	else if (skb->protocol == htons(ETH_P_IPV6))
 		ctx = qeth_eddp_create_context_generic(card, skb,
 			sizeof(struct qeth_hdr) + sizeof(struct ipv6hdr) +
-			skb->h.th->doff*4);
+			tcp_hdrlen(skb));
 	else
 		QETH_DBF_TEXT(trace, 2, "cetcpinv");
 
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 18a468dd505..244ae0dacf4 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -178,6 +178,16 @@ struct tcp_md5sig {
 #include <net/inet_connection_sock.h>
 #include <net/inet_timewait_sock.h>
 
+static inline unsigned int tcp_hdrlen(const struct sk_buff *skb)
+{
+	return skb->h.th->doff * 4;
+}
+
+static inline unsigned int tcp_optlen(const struct sk_buff *skb)
+{
+	return (skb->h.th->doff - 5) * 4;
+}
+
 /* This defines a selective acknowledgement block. */
 struct tcp_sack_block_wire {
 	__be32	start_seq;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 51424df9078..c146a02f849 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1564,7 +1564,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 		return 0;
 	}
 
-	if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
+	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
 		goto csum_err;
 
 	if (sk->sk_state == TCP_LISTEN) {
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 85b3e89110f..c573353f21c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1609,7 +1609,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 		return 0;
 	}
 
-	if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
+	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
 		goto csum_err;
 
 	if (sk->sk_state == TCP_LISTEN) {
-- 
cgit v1.2.3


From aa8223c7bb0b05183e1737881ed21827aa5b9e73 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 10 Apr 2007 21:04:22 -0700
Subject: [SK_BUFF]: Introduce tcp_hdr(), remove skb->h.th

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/atl1/atl1_main.c   |  7 ++++---
 drivers/net/bnx2.c             |  8 ++++----
 drivers/net/chelsio/sge.c      |  2 +-
 drivers/net/cxgb3/sge.c        |  2 +-
 drivers/net/e1000/e1000_main.c | 11 ++++++-----
 drivers/net/ioc3-eth.c         |  2 +-
 drivers/net/ixgb/ixgb_main.c   |  7 ++++---
 drivers/net/mv643xx_eth.c      |  2 +-
 drivers/net/tg3.c              | 15 +++++++--------
 drivers/s390/net/qeth_eddp.c   |  2 +-
 drivers/s390/net/qeth_tso.h    |  4 ++--
 include/linux/skbuff.h         |  1 -
 include/linux/tcp.h            |  9 +++++++--
 include/net/tcp.h              |  2 +-
 include/net/tcp_ecn.h          |  6 +++---
 net/ipv4/ip_output.c           |  4 ++--
 net/ipv4/syncookies.c          | 36 ++++++++++++++++++------------------
 net/ipv4/tcp.c                 | 22 +++++++++++-----------
 net/ipv4/tcp_input.c           | 28 +++++++++++++++-------------
 net/ipv4/tcp_ipv4.c            | 32 ++++++++++++++++----------------
 net/ipv4/tcp_minisocks.c       |  9 +++++----
 net/ipv4/tcp_output.c          | 13 ++++++++-----
 net/ipv6/tcp_ipv6.c            | 32 ++++++++++++++++----------------
 23 files changed, 134 insertions(+), 122 deletions(-)

diff --git a/drivers/net/atl1/atl1_main.c b/drivers/net/atl1/atl1_main.c
index 8d5994751e2..d60c2217332 100644
--- a/drivers/net/atl1/atl1_main.c
+++ b/drivers/net/atl1/atl1_main.c
@@ -1298,9 +1298,10 @@ static int atl1_tso(struct atl1_adapter *adapter, struct sk_buff *skb,
 
 			iph->tot_len = 0;
 			iph->check = 0;
-			skb->h.th->check = ~csum_tcpudp_magic(iph->saddr,
-							      iph->daddr, 0,
-							      IPPROTO_TCP, 0);
+			tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
+								 iph->daddr, 0,
+								 IPPROTO_TCP,
+								 0);
 			ipofst = skb_network_offset(skb);
 			if (ipofst != ENET_HEADER_SIZE) /* 802.3 frame */
 				tso->tsopl |= 1 << TSO_PARAM_ETHTYPE_SHIFT;
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 73512fb1645..7e7b5f34403 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -4524,7 +4524,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		vlan_tag_flags |= TX_BD_FLAGS_SW_LSO;
 
 		tcp_opt_len = 0;
-		if (skb->h.th->doff > 5)
+		if (tcp_hdr(skb)->doff > 5)
 			tcp_opt_len = tcp_optlen(skb);
 
 		ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr);
@@ -4532,9 +4532,9 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		iph = ip_hdr(skb);
 		iph->check = 0;
 		iph->tot_len = htons(mss + ip_tcp_len + tcp_opt_len);
-		skb->h.th->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-						      0, IPPROTO_TCP, 0);
-
+		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
+							 iph->daddr, 0,
+							 IPPROTO_TCP, 0);
 		if (tcp_opt_len || (iph->ihl > 5)) {
 			vlan_tag_flags |= ((iph->ihl - 5) +
 					   (tcp_opt_len >> 2)) << 8;
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index a4204dff363..43e92f9f0bc 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -1872,7 +1872,7 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		hdr->opcode = CPL_TX_PKT_LSO;
 		hdr->ip_csum_dis = hdr->l4_csum_dis = 0;
 		hdr->ip_hdr_words = ip_hdr(skb)->ihl;
-		hdr->tcp_hdr_words = skb->h.th->doff;
+		hdr->tcp_hdr_words = tcp_hdr(skb)->doff;
 		hdr->eth_type_mss = htons(MK_ETH_TYPE_MSS(eth_type,
 							  skb_shinfo(skb)->gso_size));
 		hdr->len = htonl(skb->len - sizeof(*hdr));
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index d38b1bcd138..a70fe9145a2 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -901,7 +901,7 @@ static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
 		    CPL_ETH_II : CPL_ETH_II_VLAN;
 		tso_info |= V_LSO_ETH_TYPE(eth_type) |
 		    V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
-		    V_LSO_TCPHDR_WORDS(skb->h.th->doff);
+		    V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
 		hdr->lso_info = htonl(tso_info);
 		flits = 3;
 	} else {
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 4572fbba50f..e86deb2ef82 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2893,14 +2893,15 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 			struct iphdr *iph = ip_hdr(skb);
 			iph->tot_len = 0;
 			iph->check = 0;
-			skb->h.th->check = ~csum_tcpudp_magic(iph->saddr,
-							      iph->daddr, 0,
-							      IPPROTO_TCP, 0);
+			tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
+								 iph->daddr, 0,
+								 IPPROTO_TCP,
+								 0);
 			cmd_length = E1000_TXD_CMD_IP;
 			ipcse = skb_transport_offset(skb) - 1;
 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
 			ipv6_hdr(skb)->payload_len = 0;
-			skb->h.th->check =
+			tcp_hdr(skb)->check =
 				~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
 						 &ipv6_hdr(skb)->daddr,
 						 0, IPPROTO_TCP, 0);
@@ -2909,7 +2910,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 		ipcss = skb_network_offset(skb);
 		ipcso = (void *)&(ip_hdr(skb)->check) - (void *)skb->data;
 		tucss = skb_transport_offset(skb);
-		tucso = (void *)&(skb->h.th->check) - (void *)skb->data;
+		tucso = (void *)&(tcp_hdr(skb)->check) - (void *)skb->data;
 		tucse = 0;
 
 		cmd_length |= (E1000_TXD_CMD_DEXT | E1000_TXD_CMD_TSE |
diff --git a/drivers/net/ioc3-eth.c b/drivers/net/ioc3-eth.c
index ba012e10d79..bc62e770a25 100644
--- a/drivers/net/ioc3-eth.c
+++ b/drivers/net/ioc3-eth.c
@@ -1426,7 +1426,7 @@ static int ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 		if (proto == IPPROTO_TCP) {
 			csoff += offsetof(struct tcphdr, check);
-			skb->h.th->check = csum;
+			tcp_hdr(skb)->check = csum;
 		}
 
 		w0 = ETXD_DOCHECKSUM | (csoff << ETXD_CHKOFF_SHIFT);
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index 96550d68162..e729ced52dc 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -1195,13 +1195,14 @@ ixgb_tso(struct ixgb_adapter *adapter, struct sk_buff *skb)
 		iph = ip_hdr(skb);
 		iph->tot_len = 0;
 		iph->check = 0;
-		skb->h.th->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-						      0, IPPROTO_TCP, 0);
+		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
+							 iph->daddr, 0,
+							 IPPROTO_TCP, 0);
 		ipcss = skb_network_offset(skb);
 		ipcso = (void *)&(iph->check) - (void *)skb->data;
 		ipcse = skb_transport_offset(skb) - 1;
 		tucss = skb_transport_offset(skb);
-		tucso = (void *)&(skb->h.th->check) - (void *)skb->data;
+		tucso = (void *)&(tcp_hdr(skb)->check) - (void *)skb->data;
 		tucse = 0;
 
 		i = adapter->tx_ring.next_to_use;
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index 43723839e93..ab15ecd4b3d 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -1169,7 +1169,7 @@ static void eth_tx_submit_descs_for_skb(struct mv643xx_private *mp,
 			desc->l4i_chk = udp_hdr(skb)->check;
 			break;
 		case IPPROTO_TCP:
-			desc->l4i_chk = skb->h.th->check;
+			desc->l4i_chk = tcp_hdr(skb)->check;
 			break;
 		default:
 			BUG();
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 7ca30d76bf6..414365c3198 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -3922,7 +3922,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		base_flags |= (TXD_FLAG_CPU_PRE_DMA |
 			       TXD_FLAG_CPU_POST_DMA);
 
-		skb->h.th->check = 0;
+		tcp_hdr(skb)->check = 0;
 
 	}
 	else if (skb->ip_summed == CHECKSUM_PARTIAL)
@@ -4080,14 +4080,13 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
 		iph->check = 0;
 		iph->tot_len = htons(mss + hdr_len);
 		if (tp->tg3_flags2 & TG3_FLG2_HW_TSO) {
-			skb->h.th->check = 0;
+			tcp_hdr(skb)->check = 0;
 			base_flags &= ~TXD_FLAG_TCPUDP_CSUM;
-		}
-		else {
-			skb->h.th->check = ~csum_tcpudp_magic(iph->saddr,
-							      iph->daddr, 0,
-							      IPPROTO_TCP, 0);
-		}
+		} else
+			tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
+								 iph->daddr, 0,
+								 IPPROTO_TCP,
+								 0);
 
 		if ((tp->tg3_flags2 & TG3_FLG2_HW_TSO) ||
 		    (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705)) {
diff --git a/drivers/s390/net/qeth_eddp.c b/drivers/s390/net/qeth_eddp.c
index 273f1745a00..b8e84674e17 100644
--- a/drivers/s390/net/qeth_eddp.c
+++ b/drivers/s390/net/qeth_eddp.c
@@ -416,7 +416,7 @@ __qeth_eddp_fill_context_tcp(struct qeth_eddp_context *ctx,
                        eddp->skb_offset += VLAN_HLEN;
 #endif /* CONFIG_QETH_VLAN */
        }
-	tcph = eddp->skb->h.th;
+	tcph = tcp_hdr(eddp->skb);
 	while (eddp->skb_offset < eddp->skb->len) {
 		data_len = min((int)skb_shinfo(eddp->skb)->gso_size,
 			       (int)(eddp->skb->len - eddp->skb_offset));
diff --git a/drivers/s390/net/qeth_tso.h b/drivers/s390/net/qeth_tso.h
index 4040bdd8c32..c20e923cf9a 100644
--- a/drivers/s390/net/qeth_tso.h
+++ b/drivers/s390/net/qeth_tso.h
@@ -41,7 +41,7 @@ qeth_tso_fill_header(struct qeth_card *card, struct sk_buff *skb)
 
 	hdr  = (struct qeth_hdr_tso *) skb->data;
 	iph  = ip_hdr(skb);
-	tcph = skb->h.th;
+	tcph = tcp_hdr(skb);
 	/*fix header to TSO values ...*/
 	hdr->hdr.hdr.l3.id = QETH_HEADER_TYPE_TSO;
 	/*set values which are fix for the first approach ...*/
@@ -65,7 +65,7 @@ qeth_tso_set_tcpip_header(struct qeth_card *card, struct sk_buff *skb)
 {
 	struct iphdr *iph    = ip_hdr(skb);
 	struct ipv6hdr *ip6h = ipv6_hdr(skb);
-	struct tcphdr *tcph  = skb->h.th;
+	struct tcphdr *tcph  = tcp_hdr(skb);
 
 	tcph->check = 0;
 	if (skb->protocol == ETH_P_IPV6) {
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e580416de78..8f158d66d2a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -237,7 +237,6 @@ struct sk_buff {
 	/* 4 byte hole on 64 bit*/
 
 	union {
-		struct tcphdr	*th;
 		struct iphdr	*ipiph;
 		struct ipv6hdr	*ipv6h;
 		unsigned char	*raw;
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 244ae0dacf4..911d937fb4c 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -178,14 +178,19 @@ struct tcp_md5sig {
 #include <net/inet_connection_sock.h>
 #include <net/inet_timewait_sock.h>
 
+static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb)
+{
+	return (struct tcphdr *)skb->h.raw;
+}
+
 static inline unsigned int tcp_hdrlen(const struct sk_buff *skb)
 {
-	return skb->h.th->doff * 4;
+	return tcp_hdr(skb)->doff * 4;
 }
 
 static inline unsigned int tcp_optlen(const struct sk_buff *skb)
 {
-	return (skb->h.th->doff - 5) * 4;
+	return (tcp_hdr(skb)->doff - 5) * 4;
 }
 
 /* This defines a selective acknowledgement block. */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6dacc352dcf..af9273204cf 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -984,7 +984,7 @@ static inline void tcp_openreq_init(struct request_sock *req,
 	ireq->wscale_ok = rx_opt->wscale_ok;
 	ireq->acked = 0;
 	ireq->ecn_ok = 0;
-	ireq->rmt_port = skb->h.th->source;
+	ireq->rmt_port = tcp_hdr(skb)->source;
 }
 
 extern void tcp_enter_memory_pressure(void);
diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h
index 4629d77173f..b5f7c6ac088 100644
--- a/include/net/tcp_ecn.h
+++ b/include/net/tcp_ecn.h
@@ -54,7 +54,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct tcp_sock *tp,
 			INET_ECN_xmit(sk);
 			if (tp->ecn_flags&TCP_ECN_QUEUE_CWR) {
 				tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
-				skb->h.th->cwr = 1;
+				tcp_hdr(skb)->cwr = 1;
 				skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
 			}
 		} else {
@@ -62,7 +62,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct tcp_sock *tp,
 			INET_ECN_dontxmit(sk);
 		}
 		if (tp->ecn_flags & TCP_ECN_DEMAND_CWR)
-			skb->h.th->ece = 1;
+			tcp_hdr(skb)->ece = 1;
 	}
 }
 
@@ -70,7 +70,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct tcp_sock *tp,
 
 static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, struct sk_buff *skb)
 {
-	if (skb->h.th->cwr)
+	if (tcp_hdr(skb)->cwr)
 		tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
 }
 
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6d92358fc51..602268661eb 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1352,8 +1352,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
 						.tos = RT_TOS(ip_hdr(skb)->tos) } },
 				    /* Not quite clean, but right. */
 				    .uli_u = { .ports =
-					       { .sport = skb->h.th->dest,
-						 .dport = skb->h.th->source } },
+					       { .sport = tcp_hdr(skb)->dest,
+						 .dport = tcp_hdr(skb)->source } },
 				    .proto = sk->sk_protocol };
 		security_skb_classify_flow(skb, &fl);
 		if (ip_route_output_key(&rt, &fl))
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 26160717849..2da1be0589a 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -125,10 +125,11 @@ static __u16 const msstab[] = {
 __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	const struct iphdr *iph = ip_hdr(skb);
+	const struct tcphdr *th = tcp_hdr(skb);
 	int mssind;
 	const __u16 mss = *mssp;
 
-
 	tp->last_synq_overflow = jiffies;
 
 	/* XXX sort msstab[] by probability?  Binary search? */
@@ -138,9 +139,8 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
 
 	NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT);
 
-	return secure_tcp_syn_cookie(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
-				     skb->h.th->source, skb->h.th->dest,
-				     ntohl(skb->h.th->seq),
+	return secure_tcp_syn_cookie(iph->saddr, iph->daddr,
+				     th->source, th->dest, ntohl(th->seq),
 				     jiffies / (HZ * 60), mssind);
 }
 
@@ -157,14 +157,13 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
  */
 static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
 {
-	__u32 seq;
-	__u32 mssind;
-
-	seq = ntohl(skb->h.th->seq)-1;
-	mssind = check_tcp_syn_cookie(cookie,
-				      ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
-				      skb->h.th->source, skb->h.th->dest,
-				      seq, jiffies / (HZ * 60), COUNTER_TRIES);
+	const struct iphdr *iph = ip_hdr(skb);
+	const struct tcphdr *th = tcp_hdr(skb);
+	__u32 seq = ntohl(th->seq) - 1;
+	__u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr,
+					    th->source, th->dest, seq,
+					    jiffies / (HZ * 60),
+					    COUNTER_TRIES);
 
 	return mssind < NUM_MSS ? msstab[mssind] + 1 : 0;
 }
@@ -191,14 +190,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	struct inet_request_sock *ireq;
 	struct tcp_request_sock *treq;
 	struct tcp_sock *tp = tcp_sk(sk);
-	__u32 cookie = ntohl(skb->h.th->ack_seq) - 1;
+	const struct tcphdr *th = tcp_hdr(skb);
+	__u32 cookie = ntohl(th->ack_seq) - 1;
 	struct sock *ret = sk;
 	struct request_sock *req;
 	int mss;
 	struct rtable *rt;
 	__u8 rcv_wscale;
 
-	if (!sysctl_tcp_syncookies || !skb->h.th->ack)
+	if (!sysctl_tcp_syncookies || !th->ack)
 		goto out;
 
 	if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) ||
@@ -220,10 +220,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	}
 	ireq = inet_rsk(req);
 	treq = tcp_rsk(req);
-	treq->rcv_isn		= ntohl(skb->h.th->seq) - 1;
+	treq->rcv_isn		= ntohl(th->seq) - 1;
 	treq->snt_isn		= cookie;
 	req->mss		= mss;
-	ireq->rmt_port		= skb->h.th->source;
+	ireq->rmt_port		= th->source;
 	ireq->loc_addr		= ip_hdr(skb)->daddr;
 	ireq->rmt_addr		= ip_hdr(skb)->saddr;
 	ireq->opt		= NULL;
@@ -261,8 +261,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 						.tos = RT_CONN_FLAGS(sk) } },
 				    .proto = IPPROTO_TCP,
 				    .uli_u = { .ports =
-					       { .sport = skb->h.th->dest,
-						 .dport = skb->h.th->source } } };
+					       { .sport = th->dest,
+						 .dport = th->source } } };
 		security_req_classify_flow(req, &fl);
 		if (ip_route_output_key(&rt, &fl)) {
 			reqsk_free(req);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 689f9330f1b..f832f3c33ab 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -425,7 +425,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 			/* Subtract 1, if FIN is in queue. */
 			if (answ && !skb_queue_empty(&sk->sk_receive_queue))
 				answ -=
-		       ((struct sk_buff *)sk->sk_receive_queue.prev)->h.th->fin;
+		       tcp_hdr((struct sk_buff *)sk->sk_receive_queue.prev)->fin;
 		} else
 			answ = tp->urg_seq - tp->copied_seq;
 		release_sock(sk);
@@ -1016,9 +1016,9 @@ static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
 
 	skb_queue_walk(&sk->sk_receive_queue, skb) {
 		offset = seq - TCP_SKB_CB(skb)->seq;
-		if (skb->h.th->syn)
+		if (tcp_hdr(skb)->syn)
 			offset--;
-		if (offset < skb->len || skb->h.th->fin) {
+		if (offset < skb->len || tcp_hdr(skb)->fin) {
 			*off = offset;
 			return skb;
 		}
@@ -1070,7 +1070,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 			if (offset != skb->len)
 				break;
 		}
-		if (skb->h.th->fin) {
+		if (tcp_hdr(skb)->fin) {
 			sk_eat_skb(sk, skb, 0);
 			++seq;
 			break;
@@ -1174,11 +1174,11 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 				break;
 			}
 			offset = *seq - TCP_SKB_CB(skb)->seq;
-			if (skb->h.th->syn)
+			if (tcp_hdr(skb)->syn)
 				offset--;
 			if (offset < skb->len)
 				goto found_ok_skb;
-			if (skb->h.th->fin)
+			if (tcp_hdr(skb)->fin)
 				goto found_fin_ok;
 			BUG_TRAP(flags & MSG_PEEK);
 			skb = skb->next;
@@ -1394,7 +1394,7 @@ skip_copy:
 		if (used + offset < skb->len)
 			continue;
 
-		if (skb->h.th->fin)
+		if (tcp_hdr(skb)->fin)
 			goto found_fin_ok;
 		if (!(flags & MSG_PEEK)) {
 			sk_eat_skb(sk, skb, copied_early);
@@ -1563,7 +1563,7 @@ void tcp_close(struct sock *sk, long timeout)
 	 */
 	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 		u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq -
-			  skb->h.th->fin;
+			  tcp_hdr(skb)->fin;
 		data_was_unread += len;
 		__kfree_skb(skb);
 	}
@@ -2170,7 +2170,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
 	if (!pskb_may_pull(skb, sizeof(*th)))
 		goto out;
 
-	th = skb->h.th;
+	th = tcp_hdr(skb);
 	thlen = th->doff * 4;
 	if (thlen < sizeof(*th))
 		goto out;
@@ -2210,7 +2210,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
 	delta = htonl(oldlen + (thlen + len));
 
 	skb = segs;
-	th = skb->h.th;
+	th = tcp_hdr(skb);
 	seq = ntohl(th->seq);
 
 	do {
@@ -2224,7 +2224,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
 
 		seq += len;
 		skb = skb->next;
-		th = skb->h.th;
+		th = tcp_hdr(skb);
 
 		th->seq = htonl(seq);
 		th->cwr = 0;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2776a8b0133..c1ce3623738 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -148,7 +148,7 @@ static void tcp_measure_rcv_mss(struct sock *sk,
 		     * to handle super-low mtu links fairly.
 		     */
 		    (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
-		     !(tcp_flag_word(skb->h.th)&TCP_REMNANT))) {
+		     !(tcp_flag_word(tcp_hdr(skb)) & TCP_REMNANT))) {
 			/* Subtract also invariant (if peer is RFC compliant),
 			 * tcp header plus fixed timestamp option length.
 			 * Resulting "len" is MSS free of SACK jitter.
@@ -2559,9 +2559,9 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
 				 struct sk_buff *skb, u32 ack, u32 ack_seq)
 {
 	int flag = 0;
-	u32 nwin = ntohs(skb->h.th->window);
+	u32 nwin = ntohs(tcp_hdr(skb)->window);
 
-	if (likely(!skb->h.th->syn))
+	if (likely(!tcp_hdr(skb)->syn))
 		nwin <<= tp->rx_opt.snd_wscale;
 
 	if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
@@ -2766,7 +2766,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 		if (TCP_SKB_CB(skb)->sacked)
 			flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
 
-		if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th))
+		if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))
 			flag |= FLAG_ECE;
 
 		tcp_ca_event(sk, CA_EVENT_SLOW_ACK);
@@ -2833,7 +2833,7 @@ uninteresting_ack:
 void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab)
 {
 	unsigned char *ptr;
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 	int length=(th->doff*4)-sizeof(struct tcphdr);
 
 	ptr = (unsigned char *)(th + 1);
@@ -2995,7 +2995,7 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
 static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 	u32 seq = TCP_SKB_CB(skb)->seq;
 	u32 ack = TCP_SKB_CB(skb)->ack_seq;
 
@@ -3357,8 +3357,8 @@ static void tcp_ofo_queue(struct sock *sk)
 		__skb_unlink(skb, &tp->out_of_order_queue);
 		__skb_queue_tail(&sk->sk_receive_queue, skb);
 		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-		if (skb->h.th->fin)
-			tcp_fin(skb, sk, skb->h.th);
+		if (tcp_hdr(skb)->fin)
+			tcp_fin(skb, sk, tcp_hdr(skb));
 	}
 }
 
@@ -3366,7 +3366,7 @@ static int tcp_prune_queue(struct sock *sk);
 
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 	struct tcp_sock *tp = tcp_sk(sk);
 	int eaten = -1;
 
@@ -3605,7 +3605,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 		 * - bloated or contains data before "start" or
 		 *   overlaps to the next one.
 		 */
-		if (!skb->h.th->syn && !skb->h.th->fin &&
+		if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin &&
 		    (tcp_win_from_space(skb->truesize) > skb->len ||
 		     before(TCP_SKB_CB(skb)->seq, start) ||
 		     (skb->next != tail &&
@@ -3616,7 +3616,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 		start = TCP_SKB_CB(skb)->end_seq;
 		skb = skb->next;
 	}
-	if (skb == tail || skb->h.th->syn || skb->h.th->fin)
+	if (skb == tail || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin)
 		return;
 
 	while (before(start, end)) {
@@ -3665,7 +3665,9 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 				__kfree_skb(skb);
 				NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED);
 				skb = next;
-				if (skb == tail || skb->h.th->syn || skb->h.th->fin)
+				if (skb == tail ||
+				    tcp_hdr(skb)->syn ||
+				    tcp_hdr(skb)->fin)
 					return;
 			}
 		}
@@ -4072,7 +4074,7 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen
 		tcp_rcv_space_adjust(sk);
 
 		if ((tp->ucopy.len == 0) ||
-		    (tcp_flag_word(skb->h.th) & TCP_FLAG_PSH) ||
+		    (tcp_flag_word(tcp_hdr(skb)) & TCP_FLAG_PSH) ||
 		    (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) {
 			tp->ucopy.wakeup = 1;
 			sk->sk_data_ready(sk, 0);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c146a02f849..617a5e4ca01 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -127,8 +127,8 @@ static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
 {
 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
 					  ip_hdr(skb)->saddr,
-					  skb->h.th->dest,
-					  skb->h.th->source);
+					  tcp_hdr(skb)->dest,
+					  tcp_hdr(skb)->source);
 }
 
 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
@@ -499,7 +499,7 @@ out:
 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
 {
 	struct inet_sock *inet = inet_sk(sk);
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		th->check = ~tcp_v4_check(len, inet->saddr,
@@ -522,7 +522,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb)
 		return -EINVAL;
 
 	iph = ip_hdr(skb);
-	th = skb->h.th;
+	th = tcp_hdr(skb);
 
 	th->check = 0;
 	th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
@@ -546,7 +546,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb)
 
 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 	struct {
 		struct tcphdr th;
 #ifdef CONFIG_TCP_MD5SIG
@@ -622,7 +622,7 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
 			    struct sk_buff *skb, u32 seq, u32 ack,
 			    u32 win, u32 ts)
 {
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 	struct {
 		struct tcphdr th;
 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
@@ -745,7 +745,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
 	skb = tcp_make_synack(sk, dst, req);
 
 	if (skb) {
-		struct tcphdr *th = skb->h.th;
+		struct tcphdr *th = tcp_hdr(skb);
 
 		th->check = tcp_v4_check(skb->len,
 					 ireq->loc_addr,
@@ -781,7 +781,7 @@ static void syn_flood_warning(struct sk_buff *skb)
 		warntime = jiffies;
 		printk(KERN_INFO
 		       "possible SYN flooding on port %d. Sending cookies.\n",
-		       ntohs(skb->h.th->dest));
+		       ntohs(tcp_hdr(skb)->dest));
 	}
 }
 #endif
@@ -1134,7 +1134,7 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
 	__u8 *hash_location = NULL;
 	struct tcp_md5sig_key *hash_expected;
 	const struct iphdr *iph = ip_hdr(skb);
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 	int length = (th->doff << 2) - sizeof(struct tcphdr);
 	int genhash;
 	unsigned char *ptr;
@@ -1327,7 +1327,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq->rmt_addr = saddr;
 	ireq->opt = tcp_v4_save_options(sk, skb);
 	if (!want_cookie)
-		TCP_ECN_create_request(req, skb->h.th);
+		TCP_ECN_create_request(req, tcp_hdr(skb));
 
 	if (want_cookie) {
 #ifdef CONFIG_SYN_COOKIES
@@ -1375,7 +1375,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
 				       "request from %u.%u.%u.%u/%u\n",
 				       NIPQUAD(saddr),
-				       ntohs(skb->h.th->source));
+				       ntohs(tcp_hdr(skb)->source));
 			dst_release(dst);
 			goto drop_and_free;
 		}
@@ -1481,7 +1481,7 @@ exit:
 
 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 	const struct iphdr *iph = ip_hdr(skb);
 	struct sock *nsk;
 	struct request_sock **prev;
@@ -1556,7 +1556,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 
 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
 		TCP_CHECK_TIMER(sk);
-		if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) {
+		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
 			rsk = sk;
 			goto reset;
 		}
@@ -1582,7 +1582,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 	}
 
 	TCP_CHECK_TIMER(sk);
-	if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) {
+	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
 		rsk = sk;
 		goto reset;
 	}
@@ -1625,7 +1625,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
 		goto discard_it;
 
-	th = skb->h.th;
+	th = tcp_hdr(skb);
 
 	if (th->doff < sizeof(struct tcphdr) / 4)
 		goto bad_packet;
@@ -1640,7 +1640,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	     tcp_v4_checksum_init(skb)))
 		goto bad_packet;
 
-	th = skb->h.th;
+	th = tcp_hdr(skb);
 	iph = ip_hdr(skb);
 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 463d2b24d2d..a12b08fca5a 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -453,7 +453,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 			newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0;
 			newtp->window_clamp = min(newtp->window_clamp, 65535U);
 		}
-		newtp->snd_wnd = ntohs(skb->h.th->window) << newtp->rx_opt.snd_wscale;
+		newtp->snd_wnd = (ntohs(tcp_hdr(skb)->window) <<
+				  newtp->rx_opt.snd_wscale);
 		newtp->max_window = newtp->snd_wnd;
 
 		if (newtp->rx_opt.tstamp_ok) {
@@ -488,7 +489,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 			   struct request_sock *req,
 			   struct request_sock **prev)
 {
-	struct tcphdr *th = skb->h.th;
+	const struct tcphdr *th = tcp_hdr(skb);
 	__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
 	int paws_reject = 0;
 	struct tcp_options_received tmp_opt;
@@ -710,8 +711,8 @@ int tcp_child_process(struct sock *parent, struct sock *child,
 	int state = child->sk_state;
 
 	if (!sock_owned_by_user(child)) {
-		ret = tcp_rcv_state_process(child, skb, skb->h.th, skb->len);
-
+		ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb),
+					    skb->len);
 		/* Wakeup parent, send SIGIO */
 		if (state == TCP_SYN_RECV && child->sk_state != state)
 			parent->sk_data_ready(parent, 0);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f19f5fb361b..29c53fbb220 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -465,11 +465,12 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 		tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
 #endif
 
-	th = (struct tcphdr *) skb_push(skb, tcp_header_size);
-	skb->h.th = th;
+	skb_push(skb, tcp_header_size);
+	skb_reset_transport_header(skb);
 	skb_set_owner_w(skb, sk);
 
 	/* Build TCP header and checksum it. */
+	th = tcp_hdr(skb);
 	th->source		= inet->sport;
 	th->dest		= inet->dport;
 	th->seq			= htonl(tcb->seq);
@@ -524,7 +525,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 		tp->af_specific->calc_md5_hash(md5_hash_location,
 					       md5,
 					       sk, NULL, NULL,
-					       skb->h.th,
+					       tcp_hdr(skb),
 					       sk->sk_protocol,
 					       skb->len);
 	}
@@ -2128,8 +2129,10 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	if (md5)
 		tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
 #endif
-	skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size);
+	skb_push(skb, tcp_header_size);
+	skb_reset_transport_header(skb);
 
+	th = tcp_hdr(skb);
 	memset(th, 0, sizeof(struct tcphdr));
 	th->syn = 1;
 	th->ack = 1;
@@ -2183,7 +2186,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 		tp->af_specific->calc_md5_hash(md5_hash_location,
 					       md5,
 					       NULL, dst, req,
-					       skb->h.th, sk->sk_protocol,
+					       tcp_hdr(skb), sk->sk_protocol,
 					       skb->len);
 	}
 #endif
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c573353f21c..4a55da079f5 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -117,8 +117,8 @@ static __u32 tcp_v6_init_sequence(struct sk_buff *skb)
 {
 	return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
 					    ipv6_hdr(skb)->saddr.s6_addr32,
-					    skb->h.th->dest,
-					    skb->h.th->source);
+					    tcp_hdr(skb)->dest,
+					    tcp_hdr(skb)->source);
 }
 
 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
@@ -509,7 +509,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 
 	skb = tcp_make_synack(sk, dst, req);
 	if (skb) {
-		struct tcphdr *th = skb->h.th;
+		struct tcphdr *th = tcp_hdr(skb);
 
 		th->check = tcp_v6_check(th, skb->len,
 					 &treq->loc_addr, &treq->rmt_addr,
@@ -838,7 +838,7 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
 	__u8 *hash_location = NULL;
 	struct tcp_md5sig_key *hash_expected;
 	struct ipv6hdr *ip6h = ipv6_hdr(skb);
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 	int length = (th->doff << 2) - sizeof (*th);
 	int genhash;
 	u8 *ptr;
@@ -946,7 +946,7 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = {
 static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct tcphdr *th = skb->h.th;
+	struct tcphdr *th = tcp_hdr(skb);
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
@@ -967,7 +967,7 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb)
 		return -EINVAL;
 
 	ipv6h = ipv6_hdr(skb);
-	th = skb->h.th;
+	th = tcp_hdr(skb);
 
 	th->check = 0;
 	th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
@@ -979,7 +979,7 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb)
 
 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcphdr *th = skb->h.th, *t1;
+	struct tcphdr *th = tcp_hdr(skb), *t1;
 	struct sk_buff *buff;
 	struct flowi fl;
 	int tot_len = sizeof(*th);
@@ -1079,7 +1079,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
 			    struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
 {
-	struct tcphdr *th = skb->h.th, *t1;
+	struct tcphdr *th = tcp_hdr(skb), *t1;
 	struct sk_buff *buff;
 	struct flowi fl;
 	int tot_len = sizeof(struct tcphdr);
@@ -1195,7 +1195,7 @@ static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
 {
 	struct request_sock *req, **prev;
-	const struct tcphdr *th = skb->h.th;
+	const struct tcphdr *th = tcp_hdr(skb);
 	struct sock *nsk;
 
 	/* Find possible connection requests. */
@@ -1275,7 +1275,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	treq = inet6_rsk(req);
 	ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
 	ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
-	TCP_ECN_create_request(req, skb->h.th);
+	TCP_ECN_create_request(req, tcp_hdr(skb));
 	treq->pktopts = NULL;
 	if (ipv6_opt_accepted(sk, skb) ||
 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
@@ -1528,14 +1528,14 @@ out:
 static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
 {
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		if (!tcp_v6_check(skb->h.th, skb->len, &ipv6_hdr(skb)->saddr,
+		if (!tcp_v6_check(tcp_hdr(skb), skb->len, &ipv6_hdr(skb)->saddr,
 				  &ipv6_hdr(skb)->daddr, skb->csum)) {
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 			return 0;
 		}
 	}
 
-	skb->csum = ~csum_unfold(tcp_v6_check(skb->h.th, skb->len,
+	skb->csum = ~csum_unfold(tcp_v6_check(tcp_hdr(skb), skb->len,
 					      &ipv6_hdr(skb)->saddr,
 					      &ipv6_hdr(skb)->daddr, 0));
 
@@ -1601,7 +1601,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 
 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
 		TCP_CHECK_TIMER(sk);
-		if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
+		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
 			goto reset;
 		TCP_CHECK_TIMER(sk);
 		if (opt_skb)
@@ -1632,7 +1632,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 	}
 
 	TCP_CHECK_TIMER(sk);
-	if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
+	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
 		goto reset;
 	TCP_CHECK_TIMER(sk);
 	if (opt_skb)
@@ -1698,7 +1698,7 @@ static int tcp_v6_rcv(struct sk_buff **pskb)
 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
 		goto discard_it;
 
-	th = skb->h.th;
+	th = tcp_hdr(skb);
 
 	if (th->doff < sizeof(struct tcphdr)/4)
 		goto bad_packet;
@@ -1709,7 +1709,7 @@ static int tcp_v6_rcv(struct sk_buff **pskb)
 	     tcp_v6_checksum_init(skb)))
 		goto bad_packet;
 
-	th = skb->h.th;
+	th = tcp_hdr(skb);
 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
 				    skb->len - th->doff*4);
-- 
cgit v1.2.3


From b0061ce49c83657563b64ffcf1ec137110230d93 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 25 Apr 2007 18:02:22 -0700
Subject: [SK_BUFF]: Introduce ipip_hdr(), remove skb->h.ipiph

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/sk98lin/skge.c   | 4 ++--
 drivers/net/skge.c           | 2 +-
 include/linux/ip.h           | 5 +++++
 include/linux/skbuff.h       | 1 -
 net/ipv4/xfrm4_mode_beet.c   | 6 +++---
 net/ipv4/xfrm4_mode_tunnel.c | 6 +++---
 net/ipv6/xfrm6_mode_tunnel.c | 2 +-
 7 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/net/sk98lin/skge.c b/drivers/net/sk98lin/skge.c
index e4ab7a8acc1..b987a5c3f42 100644
--- a/drivers/net/sk98lin/skge.c
+++ b/drivers/net/sk98lin/skge.c
@@ -1565,7 +1565,7 @@ struct sk_buff	*pMessage)	/* pointer to send-message              */
 		u16 hdrlen = skb_transport_offset(pMessage);
 		u16 offset = hdrlen + pMessage->csum_offset;
 
-		if ((pMessage->h.ipiph->protocol == IPPROTO_UDP ) &&
+		if ((ipip_hdr(pMessage)->protocol == IPPROTO_UDP) &&
 			(pAC->GIni.GIChipRev == 0) &&
 			(pAC->GIni.GIChipId == CHIP_ID_YUKON)) {
 			pTxd->TBControl = BMU_TCP_CHECK;
@@ -1691,7 +1691,7 @@ struct sk_buff	*pMessage)	/* pointer to send-message              */
 		** opcode for udp is not working in the hardware yet 
 		** (Revision 2.0)
 		*/
-		if ((pMessage->h.ipiph->protocol == IPPROTO_UDP ) &&
+		if ((ipip_hdr(pMessage)->protocol == IPPROTO_UDP) &&
 			(pAC->GIni.GIChipRev == 0) &&
 			(pAC->GIni.GIChipId == CHIP_ID_YUKON)) {
 			Control |= BMU_TCP_CHECK;
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index ca7a0e03984..99b61cfb7ce 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -2659,7 +2659,7 @@ static int skge_xmit_frame(struct sk_buff *skb, struct net_device *dev)
 		/* This seems backwards, but it is what the sk98lin
 		 * does.  Looks like hardware is wrong?
 		 */
-		if (skb->h.ipiph->protocol == IPPROTO_UDP
+		if (ipip_hdr(skb)->protocol == IPPROTO_UDP
 	            && hw->chip_rev == 0 && hw->chip_id == CHIP_ID_YUKON)
 			control = BMU_TCP_CHECK;
 		else
diff --git a/include/linux/ip.h b/include/linux/ip.h
index f2f26db16f5..19578440b5f 100644
--- a/include/linux/ip.h
+++ b/include/linux/ip.h
@@ -111,6 +111,11 @@ static inline struct iphdr *ip_hdr(const struct sk_buff *skb)
 {
 	return (struct iphdr *)skb_network_header(skb);
 }
+
+static inline struct iphdr *ipip_hdr(const struct sk_buff *skb)
+{
+	return (struct iphdr *)skb->h.raw;
+}
 #endif
 
 struct ip_auth_hdr {
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 8f158d66d2a..862a81cf7f7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -237,7 +237,6 @@ struct sk_buff {
 	/* 4 byte hole on 64 bit*/
 
 	union {
-		struct iphdr	*ipiph;
 		struct ipv6hdr	*ipv6h;
 		unsigned char	*raw;
 	} h;
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index 9e5ba12c6c7..32fcfc0b5c8 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -83,7 +83,7 @@ static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 
 		if (!pskb_may_pull(skb, sizeof(*ph)))
 			goto out;
-		ph = (struct ip_beet_phdr *)(skb->h.ipiph + 1);
+		ph = (struct ip_beet_phdr *)(ipip_hdr(skb) + 1);
 
 		phlen = sizeof(*ph) + ph->padlen;
 		optlen = ph->hdrlen * 8 + (IPV4_BEET_PHMAXLEN - phlen);
@@ -97,9 +97,9 @@ static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 		ph_nexthdr = ph->nexthdr;
 	}
 
-	skb->nh.raw = skb->data + (phlen - sizeof(*iph));
+	skb_set_network_header(skb, phlen - sizeof(*iph));
 	memmove(skb_network_header(skb), iph, sizeof(*iph));
-	skb->h.raw = skb->data + (phlen + optlen);
+	skb_set_transport_header(skb, phlen + optlen);
 	skb->data = skb->h.raw;
 
 	iph = ip_hdr(skb);
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index edba75610a4..521e52f055c 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -17,7 +17,7 @@
 static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
 {
 	struct iphdr *outer_iph = ip_hdr(skb);
-	struct iphdr *inner_iph = skb->h.ipiph;
+	struct iphdr *inner_iph = ipip_hdr(skb);
 
 	if (INET_ECN_is_ce(outer_iph->tos))
 		IP_ECN_set_ce(inner_iph);
@@ -47,7 +47,7 @@ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 	int flags;
 
 	iph = ip_hdr(skb);
-	skb->h.ipiph = iph;
+	skb->h.raw = skb->nh.raw;
 
 	skb_push(skb, x->props.header_len);
 	skb_reset_network_header(skb);
@@ -116,7 +116,7 @@ static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	iph = ip_hdr(skb);
 	if (iph->protocol == IPPROTO_IPIP) {
 		if (x->props.flags & XFRM_STATE_DECAP_DSCP)
-			ipv4_copy_dscp(iph, skb->h.ipiph);
+			ipv4_copy_dscp(iph, ipip_hdr(skb));
 		if (!(x->props.flags & XFRM_STATE_NOECN))
 			ipip_ecn_decapsulate(skb);
 	}
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 28f36b363d1..9d3bd338e5d 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -28,7 +28,7 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
 static inline void ip6ip_ecn_decapsulate(struct sk_buff *skb)
 {
 	if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6_hdr(skb))))
-			IP_ECN_set_ce(skb->h.ipiph);
+			IP_ECN_set_ce(ipip_hdr(skb));
 }
 
 /* Add encapsulation header.
-- 
cgit v1.2.3


From 39b89160df691045d1449cbaef43c02084c7543a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 10 Apr 2007 21:06:25 -0700
Subject: [SK_BUFF]: Introduce ipipv6_hdr(), remove skb->h.ipv6h

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h         | 5 +++++
 include/linux/skbuff.h       | 1 -
 net/ipv6/xfrm6_mode_beet.c   | 4 ++--
 net/ipv6/xfrm6_mode_tunnel.c | 8 ++++----
 4 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index ec79c59b207..b768fcc0a4c 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -228,6 +228,11 @@ static inline struct ipv6hdr *ipv6_hdr(const struct sk_buff *skb)
 	return (struct ipv6hdr *)skb_network_header(skb);
 }
 
+static inline struct ipv6hdr *ipipv6_hdr(const struct sk_buff *skb)
+{
+	return (struct ipv6hdr *)skb->h.raw;
+}
+
 /* 
    This structure contains results of exthdrs parsing
    as offsets from skb->nh.
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 862a81cf7f7..d3f186230ee 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -237,7 +237,6 @@ struct sk_buff {
 	/* 4 byte hole on 64 bit*/
 
 	union {
-		struct ipv6hdr	*ipv6h;
 		unsigned char	*raw;
 	} h;
 
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index abac09409de..0cc96ece003 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -47,8 +47,8 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	skb_reset_network_header(skb);
 	top_iph = ipv6_hdr(skb);
-	skb->nh.raw = &top_iph->nexthdr;
-	skb->h.ipv6h = top_iph + 1;
+	skb->h.raw = skb->nh.raw + sizeof(struct ipv6hdr);
+	skb->nh.raw += offsetof(struct ipv6hdr, nexthdr);
 
 	ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
 	ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 9d3bd338e5d..21d65df7479 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -19,7 +19,7 @@
 static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
 {
 	struct ipv6hdr *outer_iph = ipv6_hdr(skb);
-	struct ipv6hdr *inner_iph = skb->h.ipv6h;
+	struct ipv6hdr *inner_iph = ipipv6_hdr(skb);
 
 	if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
 		IP6_ECN_set_ce(inner_iph);
@@ -55,8 +55,8 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	skb_reset_network_header(skb);
 	top_iph = ipv6_hdr(skb);
-	skb->nh.raw = &top_iph->nexthdr;
-	skb->h.ipv6h = top_iph + 1;
+	skb->h.raw = skb->nh.raw + sizeof(struct ipv6hdr);
+	skb->nh.raw += offsetof(struct ipv6hdr, nexthdr);
 
 	top_iph->version = 6;
 	if (xdst->route->ops->family == AF_INET6) {
@@ -102,7 +102,7 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	nh = skb_network_header(skb);
 	if (nh[IP6CB(skb)->nhoff] == IPPROTO_IPV6) {
 		if (x->props.flags & XFRM_STATE_DECAP_DSCP)
-			ipv6_copy_dscp(ipv6_hdr(skb), skb->h.ipv6h);
+			ipv6_copy_dscp(ipv6_hdr(skb), ipipv6_hdr(skb));
 		if (!(x->props.flags & XFRM_STATE_NOECN))
 			ipip6_ecn_decapsulate(skb);
 	} else {
-- 
cgit v1.2.3


From bd82393ca23324d103b21aae43160728da6e6c9c Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 13 Mar 2007 17:10:43 -0300
Subject: [SK_BUFF]: More skb_reset_transport_header conversions

These are a bit more subtle, they are of this type:

-       skb->h.raw = payload;
        __skb_pull(skb, payload - skb->data);
+       skb_reset_transport_header(skb);

__skb_pull results in:

skb->data = skb->data + payload - skb->data;
skb->data = payload;

So after __skb_pull we have skb->data pointing to payload and we can
just call skb_reset_transport_header(skb), that will do:

skb->h.raw = payload;

The others are similar, allowing us to get rid of some more cases where a
pointer was being attributed to the layer headers.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c | 12 +++++++-----
 net/ipv6/datagram.c    |  4 ++--
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index ccdc59df015..fcb35cd5ccf 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -278,10 +278,12 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
 				   skb_network_header(skb);
 	serr->port = port;
 
-	skb->h.raw = payload;
-	if (!skb_pull(skb, payload - skb->data) ||
-	    sock_queue_err_skb(sk, skb))
-		kfree_skb(skb);
+	if (skb_pull(skb, payload - skb->data) != NULL) {
+		skb_reset_transport_header(skb);
+		if (sock_queue_err_skb(sk, skb) == 0)
+			return;
+	}
+	kfree_skb(skb);
 }
 
 void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info)
@@ -314,8 +316,8 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
 	serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
 	serr->port = port;
 
-	skb->h.raw = skb->tail;
 	__skb_pull(skb, skb->tail - skb->data);
+	skb_reset_transport_header(skb);
 
 	if (sock_queue_err_skb(sk, skb))
 		kfree_skb(skb);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index feba6b197fe..f16f4f0c581 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -231,8 +231,8 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
 				  skb_network_header(skb);
 	serr->port = port;
 
-	skb->h.raw = payload;
 	__skb_pull(skb, payload - skb->data);
+	skb_reset_transport_header(skb);
 
 	if (sock_queue_err_skb(sk, skb))
 		kfree_skb(skb);
@@ -268,8 +268,8 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
 	serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
 	serr->port = fl->fl_ip_dport;
 
-	skb->h.raw = skb->tail;
 	__skb_pull(skb, skb->tail - skb->data);
+	skb_reset_transport_header(skb);
 
 	if (sock_queue_err_skb(sk, skb))
 		kfree_skb(skb);
-- 
cgit v1.2.3


From a27ef749e7be3b06fb58df53d94eb97a21f18707 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 13 Mar 2007 17:17:10 -0300
Subject: [SCTP]: Eliminate some pointer attributions to the skb layer headers

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/input.c | 8 ++++----
 net/sctp/ipv6.c  | 5 ++---
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/net/sctp/input.c b/net/sctp/input.c
index 40d0df80183..f38e91b3871 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -506,7 +506,7 @@ void sctp_err_finish(struct sock *sk, struct sctp_association *asoc)
 void sctp_v4_err(struct sk_buff *skb, __u32 info)
 {
 	struct iphdr *iph = (struct iphdr *)skb->data;
-	struct sctphdr *sh = (struct sctphdr *)(skb->data + (iph->ihl <<2));
+	const int ihlen = iph->ihl * 4;
 	const int type = icmp_hdr(skb)->type;
 	const int code = icmp_hdr(skb)->code;
 	struct sock *sk;
@@ -516,7 +516,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
 	char *saveip, *savesctp;
 	int err;
 
-	if (skb->len < ((iph->ihl << 2) + 8)) {
+	if (skb->len < ihlen + 8) {
 		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
 		return;
 	}
@@ -525,8 +525,8 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
 	saveip = skb->nh.raw;
 	savesctp  = skb->h.raw;
 	skb_reset_network_header(skb);
-	skb->h.raw = (char *)sh;
-	sk = sctp_err_lookup(AF_INET, skb, sh, &asoc, &transport);
+	skb_set_transport_header(skb, ihlen);
+	sk = sctp_err_lookup(AF_INET, skb, sctp_hdr(skb), &asoc, &transport);
 	/* Put back, the original pointers. */
 	skb->nh.raw = saveip;
 	skb->h.raw = savesctp;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 0992bc5bb52..cd0af923878 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -122,7 +122,6 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			     int type, int code, int offset, __be32 info)
 {
 	struct inet6_dev *idev;
-	struct sctphdr *sh = (struct sctphdr *)(skb->data + offset);
 	struct sock *sk;
 	struct sctp_association *asoc;
 	struct sctp_transport *transport;
@@ -136,8 +135,8 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	saveip = skb->nh.raw;
 	savesctp  = skb->h.raw;
 	skb_reset_network_header(skb);
-	skb->h.raw = (char *)sh;
-	sk = sctp_err_lookup(AF_INET6, skb, sh, &asoc, &transport);
+	skb_set_transport_header(skb, offset);
+	sk = sctp_err_lookup(AF_INET6, skb, sctp_hdr(skb), &asoc, &transport);
 	/* Put back, the original pointers. */
 	skb->nh.raw = saveip;
 	skb->h.raw = savesctp;
-- 
cgit v1.2.3


From 9c70220b73908f64792422a2c39c593c4792f2c5 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 25 Apr 2007 18:04:18 -0700
Subject: [SK_BUFF]: Introduce skb_transport_header(skb)

For the places where we need a pointer to the transport header, it is
still legal to touch skb->h.raw directly if just adding to,
subtracting from or setting it to another layer header.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/appletalk/ltpc.c    |  7 +++++--
 drivers/net/cxgb3/sge.c         |  8 +++++---
 drivers/s390/net/qeth_eddp.c    |  4 ++--
 include/linux/atalk.h           |  4 ++--
 include/linux/dccp.h            | 19 ++++++++++++-------
 include/linux/icmp.h            |  2 +-
 include/linux/icmpv6.h          |  2 +-
 include/linux/igmp.h            |  6 +++---
 include/linux/ip.h              |  2 +-
 include/linux/ipv6.h            |  2 +-
 include/linux/sctp.h            |  2 +-
 include/linux/skbuff.h          |  5 +++++
 include/linux/tcp.h             |  2 +-
 include/linux/udp.h             |  2 +-
 include/net/ipx.h               |  2 +-
 include/net/pkt_cls.h           |  2 +-
 include/net/udp.h               |  4 ++--
 net/802/psnap.c                 |  2 +-
 net/ax25/af_ax25.c              |  5 +++--
 net/bluetooth/hci_core.c        |  4 ++--
 net/core/dev.c                  |  6 +++---
 net/econet/af_econet.c          |  2 +-
 net/ipv4/igmp.c                 |  2 +-
 net/ipv4/ip_gre.c               |  2 +-
 net/ipv4/ip_output.c            |  6 ++++--
 net/ipv4/ipconfig.c             |  4 ++--
 net/ipv4/ipmr.c                 |  8 +++++---
 net/ipv4/tcp.c                  | 12 +++++++-----
 net/ipv4/tcp_input.c            | 13 +++++++------
 net/ipv4/xfrm4_mode_beet.c      |  2 +-
 net/ipv4/xfrm4_mode_transport.c |  5 +++--
 net/ipv6/ah6.c                  |  2 +-
 net/ipv6/esp6.c                 |  2 +-
 net/ipv6/exthdrs.c              | 21 ++++++++++-----------
 net/ipv6/ipcomp6.c              |  2 +-
 net/ipv6/mcast.c                | 16 +++++++++-------
 net/ipv6/mip6.c                 |  8 ++++----
 net/ipv6/ndisc.c                | 17 +++++++++--------
 net/ipv6/raw.c                  |  2 +-
 net/ipv6/reassembly.c           |  2 +-
 net/ipv6/xfrm6_mode_transport.c |  5 +++--
 net/xfrm/xfrm_input.c           |  6 +++---
 42 files changed, 129 insertions(+), 102 deletions(-)

diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c
index dc3bce992dc..43c17c85c97 100644
--- a/drivers/net/appletalk/ltpc.c
+++ b/drivers/net/appletalk/ltpc.c
@@ -917,6 +917,7 @@ static int ltpc_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	int i;
 	struct lt_sendlap cbuf;
+	unsigned char *hdr;
 
 	cbuf.command = LT_SENDLAP;
 	cbuf.dnode = skb->data[0];
@@ -932,11 +933,13 @@ static int ltpc_xmit(struct sk_buff *skb, struct net_device *dev)
 		printk("\n");
 	}
 
-	do_write(dev,&cbuf,sizeof(cbuf),skb->h.raw,skb->len);
+	hdr = skb_transport_header(skb);
+	do_write(dev, &cbuf, sizeof(cbuf), hdr, skb->len);
 
 	if(debug & DEBUG_UPPER) {
 		printk("sent %d ddp bytes\n",skb->len);
-		for(i=0;i<skb->len;i++) printk("%02x ",skb->h.raw[i]);
+		for (i = 0; i < skb->len; i++)
+			printk("%02x ", hdr[i]);
 		printk("\n");
 	}
 
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index a70fe9145a2..610e4769efa 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -1324,12 +1324,14 @@ static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
 
 	flits = skb_transport_offset(skb) / 8;
 	sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
-	sgl_flits = make_sgl(skb, sgp, skb->h.raw, skb->tail - skb->h.raw,
+	sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
+			     skb->tail - skb_transport_header(skb),
 			     adap->pdev);
 	if (need_skb_unmap()) {
 		setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
 		skb->destructor = deferred_unmap_destructor;
-		((struct unmap_info *)skb->cb)->len = skb->tail - skb->h.raw;
+		((struct unmap_info *)skb->cb)->len = (skb->tail -
+						       skb_transport_header(skb));
 	}
 
 	write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
@@ -1351,7 +1353,7 @@ static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
 		return 1;	/* packet fits as immediate data */
 
 	flits = skb_transport_offset(skb) / 8;	/* headers */
-	if (skb->tail != skb->h.raw)
+	if (skb->tail != skb_transport_header(skb))
 		cnt++;
 	return flits_to_desc(flits + sgl_len(cnt));
 }
diff --git a/drivers/s390/net/qeth_eddp.c b/drivers/s390/net/qeth_eddp.c
index b8e84674e17..5890bb5ad23 100644
--- a/drivers/s390/net/qeth_eddp.c
+++ b/drivers/s390/net/qeth_eddp.c
@@ -476,13 +476,13 @@ qeth_eddp_fill_context_tcp(struct qeth_eddp_context *ctx,
 		eddp = qeth_eddp_create_eddp_data(qhdr,
 						  skb_network_header(skb),
 						  ip_hdrlen(skb),
-						  skb->h.raw,
+						  skb_transport_header(skb),
 						  tcp_hdrlen(skb));
 	else
 		eddp = qeth_eddp_create_eddp_data(qhdr,
 						  skb_network_header(skb),
 						  sizeof(struct ipv6hdr),
-						  skb->h.raw,
+						  skb_transport_header(skb),
 						  tcp_hdrlen(skb));
 
 	if (eddp == NULL) {
diff --git a/include/linux/atalk.h b/include/linux/atalk.h
index d12984ddaa9..ced8a1ed080 100644
--- a/include/linux/atalk.h
+++ b/include/linux/atalk.h
@@ -101,7 +101,7 @@ struct ddpehdr {
 
 static __inline__ struct ddpehdr *ddp_hdr(struct sk_buff *skb)
 {
-	return (struct ddpehdr *)skb->h.raw;
+	return (struct ddpehdr *)skb_transport_header(skb);
 }
 
 /* AppleTalk AARP headers */
@@ -129,7 +129,7 @@ struct elapaarp {
 
 static __inline__ struct elapaarp *aarp_hdr(struct sk_buff *skb)
 {
-	return (struct elapaarp *)skb->h.raw;
+	return (struct elapaarp *)skb_transport_header(skb);
 }
 
 /* Not specified - how long till we drop a resolved entry */
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 1f4df61735f..fdd4217f104 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -260,19 +260,20 @@ enum {
 
 static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb)
 {
-	return (struct dccp_hdr *)skb->h.raw;
+	return (struct dccp_hdr *)skb_transport_header(skb);
 }
 
 static inline struct dccp_hdr *dccp_zeroed_hdr(struct sk_buff *skb, int headlen)
 {
 	skb_push(skb, headlen);
 	skb_reset_transport_header(skb);
-	return memset(skb->h.raw, 0, headlen);
+	return memset(skb_transport_header(skb), 0, headlen);
 }
 
 static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb)
 {
-	return (struct dccp_hdr_ext *)(skb->h.raw + sizeof(struct dccp_hdr));
+	return (struct dccp_hdr_ext *)(skb_transport_header(skb) +
+				       sizeof(struct dccp_hdr));
 }
 
 static inline unsigned int __dccp_basic_hdr_len(const struct dccp_hdr *dh)
@@ -301,12 +302,14 @@ static inline __u64 dccp_hdr_seq(const struct sk_buff *skb)
 
 static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb)
 {
-	return (struct dccp_hdr_request *)(skb->h.raw + dccp_basic_hdr_len(skb));
+	return (struct dccp_hdr_request *)(skb_transport_header(skb) +
+					   dccp_basic_hdr_len(skb));
 }
 
 static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb)
 {
-	return (struct dccp_hdr_ack_bits *)(skb->h.raw + dccp_basic_hdr_len(skb));
+	return (struct dccp_hdr_ack_bits *)(skb_transport_header(skb) +
+					    dccp_basic_hdr_len(skb));
 }
 
 static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb)
@@ -317,12 +320,14 @@ static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb)
 
 static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb)
 {
-	return (struct dccp_hdr_response *)(skb->h.raw + dccp_basic_hdr_len(skb));
+	return (struct dccp_hdr_response *)(skb_transport_header(skb) +
+					    dccp_basic_hdr_len(skb));
 }
 
 static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb)
 {
-	return (struct dccp_hdr_reset *)(skb->h.raw + dccp_basic_hdr_len(skb));
+	return (struct dccp_hdr_reset *)(skb_transport_header(skb) +
+					 dccp_basic_hdr_len(skb));
 }
 
 static inline unsigned int __dccp_hdr_len(const struct dccp_hdr *dh)
diff --git a/include/linux/icmp.h b/include/linux/icmp.h
index cd3017a1578..474f2a51cf0 100644
--- a/include/linux/icmp.h
+++ b/include/linux/icmp.h
@@ -87,7 +87,7 @@ struct icmphdr {
 
 static inline struct icmphdr *icmp_hdr(const struct sk_buff *skb)
 {
-	return (struct icmphdr *)skb->h.raw;
+	return (struct icmphdr *)skb_transport_header(skb);
 }
 #endif
 
diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h
index 0b5ba5eb7ed..7c5e9817e99 100644
--- a/include/linux/icmpv6.h
+++ b/include/linux/icmpv6.h
@@ -80,7 +80,7 @@ struct icmp6hdr {
 
 static inline struct icmp6hdr *icmp6_hdr(const struct sk_buff *skb)
 {
-	return (struct icmp6hdr *)skb->h.raw;
+	return (struct icmp6hdr *)skb_transport_header(skb);
 }
 #endif
 
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index ca285527b87..f510e7e382a 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -85,19 +85,19 @@ struct igmpv3_query {
 
 static inline struct igmphdr *igmp_hdr(const struct sk_buff *skb)
 {
-	return (struct igmphdr *)skb->h.raw;
+	return (struct igmphdr *)skb_transport_header(skb);
 }
 
 static inline struct igmpv3_report *
 			igmpv3_report_hdr(const struct sk_buff *skb)
 {
-	return (struct igmpv3_report *)skb->h.raw;
+	return (struct igmpv3_report *)skb_transport_header(skb);
 }
 
 static inline struct igmpv3_query *
 			igmpv3_query_hdr(const struct sk_buff *skb)
 {
-	return (struct igmpv3_query *)skb->h.raw;
+	return (struct igmpv3_query *)skb_transport_header(skb);
 }
 #endif
 
diff --git a/include/linux/ip.h b/include/linux/ip.h
index 19578440b5f..bd0a2a8631c 100644
--- a/include/linux/ip.h
+++ b/include/linux/ip.h
@@ -114,7 +114,7 @@ static inline struct iphdr *ip_hdr(const struct sk_buff *skb)
 
 static inline struct iphdr *ipip_hdr(const struct sk_buff *skb)
 {
-	return (struct iphdr *)skb->h.raw;
+	return (struct iphdr *)skb_transport_header(skb);
 }
 #endif
 
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index b768fcc0a4c..09ea01a8a99 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -230,7 +230,7 @@ static inline struct ipv6hdr *ipv6_hdr(const struct sk_buff *skb)
 
 static inline struct ipv6hdr *ipipv6_hdr(const struct sk_buff *skb)
 {
-	return (struct ipv6hdr *)skb->h.raw;
+	return (struct ipv6hdr *)skb_transport_header(skb);
 }
 
 /* 
diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index d76767dfe59..d70df61a029 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -68,7 +68,7 @@ typedef struct sctphdr {
 
 static inline struct sctphdr *sctp_hdr(const struct sk_buff *skb)
 {
-	return (struct sctphdr *)skb->h.raw;
+	return (struct sctphdr *)skb_transport_header(skb);
 }
 #endif
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d3f186230ee..39a6da243b2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -951,6 +951,11 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
 	skb->tail += len;
 }
 
+static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
+{
+	return skb->h.raw;
+}
+
 static inline void skb_reset_transport_header(struct sk_buff *skb)
 {
 	skb->h.raw = skb->data;
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 911d937fb4c..c6b9f92e828 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -180,7 +180,7 @@ struct tcp_md5sig {
 
 static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb)
 {
-	return (struct tcphdr *)skb->h.raw;
+	return (struct tcphdr *)skb_transport_header(skb);
 }
 
 static inline unsigned int tcp_hdrlen(const struct sk_buff *skb)
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 1f58503af9a..6de445c31a6 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -31,7 +31,7 @@ struct udphdr {
 
 static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
 {
-	return (struct udphdr *)skb->h.raw;
+	return (struct udphdr *)skb_transport_header(skb);
 }
 #endif
 
diff --git a/include/net/ipx.h b/include/net/ipx.h
index c6b2ee61086..4cc0b4eca94 100644
--- a/include/net/ipx.h
+++ b/include/net/ipx.h
@@ -43,7 +43,7 @@ struct ipxhdr {
 
 static __inline__ struct ipxhdr *ipx_hdr(struct sk_buff *skb)
 {
-	return (struct ipxhdr *)skb->h.raw;
+	return (struct ipxhdr *)skb_transport_header(skb);
 }
 
 struct ipx_interface {
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 880eb7b5416..dcb3a91f136 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -328,7 +328,7 @@ static inline unsigned char * tcf_get_base_ptr(struct sk_buff *skb, int layer)
 		case TCF_LAYER_NETWORK:
 			return skb_network_header(skb);
 		case TCF_LAYER_TRANSPORT:
-			return skb->h.raw;
+			return skb_transport_header(skb);
 	}
 
 	return NULL;
diff --git a/include/net/udp.h b/include/net/udp.h
index 4a9699f7928..4906ed7113e 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -89,8 +89,8 @@ static inline int udp_lib_checksum_complete(struct sk_buff *skb)
  */
 static inline __wsum udp_csum_outgoing(struct sock *sk, struct sk_buff *skb)
 {
-	__wsum csum = csum_partial(skb->h.raw, sizeof(struct udphdr), 0);
-
+	__wsum csum = csum_partial(skb_transport_header(skb),
+				   sizeof(struct udphdr), 0);
 	skb_queue_walk(&sk->sk_write_queue, skb) {
 		csum = csum_add(csum, skb->csum);
 	}
diff --git a/net/802/psnap.c b/net/802/psnap.c
index 6e7c2120b83..7cba1f42608 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -56,7 +56,7 @@ static int snap_rcv(struct sk_buff *skb, struct net_device *dev,
 	};
 
 	rcu_read_lock();
-	proto = find_snap_client(skb->h.raw);
+	proto = find_snap_client(skb_transport_header(skb));
 	if (proto) {
 		/* Pass the frame on. */
 		skb->h.raw  += 5;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 75d4d695ede..5f28887822e 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1585,9 +1585,10 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 	skb_set_transport_header(skb, lv);
 
-	SOCK_DEBUG(sk, "base=%p pos=%p\n", skb->data, skb->h.raw);
+	SOCK_DEBUG(sk, "base=%p pos=%p\n",
+		   skb->data, skb_transport_header(skb));
 
-	*skb->h.raw = AX25_UI;
+	*skb_transport_header(skb) = AX25_UI;
 
 	/* Datagram frames go straight out of the door as UI */
 	ax25_queue_xmit(skb, ax25->ax25_dev->dev);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index c11ceb6b3f7..c177e75d64a 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1076,7 +1076,7 @@ static void hci_add_acl_hdr(struct sk_buff *skb, __u16 handle, __u16 flags)
 
 	skb_push(skb, HCI_ACL_HDR_SIZE);
 	skb_reset_transport_header(skb);
-	hdr = (struct hci_acl_hdr *)skb->h.raw;
+	hdr = (struct hci_acl_hdr *)skb_transport_header(skb);
 	hdr->handle = cpu_to_le16(hci_handle_pack(handle, flags));
 	hdr->dlen   = cpu_to_le16(len);
 }
@@ -1145,7 +1145,7 @@ int hci_send_sco(struct hci_conn *conn, struct sk_buff *skb)
 
 	skb_push(skb, HCI_SCO_HDR_SIZE);
 	skb_reset_transport_header(skb);
-	memcpy(skb->h.raw, &hdr, HCI_SCO_HDR_SIZE);
+	memcpy(skb_transport_header(skb), &hdr, HCI_SCO_HDR_SIZE);
 
 	skb->dev = (void *) hdev;
 	bt_cb(skb)->pkt_type = HCI_SCODATA_PKT;
diff --git a/net/core/dev.c b/net/core/dev.c
index f7f7e5687e4..30fcc7f9d4e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1175,12 +1175,12 @@ int skb_checksum_help(struct sk_buff *skb)
 	BUG_ON(offset > (int)skb->len);
 	csum = skb_checksum(skb, offset, skb->len-offset, 0);
 
-	offset = skb->tail - skb->h.raw;
+	offset = skb->tail - skb_transport_header(skb);
 	BUG_ON(offset <= 0);
 	BUG_ON(skb->csum_offset + 2 > offset);
 
-	*(__sum16*)(skb->h.raw + skb->csum_offset) = csum_fold(csum);
-
+	*(__sum16 *)(skb_transport_header(skb) +
+		     skb->csum_offset) = csum_fold(csum);
 out_set_summed:
 	skb->ip_summed = CHECKSUM_NONE;
 out:
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index dcc2e4b6b2f..78993dadb53 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -943,7 +943,7 @@ static void aun_data_available(struct sock *sk, int slen)
 		printk(KERN_DEBUG "AUN: recvfrom() error %d\n", -err);
 	}
 
-	data = skb->h.raw + sizeof(struct udphdr);
+	data = skb_transport_header(skb) + sizeof(struct udphdr);
 	ah = (struct aunhdr *)data;
 	len = skb->len - sizeof(struct udphdr);
 	ip = ip_hdr(skb);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 292516bb1ec..8f0df7b4dfe 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -348,7 +348,7 @@ static int igmpv3_sendpack(struct sk_buff *skb)
 	struct iphdr *pip = ip_hdr(skb);
 	struct igmphdr *pig = igmp_hdr(skb);
 	const int iplen = skb->tail - skb->nh.raw;
-	const int igmplen = skb->tail - skb->h.raw;
+	const int igmplen = skb->tail - skb_transport_header(skb);
 
 	pip->tot_len = htons(iplen);
 	ip_send_check(pip);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 39216e6a59e..e6a9e452fd6 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -619,7 +619,7 @@ static int ipgre_rcv(struct sk_buff *skb)
 		skb_reset_mac_header(skb);
 		__pskb_pull(skb, offset);
 		skb_reset_network_header(skb);
-		skb_postpull_rcsum(skb, skb->h.raw, offset);
+		skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
 		skb->pkt_type = PACKET_HOST;
 #ifdef CONFIG_NET_IPGRE_BROADCAST
 		if (MULTICAST(iph->daddr)) {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 602268661eb..11029b9d4cf 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1128,7 +1128,7 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 			if (fraggap) {
 				skb->csum = skb_copy_and_csum_bits(skb_prev,
 								   maxfraglen,
-								   skb->h.raw,
+						    skb_transport_header(skb),
 								   fraggap, 0);
 				skb_prev->csum = csum_sub(skb_prev->csum,
 							  skb->csum);
@@ -1374,7 +1374,9 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
 		       &ipc, rt, MSG_DONTWAIT);
 	if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
 		if (arg->csumoffset >= 0)
-			*((__sum16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum));
+			*((__sum16 *)skb_transport_header(skb) +
+			  arg->csumoffset) = csum_fold(csum_add(skb->csum,
+								arg->csum));
 		skb->ip_summed = CHECKSUM_NONE;
 		ip_push_pending_frames(sk);
 	}
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 6b91c9f5d57..4e19ee0e010 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -432,7 +432,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 		goto drop;
 
 	/* Basic sanity checks can be done without the lock.  */
-	rarp = (struct arphdr *)skb->h.raw;
+	rarp = (struct arphdr *)skb_transport_header(skb);
 
 	/* If this test doesn't pass, it's not IP, or we should
 	 * ignore it anyway.
@@ -455,7 +455,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 		goto drop;
 
 	/* OK, it is all there and looks valid, process... */
-	rarp = (struct arphdr *)skb->h.raw;
+	rarp = (struct arphdr *)skb_transport_header(skb);
 	rarp_ptr = (unsigned char *) (rarp + 1);
 
 	/* One reply at a time, please. */
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 05bc27002de..8f45c95db45 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1437,7 +1437,8 @@ int pim_rcv_v1(struct sk_buff * skb)
 	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
 		goto drop;
 
-	encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
+	encap = (struct iphdr *)(skb_transport_header(skb) +
+				 sizeof(struct igmphdr));
 	/*
 	   Check that:
 	   a. packet is really destinted to a multicast group
@@ -1490,7 +1491,7 @@ static int pim_rcv(struct sk_buff * skb)
 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 		goto drop;
 
-	pim = (struct pimreghdr*)skb->h.raw;
+	pim = (struct pimreghdr *)skb_transport_header(skb);
 	if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
 	    (pim->flags&PIM_NULL_REGISTER) ||
 	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
@@ -1498,7 +1499,8 @@ static int pim_rcv(struct sk_buff * skb)
 		goto drop;
 
 	/* check if the inner packet is destined to mcast group */
-	encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
+	encap = (struct iphdr *)(skb_transport_header(skb) +
+				 sizeof(struct pimreghdr));
 	if (!MULTICAST(encap->daddr) ||
 	    encap->tot_len == 0 ||
 	    ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f832f3c33ab..2b214cc3724 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2219,8 +2219,9 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
 		th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
 				       (__force u32)delta));
 		if (skb->ip_summed != CHECKSUM_PARTIAL)
-			th->check = csum_fold(csum_partial(skb->h.raw, thlen,
-							   skb->csum));
+			th->check =
+			     csum_fold(csum_partial(skb_transport_header(skb),
+						    thlen, skb->csum));
 
 		seq += len;
 		skb = skb->next;
@@ -2230,12 +2231,13 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
 		th->cwr = 0;
 	} while (skb->next);
 
-	delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len);
+	delta = htonl(oldlen + (skb->tail - skb_transport_header(skb)) +
+		      skb->data_len);
 	th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
 				(__force u32)delta));
 	if (skb->ip_summed != CHECKSUM_PARTIAL)
-		th->check = csum_fold(csum_partial(skb->h.raw, thlen,
-						   skb->csum));
+		th->check = csum_fold(csum_partial(skb_transport_header(skb),
+						   thlen, skb->csum));
 
 out:
 	return segs;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c1ce3623738..9c3b4c7a50a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -140,7 +140,7 @@ static void tcp_measure_rcv_mss(struct sock *sk,
 		 *
 		 * "len" is invariant segment length, including TCP header.
 		 */
-		len += skb->data - skb->h.raw;
+		len += skb->data - skb_transport_header(skb);
 		if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) ||
 		    /* If PSH is not set, packet should be
 		     * full sized, provided peer TCP is not badly broken.
@@ -940,7 +940,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked;
+	unsigned char *ptr = (skb_transport_header(ack_skb) +
+			      TCP_SKB_CB(ack_skb)->sacked);
 	struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2);
 	struct sk_buff *cached_skb;
 	int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
@@ -3634,10 +3635,10 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
 			return;
 
 		skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
-		skb_set_network_header(nskb,
-				       skb_network_header(skb) - skb->head);
-		skb_set_transport_header(nskb, skb->h.raw - skb->head);
-
+		skb_set_network_header(nskb, (skb_network_header(skb) -
+					      skb->head));
+		skb_set_transport_header(nskb, (skb_transport_header(skb) -
+						skb->head));
 		skb_reserve(nskb, header);
 		memcpy(nskb->head, skb->head, header);
 		memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index 32fcfc0b5c8..591f0f1ef87 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -51,7 +51,7 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 
 		BUG_ON(optlen < 0);
 
-		ph = (struct ip_beet_phdr *)skb->h.raw;
+		ph = (struct ip_beet_phdr *)skb_transport_header(skb);
 		ph->padlen = 4 - (optlen & 4);
 		ph->hdrlen = optlen / 8;
 		ph->nexthdr = top_iph->protocol;
diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c
index 2c46cbb3bbb..dc8834ea375 100644
--- a/net/ipv4/xfrm4_mode_transport.c
+++ b/net/ipv4/xfrm4_mode_transport.c
@@ -45,10 +45,11 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
  */
 static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-	int ihl = skb->data - skb->h.raw;
+	int ihl = skb->data - skb_transport_header(skb);
 
 	if (skb->h.raw != skb->nh.raw) {
-		memmove(skb->h.raw, skb_network_header(skb), ihl);
+		memmove(skb_transport_header(skb),
+			skb_network_header(skb), ihl);
 		skb->nh.raw = skb->h.raw;
 	}
 	ip_hdr(skb)->tot_len = htons(skb->len + ihl);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index e5ee981d3e1..d2af4fe3725 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -268,7 +268,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 			goto error_free_iph;
 	}
 
-	ah = (struct ip_auth_hdr *)skb->h.raw;
+	ah = (struct ip_auth_hdr *)skb_transport_header(skb);
 	ah->nexthdr = nexthdr;
 
 	top_iph->priority    = 0;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index ad522b7b577..436eb9e6a6c 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -87,7 +87,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	pskb_put(skb, trailer, clen - skb->len);
 
 	top_iph = (struct ipv6hdr *)__skb_push(skb, hdr_len);
-	esph = (struct ipv6_esp_hdr *)skb->h.raw;
+	esph = (struct ipv6_esp_hdr *)skb_transport_header(skb);
 	top_iph->payload_len = htons(skb->len + alen - sizeof(*top_iph));
 	*(u8 *)(trailer->tail - 1) = *skb_network_header(skb);
 	*skb_network_header(skb) = IPPROTO_ESP;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 1bda0299890..f25ee773f52 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -144,7 +144,7 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
 	struct tlvtype_proc *curr;
 	const unsigned char *nh = skb_network_header(skb);
 	int off = skb->h.raw - skb->nh.raw;
-	int len = ((skb->h.raw[1]+1)<<3);
+	int len = (skb_transport_header(skb)[1] + 1) << 3;
 
 	if (skb_transport_offset(skb) + len > skb_headlen(skb))
 		goto bad;
@@ -290,15 +290,14 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
 
 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
 	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
-				 ((skb->h.raw[1] + 1) << 3)))) {
+				 ((skb_transport_header(skb)[1] + 1) << 3)))) {
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INHDRERRORS);
 		kfree_skb(skb);
 		return -1;
 	}
 
-	opt->lastopt = skb->h.raw - skb->nh.raw;
-	opt->dst1 = skb->h.raw - skb->nh.raw;
+	opt->lastopt = opt->dst1 = skb->h.raw - skb->nh.raw;
 #ifdef CONFIG_IPV6_MIP6
 	dstbuf = opt->dst1;
 #endif
@@ -307,7 +306,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
 	if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) {
 		dst_release(dst);
 		skb = *skbp;
-		skb->h.raw += ((skb->h.raw[1]+1)<<3);
+		skb->h.raw += (skb_transport_header(skb)[1] + 1) << 3;
 		opt = IP6CB(skb);
 #ifdef CONFIG_IPV6_MIP6
 		opt->nhoff = dstbuf;
@@ -390,14 +389,14 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
 
 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
 	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
-				 ((skb->h.raw[1] + 1) << 3)))) {
+				 ((skb_transport_header(skb)[1] + 1) << 3)))) {
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 				 IPSTATS_MIB_INHDRERRORS);
 		kfree_skb(skb);
 		return -1;
 	}
 
-	hdr = (struct ipv6_rt_hdr *) skb->h.raw;
+	hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb);
 
 	switch (hdr->type) {
 #ifdef CONFIG_IPV6_MIP6
@@ -444,8 +443,7 @@ looped_back:
 			break;
 		}
 
-		opt->lastopt = skb->h.raw - skb->nh.raw;
-		opt->srcrt = skb->h.raw - skb->nh.raw;
+		opt->lastopt = opt->srcrt = skb->h.raw - skb->nh.raw;
 		skb->h.raw += (hdr->hdrlen + 1) << 3;
 		opt->dst0 = opt->dst1;
 		opt->dst1 = 0;
@@ -745,7 +743,8 @@ int ipv6_parse_hopopts(struct sk_buff **skbp)
 	 * hop-by-hop options.
 	 */
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) ||
-	    !pskb_may_pull(skb, sizeof(struct ipv6hdr) + ((skb->h.raw[1] + 1) << 3))) {
+	    !pskb_may_pull(skb, (sizeof(struct ipv6hdr) +
+				 ((skb_transport_header(skb)[1] + 1) << 3)))) {
 		kfree_skb(skb);
 		return -1;
 	}
@@ -753,7 +752,7 @@ int ipv6_parse_hopopts(struct sk_buff **skbp)
 	opt->hop = sizeof(struct ipv6hdr);
 	if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) {
 		skb = *skbp;
-		skb->h.raw += (skb->h.raw[1]+1)<<3;
+		skb->h.raw += (skb_transport_header(skb)[1] + 1) << 3;
 		opt = IP6CB(skb);
 		opt->nhoff = sizeof(struct ipv6hdr);
 		return 1;
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 4a6501695e9..5555c98dea0 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -143,7 +143,7 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	/* compression */
 	plen = skb->len - hdr_len;
 	dlen = IPCOMP_SCRATCH_SIZE;
-	start = skb->h.raw;
+	start = skb_transport_header(skb);
 
 	cpu = get_cpu();
 	scratch = *per_cpu_ptr(ipcomp6_scratches, cpu);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 1f2a3be9308..c6436f5e3e9 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1212,7 +1212,7 @@ int igmp6_event_query(struct sk_buff *skb)
 			in6_dev_put(idev);
 			return -EINVAL;
 		}
-		mlh2 = (struct mld2_query *) skb->h.raw;
+		mlh2 = (struct mld2_query *)skb_transport_header(skb);
 		max_delay = (MLDV2_MRC(ntohs(mlh2->mrc))*HZ)/1000;
 		if (!max_delay)
 			max_delay = 1;
@@ -1235,7 +1235,7 @@ int igmp6_event_query(struct sk_buff *skb)
 				in6_dev_put(idev);
 				return -EINVAL;
 			}
-			mlh2 = (struct mld2_query *) skb->h.raw;
+			mlh2 = (struct mld2_query *)skb_transport_header(skb);
 			mark = 1;
 		}
 	} else {
@@ -1460,18 +1460,20 @@ static inline int mld_dev_queue_xmit(struct sk_buff *skb)
 static void mld_sendpack(struct sk_buff *skb)
 {
 	struct ipv6hdr *pip6 = ipv6_hdr(skb);
-	struct mld2_report *pmr = (struct mld2_report *)skb->h.raw;
+	struct mld2_report *pmr =
+			      (struct mld2_report *)skb_transport_header(skb);
 	int payload_len, mldlen;
 	struct inet6_dev *idev = in6_dev_get(skb->dev);
 	int err;
 
 	IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
 	payload_len = skb->tail - skb_network_header(skb) - sizeof(*pip6);
-	mldlen = skb->tail - skb->h.raw;
+	mldlen = skb->tail - skb_transport_header(skb);
 	pip6->payload_len = htons(payload_len);
 
 	pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
-		IPPROTO_ICMPV6, csum_partial(skb->h.raw, mldlen, 0));
+		IPPROTO_ICMPV6, csum_partial(skb_transport_header(skb),
+					     mldlen, 0));
 	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev,
 		mld_dev_queue_xmit);
 	if (!err) {
@@ -1505,7 +1507,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 	pgr->grec_auxwords = 0;
 	pgr->grec_nsrcs = 0;
 	pgr->grec_mca = pmc->mca_addr;	/* structure copy */
-	pmr = (struct mld2_report *)skb->h.raw;
+	pmr = (struct mld2_report *)skb_transport_header(skb);
 	pmr->ngrec = htons(ntohs(pmr->ngrec)+1);
 	*ppgr = pgr;
 	return skb;
@@ -1538,7 +1540,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 	if (!*psf_list)
 		goto empty_source;
 
-	pmr = skb ? (struct mld2_report *)skb->h.raw : NULL;
+	pmr = skb ? (struct mld2_report *)skb_transport_header(skb) : NULL;
 
 	/* EX and TO_EX get a fresh packet, if needed */
 	if (truncate) {
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 85202891644..f0288e92fb5 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -92,10 +92,10 @@ int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
 
 	if (!pskb_may_pull(skb, (skb_transport_offset(skb)) + 8) ||
 	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
-				 ((skb->h.raw[1] + 1) << 3))))
+				 ((skb_transport_header(skb)[1] + 1) << 3))))
 		return -1;
 
-	mh = (struct ip6_mh *)skb->h.raw;
+	mh = (struct ip6_mh *)skb_transport_header(skb);
 
 	if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) {
 		LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n",
@@ -158,7 +158,7 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb)
 	nexthdr = *skb_network_header(skb);
 	*skb_network_header(skb) = IPPROTO_DSTOPTS;
 
-	dstopt = (struct ipv6_destopt_hdr *)skb->h.raw;
+	dstopt = (struct ipv6_destopt_hdr *)skb_transport_header(skb);
 	dstopt->nexthdr = nexthdr;
 
 	hao = mip6_padn((char *)(dstopt + 1),
@@ -370,7 +370,7 @@ static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb)
 	nexthdr = *skb_network_header(skb);
 	*skb_network_header(skb) = IPPROTO_ROUTING;
 
-	rt2 = (struct rt2_hdr *)skb->h.raw;
+	rt2 = (struct rt2_hdr *)skb_transport_header(skb);
 	rt2->rt_hdr.nexthdr = nexthdr;
 	rt2->rt_hdr.hdrlen = (x->props.header_len >> 3) - 1;
 	rt2->rt_hdr.type = IPV6_SRCRT_TYPE_2;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 8b946f56287..f9a85ab594d 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -760,7 +760,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
 
 static void ndisc_recv_ns(struct sk_buff *skb)
 {
-	struct nd_msg *msg = (struct nd_msg *)skb->h.raw;
+	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
 	struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
 	struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
 	u8 *lladdr = NULL;
@@ -938,7 +938,7 @@ out:
 
 static void ndisc_recv_na(struct sk_buff *skb)
 {
-	struct nd_msg *msg = (struct nd_msg *)skb->h.raw;
+	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
 	struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
 	struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
 	u8 *lladdr = NULL;
@@ -1040,7 +1040,7 @@ out:
 
 static void ndisc_recv_rs(struct sk_buff *skb)
 {
-	struct rs_msg *rs_msg = (struct rs_msg *) skb->h.raw;
+	struct rs_msg *rs_msg = (struct rs_msg *)skb_transport_header(skb);
 	unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
 	struct neighbour *neigh;
 	struct inet6_dev *idev;
@@ -1097,7 +1097,7 @@ out:
 
 static void ndisc_router_discovery(struct sk_buff *skb)
 {
-	struct ra_msg *ra_msg = (struct ra_msg *) skb->h.raw;
+	struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
 	struct neighbour *neigh = NULL;
 	struct inet6_dev *in6_dev;
 	struct rt6_info *rt = NULL;
@@ -1108,7 +1108,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 
 	__u8 * opt = (__u8 *)(ra_msg + 1);
 
-	optlen = (skb->tail - skb->h.raw) - sizeof(struct ra_msg);
+	optlen = (skb->tail - skb_transport_header(skb)) -
+		  sizeof(struct ra_msg);
 
 	if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
 		ND_PRINTK2(KERN_WARNING
@@ -1357,7 +1358,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
 		return;
 	}
 
-	optlen = skb->tail - skb->h.raw;
+	optlen = skb->tail - skb_transport_header(skb);
 	optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
 
 	if (optlen < 0) {
@@ -1584,9 +1585,9 @@ int ndisc_rcv(struct sk_buff *skb)
 	if (!pskb_may_pull(skb, skb->len))
 		return 0;
 
-	msg = (struct nd_msg *) skb->h.raw;
+	msg = (struct nd_msg *)skb_transport_header(skb);
 
-	__skb_push(skb, skb->data-skb->h.raw);
+	__skb_push(skb, skb->data - skb_transport_header(skb));
 
 	if (ipv6_hdr(skb)->hop_limit != 255) {
 		ND_PRINTK2(KERN_WARNING
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index bb049f1c267..116257d59a3 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1077,7 +1077,7 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
 			spin_lock_bh(&sk->sk_receive_queue.lock);
 			skb = skb_peek(&sk->sk_receive_queue);
 			if (skb != NULL)
-				amount = skb->tail - skb->h.raw;
+				amount = skb->tail - skb_transport_header(skb);
 			spin_unlock_bh(&sk->sk_receive_queue.lock);
 			return put_user(amount, (int __user *)arg);
 		}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index ef29a7bb97c..31d4271ea54 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -726,7 +726,7 @@ static int ipv6_frag_rcv(struct sk_buff **skbp)
 	}
 
 	hdr = ipv6_hdr(skb);
-	fhdr = (struct frag_hdr *)skb->h.raw;
+	fhdr = (struct frag_hdr *)skb_transport_header(skb);
 
 	if (!(fhdr->frag_off & htons(0xFFF9))) {
 		/* It is not a fragmented frame */
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index 0134d74ef08..d526f4e9c65 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -51,10 +51,11 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
  */
 static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-	int ihl = skb->data - skb->h.raw;
+	int ihl = skb->data - skb_transport_header(skb);
 
 	if (skb->h.raw != skb->nh.raw) {
-		memmove(skb->h.raw, skb_network_header(skb), ihl);
+		memmove(skb_transport_header(skb),
+			skb_network_header(skb), ihl);
 		skb->nh.raw = skb->h.raw;
 	}
 	ipv6_hdr(skb)->payload_len = htons(skb->len + ihl -
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index ee15bdae141..5c4695840c5 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -62,7 +62,7 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
 	case IPPROTO_COMP:
 		if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr)))
 			return -EINVAL;
-		*spi = htonl(ntohs(*(__be16*)(skb->h.raw + 2)));
+		*spi = htonl(ntohs(*(__be16*)(skb_transport_header(skb) + 2)));
 		*seq = 0;
 		return 0;
 	default:
@@ -72,8 +72,8 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq)
 	if (!pskb_may_pull(skb, 16))
 		return -EINVAL;
 
-	*spi = *(__be32*)(skb->h.raw + offset);
-	*seq = *(__be32*)(skb->h.raw + offset_seq);
+	*spi = *(__be32*)(skb_transport_header(skb) + offset);
+	*seq = *(__be32*)(skb_transport_header(skb) + offset_seq);
 	return 0;
 }
 EXPORT_SYMBOL(xfrm_parse_spi);
-- 
cgit v1.2.3


From 239254fedcbc6ff79bcf5696fe94723f7a5d0782 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 19 Apr 2007 19:55:44 -0700
Subject: [IPV4] xfrm4_mode_beet: Use skb_transport_header().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_mode_beet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index 591f0f1ef87..74859dfb3a2 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -100,7 +100,7 @@ static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 	skb_set_network_header(skb, phlen - sizeof(*iph));
 	memmove(skb_network_header(skb), iph, sizeof(*iph));
 	skb_set_transport_header(skb, phlen + optlen);
-	skb->data = skb->h.raw;
+	skb->data = skb_transport_header(skb);
 
 	iph = ip_hdr(skb);
 	iph->ihl = (sizeof(*iph) + optlen) / 4;
-- 
cgit v1.2.3


From ce18afe57bf53477f133208856dd2b7e6b5db5e3 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Wed, 14 Mar 2007 16:36:16 -0700
Subject: [NETFILTER]: x_tables: remove duplicate of xt_prefix

Remove xt_proto_prefix array which duplicates xt_prefix and change all
users of xt_proto_prefix to xt_prefix.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/x_tables.c | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index ec607a421a5..0eb2504b89b 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -56,8 +56,8 @@ enum {
 };
 
 static const char *xt_prefix[NPROTO] = {
-	[AF_INET] 	= "ip",
-	[AF_INET6] 	= "ip6",
+	[AF_INET]	= "ip",
+	[AF_INET6]	= "ip6",
 	[NF_ARP]	= "arp",
 };
 
@@ -651,12 +651,6 @@ void *xt_unregister_table(struct xt_table *table)
 EXPORT_SYMBOL_GPL(xt_unregister_table);
 
 #ifdef CONFIG_PROC_FS
-static char *xt_proto_prefix[NPROTO] = {
-	[AF_INET]	= "ip",
-	[AF_INET6]	= "ip6",
-	[NF_ARP]	= "arp",
-};
-
 static struct list_head *xt_get_idx(struct list_head *list, struct seq_file *seq, loff_t pos)
 {
 	struct list_head *head = list->next;
@@ -798,7 +792,7 @@ int xt_proto_init(int af)
 
 
 #ifdef CONFIG_PROC_FS
-	strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+	strlcpy(buf, xt_prefix[af], sizeof(buf));
 	strlcat(buf, FORMAT_TABLES, sizeof(buf));
 	proc = proc_net_fops_create(buf, 0440, &xt_file_ops);
 	if (!proc)
@@ -806,14 +800,14 @@ int xt_proto_init(int af)
 	proc->data = (void *) ((unsigned long) af | (TABLE << 16));
 
 
-	strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+	strlcpy(buf, xt_prefix[af], sizeof(buf));
 	strlcat(buf, FORMAT_MATCHES, sizeof(buf));
 	proc = proc_net_fops_create(buf, 0440, &xt_file_ops);
 	if (!proc)
 		goto out_remove_tables;
 	proc->data = (void *) ((unsigned long) af | (MATCH << 16));
 
-	strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+	strlcpy(buf, xt_prefix[af], sizeof(buf));
 	strlcat(buf, FORMAT_TARGETS, sizeof(buf));
 	proc = proc_net_fops_create(buf, 0440, &xt_file_ops);
 	if (!proc)
@@ -825,12 +819,12 @@ int xt_proto_init(int af)
 
 #ifdef CONFIG_PROC_FS
 out_remove_matches:
-	strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+	strlcpy(buf, xt_prefix[af], sizeof(buf));
 	strlcat(buf, FORMAT_MATCHES, sizeof(buf));
 	proc_net_remove(buf);
 
 out_remove_tables:
-	strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+	strlcpy(buf, xt_prefix[af], sizeof(buf));
 	strlcat(buf, FORMAT_TABLES, sizeof(buf));
 	proc_net_remove(buf);
 out:
@@ -844,15 +838,15 @@ void xt_proto_fini(int af)
 #ifdef CONFIG_PROC_FS
 	char buf[XT_FUNCTION_MAXNAMELEN];
 
-	strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+	strlcpy(buf, xt_prefix[af], sizeof(buf));
 	strlcat(buf, FORMAT_TABLES, sizeof(buf));
 	proc_net_remove(buf);
 
-	strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+	strlcpy(buf, xt_prefix[af], sizeof(buf));
 	strlcat(buf, FORMAT_TARGETS, sizeof(buf));
 	proc_net_remove(buf);
 
-	strlcpy(buf, xt_proto_prefix[af], sizeof(buf));
+	strlcpy(buf, xt_prefix[af], sizeof(buf));
 	strlcat(buf, FORMAT_MATCHES, sizeof(buf));
 	proc_net_remove(buf);
 #endif /*CONFIG_PROC_FS*/
-- 
cgit v1.2.3


From 587aa64163bb14f70098f450abab9410787fce9d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 14 Mar 2007 16:37:25 -0700
Subject: [NETFILTER]: Remove IPv4 only connection tracking/NAT

Remove the obsolete IPv4 only connection tracking/NAT as scheduled in
feature-removal-schedule.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/feature-removal-schedule.txt         |    9 -
 include/linux/netfilter_ipv4/Kbuild                |   14 -
 include/linux/netfilter_ipv4/ip_conntrack.h        |  402 -----
 include/linux/netfilter_ipv4/ip_conntrack_amanda.h |   11 -
 include/linux/netfilter_ipv4/ip_conntrack_core.h   |   61 -
 include/linux/netfilter_ipv4/ip_conntrack_ftp.h    |   44 -
 include/linux/netfilter_ipv4/ip_conntrack_h323.h   |   89 -
 include/linux/netfilter_ipv4/ip_conntrack_helper.h |   46 -
 include/linux/netfilter_ipv4/ip_conntrack_icmp.h   |    6 -
 include/linux/netfilter_ipv4/ip_conntrack_irc.h    |   32 -
 include/linux/netfilter_ipv4/ip_conntrack_pptp.h   |  326 ----
 .../linux/netfilter_ipv4/ip_conntrack_proto_gre.h  |  114 --
 .../linux/netfilter_ipv4/ip_conntrack_protocol.h   |   98 --
 include/linux/netfilter_ipv4/ip_conntrack_sctp.h   |    6 -
 include/linux/netfilter_ipv4/ip_conntrack_sip.h    |   40 -
 include/linux/netfilter_ipv4/ip_conntrack_tcp.h    |    6 -
 include/linux/netfilter_ipv4/ip_conntrack_tftp.h   |   20 -
 include/linux/netfilter_ipv4/ip_conntrack_tuple.h  |  146 --
 include/linux/netfilter_ipv4/ip_nat.h              |   79 -
 include/linux/netfilter_ipv4/ip_nat_core.h         |   18 -
 include/linux/netfilter_ipv4/ip_nat_helper.h       |   33 -
 include/linux/netfilter_ipv4/ip_nat_pptp.h         |   11 -
 include/linux/netfilter_ipv4/ip_nat_protocol.h     |   74 -
 include/linux/netfilter_ipv4/ip_nat_rule.h         |   28 -
 include/linux/netfilter_ipv4/ipt_SAME.h            |    2 +-
 include/net/netfilter/nf_conntrack.h               |    5 +
 include/net/netfilter/nf_conntrack_compat.h        |  145 --
 include/net/netfilter/nf_nat_rule.h                |   10 -
 net/ipv4/netfilter/Kconfig                         |  267 +--
 net/ipv4/netfilter/Makefile                        |   45 +-
 net/ipv4/netfilter/ip_conntrack_amanda.c           |  229 ---
 net/ipv4/netfilter/ip_conntrack_core.c             | 1549 ----------------
 net/ipv4/netfilter/ip_conntrack_ftp.c              |  520 ------
 net/ipv4/netfilter/ip_conntrack_helper_h323.c      | 1840 --------------------
 net/ipv4/netfilter/ip_conntrack_helper_pptp.c      |  684 --------
 net/ipv4/netfilter/ip_conntrack_irc.c              |  314 ----
 net/ipv4/netfilter/ip_conntrack_netbios_ns.c       |  143 --
 net/ipv4/netfilter/ip_conntrack_netlink.c          | 1577 -----------------
 net/ipv4/netfilter/ip_conntrack_proto_generic.c    |   74 -
 net/ipv4/netfilter/ip_conntrack_proto_gre.c        |  328 ----
 net/ipv4/netfilter/ip_conntrack_proto_icmp.c       |  315 ----
 net/ipv4/netfilter/ip_conntrack_proto_sctp.c       |  659 -------
 net/ipv4/netfilter/ip_conntrack_proto_tcp.c        | 1163 -------------
 net/ipv4/netfilter/ip_conntrack_proto_udp.c        |  148 --
 net/ipv4/netfilter/ip_conntrack_sip.c              |  520 ------
 net/ipv4/netfilter/ip_conntrack_standalone.c       |  962 ----------
 net/ipv4/netfilter/ip_conntrack_tftp.c             |  161 --
 net/ipv4/netfilter/ip_nat_amanda.c                 |   85 -
 net/ipv4/netfilter/ip_nat_core.c                   |  633 -------
 net/ipv4/netfilter/ip_nat_ftp.c                    |  180 --
 net/ipv4/netfilter/ip_nat_helper.c                 |  436 -----
 net/ipv4/netfilter/ip_nat_helper_h323.c            |  611 -------
 net/ipv4/netfilter/ip_nat_helper_pptp.c            |  350 ----
 net/ipv4/netfilter/ip_nat_irc.c                    |  122 --
 net/ipv4/netfilter/ip_nat_proto_gre.c              |  174 --
 net/ipv4/netfilter/ip_nat_proto_icmp.c             |   87 -
 net/ipv4/netfilter/ip_nat_proto_tcp.c              |  154 --
 net/ipv4/netfilter/ip_nat_proto_udp.c              |  144 --
 net/ipv4/netfilter/ip_nat_proto_unknown.c          |   55 -
 net/ipv4/netfilter/ip_nat_rule.c                   |  314 ----
 net/ipv4/netfilter/ip_nat_sip.c                    |  282 ---
 net/ipv4/netfilter/ip_nat_snmp_basic.c             | 1333 --------------
 net/ipv4/netfilter/ip_nat_standalone.c             |  387 ----
 net/ipv4/netfilter/ip_nat_tftp.c                   |   70 -
 net/ipv4/netfilter/ipt_CLUSTERIP.c                 |   18 +-
 net/ipv4/netfilter/ipt_MASQUERADE.c                |   57 +-
 net/ipv4/netfilter/ipt_NETMAP.c                    |   22 +-
 net/ipv4/netfilter/ipt_REDIRECT.c                  |   24 +-
 net/ipv4/netfilter/ipt_SAME.c                      |   25 +-
 net/ipv4/netfilter/nf_nat_h323.c                   |    4 +-
 net/ipv4/netfilter/nf_nat_pptp.c                   |    2 +-
 net/netfilter/Kconfig                              |   63 +-
 net/netfilter/xt_CONNMARK.c                        |   32 +-
 net/netfilter/xt_CONNSECMARK.c                     |   18 +-
 net/netfilter/xt_NOTRACK.c                         |    4 +-
 net/netfilter/xt_connbytes.c                       |   10 +-
 net/netfilter/xt_connmark.c                        |   17 +-
 net/netfilter/xt_conntrack.c                       |  110 +-
 net/netfilter/xt_helper.c                          |   57 -
 net/netfilter/xt_state.c                           |    4 +-
 80 files changed, 122 insertions(+), 19135 deletions(-)
 delete mode 100644 include/linux/netfilter_ipv4/ip_conntrack.h
 delete mode 100644 include/linux/netfilter_ipv4/ip_conntrack_amanda.h
 delete mode 100644 include/linux/netfilter_ipv4/ip_conntrack_core.h
 delete mode 100644 include/linux/netfilter_ipv4/ip_conntrack_ftp.h
 delete mode 100644 include/linux/netfilter_ipv4/ip_conntrack_h323.h
 delete mode 100644 include/linux/netfilter_ipv4/ip_conntrack_helper.h
 delete mode 100644 include/linux/netfilter_ipv4/ip_conntrack_icmp.h
 delete mode 100644 include/linux/netfilter_ipv4/ip_conntrack_irc.h
 delete mode 100644 include/linux/netfilter_ipv4/ip_conntrack_pptp.h
 delete mode 100644 include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h
 delete mode 100644 include/linux/netfilter_ipv4/ip_conntrack_protocol.h
 delete mode 100644 include/linux/netfilter_ipv4/ip_conntrack_sctp.h
 delete mode 100644 include/linux/netfilter_ipv4/ip_conntrack_sip.h
 delete mode 100644 include/linux/netfilter_ipv4/ip_conntrack_tcp.h
 delete mode 100644 include/linux/netfilter_ipv4/ip_conntrack_tftp.h
 delete mode 100644 include/linux/netfilter_ipv4/ip_conntrack_tuple.h
 delete mode 100644 include/linux/netfilter_ipv4/ip_nat.h
 delete mode 100644 include/linux/netfilter_ipv4/ip_nat_core.h
 delete mode 100644 include/linux/netfilter_ipv4/ip_nat_helper.h
 delete mode 100644 include/linux/netfilter_ipv4/ip_nat_pptp.h
 delete mode 100644 include/linux/netfilter_ipv4/ip_nat_protocol.h
 delete mode 100644 include/linux/netfilter_ipv4/ip_nat_rule.h
 delete mode 100644 include/net/netfilter/nf_conntrack_compat.h
 delete mode 100644 net/ipv4/netfilter/ip_conntrack_amanda.c
 delete mode 100644 net/ipv4/netfilter/ip_conntrack_core.c
 delete mode 100644 net/ipv4/netfilter/ip_conntrack_ftp.c
 delete mode 100644 net/ipv4/netfilter/ip_conntrack_helper_h323.c
 delete mode 100644 net/ipv4/netfilter/ip_conntrack_helper_pptp.c
 delete mode 100644 net/ipv4/netfilter/ip_conntrack_irc.c
 delete mode 100644 net/ipv4/netfilter/ip_conntrack_netbios_ns.c
 delete mode 100644 net/ipv4/netfilter/ip_conntrack_netlink.c
 delete mode 100644 net/ipv4/netfilter/ip_conntrack_proto_generic.c
 delete mode 100644 net/ipv4/netfilter/ip_conntrack_proto_gre.c
 delete mode 100644 net/ipv4/netfilter/ip_conntrack_proto_icmp.c
 delete mode 100644 net/ipv4/netfilter/ip_conntrack_proto_sctp.c
 delete mode 100644 net/ipv4/netfilter/ip_conntrack_proto_tcp.c
 delete mode 100644 net/ipv4/netfilter/ip_conntrack_proto_udp.c
 delete mode 100644 net/ipv4/netfilter/ip_conntrack_sip.c
 delete mode 100644 net/ipv4/netfilter/ip_conntrack_standalone.c
 delete mode 100644 net/ipv4/netfilter/ip_conntrack_tftp.c
 delete mode 100644 net/ipv4/netfilter/ip_nat_amanda.c
 delete mode 100644 net/ipv4/netfilter/ip_nat_core.c
 delete mode 100644 net/ipv4/netfilter/ip_nat_ftp.c
 delete mode 100644 net/ipv4/netfilter/ip_nat_helper.c
 delete mode 100644 net/ipv4/netfilter/ip_nat_helper_h323.c
 delete mode 100644 net/ipv4/netfilter/ip_nat_helper_pptp.c
 delete mode 100644 net/ipv4/netfilter/ip_nat_irc.c
 delete mode 100644 net/ipv4/netfilter/ip_nat_proto_gre.c
 delete mode 100644 net/ipv4/netfilter/ip_nat_proto_icmp.c
 delete mode 100644 net/ipv4/netfilter/ip_nat_proto_tcp.c
 delete mode 100644 net/ipv4/netfilter/ip_nat_proto_udp.c
 delete mode 100644 net/ipv4/netfilter/ip_nat_proto_unknown.c
 delete mode 100644 net/ipv4/netfilter/ip_nat_rule.c
 delete mode 100644 net/ipv4/netfilter/ip_nat_sip.c
 delete mode 100644 net/ipv4/netfilter/ip_nat_snmp_basic.c
 delete mode 100644 net/ipv4/netfilter/ip_nat_standalone.c
 delete mode 100644 net/ipv4/netfilter/ip_nat_tftp.c

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 19b4c96b2a4..9817b60e70a 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -211,15 +211,6 @@ Who:   Adrian Bunk <bunk@stusta.de>
 
 ---------------------------
 
-What:	IPv4 only connection tracking/NAT/helpers
-When:	2.6.22
-Why:	The new layer 3 independant connection tracking replaces the old
-	IPv4 only version. After some stabilization of the new code the
-	old one will be removed.
-Who:	Patrick McHardy <kaber@trash.net>
-
----------------------------
-
 What:	ACPI hooks (X86_SPEEDSTEP_CENTRINO_ACPI) in speedstep-centrino driver
 When:	December 2006
 Why:	Speedstep-centrino driver with ACPI hooks and acpi-cpufreq driver are
diff --git a/include/linux/netfilter_ipv4/Kbuild b/include/linux/netfilter_ipv4/Kbuild
index 180337801a8..7185792b900 100644
--- a/include/linux/netfilter_ipv4/Kbuild
+++ b/include/linux/netfilter_ipv4/Kbuild
@@ -1,9 +1,3 @@
-header-y += ip_conntrack_helper.h
-header-y += ip_conntrack_protocol.h
-header-y += ip_conntrack_sctp.h
-header-y += ip_conntrack_tcp.h
-header-y += ip_conntrack_tftp.h
-header-y += ip_nat_pptp.h
 header-y += ipt_addrtype.h
 header-y += ipt_ah.h
 header-y += ipt_CLASSIFY.h
@@ -49,13 +43,5 @@ header-y += ipt_ttl.h
 header-y += ipt_TTL.h
 header-y += ipt_ULOG.h
 
-unifdef-y += ip_conntrack.h
-unifdef-y += ip_conntrack_h323.h
-unifdef-y += ip_conntrack_irc.h
-unifdef-y += ip_conntrack_pptp.h
-unifdef-y += ip_conntrack_proto_gre.h
-unifdef-y += ip_conntrack_tuple.h
-unifdef-y += ip_nat.h
-unifdef-y += ip_nat_rule.h
 unifdef-y += ip_queue.h
 unifdef-y += ip_tables.h
diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h
deleted file mode 100644
index da9274e6bf1..00000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack.h
+++ /dev/null
@@ -1,402 +0,0 @@
-#ifndef _IP_CONNTRACK_H
-#define _IP_CONNTRACK_H
-
-#include <linux/netfilter/nf_conntrack_common.h>
-
-#ifdef __KERNEL__
-#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
-#include <linux/bitops.h>
-#include <linux/compiler.h>
-#include <asm/atomic.h>
-
-#include <linux/timer.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tcp.h>
-#include <linux/netfilter_ipv4/ip_conntrack_icmp.h>
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-#include <linux/netfilter_ipv4/ip_conntrack_sctp.h>
-
-/* per conntrack: protocol private data */
-union ip_conntrack_proto {
-	/* insert conntrack proto private data here */
-	struct ip_ct_gre gre;
-	struct ip_ct_sctp sctp;
-	struct ip_ct_tcp tcp;
-	struct ip_ct_icmp icmp;
-};
-
-union ip_conntrack_expect_proto {
-	/* insert expect proto private data here */
-};
-
-/* Add protocol helper include file here */
-#include <linux/netfilter_ipv4/ip_conntrack_h323.h>
-#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
-#include <linux/netfilter_ipv4/ip_conntrack_amanda.h>
-#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
-#include <linux/netfilter_ipv4/ip_conntrack_irc.h>
-
-/* per conntrack: application helper private data */
-union ip_conntrack_help {
-	/* insert conntrack helper private data (master) here */
-	struct ip_ct_h323_master ct_h323_info;
-	struct ip_ct_pptp_master ct_pptp_info;
-	struct ip_ct_ftp_master ct_ftp_info;
-	struct ip_ct_irc_master ct_irc_info;
-};
-
-#ifdef CONFIG_IP_NF_NAT_NEEDED
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_pptp.h>
-
-/* per conntrack: nat application helper private data */
-union ip_conntrack_nat_help {
-	/* insert nat helper private data here */
-	struct ip_nat_pptp nat_pptp_info;
-};
-#endif
-
-#include <linux/types.h>
-#include <linux/skbuff.h>
-
-#ifdef CONFIG_NETFILTER_DEBUG
-#define IP_NF_ASSERT(x)							\
-do {									\
-	if (!(x))							\
-		/* Wooah!  I'm tripping my conntrack in a frenzy of	\
-		   netplay... */					\
-		printk("NF_IP_ASSERT: %s:%i(%s)\n",			\
-		       __FILE__, __LINE__, __FUNCTION__);		\
-} while(0)
-#else
-#define IP_NF_ASSERT(x)
-#endif
-
-struct ip_conntrack_helper;
-
-struct ip_conntrack
-{
-	/* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
-           plus 1 for any connection(s) we are `master' for */
-	struct nf_conntrack ct_general;
-
-	/* Have we seen traffic both ways yet? (bitset) */
-	unsigned long status;
-
-	/* Timer function; drops refcnt when it goes off. */
-	struct timer_list timeout;
-
-#ifdef CONFIG_IP_NF_CT_ACCT
-	/* Accounting Information (same cache line as other written members) */
-	struct ip_conntrack_counter counters[IP_CT_DIR_MAX];
-#endif
-	/* If we were expected by an expectation, this will be it */
-	struct ip_conntrack *master;
-
-	/* Current number of expected connections */
-	unsigned int expecting;
-
-	/* Unique ID that identifies this conntrack*/
-	unsigned int id;
-
-	/* Helper, if any. */
-	struct ip_conntrack_helper *helper;
-
-	/* Storage reserved for other modules: */
-	union ip_conntrack_proto proto;
-
-	union ip_conntrack_help help;
-
-#ifdef CONFIG_IP_NF_NAT_NEEDED
-	struct {
-		struct ip_nat_info info;
-		union ip_conntrack_nat_help help;
-#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
-	defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
-		int masq_index;
-#endif
-	} nat;
-#endif /* CONFIG_IP_NF_NAT_NEEDED */
-
-#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
-	u_int32_t mark;
-#endif
-
-#ifdef CONFIG_IP_NF_CONNTRACK_SECMARK
-	u_int32_t secmark;
-#endif
-
-	/* Traversed often, so hopefully in different cacheline to top */
-	/* These are my tuples; original and reply */
-	struct ip_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX];
-};
-
-struct ip_conntrack_expect
-{
-	/* Internal linked list (global expectation list) */
-	struct list_head list;
-
-	/* We expect this tuple, with the following mask */
-	struct ip_conntrack_tuple tuple, mask;
- 
-	/* Function to call after setup and insertion */
-	void (*expectfn)(struct ip_conntrack *new,
-			 struct ip_conntrack_expect *this);
-
-	/* The conntrack of the master connection */
-	struct ip_conntrack *master;
-
-	/* Timer function; deletes the expectation. */
-	struct timer_list timeout;
-
-	/* Usage count. */
-	atomic_t use;
-
-	/* Unique ID */
-	unsigned int id;
-
-	/* Flags */
-	unsigned int flags;
-
-#ifdef CONFIG_IP_NF_NAT_NEEDED
-	__be32 saved_ip;
-	/* This is the original per-proto part, used to map the
-	 * expected connection the way the recipient expects. */
-	union ip_conntrack_manip_proto saved_proto;
-	/* Direction relative to the master connection. */
-	enum ip_conntrack_dir dir;
-#endif
-};
-
-#define IP_CT_EXPECT_PERMANENT	0x1
-
-static inline struct ip_conntrack *
-tuplehash_to_ctrack(const struct ip_conntrack_tuple_hash *hash)
-{
-	return container_of(hash, struct ip_conntrack,
-			    tuplehash[hash->tuple.dst.dir]);
-}
-
-/* get master conntrack via master expectation */
-#define master_ct(conntr) (conntr->master)
-
-/* Alter reply tuple (maybe alter helper). */
-extern void
-ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
-			 const struct ip_conntrack_tuple *newreply);
-
-/* Is this tuple taken? (ignoring any belonging to the given
-   conntrack). */
-extern int
-ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
-			 const struct ip_conntrack *ignored_conntrack);
-
-/* Return conntrack_info and tuple hash for given skb. */
-static inline struct ip_conntrack *
-ip_conntrack_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
-{
-	*ctinfo = skb->nfctinfo;
-	return (struct ip_conntrack *)skb->nfct;
-}
-
-/* decrement reference count on a conntrack */
-static inline void
-ip_conntrack_put(struct ip_conntrack *ct)
-{
-	IP_NF_ASSERT(ct);
-	nf_conntrack_put(&ct->ct_general);
-}
-
-extern int invert_tuplepr(struct ip_conntrack_tuple *inverse,
-			  const struct ip_conntrack_tuple *orig);
-
-extern void __ip_ct_refresh_acct(struct ip_conntrack *ct,
-			         enum ip_conntrack_info ctinfo,
-			         const struct sk_buff *skb,
-			         unsigned long extra_jiffies,
-				 int do_acct);
-
-/* Refresh conntrack for this many jiffies and do accounting */
-static inline void ip_ct_refresh_acct(struct ip_conntrack *ct, 
-				      enum ip_conntrack_info ctinfo,
-				      const struct sk_buff *skb,
-				      unsigned long extra_jiffies)
-{
-	__ip_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies, 1);
-}
-
-/* Refresh conntrack for this many jiffies */
-static inline void ip_ct_refresh(struct ip_conntrack *ct,
-				 const struct sk_buff *skb,
-				 unsigned long extra_jiffies)
-{
-	__ip_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0);
-}
-
-/* These are for NAT.  Icky. */
-/* Update TCP window tracking data when NAT mangles the packet */
-extern void ip_conntrack_tcp_update(struct sk_buff *skb,
-				    struct ip_conntrack *conntrack,
-				    enum ip_conntrack_dir dir);
-
-/* Call me when a conntrack is destroyed. */
-extern void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack);
-
-/* Fake conntrack entry for untracked connections */
-extern struct ip_conntrack ip_conntrack_untracked;
-
-/* Returns new sk_buff, or NULL */
-struct sk_buff *
-ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user);
-
-/* Iterate over all conntracks: if iter returns true, it's deleted. */
-extern void
-ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *data),
-		      void *data);
-
-extern struct ip_conntrack_helper *
-__ip_conntrack_helper_find_byname(const char *);
-extern struct ip_conntrack_helper *
-ip_conntrack_helper_find_get(const struct ip_conntrack_tuple *tuple);
-extern void ip_conntrack_helper_put(struct ip_conntrack_helper *helper);
-
-extern struct ip_conntrack_protocol *
-__ip_conntrack_proto_find(u_int8_t protocol);
-extern struct ip_conntrack_protocol *
-ip_conntrack_proto_find_get(u_int8_t protocol);
-extern void ip_conntrack_proto_put(struct ip_conntrack_protocol *proto);
-
-extern void ip_ct_remove_expectations(struct ip_conntrack *ct);
-
-extern struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *,
-					       struct ip_conntrack_tuple *);
-
-extern void ip_conntrack_free(struct ip_conntrack *ct);
-
-extern void ip_conntrack_hash_insert(struct ip_conntrack *ct);
-
-extern struct ip_conntrack_expect *
-__ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple);
-
-extern struct ip_conntrack_expect *
-ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple);
-
-extern struct ip_conntrack_tuple_hash *
-__ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
-                    const struct ip_conntrack *ignored_conntrack);
-
-extern void ip_conntrack_flush(void);
-
-/* It's confirmed if it is, or has been in the hash table. */
-static inline int is_confirmed(struct ip_conntrack *ct)
-{
-	return test_bit(IPS_CONFIRMED_BIT, &ct->status);
-}
-
-static inline int is_dying(struct ip_conntrack *ct)
-{
-	return test_bit(IPS_DYING_BIT, &ct->status);
-}
-
-extern unsigned int ip_conntrack_htable_size;
-extern int ip_conntrack_checksum;
- 
-#define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++)
-#define CONNTRACK_STAT_INC_ATOMIC(count)		\
-do {							\
-	local_bh_disable();				\
-	__get_cpu_var(ip_conntrack_stat).count++;	\
-	local_bh_enable();				\
-} while (0)
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-#include <linux/notifier.h>
-#include <linux/interrupt.h>
-
-struct ip_conntrack_ecache {
-	struct ip_conntrack *ct;
-	unsigned int events;
-};
-DECLARE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
-
-#define CONNTRACK_ECACHE(x)	(__get_cpu_var(ip_conntrack_ecache).x)
- 
-extern struct atomic_notifier_head ip_conntrack_chain;
-extern struct atomic_notifier_head ip_conntrack_expect_chain;
-
-static inline int ip_conntrack_register_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_register(&ip_conntrack_chain, nb);
-}
-
-static inline int ip_conntrack_unregister_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_unregister(&ip_conntrack_chain, nb);
-}
-
-static inline int 
-ip_conntrack_expect_register_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_register(&ip_conntrack_expect_chain, nb);
-}
-
-static inline int
-ip_conntrack_expect_unregister_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_unregister(&ip_conntrack_expect_chain,
-			nb);
-}
-
-extern void ip_ct_deliver_cached_events(const struct ip_conntrack *ct);
-extern void __ip_ct_event_cache_init(struct ip_conntrack *ct);
-
-static inline void 
-ip_conntrack_event_cache(enum ip_conntrack_events event,
-			 const struct sk_buff *skb)
-{
-	struct ip_conntrack *ct = (struct ip_conntrack *)skb->nfct;
-	struct ip_conntrack_ecache *ecache;
-	
-	local_bh_disable();
-	ecache = &__get_cpu_var(ip_conntrack_ecache);
-	if (ct != ecache->ct)
-		__ip_ct_event_cache_init(ct);
-	ecache->events |= event;
-	local_bh_enable();
-}
-
-static inline void ip_conntrack_event(enum ip_conntrack_events event,
-				      struct ip_conntrack *ct)
-{
-	if (is_confirmed(ct) && !is_dying(ct))
-		atomic_notifier_call_chain(&ip_conntrack_chain, event, ct);
-}
-
-static inline void 
-ip_conntrack_expect_event(enum ip_conntrack_expect_events event,
-			  struct ip_conntrack_expect *exp)
-{
-	atomic_notifier_call_chain(&ip_conntrack_expect_chain, event, exp);
-}
-#else /* CONFIG_IP_NF_CONNTRACK_EVENTS */
-static inline void ip_conntrack_event_cache(enum ip_conntrack_events event, 
-					    const struct sk_buff *skb) {}
-static inline void ip_conntrack_event(enum ip_conntrack_events event, 
-				      struct ip_conntrack *ct) {}
-static inline void ip_ct_deliver_cached_events(const struct ip_conntrack *ct) {}
-static inline void 
-ip_conntrack_expect_event(enum ip_conntrack_expect_events event, 
-			  struct ip_conntrack_expect *exp) {}
-#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
-
-#ifdef CONFIG_IP_NF_NAT_NEEDED
-static inline int ip_nat_initialized(struct ip_conntrack *conntrack,
-				     enum ip_nat_manip_type manip)
-{
-	if (manip == IP_NAT_MANIP_SRC)
-		return test_bit(IPS_SRC_NAT_DONE_BIT, &conntrack->status);
-	return test_bit(IPS_DST_NAT_DONE_BIT, &conntrack->status);
-}
-#endif /* CONFIG_IP_NF_NAT_NEEDED */
-
-#endif /* __KERNEL__ */
-#endif /* _IP_CONNTRACK_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_amanda.h b/include/linux/netfilter_ipv4/ip_conntrack_amanda.h
deleted file mode 100644
index de3e41f51ae..00000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_amanda.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef _IP_CONNTRACK_AMANDA_H
-#define _IP_CONNTRACK_AMANDA_H
-/* AMANDA tracking. */
-
-struct ip_conntrack_expect;
-extern unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb,
-					  enum ip_conntrack_info ctinfo,
-					  unsigned int matchoff,
-					  unsigned int matchlen,
-					  struct ip_conntrack_expect *exp);
-#endif /* _IP_CONNTRACK_AMANDA_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h
deleted file mode 100644
index e3a6df07aa4..00000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_core.h
+++ /dev/null
@@ -1,61 +0,0 @@
-#ifndef _IP_CONNTRACK_CORE_H
-#define _IP_CONNTRACK_CORE_H
-#include <linux/netfilter.h>
-
-#define MAX_IP_CT_PROTO 256
-extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
-
-/* This header is used to share core functionality between the
-   standalone connection tracking module, and the compatibility layer's use
-   of connection tracking. */
-extern unsigned int ip_conntrack_in(unsigned int hooknum,
-				    struct sk_buff **pskb,
-				    const struct net_device *in,
-				    const struct net_device *out,
-				    int (*okfn)(struct sk_buff *));
-
-extern int ip_conntrack_init(void);
-extern void ip_conntrack_cleanup(void);
-
-struct ip_conntrack_protocol;
-
-extern int
-ip_ct_get_tuple(const struct iphdr *iph,
-		const struct sk_buff *skb,
-		unsigned int dataoff,
-		struct ip_conntrack_tuple *tuple,
-		const struct ip_conntrack_protocol *protocol);
-
-extern int
-ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
-		   const struct ip_conntrack_tuple *orig,
-		   const struct ip_conntrack_protocol *protocol);
-
-/* Find a connection corresponding to a tuple. */
-struct ip_conntrack_tuple_hash *
-ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
-		      const struct ip_conntrack *ignored_conntrack);
-
-extern int __ip_conntrack_confirm(struct sk_buff **pskb);
-
-/* Confirm a connection: returns NF_DROP if packet must be dropped. */
-static inline int ip_conntrack_confirm(struct sk_buff **pskb)
-{
-	struct ip_conntrack *ct = (struct ip_conntrack *)(*pskb)->nfct;
-	int ret = NF_ACCEPT;
-
-	if (ct) {
-		if (!is_confirmed(ct) && !is_dying(ct))
-			ret = __ip_conntrack_confirm(pskb);
-		ip_ct_deliver_cached_events(ct);
-	}
-	return ret;
-}
-
-extern void ip_ct_unlink_expect(struct ip_conntrack_expect *exp);
-
-extern struct list_head *ip_conntrack_hash;
-extern struct list_head ip_conntrack_expect_list;
-extern rwlock_t ip_conntrack_lock;
-#endif /* _IP_CONNTRACK_CORE_H */
-
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_ftp.h b/include/linux/netfilter_ipv4/ip_conntrack_ftp.h
deleted file mode 100644
index 2129fc3972a..00000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_ftp.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef _IP_CONNTRACK_FTP_H
-#define _IP_CONNTRACK_FTP_H
-/* FTP tracking. */
-
-/* This enum is exposed to userspace */
-enum ip_ct_ftp_type
-{
-	/* PORT command from client */
-	IP_CT_FTP_PORT,
-	/* PASV response from server */
-	IP_CT_FTP_PASV,
-	/* EPRT command from client */
-	IP_CT_FTP_EPRT,
-	/* EPSV response from server */
-	IP_CT_FTP_EPSV,
-};
-
-#ifdef __KERNEL__
-
-#define FTP_PORT	21
-
-#define NUM_SEQ_TO_REMEMBER 2
-/* This structure exists only once per master */
-struct ip_ct_ftp_master {
-	/* Valid seq positions for cmd matching after newline */
-	u_int32_t seq_aft_nl[IP_CT_DIR_MAX][NUM_SEQ_TO_REMEMBER];
-	/* 0 means seq_match_aft_nl not set */
-	int seq_aft_nl_num[IP_CT_DIR_MAX];
-};
-
-struct ip_conntrack_expect;
-
-/* For NAT to hook in when we find a packet which describes what other
- * connection we should expect. */
-extern unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
-				       enum ip_conntrack_info ctinfo,
-				       enum ip_ct_ftp_type type,
-				       unsigned int matchoff,
-				       unsigned int matchlen,
-				       struct ip_conntrack_expect *exp,
-				       u32 *seq);
-#endif /* __KERNEL__ */
-
-#endif /* _IP_CONNTRACK_FTP_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_h323.h b/include/linux/netfilter_ipv4/ip_conntrack_h323.h
deleted file mode 100644
index 18f769818f4..00000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_h323.h
+++ /dev/null
@@ -1,89 +0,0 @@
-#ifndef _IP_CONNTRACK_H323_H
-#define _IP_CONNTRACK_H323_H
-
-#ifdef __KERNEL__
-
-#include <linux/netfilter/nf_conntrack_h323_asn1.h>
-
-#define RAS_PORT 1719
-#define Q931_PORT 1720
-#define H323_RTP_CHANNEL_MAX 4	/* Audio, video, FAX and other */
-
-/* This structure exists only once per master */
-struct ip_ct_h323_master {
-
-	/* Original and NATed Q.931 or H.245 signal ports */
-	u_int16_t sig_port[IP_CT_DIR_MAX];
-
-	/* Original and NATed RTP ports */
-	u_int16_t rtp_port[H323_RTP_CHANNEL_MAX][IP_CT_DIR_MAX];
-
-	union {
-		/* RAS connection timeout */
-		u_int32_t timeout;
-
-		/* Next TPKT length (for separate TPKT header and data) */
-		u_int16_t tpkt_len[IP_CT_DIR_MAX];
-	};
-};
-
-struct ip_conntrack_expect;
-
-extern int get_h225_addr(unsigned char *data, TransportAddress * addr,
-			 __be32 * ip, u_int16_t * port);
-extern void ip_conntrack_h245_expect(struct ip_conntrack *new,
-				     struct ip_conntrack_expect *this);
-extern void ip_conntrack_q931_expect(struct ip_conntrack *new,
-				     struct ip_conntrack_expect *this);
-extern int (*set_h245_addr_hook) (struct sk_buff ** pskb,
-				  unsigned char **data, int dataoff,
-				  H245_TransportAddress * addr,
-				  __be32 ip, u_int16_t port);
-extern int (*set_h225_addr_hook) (struct sk_buff ** pskb,
-				  unsigned char **data, int dataoff,
-				  TransportAddress * addr,
-				  __be32 ip, u_int16_t port);
-extern int (*set_sig_addr_hook) (struct sk_buff ** pskb,
-				 struct ip_conntrack * ct,
-				 enum ip_conntrack_info ctinfo,
-				 unsigned char **data,
-				 TransportAddress * addr, int count);
-extern int (*set_ras_addr_hook) (struct sk_buff ** pskb,
-				 struct ip_conntrack * ct,
-				 enum ip_conntrack_info ctinfo,
-				 unsigned char **data,
-				 TransportAddress * addr, int count);
-extern int (*nat_rtp_rtcp_hook) (struct sk_buff ** pskb,
-				 struct ip_conntrack * ct,
-				 enum ip_conntrack_info ctinfo,
-				 unsigned char **data, int dataoff,
-				 H245_TransportAddress * addr,
-				 u_int16_t port, u_int16_t rtp_port,
-				 struct ip_conntrack_expect * rtp_exp,
-				 struct ip_conntrack_expect * rtcp_exp);
-extern int (*nat_t120_hook) (struct sk_buff ** pskb, struct ip_conntrack * ct,
-			     enum ip_conntrack_info ctinfo,
-			     unsigned char **data, int dataoff,
-			     H245_TransportAddress * addr, u_int16_t port,
-			     struct ip_conntrack_expect * exp);
-extern int (*nat_h245_hook) (struct sk_buff ** pskb, struct ip_conntrack * ct,
-			     enum ip_conntrack_info ctinfo,
-			     unsigned char **data, int dataoff,
-			     TransportAddress * addr, u_int16_t port,
-			     struct ip_conntrack_expect * exp);
-extern int (*nat_callforwarding_hook) (struct sk_buff ** pskb,
-				       struct ip_conntrack * ct,
-				       enum ip_conntrack_info ctinfo,
-				       unsigned char **data, int dataoff,
-				       TransportAddress * addr,
-				       u_int16_t port,
-				       struct ip_conntrack_expect * exp);
-extern int (*nat_q931_hook) (struct sk_buff ** pskb, struct ip_conntrack * ct,
-			     enum ip_conntrack_info ctinfo,
-			     unsigned char **data, TransportAddress * addr,
-			     int idx, u_int16_t port,
-			     struct ip_conntrack_expect * exp);
-
-#endif
-
-#endif
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_helper.h b/include/linux/netfilter_ipv4/ip_conntrack_helper.h
deleted file mode 100644
index 77fe868d36f..00000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_helper.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* IP connection tracking helpers. */
-#ifndef _IP_CONNTRACK_HELPER_H
-#define _IP_CONNTRACK_HELPER_H
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-
-struct module;
-
-struct ip_conntrack_helper
-{	
-	struct list_head list; 		/* Internal use. */
-
-	const char *name;		/* name of the module */
-	struct module *me;		/* pointer to self */
-	unsigned int max_expected;	/* Maximum number of concurrent 
-					 * expected connections */
-	unsigned int timeout;		/* timeout for expecteds */
-
-	/* Mask of things we will help (compared against server response) */
-	struct ip_conntrack_tuple tuple;
-	struct ip_conntrack_tuple mask;
-	
-	/* Function to call when data passes; return verdict, or -1 to
-           invalidate. */
-	int (*help)(struct sk_buff **pskb,
-		    struct ip_conntrack *ct,
-		    enum ip_conntrack_info conntrackinfo);
-
-	void (*destroy)(struct ip_conntrack *ct);
-
-	int (*to_nfattr)(struct sk_buff *skb, const struct ip_conntrack *ct);
-};
-
-extern int ip_conntrack_helper_register(struct ip_conntrack_helper *);
-extern void ip_conntrack_helper_unregister(struct ip_conntrack_helper *);
-
-/* Allocate space for an expectation: this is mandatory before calling 
-   ip_conntrack_expect_related.  You will have to call put afterwards. */
-extern struct ip_conntrack_expect *
-ip_conntrack_expect_alloc(struct ip_conntrack *master);
-extern void ip_conntrack_expect_put(struct ip_conntrack_expect *exp);
-
-/* Add an expected connection: can have more than one per connection */
-extern int ip_conntrack_expect_related(struct ip_conntrack_expect *exp);
-extern void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp);
-
-#endif /*_IP_CONNTRACK_HELPER_H*/
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_icmp.h b/include/linux/netfilter_ipv4/ip_conntrack_icmp.h
deleted file mode 100644
index eed5ee3e474..00000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_icmp.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _IP_CONNTRACK_ICMP_H
-#define _IP_CONNTRACK_ICMP_H
-
-#include <net/netfilter/ipv4/nf_conntrack_icmp.h>
-
-#endif /* _IP_CONNTRACK_ICMP_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_irc.h b/include/linux/netfilter_ipv4/ip_conntrack_irc.h
deleted file mode 100644
index 16601e0d562..00000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_irc.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* IRC extension for IP connection tracking.
- * (C) 2000 by Harald Welte <laforge@gnumonks.org>
- * based on RR's ip_conntrack_ftp.h
- *
- * ip_conntrack_irc.h,v 1.6 2000/11/07 18:26:42 laforge Exp
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- *
- *
- */
-#ifndef _IP_CONNTRACK_IRC_H
-#define _IP_CONNTRACK_IRC_H
-
-/* This structure exists only once per master */
-struct ip_ct_irc_master {
-};
-
-#ifdef __KERNEL__
-extern unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb,
-				       enum ip_conntrack_info ctinfo,
-				       unsigned int matchoff,
-				       unsigned int matchlen,
-				       struct ip_conntrack_expect *exp);
-
-#define IRC_PORT	6667
-
-#endif /* __KERNEL__ */
-
-#endif /* _IP_CONNTRACK_IRC_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
deleted file mode 100644
index 2644b1faddd..00000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
+++ /dev/null
@@ -1,326 +0,0 @@
-/* PPTP constants and structs */
-#ifndef _CONNTRACK_PPTP_H
-#define _CONNTRACK_PPTP_H
-
-/* state of the control session */
-enum pptp_ctrlsess_state {
-	PPTP_SESSION_NONE,			/* no session present */
-	PPTP_SESSION_ERROR,			/* some session error */
-	PPTP_SESSION_STOPREQ,			/* stop_sess request seen */
-	PPTP_SESSION_REQUESTED,			/* start_sess request seen */
-	PPTP_SESSION_CONFIRMED,			/* session established */
-};
-
-/* state of the call inside the control session */
-enum pptp_ctrlcall_state {
-	PPTP_CALL_NONE,
-	PPTP_CALL_ERROR,
-	PPTP_CALL_OUT_REQ,
-	PPTP_CALL_OUT_CONF,
-	PPTP_CALL_IN_REQ,
-	PPTP_CALL_IN_REP,
-	PPTP_CALL_IN_CONF,
-	PPTP_CALL_CLEAR_REQ,
-};
-
-
-/* conntrack private data */
-struct ip_ct_pptp_master {
-	enum pptp_ctrlsess_state sstate;	/* session state */
-
-	/* everything below is going to be per-expectation in newnat,
-	 * since there could be more than one call within one session */
-	enum pptp_ctrlcall_state cstate;	/* call state */
-	__be16 pac_call_id;			/* call id of PAC, host byte order */
-	__be16 pns_call_id;			/* call id of PNS, host byte order */
-
-	/* in pre-2.6.11 this used to be per-expect. Now it is per-conntrack
-	 * and therefore imposes a fixed limit on the number of maps */
-	struct ip_ct_gre_keymap *keymap_orig, *keymap_reply;
-};
-
-/* conntrack_expect private member */
-struct ip_ct_pptp_expect {
-	enum pptp_ctrlcall_state cstate; 	/* call state */
-	__be16 pac_call_id;			/* call id of PAC */
-	__be16 pns_call_id;			/* call id of PNS */
-};
-
-
-#ifdef __KERNEL__
-
-#define IP_CONNTR_PPTP		PPTP_CONTROL_PORT
-
-#define PPTP_CONTROL_PORT	1723
-
-#define PPTP_PACKET_CONTROL	1
-#define PPTP_PACKET_MGMT	2
-
-#define PPTP_MAGIC_COOKIE	0x1a2b3c4d
-
-struct pptp_pkt_hdr {
-	__u16	packetLength;
-	__be16	packetType;
-	__be32	magicCookie;
-};
-
-/* PptpControlMessageType values */
-#define PPTP_START_SESSION_REQUEST	1
-#define PPTP_START_SESSION_REPLY	2
-#define PPTP_STOP_SESSION_REQUEST	3
-#define PPTP_STOP_SESSION_REPLY		4
-#define PPTP_ECHO_REQUEST		5
-#define PPTP_ECHO_REPLY			6
-#define PPTP_OUT_CALL_REQUEST		7
-#define PPTP_OUT_CALL_REPLY		8
-#define PPTP_IN_CALL_REQUEST		9
-#define PPTP_IN_CALL_REPLY		10
-#define PPTP_IN_CALL_CONNECT		11
-#define PPTP_CALL_CLEAR_REQUEST		12
-#define PPTP_CALL_DISCONNECT_NOTIFY	13
-#define PPTP_WAN_ERROR_NOTIFY		14
-#define PPTP_SET_LINK_INFO		15
-
-#define PPTP_MSG_MAX			15
-
-/* PptpGeneralError values */
-#define PPTP_ERROR_CODE_NONE		0
-#define PPTP_NOT_CONNECTED		1
-#define PPTP_BAD_FORMAT			2
-#define PPTP_BAD_VALUE			3
-#define PPTP_NO_RESOURCE		4
-#define PPTP_BAD_CALLID			5
-#define PPTP_REMOVE_DEVICE_ERROR	6
-
-struct PptpControlHeader {
-	__be16	messageType;
-	__u16	reserved;
-};
-
-/* FramingCapability Bitmap Values */
-#define PPTP_FRAME_CAP_ASYNC		0x1
-#define PPTP_FRAME_CAP_SYNC		0x2
-
-/* BearerCapability Bitmap Values */
-#define PPTP_BEARER_CAP_ANALOG		0x1
-#define PPTP_BEARER_CAP_DIGITAL		0x2
-
-struct PptpStartSessionRequest {
-	__be16	protocolVersion;
-	__u16	reserved1;
-	__be32	framingCapability;
-	__be32	bearerCapability;
-	__be16	maxChannels;
-	__be16	firmwareRevision;
-	__u8	hostName[64];
-	__u8	vendorString[64];
-};
-
-/* PptpStartSessionResultCode Values */
-#define PPTP_START_OK			1
-#define PPTP_START_GENERAL_ERROR	2
-#define PPTP_START_ALREADY_CONNECTED	3
-#define PPTP_START_NOT_AUTHORIZED	4
-#define PPTP_START_UNKNOWN_PROTOCOL	5
-
-struct PptpStartSessionReply {
-	__be16	protocolVersion;
-	__u8	resultCode;
-	__u8	generalErrorCode;
-	__be32	framingCapability;
-	__be32	bearerCapability;
-	__be16	maxChannels;
-	__be16	firmwareRevision;
-	__u8	hostName[64];
-	__u8	vendorString[64];
-};
-
-/* PptpStopReasons */
-#define PPTP_STOP_NONE			1
-#define PPTP_STOP_PROTOCOL		2
-#define PPTP_STOP_LOCAL_SHUTDOWN	3
-
-struct PptpStopSessionRequest {
-	__u8	reason;
-	__u8	reserved1;
-	__u16	reserved2;
-};
-
-/* PptpStopSessionResultCode */
-#define PPTP_STOP_OK			1
-#define PPTP_STOP_GENERAL_ERROR		2
-
-struct PptpStopSessionReply {
-	__u8	resultCode;
-	__u8	generalErrorCode;
-	__u16	reserved1;
-};
-
-struct PptpEchoRequest {
-	__be32 identNumber;
-};
-
-/* PptpEchoReplyResultCode */
-#define PPTP_ECHO_OK			1
-#define PPTP_ECHO_GENERAL_ERROR		2
-
-struct PptpEchoReply {
-	__be32	identNumber;
-	__u8	resultCode;
-	__u8	generalErrorCode;
-	__u16	reserved;
-};
-
-/* PptpFramingType */
-#define PPTP_ASYNC_FRAMING		1
-#define PPTP_SYNC_FRAMING		2
-#define PPTP_DONT_CARE_FRAMING		3
-
-/* PptpCallBearerType */
-#define PPTP_ANALOG_TYPE		1
-#define PPTP_DIGITAL_TYPE		2
-#define PPTP_DONT_CARE_BEARER_TYPE	3
-
-struct PptpOutCallRequest {
-	__be16	callID;
-	__be16	callSerialNumber;
-	__be32	minBPS;
-	__be32	maxBPS;
-	__be32	bearerType;
-	__be32	framingType;
-	__be16	packetWindow;
-	__be16	packetProcDelay;
-	__be16	phoneNumberLength;
-	__u16	reserved1;
-	__u8	phoneNumber[64];
-	__u8	subAddress[64];
-};
-
-/* PptpCallResultCode */
-#define PPTP_OUTCALL_CONNECT		1
-#define PPTP_OUTCALL_GENERAL_ERROR	2
-#define PPTP_OUTCALL_NO_CARRIER		3
-#define PPTP_OUTCALL_BUSY		4
-#define PPTP_OUTCALL_NO_DIAL_TONE	5
-#define PPTP_OUTCALL_TIMEOUT		6
-#define PPTP_OUTCALL_DONT_ACCEPT	7
-
-struct PptpOutCallReply {
-	__be16	callID;
-	__be16	peersCallID;
-	__u8	resultCode;
-	__u8	generalErrorCode;
-	__be16	causeCode;
-	__be32	connectSpeed;
-	__be16	packetWindow;
-	__be16	packetProcDelay;
-	__be32	physChannelID;
-};
-
-struct PptpInCallRequest {
-	__be16	callID;
-	__be16	callSerialNumber;
-	__be32	callBearerType;
-	__be32	physChannelID;
-	__be16	dialedNumberLength;
-	__be16	dialingNumberLength;
-	__u8	dialedNumber[64];
-	__u8	dialingNumber[64];
-	__u8	subAddress[64];
-};
-
-/* PptpInCallResultCode */
-#define PPTP_INCALL_ACCEPT		1
-#define PPTP_INCALL_GENERAL_ERROR	2
-#define PPTP_INCALL_DONT_ACCEPT		3
-
-struct PptpInCallReply {
-	__be16	callID;
-	__be16	peersCallID;
-	__u8	resultCode;
-	__u8	generalErrorCode;
-	__be16	packetWindow;
-	__be16	packetProcDelay;
-	__u16	reserved;
-};
-
-struct PptpInCallConnected {
-	__be16	peersCallID;
-	__u16	reserved;
-	__be32	connectSpeed;
-	__be16	packetWindow;
-	__be16	packetProcDelay;
-	__be32	callFramingType;
-};
-
-struct PptpClearCallRequest {
-	__be16	callID;
-	__u16	reserved;
-};
-
-struct PptpCallDisconnectNotify {
-	__be16	callID;
-	__u8	resultCode;
-	__u8	generalErrorCode;
-	__be16	causeCode;
-	__u16	reserved;
-	__u8	callStatistics[128];
-};
-
-struct PptpWanErrorNotify {
-	__be16	peersCallID;
-	__u16	reserved;
-	__be32	crcErrors;
-	__be32	framingErrors;
-	__be32	hardwareOverRuns;
-	__be32	bufferOverRuns;
-	__be32	timeoutErrors;
-	__be32	alignmentErrors;
-};
-
-struct PptpSetLinkInfo {
-	__be16	peersCallID;
-	__u16	reserved;
-	__be32	sendAccm;
-	__be32	recvAccm;
-};
-
-union pptp_ctrl_union {
-	struct PptpStartSessionRequest	sreq;
-	struct PptpStartSessionReply	srep;
-	struct PptpStopSessionRequest	streq;
-	struct PptpStopSessionReply	strep;
-	struct PptpOutCallRequest	ocreq;
-	struct PptpOutCallReply		ocack;
-	struct PptpInCallRequest	icreq;
-	struct PptpInCallReply		icack;
-	struct PptpInCallConnected	iccon;
-	struct PptpClearCallRequest	clrreq;
-	struct PptpCallDisconnectNotify disc;
-	struct PptpWanErrorNotify	wanerr;
-	struct PptpSetLinkInfo		setlink;
-};
-
-extern int
-(*ip_nat_pptp_hook_outbound)(struct sk_buff **pskb,
-			  struct ip_conntrack *ct,
-			  enum ip_conntrack_info ctinfo,
-			  struct PptpControlHeader *ctlh,
-			  union pptp_ctrl_union *pptpReq);
-
-extern int
-(*ip_nat_pptp_hook_inbound)(struct sk_buff **pskb,
-			  struct ip_conntrack *ct,
-			  enum ip_conntrack_info ctinfo,
-			  struct PptpControlHeader *ctlh,
-			  union pptp_ctrl_union *pptpReq);
-
-extern void
-(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *exp_orig,
-			    struct ip_conntrack_expect *exp_reply);
-
-extern void
-(*ip_nat_pptp_hook_expectfn)(struct ip_conntrack *ct,
-			     struct ip_conntrack_expect *exp);
-#endif /* __KERNEL__ */
-#endif /* _CONNTRACK_PPTP_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h b/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h
deleted file mode 100644
index e371e0fc167..00000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h
+++ /dev/null
@@ -1,114 +0,0 @@
-#ifndef _CONNTRACK_PROTO_GRE_H
-#define _CONNTRACK_PROTO_GRE_H
-#include <asm/byteorder.h>
-
-/* GRE PROTOCOL HEADER */
-
-/* GRE Version field */
-#define GRE_VERSION_1701	0x0
-#define GRE_VERSION_PPTP	0x1
-
-/* GRE Protocol field */
-#define GRE_PROTOCOL_PPTP	0x880B
-
-/* GRE Flags */
-#define GRE_FLAG_C		0x80
-#define GRE_FLAG_R		0x40
-#define GRE_FLAG_K		0x20
-#define GRE_FLAG_S		0x10
-#define GRE_FLAG_A		0x80
-
-#define GRE_IS_C(f)	((f)&GRE_FLAG_C)
-#define GRE_IS_R(f)	((f)&GRE_FLAG_R)
-#define GRE_IS_K(f)	((f)&GRE_FLAG_K)
-#define GRE_IS_S(f)	((f)&GRE_FLAG_S)
-#define GRE_IS_A(f)	((f)&GRE_FLAG_A)
-
-/* GRE is a mess: Four different standards */
-struct gre_hdr {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-	__u16	rec:3,
-		srr:1,
-		seq:1,
-		key:1,
-		routing:1,
-		csum:1,
-		version:3,
-		reserved:4,
-		ack:1;
-#elif defined(__BIG_ENDIAN_BITFIELD)
-	__u16	csum:1,
-		routing:1,
-		key:1,
-		seq:1,
-		srr:1,
-		rec:3,
-		ack:1,
-		reserved:4,
-		version:3;
-#else
-#error "Adjust your <asm/byteorder.h> defines"
-#endif
-	__be16	protocol;
-};
-
-/* modified GRE header for PPTP */
-struct gre_hdr_pptp {
-	__u8   flags;		/* bitfield */
-	__u8   version;		/* should be GRE_VERSION_PPTP */
-	__be16 protocol;	/* should be GRE_PROTOCOL_PPTP */
-	__be16 payload_len;	/* size of ppp payload, not inc. gre header */
-	__be16 call_id;		/* peer's call_id for this session */
-	__be32 seq;		/* sequence number.  Present if S==1 */
-	__be32 ack;		/* seq number of highest packet recieved by */
-				/*  sender in this session */
-};
-
-
-/* this is part of ip_conntrack */
-struct ip_ct_gre {
-	unsigned int stream_timeout;
-	unsigned int timeout;
-};
-
-#ifdef __KERNEL__
-struct ip_conntrack_expect;
-struct ip_conntrack;
-
-/* structure for original <-> reply keymap */
-struct ip_ct_gre_keymap {
-	struct list_head list;
-
-	struct ip_conntrack_tuple tuple;
-};
-
-/* add new tuple->key_reply pair to keymap */
-int ip_ct_gre_keymap_add(struct ip_conntrack *ct,
-			 struct ip_conntrack_tuple *t,
-			 int reply);
-
-/* delete keymap entries */
-void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct);
-
-
-/* get pointer to gre key, if present */
-static inline __be32 *gre_key(struct gre_hdr *greh)
-{
-	if (!greh->key)
-		return NULL;
-	if (greh->csum || greh->routing)
-		return (__be32 *) (greh+sizeof(*greh)+4);
-	return (__be32 *) (greh+sizeof(*greh));
-}
-
-/* get pointer ot gre csum, if present */
-static inline __sum16 *gre_csum(struct gre_hdr *greh)
-{
-	if (!greh->csum)
-		return NULL;
-	return (__sum16 *) (greh+sizeof(*greh));
-}
-
-#endif /* __KERNEL__ */
-
-#endif /* _CONNTRACK_PROTO_GRE_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
deleted file mode 100644
index 2c76b879e3d..00000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/* Header for use in defining a given protocol for connection tracking. */
-#ifndef _IP_CONNTRACK_PROTOCOL_H
-#define _IP_CONNTRACK_PROTOCOL_H
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-
-struct seq_file;
-
-struct ip_conntrack_protocol
-{
-	/* Protocol number. */
-	u_int8_t proto;
-
-	/* Protocol name */
-	const char *name;
-
-	/* Try to fill in the third arg: dataoff is offset past IP
-           hdr.  Return true if possible. */
-	int (*pkt_to_tuple)(const struct sk_buff *skb,
-			   unsigned int dataoff,
-			   struct ip_conntrack_tuple *tuple);
-
-	/* Invert the per-proto part of the tuple: ie. turn xmit into reply.
-	 * Some packets can't be inverted: return 0 in that case.
-	 */
-	int (*invert_tuple)(struct ip_conntrack_tuple *inverse,
-			    const struct ip_conntrack_tuple *orig);
-
-	/* Print out the per-protocol part of the tuple. Return like seq_* */
-	int (*print_tuple)(struct seq_file *,
-			   const struct ip_conntrack_tuple *);
-
-	/* Print out the private part of the conntrack. */
-	int (*print_conntrack)(struct seq_file *, const struct ip_conntrack *);
-
-	/* Returns verdict for packet, or -1 for invalid. */
-	int (*packet)(struct ip_conntrack *conntrack,
-		      const struct sk_buff *skb,
-		      enum ip_conntrack_info ctinfo);
-
-	/* Called when a new connection for this protocol found;
-	 * returns TRUE if it's OK.  If so, packet() called next. */
-	int (*new)(struct ip_conntrack *conntrack, const struct sk_buff *skb);
-
-	/* Called when a conntrack entry is destroyed */
-	void (*destroy)(struct ip_conntrack *conntrack);
-
-	int (*error)(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
-		     unsigned int hooknum);
-
-	/* convert protoinfo to nfnetink attributes */
-	int (*to_nfattr)(struct sk_buff *skb, struct nfattr *nfa,
-			 const struct ip_conntrack *ct);
-
-	/* convert nfnetlink attributes to protoinfo */
-	int (*from_nfattr)(struct nfattr *tb[], struct ip_conntrack *ct);
-
-	int (*tuple_to_nfattr)(struct sk_buff *skb,
-			       const struct ip_conntrack_tuple *t);
-	int (*nfattr_to_tuple)(struct nfattr *tb[],
-			       struct ip_conntrack_tuple *t);
-
-	/* Module (if any) which this is connected to. */
-	struct module *me;
-};
-
-/* Protocol registration. */
-extern int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto);
-extern void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto);
-/* Existing built-in protocols */
-extern struct ip_conntrack_protocol ip_conntrack_protocol_tcp;
-extern struct ip_conntrack_protocol ip_conntrack_protocol_udp;
-extern struct ip_conntrack_protocol ip_conntrack_protocol_icmp;
-extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;
-extern int ip_conntrack_protocol_tcp_init(void);
-
-/* Log invalid packets */
-extern unsigned int ip_ct_log_invalid;
-
-extern int ip_ct_port_tuple_to_nfattr(struct sk_buff *,
-				      const struct ip_conntrack_tuple *);
-extern int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[],
-				      struct ip_conntrack_tuple *);
-
-#ifdef CONFIG_SYSCTL
-#ifdef DEBUG_INVALID_PACKETS
-#define LOG_INVALID(proto) \
-	(ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW)
-#else
-#define LOG_INVALID(proto) \
-	((ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW) \
-	 && net_ratelimit())
-#endif
-#else
-#define LOG_INVALID(proto) 0
-#endif /* CONFIG_SYSCTL */
-
-#endif /*_IP_CONNTRACK_PROTOCOL_H*/
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_sctp.h b/include/linux/netfilter_ipv4/ip_conntrack_sctp.h
deleted file mode 100644
index 4099a041a32..00000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_sctp.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _IP_CONNTRACK_SCTP_H
-#define _IP_CONNTRACK_SCTP_H
-
-#include <linux/netfilter/nf_conntrack_sctp.h>
-
-#endif /* _IP_CONNTRACK_SCTP_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_sip.h b/include/linux/netfilter_ipv4/ip_conntrack_sip.h
deleted file mode 100644
index bef6c646def..00000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_sip.h
+++ /dev/null
@@ -1,40 +0,0 @@
-#ifndef __IP_CONNTRACK_SIP_H__
-#define __IP_CONNTRACK_SIP_H__
-#ifdef __KERNEL__
-
-#define SIP_PORT	5060
-#define SIP_TIMEOUT	3600
-
-enum sip_header_pos {
-	POS_REG_REQ_URI,
-	POS_REQ_URI,
-	POS_FROM,
-	POS_TO,
-	POS_VIA,
-	POS_CONTACT,
-	POS_CONTENT,
-	POS_MEDIA,
-	POS_OWNER,
-	POS_CONNECTION,
-	POS_SDP_HEADER,
-};
-
-extern unsigned int (*ip_nat_sip_hook)(struct sk_buff **pskb,
-				       enum ip_conntrack_info ctinfo,
-				       struct ip_conntrack *ct,
-				       const char **dptr);
-extern unsigned int (*ip_nat_sdp_hook)(struct sk_buff **pskb,
-				       enum ip_conntrack_info ctinfo,
-				       struct ip_conntrack_expect *exp,
-				       const char *dptr);
-
-extern int ct_sip_get_info(const char *dptr, size_t dlen,
-			   unsigned int *matchoff,
-			   unsigned int *matchlen,
-			   enum sip_header_pos pos);
-extern int ct_sip_lnlen(const char *line, const char *limit);
-extern const char *ct_sip_search(const char *needle, const char *haystack,
-				 size_t needle_len, size_t haystack_len,
-				 int case_sensitive);
-#endif /* __KERNEL__ */
-#endif /* __IP_CONNTRACK_SIP_H__ */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tcp.h b/include/linux/netfilter_ipv4/ip_conntrack_tcp.h
deleted file mode 100644
index 876b8fb17e6..00000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_tcp.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _IP_CONNTRACK_TCP_H
-#define _IP_CONNTRACK_TCP_H
-
-#include <linux/netfilter/nf_conntrack_tcp.h>
-
-#endif /* _IP_CONNTRACK_TCP_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tftp.h b/include/linux/netfilter_ipv4/ip_conntrack_tftp.h
deleted file mode 100644
index a404fc0abf0..00000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_tftp.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef _IP_CT_TFTP
-#define _IP_CT_TFTP
-
-#define TFTP_PORT 69
-
-struct tftphdr {
-	__be16 opcode;
-};
-
-#define TFTP_OPCODE_READ	1
-#define TFTP_OPCODE_WRITE	2
-#define TFTP_OPCODE_DATA	3
-#define TFTP_OPCODE_ACK		4
-#define TFTP_OPCODE_ERROR	5
-
-extern unsigned int (*ip_nat_tftp_hook)(struct sk_buff **pskb,
-				 enum ip_conntrack_info ctinfo,
-				 struct ip_conntrack_expect *exp);
-
-#endif /* _IP_CT_TFTP */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h
deleted file mode 100644
index c228bde74c3..00000000000
--- a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h
+++ /dev/null
@@ -1,146 +0,0 @@
-#ifndef _IP_CONNTRACK_TUPLE_H
-#define _IP_CONNTRACK_TUPLE_H
-
-#include <linux/types.h>
-#include <linux/netfilter/nf_conntrack_tuple_common.h>
-
-/* A `tuple' is a structure containing the information to uniquely
-  identify a connection.  ie. if two packets have the same tuple, they
-  are in the same connection; if not, they are not.
-
-  We divide the structure along "manipulatable" and
-  "non-manipulatable" lines, for the benefit of the NAT code.
-*/
-
-/* The protocol-specific manipulable parts of the tuple: always in
-   network order! */
-union ip_conntrack_manip_proto
-{
-	/* Add other protocols here. */
-	u_int16_t all;
-
-	struct {
-		__be16 port;
-	} tcp;
-	struct {
-		__be16 port;
-	} udp;
-	struct {
-		__be16 id;
-	} icmp;
-	struct {
-		__be16 port;
-	} sctp;
-	struct {
-		__be16 key;	/* key is 32bit, pptp only uses 16 */
-	} gre;
-};
-
-/* The manipulable part of the tuple. */
-struct ip_conntrack_manip
-{
-	__be32 ip;
-	union ip_conntrack_manip_proto u;
-};
-
-/* This contains the information to distinguish a connection. */
-struct ip_conntrack_tuple
-{
-	struct ip_conntrack_manip src;
-
-	/* These are the parts of the tuple which are fixed. */
-	struct {
-		__be32 ip;
-		union {
-			/* Add other protocols here. */
-			u_int16_t all;
-
-			struct {
-				__be16 port;
-			} tcp;
-			struct {
-				__be16 port;
-			} udp;
-			struct {
-				u_int8_t type, code;
-			} icmp;
-			struct {
-				__be16 port;
-			} sctp;
-			struct {
-				__be16 key;	/* key is 32bit, 
-						 * pptp only uses 16 */
-			} gre;
-		} u;
-
-		/* The protocol. */
-		u_int8_t protonum;
-
-		/* The direction (for tuplehash) */
-		u_int8_t dir;
-	} dst;
-};
-
-/* This is optimized opposed to a memset of the whole structure.  Everything we
- * really care about is the  source/destination unions */
-#define IP_CT_TUPLE_U_BLANK(tuple) 				\
-	do {							\
-		(tuple)->src.u.all = 0;				\
-		(tuple)->dst.u.all = 0;				\
-	} while (0)
-
-#ifdef __KERNEL__
-
-#define DUMP_TUPLE(tp)						\
-DEBUGP("tuple %p: %u %u.%u.%u.%u:%hu -> %u.%u.%u.%u:%hu\n",	\
-       (tp), (tp)->dst.protonum,				\
-       NIPQUAD((tp)->src.ip), ntohs((tp)->src.u.all),		\
-       NIPQUAD((tp)->dst.ip), ntohs((tp)->dst.u.all))
-
-/* If we're the first tuple, it's the original dir. */
-#define DIRECTION(h) ((enum ip_conntrack_dir)(h)->tuple.dst.dir)
-
-/* Connections have two entries in the hash table: one for each way */
-struct ip_conntrack_tuple_hash
-{
-	struct list_head list;
-
-	struct ip_conntrack_tuple tuple;
-};
-
-#endif /* __KERNEL__ */
-
-static inline int ip_ct_tuple_src_equal(const struct ip_conntrack_tuple *t1,
-				        const struct ip_conntrack_tuple *t2)
-{
-	return t1->src.ip == t2->src.ip
-		&& t1->src.u.all == t2->src.u.all;
-}
-
-static inline int ip_ct_tuple_dst_equal(const struct ip_conntrack_tuple *t1,
-				        const struct ip_conntrack_tuple *t2)
-{
-	return t1->dst.ip == t2->dst.ip
-		&& t1->dst.u.all == t2->dst.u.all
-		&& t1->dst.protonum == t2->dst.protonum;
-}
-
-static inline int ip_ct_tuple_equal(const struct ip_conntrack_tuple *t1,
-				    const struct ip_conntrack_tuple *t2)
-{
-	return ip_ct_tuple_src_equal(t1, t2) && ip_ct_tuple_dst_equal(t1, t2);
-}
-
-static inline int ip_ct_tuple_mask_cmp(const struct ip_conntrack_tuple *t,
-				       const struct ip_conntrack_tuple *tuple,
-				       const struct ip_conntrack_tuple *mask)
-{
-	return !(((t->src.ip ^ tuple->src.ip) & mask->src.ip)
-		 || ((t->dst.ip ^ tuple->dst.ip) & mask->dst.ip)
-		 || ((t->src.u.all ^ tuple->src.u.all) & mask->src.u.all)
-		 || ((t->dst.u.all ^ tuple->dst.u.all) & mask->dst.u.all)
-		 || ((t->dst.protonum ^ tuple->dst.protonum)
-		     & mask->dst.protonum));
-}
-
-#endif /* _IP_CONNTRACK_TUPLE_H */
diff --git a/include/linux/netfilter_ipv4/ip_nat.h b/include/linux/netfilter_ipv4/ip_nat.h
deleted file mode 100644
index bbca89aab81..00000000000
--- a/include/linux/netfilter_ipv4/ip_nat.h
+++ /dev/null
@@ -1,79 +0,0 @@
-#ifndef _IP_NAT_H
-#define _IP_NAT_H
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
-
-#define IP_NAT_MAPPING_TYPE_MAX_NAMELEN 16
-
-enum ip_nat_manip_type
-{
-	IP_NAT_MANIP_SRC,
-	IP_NAT_MANIP_DST
-};
-
-/* SRC manip occurs POST_ROUTING or LOCAL_IN */
-#define HOOK2MANIP(hooknum) ((hooknum) != NF_IP_POST_ROUTING && (hooknum) != NF_IP_LOCAL_IN)
-
-#define IP_NAT_RANGE_MAP_IPS 1
-#define IP_NAT_RANGE_PROTO_SPECIFIED 2
-#define IP_NAT_RANGE_PROTO_RANDOM 4 /* add randomness to "port" selection */
-
-/* NAT sequence number modifications */
-struct ip_nat_seq {
-	/* position of the last TCP sequence number 
-	 * modification (if any) */
-	u_int32_t correction_pos;
-	/* sequence number offset before and after last modification */
-	int16_t offset_before, offset_after;
-};
-
-/* Single range specification. */
-struct ip_nat_range
-{
-	/* Set to OR of flags above. */
-	unsigned int flags;
-
-	/* Inclusive: network order. */
-	__be32 min_ip, max_ip;
-
-	/* Inclusive: network order */
-	union ip_conntrack_manip_proto min, max;
-};
-
-/* For backwards compat: don't use in modern code. */
-struct ip_nat_multi_range_compat
-{
-	unsigned int rangesize; /* Must be 1. */
-
-	/* hangs off end. */
-	struct ip_nat_range range[1];
-};
-
-#ifdef __KERNEL__
-#include <linux/list.h>
-
-/* Protects NAT hash tables, and NAT-private part of conntracks. */
-extern rwlock_t ip_nat_lock;
-
-/* The structure embedded in the conntrack structure. */
-struct ip_nat_info
-{
-	struct list_head bysource;
-	struct ip_nat_seq seq[IP_CT_DIR_MAX];
-};
-
-struct ip_conntrack;
-
-/* Set up the info structure to map into this range. */
-extern unsigned int ip_nat_setup_info(struct ip_conntrack *conntrack,
-				      const struct ip_nat_range *range,
-				      unsigned int hooknum);
-
-/* Is this tuple already taken? (not by us)*/
-extern int ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
-			     const struct ip_conntrack *ignored_conntrack);
-
-#else  /* !__KERNEL__: iptables wants this to compile. */
-#define ip_nat_multi_range ip_nat_multi_range_compat
-#endif /*__KERNEL__*/
-#endif
diff --git a/include/linux/netfilter_ipv4/ip_nat_core.h b/include/linux/netfilter_ipv4/ip_nat_core.h
deleted file mode 100644
index 60566f9fd7b..00000000000
--- a/include/linux/netfilter_ipv4/ip_nat_core.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef _IP_NAT_CORE_H
-#define _IP_NAT_CORE_H
-#include <linux/list.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-
-/* This header used to share core functionality between the standalone
-   NAT module, and the compatibility layer's use of NAT for masquerading. */
-
-extern unsigned int ip_nat_packet(struct ip_conntrack *ct,
-			       enum ip_conntrack_info conntrackinfo,
-			       unsigned int hooknum,
-			       struct sk_buff **pskb);
-
-extern int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
-					 enum ip_conntrack_info ctinfo,
-					 unsigned int hooknum,
-					 struct sk_buff **pskb);
-#endif /* _IP_NAT_CORE_H */
diff --git a/include/linux/netfilter_ipv4/ip_nat_helper.h b/include/linux/netfilter_ipv4/ip_nat_helper.h
deleted file mode 100644
index bf9cb105c88..00000000000
--- a/include/linux/netfilter_ipv4/ip_nat_helper.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef _IP_NAT_HELPER_H
-#define _IP_NAT_HELPER_H
-/* NAT protocol helper routines. */
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/module.h>
-
-struct sk_buff;
-
-/* These return true or false. */
-extern int ip_nat_mangle_tcp_packet(struct sk_buff **skb,
-				struct ip_conntrack *ct,
-				enum ip_conntrack_info ctinfo,
-				unsigned int match_offset,
-				unsigned int match_len,
-				const char *rep_buffer,
-				unsigned int rep_len);
-extern int ip_nat_mangle_udp_packet(struct sk_buff **skb,
-				struct ip_conntrack *ct,
-				enum ip_conntrack_info ctinfo,
-				unsigned int match_offset,
-				unsigned int match_len,
-				const char *rep_buffer,
-				unsigned int rep_len);
-extern int ip_nat_seq_adjust(struct sk_buff **pskb, 
-			     struct ip_conntrack *ct, 
-			     enum ip_conntrack_info ctinfo);
-
-/* Setup NAT on this expected conntrack so it follows master, but goes
- * to port ct->master->saved_proto. */
-extern void ip_nat_follow_master(struct ip_conntrack *ct,
-				 struct ip_conntrack_expect *this);
-#endif
diff --git a/include/linux/netfilter_ipv4/ip_nat_pptp.h b/include/linux/netfilter_ipv4/ip_nat_pptp.h
deleted file mode 100644
index 36668bf0f37..00000000000
--- a/include/linux/netfilter_ipv4/ip_nat_pptp.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* PPTP constants and structs */
-#ifndef _NAT_PPTP_H
-#define _NAT_PPTP_H
-
-/* conntrack private data */
-struct ip_nat_pptp {
-	__be16 pns_call_id;		/* NAT'ed PNS call id */
-	__be16 pac_call_id;		/* NAT'ed PAC call id */
-};
-
-#endif /* _NAT_PPTP_H */
diff --git a/include/linux/netfilter_ipv4/ip_nat_protocol.h b/include/linux/netfilter_ipv4/ip_nat_protocol.h
deleted file mode 100644
index 612a43614e7..00000000000
--- a/include/linux/netfilter_ipv4/ip_nat_protocol.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/* Header for use in defining a given protocol. */
-#ifndef _IP_NAT_PROTOCOL_H
-#define _IP_NAT_PROTOCOL_H
-#include <linux/init.h>
-#include <linux/list.h>
-
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-
-struct iphdr;
-struct ip_nat_range;
-
-struct ip_nat_protocol
-{
-	/* Protocol name */
-	const char *name;
-
-	/* Protocol number. */
-	unsigned int protonum;
-
-	struct module *me;
-
-	/* Translate a packet to the target according to manip type.
-	   Return true if succeeded. */
-	int (*manip_pkt)(struct sk_buff **pskb,
-			 unsigned int iphdroff,
-			 const struct ip_conntrack_tuple *tuple,
-			 enum ip_nat_manip_type maniptype);
-
-	/* Is the manipable part of the tuple between min and max incl? */
-	int (*in_range)(const struct ip_conntrack_tuple *tuple,
-			enum ip_nat_manip_type maniptype,
-			const union ip_conntrack_manip_proto *min,
-			const union ip_conntrack_manip_proto *max);
-
-	/* Alter the per-proto part of the tuple (depending on
-	   maniptype), to give a unique tuple in the given range if
-	   possible; return false if not.  Per-protocol part of tuple
-	   is initialized to the incoming packet. */
-	int (*unique_tuple)(struct ip_conntrack_tuple *tuple,
-			    const struct ip_nat_range *range,
-			    enum ip_nat_manip_type maniptype,
-			    const struct ip_conntrack *conntrack);
-
-	int (*range_to_nfattr)(struct sk_buff *skb,
-			       const struct ip_nat_range *range);
-
-	int (*nfattr_to_range)(struct nfattr *tb[],
-			       struct ip_nat_range *range);
-};
-
-/* Protocol registration. */
-extern int ip_nat_protocol_register(struct ip_nat_protocol *proto);
-extern void ip_nat_protocol_unregister(struct ip_nat_protocol *proto);
-
-extern struct ip_nat_protocol *ip_nat_proto_find_get(u_int8_t protocol);
-extern void ip_nat_proto_put(struct ip_nat_protocol *proto);
-
-/* Built-in protocols. */
-extern struct ip_nat_protocol ip_nat_protocol_tcp;
-extern struct ip_nat_protocol ip_nat_protocol_udp;
-extern struct ip_nat_protocol ip_nat_protocol_icmp;
-extern struct ip_nat_protocol ip_nat_unknown_protocol;
-
-extern int init_protocols(void) __init;
-extern void cleanup_protocols(void);
-extern struct ip_nat_protocol *find_nat_proto(u_int16_t protonum);
-
-extern int ip_nat_port_range_to_nfattr(struct sk_buff *skb,
-				       const struct ip_nat_range *range);
-extern int ip_nat_port_nfattr_to_range(struct nfattr *tb[],
-				       struct ip_nat_range *range);
-
-#endif /*_IP_NAT_PROTO_H*/
diff --git a/include/linux/netfilter_ipv4/ip_nat_rule.h b/include/linux/netfilter_ipv4/ip_nat_rule.h
deleted file mode 100644
index 73b9552e6a8..00000000000
--- a/include/linux/netfilter_ipv4/ip_nat_rule.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef _IP_NAT_RULE_H
-#define _IP_NAT_RULE_H
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-
-#ifdef __KERNEL__
-
-extern int ip_nat_rule_init(void) __init;
-extern void ip_nat_rule_cleanup(void);
-extern int ip_nat_rule_find(struct sk_buff **pskb,
-			    unsigned int hooknum,
-			    const struct net_device *in,
-			    const struct net_device *out,
-			    struct ip_conntrack *ct,
-			    struct ip_nat_info *info);
-
-extern unsigned int
-alloc_null_binding(struct ip_conntrack *conntrack,
-		   struct ip_nat_info *info,
-		   unsigned int hooknum);
-
-extern unsigned int
-alloc_null_binding_confirmed(struct ip_conntrack *conntrack,
-			     struct ip_nat_info *info,
-			     unsigned int hooknum);
-#endif
-#endif /* _IP_NAT_RULE_H */
diff --git a/include/linux/netfilter_ipv4/ipt_SAME.h b/include/linux/netfilter_ipv4/ipt_SAME.h
index cc4c0b2269a..be6e682a85e 100644
--- a/include/linux/netfilter_ipv4/ipt_SAME.h
+++ b/include/linux/netfilter_ipv4/ipt_SAME.h
@@ -13,7 +13,7 @@ struct ipt_same_info
 	u_int32_t *iparray;
 
 	/* hangs off end. */
-	struct ip_nat_range range[IPT_SAME_MAX_RANGE];
+	struct nf_nat_range range[IPT_SAME_MAX_RANGE];
 };
 
 #endif /*_IPT_SAME_H*/
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 0e690e34c00..1c6b8bd09b9 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -250,6 +250,11 @@ static inline int nf_ct_is_dying(struct nf_conn *ct)
 	return test_bit(IPS_DYING_BIT, &ct->status);
 }
 
+static inline int nf_ct_is_untracked(const struct sk_buff *skb)
+{
+	return (skb->nfct == &nf_conntrack_untracked.ct_general);
+}
+
 extern unsigned int nf_conntrack_htable_size;
 extern int nf_conntrack_checksum;
 extern atomic_t nf_conntrack_count;
diff --git a/include/net/netfilter/nf_conntrack_compat.h b/include/net/netfilter/nf_conntrack_compat.h
deleted file mode 100644
index 6f84c1f7fcd..00000000000
--- a/include/net/netfilter/nf_conntrack_compat.h
+++ /dev/null
@@ -1,145 +0,0 @@
-#ifndef _NF_CONNTRACK_COMPAT_H
-#define _NF_CONNTRACK_COMPAT_H
-
-#ifdef __KERNEL__
-
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/socket.h>
-
-#ifdef CONFIG_IP_NF_CONNTRACK_MARK
-static inline u_int32_t *nf_ct_get_mark(const struct sk_buff *skb,
-					u_int32_t *ctinfo)
-{
-	struct ip_conntrack *ct = ip_conntrack_get(skb, ctinfo);
-
-	if (ct)
-		return &ct->mark;
-	else
-		return NULL;
-}
-#endif /* CONFIG_IP_NF_CONNTRACK_MARK */
-
-#ifdef CONFIG_IP_NF_CONNTRACK_SECMARK
-static inline u_int32_t *nf_ct_get_secmark(const struct sk_buff *skb,
-					   u_int32_t *ctinfo)
-{
-	struct ip_conntrack *ct = ip_conntrack_get(skb, ctinfo);
-
-	if (ct)
-		return &ct->secmark;
-	else
-		return NULL;
-}
-#endif /* CONFIG_IP_NF_CONNTRACK_SECMARK */
-
-#ifdef CONFIG_IP_NF_CT_ACCT
-static inline struct ip_conntrack_counter *
-nf_ct_get_counters(const struct sk_buff *skb)
-{
-	enum ip_conntrack_info ctinfo;
-	struct ip_conntrack *ct = ip_conntrack_get(skb, &ctinfo);
-
-	if (ct)
-		return ct->counters;
-	else
-		return NULL;
-}
-#endif /* CONFIG_IP_NF_CT_ACCT */
-
-static inline int nf_ct_is_untracked(const struct sk_buff *skb)
-{
-	return (skb->nfct == &ip_conntrack_untracked.ct_general);
-}
-
-static inline void nf_ct_untrack(struct sk_buff *skb)
-{
-	skb->nfct = &ip_conntrack_untracked.ct_general;
-}
-
-static inline int nf_ct_get_ctinfo(const struct sk_buff *skb,
-				   enum ip_conntrack_info *ctinfo)
-{
-	struct ip_conntrack *ct = ip_conntrack_get(skb, ctinfo);
-	return (ct != NULL);
-}
-
-static inline int nf_ct_l3proto_try_module_get(unsigned short l3proto)
-{
-	need_conntrack();
-	return l3proto == PF_INET ? 0 : -1;
-}
-
-static inline void nf_ct_l3proto_module_put(unsigned short l3proto)
-{
-}
-
-#else /* CONFIG_IP_NF_CONNTRACK */
-
-#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
-#include <net/netfilter/nf_conntrack.h>
-
-#ifdef CONFIG_NF_CONNTRACK_MARK
-
-static inline u_int32_t *nf_ct_get_mark(const struct sk_buff *skb,
-					u_int32_t *ctinfo)
-{
-	struct nf_conn *ct = nf_ct_get(skb, ctinfo);
-
-	if (ct)
-		return &ct->mark;
-	else
-		return NULL;
-}
-#endif /* CONFIG_NF_CONNTRACK_MARK */
-
-#ifdef CONFIG_NF_CONNTRACK_SECMARK
-static inline u_int32_t *nf_ct_get_secmark(const struct sk_buff *skb,
-					   u_int32_t *ctinfo)
-{
-	struct nf_conn *ct = nf_ct_get(skb, ctinfo);
-
-	if (ct)
-		return &ct->secmark;
-	else
-		return NULL;
-}
-#endif /* CONFIG_NF_CONNTRACK_MARK */
-
-#ifdef CONFIG_NF_CT_ACCT
-static inline struct ip_conntrack_counter *
-nf_ct_get_counters(const struct sk_buff *skb)
-{
-	enum ip_conntrack_info ctinfo;
-	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
-
-	if (ct)
-		return ct->counters;
-	else
-		return NULL;
-}
-#endif /* CONFIG_NF_CT_ACCT */
-
-static inline int nf_ct_is_untracked(const struct sk_buff *skb)
-{
-	return (skb->nfct == &nf_conntrack_untracked.ct_general);
-}
-
-static inline void nf_ct_untrack(struct sk_buff *skb)
-{
-	skb->nfct = &nf_conntrack_untracked.ct_general;
-}
-
-static inline int nf_ct_get_ctinfo(const struct sk_buff *skb,
-				   enum ip_conntrack_info *ctinfo)
-{
-	struct nf_conn *ct = nf_ct_get(skb, ctinfo);
-	return (ct != NULL);
-}
-
-#endif /* CONFIG_IP_NF_CONNTRACK */
-
-#endif /* __KERNEL__ */
-
-#endif /* _NF_CONNTRACK_COMPAT_H */
diff --git a/include/net/netfilter/nf_nat_rule.h b/include/net/netfilter/nf_nat_rule.h
index f191c672bcc..e76565459ad 100644
--- a/include/net/netfilter/nf_nat_rule.h
+++ b/include/net/netfilter/nf_nat_rule.h
@@ -4,16 +4,6 @@
 #include <net/netfilter/nf_nat.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 
-/* Compatibility definitions for ipt_FOO modules */
-#define ip_nat_range			nf_nat_range
-#define ip_conntrack_tuple		nf_conntrack_tuple
-#define ip_conntrack_get		nf_ct_get
-#define ip_conntrack			nf_conn
-#define ip_nat_setup_info		nf_nat_setup_info
-#define ip_nat_multi_range_compat	nf_nat_multi_range_compat
-#define ip_ct_iterate_cleanup		nf_ct_iterate_cleanup
-#define	IP_NF_ASSERT			NF_CT_ASSERT
-
 extern int nf_nat_rule_init(void) __init;
 extern void nf_nat_rule_cleanup(void);
 extern int nf_nat_rule_find(struct sk_buff **pskb,
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 601808c796e..46509fae9fd 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -30,188 +30,6 @@ config NF_CONNTRACK_PROC_COMPAT
 
 	  If unsure, say Y.
 
-# connection tracking, helpers and protocols
-config IP_NF_CT_ACCT
-	bool "Connection tracking flow accounting"
-	depends on IP_NF_CONNTRACK
-	help
-	  If this option is enabled, the connection tracking code will
-	  keep per-flow packet and byte counters.
-
-	  Those counters can be used for flow-based accounting or the
-	  `connbytes' match.
-
-	  If unsure, say `N'.
-
-config IP_NF_CONNTRACK_MARK
-	bool  'Connection mark tracking support'
-	depends on IP_NF_CONNTRACK
-	help
-	  This option enables support for connection marks, used by the
-	  `CONNMARK' target and `connmark' match. Similar to the mark value
-	  of packets, but this mark value is kept in the conntrack session
-	  instead of the individual packets.
-	
-config IP_NF_CONNTRACK_SECMARK
-	bool  'Connection tracking security mark support'
-	depends on IP_NF_CONNTRACK && NETWORK_SECMARK
-	help
-	  This option enables security markings to be applied to
-	  connections.  Typically they are copied to connections from
-	  packets using the CONNSECMARK target and copied back from
-	  connections to packets with the same target, with the packets
-	  being originally labeled via SECMARK.
-
-	  If unsure, say 'N'.
-
-config IP_NF_CONNTRACK_EVENTS
-	bool "Connection tracking events (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && IP_NF_CONNTRACK
-	help
-	  If this option is enabled, the connection tracking code will
-	  provide a notifier chain that can be used by other kernel code
-	  to get notified about changes in the connection tracking state.
-	  
-	  IF unsure, say `N'.
-
-config IP_NF_CONNTRACK_NETLINK
-	tristate 'Connection tracking netlink interface (EXPERIMENTAL)'
-	depends on EXPERIMENTAL && IP_NF_CONNTRACK && NETFILTER_NETLINK
-	depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m
-	depends on IP_NF_NAT=n || IP_NF_NAT
-	help
-	  This option enables support for a netlink-based userspace interface
-
-
-config IP_NF_CT_PROTO_SCTP
-	tristate  'SCTP protocol connection tracking support (EXPERIMENTAL)'
-	depends on IP_NF_CONNTRACK && EXPERIMENTAL
-	help
-	  With this option enabled, the connection tracking code will
-	  be able to do state tracking on SCTP connections.
-
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/modules.txt>.  If unsure, say `N'.
-
-config IP_NF_FTP
-	tristate "FTP protocol support"
-	depends on IP_NF_CONNTRACK
-	help
-	  Tracking FTP connections is problematic: special helpers are
-	  required for tracking them, and doing masquerading and other forms
-	  of Network Address Translation on them.
-
-	  To compile it as a module, choose M here.  If unsure, say Y.
-
-config IP_NF_IRC
-	tristate "IRC protocol support"
-	depends on IP_NF_CONNTRACK
-	---help---
-	  There is a commonly-used extension to IRC called
-	  Direct Client-to-Client Protocol (DCC).  This enables users to send
-	  files to each other, and also chat to each other without the need
-	  of a server.  DCC Sending is used anywhere you send files over IRC,
-	  and DCC Chat is most commonly used by Eggdrop bots.  If you are
-	  using NAT, this extension will enable you to send files and initiate
-	  chats.  Note that you do NOT need this extension to get files or
-	  have others initiate chats, or everything else in IRC.
-
-	  To compile it as a module, choose M here.  If unsure, say Y.
-
-config IP_NF_NETBIOS_NS
-	tristate "NetBIOS name service protocol support (EXPERIMENTAL)"
-	depends on IP_NF_CONNTRACK && EXPERIMENTAL
-	help
-	  NetBIOS name service requests are sent as broadcast messages from an
-	  unprivileged port and responded to with unicast messages to the
-	  same port. This make them hard to firewall properly because connection
-	  tracking doesn't deal with broadcasts. This helper tracks locally
-	  originating NetBIOS name service requests and the corresponding
-	  responses. It relies on correct IP address configuration, specifically
-	  netmask and broadcast address. When properly configured, the output
-	  of "ip address show" should look similar to this:
-
-	  $ ip -4 address show eth0
-	  4: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc pfifo_fast qlen 1000
-	      inet 172.16.2.252/24 brd 172.16.2.255 scope global eth0
-	  
-	  To compile it as a module, choose M here.  If unsure, say N.
-
-config IP_NF_TFTP
-	tristate "TFTP protocol support"
-	depends on IP_NF_CONNTRACK
-	help
-	  TFTP connection tracking helper, this is required depending
-	  on how restrictive your ruleset is.
-	  If you are using a tftp client behind -j SNAT or -j MASQUERADING
-	  you will need this.
-
-	  To compile it as a module, choose M here.  If unsure, say Y.
-
-config IP_NF_AMANDA
-	tristate "Amanda backup protocol support"
-	depends on IP_NF_CONNTRACK
-	select TEXTSEARCH
-	select TEXTSEARCH_KMP
-	help
-	  If you are running the Amanda backup package <http://www.amanda.org/>
-	  on this machine or machines that will be MASQUERADED through this
-	  machine, then you may want to enable this feature.  This allows the
-	  connection tracking and natting code to allow the sub-channels that
-	  Amanda requires for communication of the backup data, messages and
-	  index.
-
-	  To compile it as a module, choose M here.  If unsure, say Y.
-
-config IP_NF_PPTP
-	tristate  'PPTP protocol support'
-	depends on IP_NF_CONNTRACK
-	help
-	  This module adds support for PPTP (Point to Point Tunnelling
-	  Protocol, RFC2637) connection tracking and NAT. 
-	
-	  If you are running PPTP sessions over a stateful firewall or NAT
-	  box, you may want to enable this feature.  
-	
-	  Please note that not all PPTP modes of operation are supported yet.
-	  For more info, read top of the file
-	  net/ipv4/netfilter/ip_conntrack_pptp.c
-	
-	  If you want to compile it as a module, say M here and read
-	  Documentation/modules.txt.  If unsure, say `N'.
-
-config IP_NF_H323
-	tristate  'H.323 protocol support (EXPERIMENTAL)'
-	depends on IP_NF_CONNTRACK && EXPERIMENTAL
-	help
-	  H.323 is a VoIP signalling protocol from ITU-T. As one of the most
-	  important VoIP protocols, it is widely used by voice hardware and
-	  software including voice gateways, IP phones, Netmeeting, OpenPhone,
-	  Gnomemeeting, etc.
-
-	  With this module you can support H.323 on a connection tracking/NAT
-	  firewall.
-
-	  This module supports RAS, Fast Start, H.245 Tunnelling, Call
-	  Forwarding, RTP/RTCP and T.120 based audio, video, fax, chat,
-	  whiteboard, file transfer, etc. For more information, please
-	  visit http://nath323.sourceforge.net/.
-
-	  If you want to compile it as a module, say 'M' here and read
-	  Documentation/modules.txt.  If unsure, say 'N'.
-
-config IP_NF_SIP
-	tristate "SIP protocol support (EXPERIMENTAL)"
-	depends on IP_NF_CONNTRACK && EXPERIMENTAL
-	help
-	  SIP is an application-layer control protocol that can establish,
-	  modify, and terminate multimedia sessions (conferences) such as
-	  Internet telephony calls. With the ip_conntrack_sip and
-	  the ip_nat_sip modules you can support the protocol on a connection
-	  tracking/NATing firewall.
-
-	  To compile it as a module, choose M here.  If unsure, say Y.
-
 config IP_NF_QUEUE
 	tristate "IP Userspace queueing via NETLINK (OBSOLETE)"
 	help
@@ -361,17 +179,6 @@ config IP_NF_TARGET_ULOG
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-# NAT + specific targets: ip_conntrack
-config IP_NF_NAT
-	tristate "Full NAT"
-	depends on IP_NF_IPTABLES && IP_NF_CONNTRACK
-	help
-	  The Full NAT option allows masquerading, port forwarding and other
-	  forms of full Network Address Port Translation.  It is controlled by
-	  the `nat' table in iptables: see the man page for iptables(8).
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 # NAT + specific targets: nf_conntrack
 config NF_NAT
 	tristate "Full NAT"
@@ -383,11 +190,6 @@ config NF_NAT
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_NAT_NEEDED
-	bool
-	depends on IP_NF_NAT
-	default y
-
 config NF_NAT_NEEDED
 	bool
 	depends on NF_NAT
@@ -395,7 +197,7 @@ config NF_NAT_NEEDED
 
 config IP_NF_TARGET_MASQUERADE
 	tristate "MASQUERADE target support"
-	depends on (NF_NAT || IP_NF_NAT)
+	depends on NF_NAT
 	help
 	  Masquerading is a special case of NAT: all outgoing connections are
 	  changed to seem to come from a particular interface's address, and
@@ -407,7 +209,7 @@ config IP_NF_TARGET_MASQUERADE
 
 config IP_NF_TARGET_REDIRECT
 	tristate "REDIRECT target support"
-	depends on (NF_NAT || IP_NF_NAT)
+	depends on NF_NAT
 	help
 	  REDIRECT is a special case of NAT: all incoming connections are
 	  mapped onto the incoming interface's address, causing the packets to
@@ -418,7 +220,7 @@ config IP_NF_TARGET_REDIRECT
 
 config IP_NF_TARGET_NETMAP
 	tristate "NETMAP target support"
-	depends on (NF_NAT || IP_NF_NAT)
+	depends on NF_NAT
 	help
 	  NETMAP is an implementation of static 1:1 NAT mapping of network
 	  addresses. It maps the network address part, while keeping the host
@@ -429,28 +231,13 @@ config IP_NF_TARGET_NETMAP
 
 config IP_NF_TARGET_SAME
 	tristate "SAME target support"
-	depends on (NF_NAT || IP_NF_NAT)
+	depends on NF_NAT
 	help
 	  This option adds a `SAME' target, which works like the standard SNAT
 	  target, but attempts to give clients the same IP for all connections.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config IP_NF_NAT_SNMP_BASIC
-	tristate "Basic SNMP-ALG support (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && IP_NF_NAT
-	---help---
-
-	  This module implements an Application Layer Gateway (ALG) for
-	  SNMP payloads.  In conjunction with NAT, it allows a network
-	  management system to access multiple private networks with
-	  conflicting addresses.  It works by modifying IP addresses
-	  inside SNMP payloads to match IP-layer NAT mapping.
-
-	  This is the "basic" form of SNMP-ALG, as described in RFC 2962
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 config NF_NAT_SNMP_BASIC
 	tristate "Basic SNMP-ALG support (EXPERIMENTAL)"
 	depends on EXPERIMENTAL && NF_NAT
@@ -477,78 +264,37 @@ config NF_NAT_PROTO_GRE
 	tristate
 	depends on NF_NAT && NF_CT_PROTO_GRE
 
-config IP_NF_NAT_FTP
-	tristate
-	depends on IP_NF_IPTABLES && IP_NF_CONNTRACK && IP_NF_NAT
-	default IP_NF_NAT && IP_NF_FTP
-
 config NF_NAT_FTP
 	tristate
 	depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
 	default NF_NAT && NF_CONNTRACK_FTP
 
-config IP_NF_NAT_IRC
-	tristate
-	depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
-	default IP_NF_NAT if IP_NF_IRC=y
-	default m if IP_NF_IRC=m
-
 config NF_NAT_IRC
 	tristate
 	depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
 	default NF_NAT && NF_CONNTRACK_IRC
 
-config IP_NF_NAT_TFTP
-	tristate
-	depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
-	default IP_NF_NAT if IP_NF_TFTP=y
-	default m if IP_NF_TFTP=m
-
 config NF_NAT_TFTP
 	tristate
 	depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
 	default NF_NAT && NF_CONNTRACK_TFTP
 
-config IP_NF_NAT_AMANDA
-	tristate
-	depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
-	default IP_NF_NAT if IP_NF_AMANDA=y
-	default m if IP_NF_AMANDA=m
-
 config NF_NAT_AMANDA
 	tristate
 	depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
 	default NF_NAT && NF_CONNTRACK_AMANDA
 
-config IP_NF_NAT_PPTP
-	tristate
-	depends on IP_NF_NAT!=n && IP_NF_PPTP!=n
-	default IP_NF_NAT if IP_NF_PPTP=y
-	default m if IP_NF_PPTP=m
-
 config NF_NAT_PPTP
 	tristate
 	depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
 	default NF_NAT && NF_CONNTRACK_PPTP
 	select NF_NAT_PROTO_GRE
 
-config IP_NF_NAT_H323
-	tristate
-	depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
-	default IP_NF_NAT if IP_NF_H323=y
-	default m if IP_NF_H323=m
-
 config NF_NAT_H323
 	tristate
 	depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
 	default NF_NAT && NF_CONNTRACK_H323
 
-config IP_NF_NAT_SIP
-	tristate
-	depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
-	default IP_NF_NAT if IP_NF_SIP=y
-	default m if IP_NF_SIP=m
-
 config NF_NAT_SIP
 	tristate
 	depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT
@@ -606,9 +352,8 @@ config IP_NF_TARGET_TTL
 config IP_NF_TARGET_CLUSTERIP
 	tristate "CLUSTERIP target support (EXPERIMENTAL)"
 	depends on IP_NF_MANGLE && EXPERIMENTAL
-	depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4
-	select IP_NF_CONNTRACK_MARK if IP_NF_CONNTRACK
-	select NF_CONNTRACK_MARK if NF_CONNTRACK_IPV4
+	depends on NF_CONNTRACK_IPV4
+	select NF_CONNTRACK_MARK
 	help
 	  The CLUSTERIP target allows you to build load-balancing clusters of
 	  network servers without having a dedicated load-balancing
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 6625ec68180..409d273f6f8 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -2,8 +2,6 @@
 # Makefile for the netfilter modules on top of IPv4.
 #
 
-# objects for the standalone - connection tracking / NAT
-ip_conntrack-objs	:= ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o
 # objects for l3 independent conntrack
 nf_conntrack_ipv4-objs  :=  nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
 ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y)
@@ -12,53 +10,14 @@ nf_conntrack_ipv4-objs	+= nf_conntrack_l3proto_ipv4_compat.o
 endif
 endif
 
-ip_nat-objs	:= ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
-nf_nat-objs	:= nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o
-ifneq ($(CONFIG_NF_NAT),)
+nf_nat-objs		:= nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o
 iptable_nat-objs	:= nf_nat_rule.o nf_nat_standalone.o
-else
-iptable_nat-objs	:= ip_nat_rule.o ip_nat_standalone.o
-endif
-
-ip_conntrack_pptp-objs	:= ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o
-ip_nat_pptp-objs	:= ip_nat_helper_pptp.o ip_nat_proto_gre.o
-
-ip_conntrack_h323-objs := ip_conntrack_helper_h323.o ../../netfilter/nf_conntrack_h323_asn1.o
-ip_nat_h323-objs := ip_nat_helper_h323.o
 
 # connection tracking
-obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
 obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
 
-obj-$(CONFIG_IP_NF_NAT) += ip_nat.o
 obj-$(CONFIG_NF_NAT) += nf_nat.o
 
-# conntrack netlink interface
-obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o
-
-
-# SCTP protocol connection tracking
-obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o
-
-# connection tracking helpers
-obj-$(CONFIG_IP_NF_H323) += ip_conntrack_h323.o
-obj-$(CONFIG_IP_NF_PPTP) += ip_conntrack_pptp.o
-obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o
-obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o
-obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o
-obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o
-obj-$(CONFIG_IP_NF_SIP) += ip_conntrack_sip.o
-obj-$(CONFIG_IP_NF_NETBIOS_NS) += ip_conntrack_netbios_ns.o
-
-# NAT helpers (ip_conntrack)
-obj-$(CONFIG_IP_NF_NAT_H323) += ip_nat_h323.o
-obj-$(CONFIG_IP_NF_NAT_PPTP) += ip_nat_pptp.o
-obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o
-obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o
-obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o
-obj-$(CONFIG_IP_NF_NAT_IRC) += ip_nat_irc.o
-obj-$(CONFIG_IP_NF_NAT_SIP) += ip_nat_sip.o
-
 # NAT helpers (nf_conntrack)
 obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
 obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o
@@ -78,7 +37,6 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
 # the three instances of ip_tables
 obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
 obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
-obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
 obj-$(CONFIG_NF_NAT) += iptable_nat.o
 obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
 
@@ -100,7 +58,6 @@ obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
 obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
 obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
 obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o
-obj-$(CONFIG_IP_NF_NAT_SNMP_BASIC) += ip_nat_snmp_basic.o
 obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
 obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
 obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
deleted file mode 100644
index c40762c67d0..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ /dev/null
@@ -1,229 +0,0 @@
-/* Amanda extension for IP connection tracking, Version 0.2
- * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
- * based on HW's ip_conntrack_irc.c as well as other modules
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- *
- *	Module load syntax:
- * 	insmod ip_conntrack_amanda.o [master_timeout=n]
- *
- *	Where master_timeout is the timeout (in seconds) of the master
- *	connection (port 10080).  This defaults to 5 minutes but if
- *	your clients take longer than 5 minutes to do their work
- *	before getting back to the Amanda server, you can increase
- *	this value.
- *
- */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/textsearch.h>
-#include <linux/skbuff.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_amanda.h>
-
-static unsigned int master_timeout = 300;
-static char *ts_algo = "kmp";
-
-MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
-MODULE_DESCRIPTION("Amanda connection tracking module");
-MODULE_LICENSE("GPL");
-module_param(master_timeout, uint, 0600);
-MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
-module_param(ts_algo, charp, 0400);
-MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)");
-
-unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb,
-				   enum ip_conntrack_info ctinfo,
-				   unsigned int matchoff,
-				   unsigned int matchlen,
-				   struct ip_conntrack_expect *exp);
-EXPORT_SYMBOL_GPL(ip_nat_amanda_hook);
-
-enum amanda_strings {
-	SEARCH_CONNECT,
-	SEARCH_NEWLINE,
-	SEARCH_DATA,
-	SEARCH_MESG,
-	SEARCH_INDEX,
-};
-
-static struct {
-	char			*string;
-	size_t			len;
-	struct ts_config	*ts;
-} search[] = {
-	[SEARCH_CONNECT] = {
-		.string	= "CONNECT ",
-		.len	= 8,
-	},
-	[SEARCH_NEWLINE] = {
-		.string	= "\n",
-		.len	= 1,
-	},
-	[SEARCH_DATA] = {
-		.string	= "DATA ",
-		.len	= 5,
-	},
-	[SEARCH_MESG] = {
-		.string	= "MESG ",
-		.len	= 5,
-	},
-	[SEARCH_INDEX] = {
-		.string = "INDEX ",
-		.len	= 6,
-	},
-};
-
-static int help(struct sk_buff **pskb,
-		struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
-{
-	struct ts_state ts;
-	struct ip_conntrack_expect *exp;
-	unsigned int dataoff, start, stop, off, i;
-	char pbuf[sizeof("65535")], *tmp;
-	u_int16_t port, len;
-	int ret = NF_ACCEPT;
-	typeof(ip_nat_amanda_hook) ip_nat_amanda;
-
-	/* Only look at packets from the Amanda server */
-	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
-		return NF_ACCEPT;
-
-	/* increase the UDP timeout of the master connection as replies from
-	 * Amanda clients to the server can be quite delayed */
-	ip_ct_refresh(ct, *pskb, master_timeout * HZ);
-
-	/* No data? */
-	dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
-	if (dataoff >= (*pskb)->len) {
-		if (net_ratelimit())
-			printk("amanda_help: skblen = %u\n", (*pskb)->len);
-		return NF_ACCEPT;
-	}
-
-	memset(&ts, 0, sizeof(ts));
-	start = skb_find_text(*pskb, dataoff, (*pskb)->len,
-			      search[SEARCH_CONNECT].ts, &ts);
-	if (start == UINT_MAX)
-		goto out;
-	start += dataoff + search[SEARCH_CONNECT].len;
-
-	memset(&ts, 0, sizeof(ts));
-	stop = skb_find_text(*pskb, start, (*pskb)->len,
-			     search[SEARCH_NEWLINE].ts, &ts);
-	if (stop == UINT_MAX)
-		goto out;
-	stop += start;
-
-	for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) {
-		memset(&ts, 0, sizeof(ts));
-		off = skb_find_text(*pskb, start, stop, search[i].ts, &ts);
-		if (off == UINT_MAX)
-			continue;
-		off += start + search[i].len;
-
-		len = min_t(unsigned int, sizeof(pbuf) - 1, stop - off);
-		if (skb_copy_bits(*pskb, off, pbuf, len))
-			break;
-		pbuf[len] = '\0';
-
-		port = simple_strtoul(pbuf, &tmp, 10);
-		len = tmp - pbuf;
-		if (port == 0 || len > 5)
-			break;
-
-		exp = ip_conntrack_expect_alloc(ct);
-		if (exp == NULL) {
-			ret = NF_DROP;
-			goto out;
-		}
-
-		exp->expectfn = NULL;
-		exp->flags = 0;
-
-		exp->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
-		exp->tuple.src.u.tcp.port = 0;
-		exp->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
-		exp->tuple.dst.protonum = IPPROTO_TCP;
-		exp->tuple.dst.u.tcp.port = htons(port);
-
-		exp->mask.src.ip = htonl(0xFFFFFFFF);
-		exp->mask.src.u.tcp.port = 0;
-		exp->mask.dst.ip = htonl(0xFFFFFFFF);
-		exp->mask.dst.protonum = 0xFF;
-		exp->mask.dst.u.tcp.port = htons(0xFFFF);
-
-		/* RCU read locked by nf_hook_slow */
-		ip_nat_amanda = rcu_dereference(ip_nat_amanda_hook);
-		if (ip_nat_amanda)
-			ret = ip_nat_amanda(pskb, ctinfo, off - dataoff,
-					    len, exp);
-		else if (ip_conntrack_expect_related(exp) != 0)
-			ret = NF_DROP;
-		ip_conntrack_expect_put(exp);
-	}
-
-out:
-	return ret;
-}
-
-static struct ip_conntrack_helper amanda_helper = {
-	.max_expected = 3,
-	.timeout = 180,
-	.me = THIS_MODULE,
-	.help = help,
-	.name = "amanda",
-
-	.tuple = { .src = { .u = { .udp = {.port = __constant_htons(10080) } } },
-		   .dst = { .protonum = IPPROTO_UDP },
-	},
-	.mask = { .src = { .u = { 0xFFFF } },
-		 .dst = { .protonum = 0xFF },
-	},
-};
-
-static void __exit ip_conntrack_amanda_fini(void)
-{
-	int i;
-
-	ip_conntrack_helper_unregister(&amanda_helper);
-	for (i = 0; i < ARRAY_SIZE(search); i++)
-		textsearch_destroy(search[i].ts);
-}
-
-static int __init ip_conntrack_amanda_init(void)
-{
-	int ret, i;
-
-	ret = -ENOMEM;
-	for (i = 0; i < ARRAY_SIZE(search); i++) {
-		search[i].ts = textsearch_prepare(ts_algo, search[i].string,
-						  search[i].len,
-						  GFP_KERNEL, TS_AUTOLOAD);
-		if (search[i].ts == NULL)
-			goto err;
-	}
-	ret = ip_conntrack_helper_register(&amanda_helper);
-	if (ret < 0)
-		goto err;
-	return 0;
-
-err:
-	for (; i >= 0; i--) {
-		if (search[i].ts)
-			textsearch_destroy(search[i].ts);
-	}
-	return ret;
-}
-
-module_init(ip_conntrack_amanda_init);
-module_exit(ip_conntrack_amanda_fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
deleted file mode 100644
index 986c0c81294..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ /dev/null
@@ -1,1549 +0,0 @@
-/* Connection state tracking for netfilter.  This is separated from,
-   but required by, the NAT layer; it can also be used by an iptables
-   extension. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
- * 	- new API and handling of conntrack/nat helpers
- * 	- now capable of multiple expectations for one master
- * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
- * 	- add usage/reference counts to ip_conntrack_expect
- *	- export ip_conntrack[_expect]_{find_get,put} functions
- * */
-
-#include <linux/types.h>
-#include <linux/icmp.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <linux/vmalloc.h>
-#include <net/checksum.h>
-#include <net/ip.h>
-#include <linux/stddef.h>
-#include <linux/sysctl.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/jhash.h>
-#include <linux/err.h>
-#include <linux/percpu.h>
-#include <linux/moduleparam.h>
-#include <linux/notifier.h>
-
-/* ip_conntrack_lock protects the main hash table, protocol/helper/expected
-   registrations, conntrack timers*/
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-
-#define IP_CONNTRACK_VERSION	"2.4"
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-DEFINE_RWLOCK(ip_conntrack_lock);
-
-/* ip_conntrack_standalone needs this */
-atomic_t ip_conntrack_count = ATOMIC_INIT(0);
-
-void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
-LIST_HEAD(ip_conntrack_expect_list);
-struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO] __read_mostly;
-static LIST_HEAD(helpers);
-unsigned int ip_conntrack_htable_size __read_mostly = 0;
-int ip_conntrack_max __read_mostly;
-struct list_head *ip_conntrack_hash __read_mostly;
-static struct kmem_cache *ip_conntrack_cachep __read_mostly;
-static struct kmem_cache *ip_conntrack_expect_cachep __read_mostly;
-struct ip_conntrack ip_conntrack_untracked;
-unsigned int ip_ct_log_invalid __read_mostly;
-static LIST_HEAD(unconfirmed);
-static int ip_conntrack_vmalloc __read_mostly;
-
-static unsigned int ip_conntrack_next_id;
-static unsigned int ip_conntrack_expect_next_id;
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-ATOMIC_NOTIFIER_HEAD(ip_conntrack_chain);
-ATOMIC_NOTIFIER_HEAD(ip_conntrack_expect_chain);
-
-DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
-
-/* deliver cached events and clear cache entry - must be called with locally
- * disabled softirqs */
-static inline void
-__ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache)
-{
-	DEBUGP("ecache: delivering events for %p\n", ecache->ct);
-	if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events)
-		atomic_notifier_call_chain(&ip_conntrack_chain, ecache->events,
-				    ecache->ct);
-	ecache->events = 0;
-	ip_conntrack_put(ecache->ct);
-	ecache->ct = NULL;
-}
-
-/* Deliver all cached events for a particular conntrack. This is called
- * by code prior to async packet handling or freeing the skb */
-void ip_ct_deliver_cached_events(const struct ip_conntrack *ct)
-{
-	struct ip_conntrack_ecache *ecache;
-
-	local_bh_disable();
-	ecache = &__get_cpu_var(ip_conntrack_ecache);
-	if (ecache->ct == ct)
-		__ip_ct_deliver_cached_events(ecache);
-	local_bh_enable();
-}
-
-void __ip_ct_event_cache_init(struct ip_conntrack *ct)
-{
-	struct ip_conntrack_ecache *ecache;
-
-	/* take care of delivering potentially old events */
-	ecache = &__get_cpu_var(ip_conntrack_ecache);
-	BUG_ON(ecache->ct == ct);
-	if (ecache->ct)
-		__ip_ct_deliver_cached_events(ecache);
-	/* initialize for this conntrack/packet */
-	ecache->ct = ct;
-	nf_conntrack_get(&ct->ct_general);
-}
-
-/* flush the event cache - touches other CPU's data and must not be called while
- * packets are still passing through the code */
-static void ip_ct_event_cache_flush(void)
-{
-	struct ip_conntrack_ecache *ecache;
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		ecache = &per_cpu(ip_conntrack_ecache, cpu);
-		if (ecache->ct)
-			ip_conntrack_put(ecache->ct);
-	}
-}
-#else
-static inline void ip_ct_event_cache_flush(void) {}
-#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
-
-DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
-
-static int ip_conntrack_hash_rnd_initted;
-static unsigned int ip_conntrack_hash_rnd;
-
-static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple,
-			    unsigned int size, unsigned int rnd)
-{
-	return (jhash_3words((__force u32)tuple->src.ip,
-			     ((__force u32)tuple->dst.ip ^ tuple->dst.protonum),
-			     (tuple->src.u.all | (tuple->dst.u.all << 16)),
-			     rnd) % size);
-}
-
-static u_int32_t
-hash_conntrack(const struct ip_conntrack_tuple *tuple)
-{
-	return __hash_conntrack(tuple, ip_conntrack_htable_size,
-				ip_conntrack_hash_rnd);
-}
-
-int
-ip_ct_get_tuple(const struct iphdr *iph,
-		const struct sk_buff *skb,
-		unsigned int dataoff,
-		struct ip_conntrack_tuple *tuple,
-		const struct ip_conntrack_protocol *protocol)
-{
-	/* Never happen */
-	if (iph->frag_off & htons(IP_OFFSET)) {
-		printk("ip_conntrack_core: Frag of proto %u.\n",
-		       iph->protocol);
-		return 0;
-	}
-
-	tuple->src.ip = iph->saddr;
-	tuple->dst.ip = iph->daddr;
-	tuple->dst.protonum = iph->protocol;
-	tuple->dst.dir = IP_CT_DIR_ORIGINAL;
-
-	return protocol->pkt_to_tuple(skb, dataoff, tuple);
-}
-
-int
-ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
-		   const struct ip_conntrack_tuple *orig,
-		   const struct ip_conntrack_protocol *protocol)
-{
-	inverse->src.ip = orig->dst.ip;
-	inverse->dst.ip = orig->src.ip;
-	inverse->dst.protonum = orig->dst.protonum;
-	inverse->dst.dir = !orig->dst.dir;
-
-	return protocol->invert_tuple(inverse, orig);
-}
-
-
-/* ip_conntrack_expect helper functions */
-void ip_ct_unlink_expect(struct ip_conntrack_expect *exp)
-{
-	IP_NF_ASSERT(!timer_pending(&exp->timeout));
-	list_del(&exp->list);
-	CONNTRACK_STAT_INC(expect_delete);
-	exp->master->expecting--;
-	ip_conntrack_expect_put(exp);
-}
-
-static void expectation_timed_out(unsigned long ul_expect)
-{
-	struct ip_conntrack_expect *exp = (void *)ul_expect;
-
-	write_lock_bh(&ip_conntrack_lock);
-	ip_ct_unlink_expect(exp);
-	write_unlock_bh(&ip_conntrack_lock);
-	ip_conntrack_expect_put(exp);
-}
-
-struct ip_conntrack_expect *
-__ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
-{
-	struct ip_conntrack_expect *i;
-
-	list_for_each_entry(i, &ip_conntrack_expect_list, list) {
-		if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
-			return i;
-	}
-	return NULL;
-}
-
-/* Just find a expectation corresponding to a tuple. */
-struct ip_conntrack_expect *
-ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
-{
-	struct ip_conntrack_expect *i;
-
-	read_lock_bh(&ip_conntrack_lock);
-	i = __ip_conntrack_expect_find(tuple);
-	if (i)
-		atomic_inc(&i->use);
-	read_unlock_bh(&ip_conntrack_lock);
-
-	return i;
-}
-
-/* If an expectation for this connection is found, it gets delete from
- * global list then returned. */
-static struct ip_conntrack_expect *
-find_expectation(const struct ip_conntrack_tuple *tuple)
-{
-	struct ip_conntrack_expect *i;
-
-	list_for_each_entry(i, &ip_conntrack_expect_list, list) {
-		/* If master is not in hash table yet (ie. packet hasn't left
-		   this machine yet), how can other end know about expected?
-		   Hence these are not the droids you are looking for (if
-		   master ct never got confirmed, we'd hold a reference to it
-		   and weird things would happen to future packets). */
-		if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
-		    && is_confirmed(i->master)) {
-			if (i->flags & IP_CT_EXPECT_PERMANENT) {
-				atomic_inc(&i->use);
-				return i;
-			} else if (del_timer(&i->timeout)) {
-				ip_ct_unlink_expect(i);
-				return i;
-			}
-		}
-	}
-	return NULL;
-}
-
-/* delete all expectations for this conntrack */
-void ip_ct_remove_expectations(struct ip_conntrack *ct)
-{
-	struct ip_conntrack_expect *i, *tmp;
-
-	/* Optimization: most connection never expect any others. */
-	if (ct->expecting == 0)
-		return;
-
-	list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
-		if (i->master == ct && del_timer(&i->timeout)) {
-			ip_ct_unlink_expect(i);
-			ip_conntrack_expect_put(i);
-		}
-	}
-}
-
-static void
-clean_from_lists(struct ip_conntrack *ct)
-{
-	DEBUGP("clean_from_lists(%p)\n", ct);
-	list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
-	list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list);
-
-	/* Destroy all pending expectations */
-	ip_ct_remove_expectations(ct);
-}
-
-static void
-destroy_conntrack(struct nf_conntrack *nfct)
-{
-	struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
-	struct ip_conntrack_protocol *proto;
-	struct ip_conntrack_helper *helper;
-	typeof(ip_conntrack_destroyed) destroyed;
-
-	DEBUGP("destroy_conntrack(%p)\n", ct);
-	IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
-	IP_NF_ASSERT(!timer_pending(&ct->timeout));
-
-	ip_conntrack_event(IPCT_DESTROY, ct);
-	set_bit(IPS_DYING_BIT, &ct->status);
-
-	helper = ct->helper;
-	if (helper && helper->destroy)
-		helper->destroy(ct);
-
-	/* To make sure we don't get any weird locking issues here:
-	 * destroy_conntrack() MUST NOT be called with a write lock
-	 * to ip_conntrack_lock!!! -HW */
-	rcu_read_lock();
-	proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
-	if (proto && proto->destroy)
-		proto->destroy(ct);
-
-	destroyed = rcu_dereference(ip_conntrack_destroyed);
-	if (destroyed)
-		destroyed(ct);
-
-	rcu_read_unlock();
-
-	write_lock_bh(&ip_conntrack_lock);
-	/* Expectations will have been removed in clean_from_lists,
-	 * except TFTP can create an expectation on the first packet,
-	 * before connection is in the list, so we need to clean here,
-	 * too. */
-	ip_ct_remove_expectations(ct);
-
-	/* We overload first tuple to link into unconfirmed list. */
-	if (!is_confirmed(ct)) {
-		BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
-		list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
-	}
-
-	CONNTRACK_STAT_INC(delete);
-	write_unlock_bh(&ip_conntrack_lock);
-
-	if (ct->master)
-		ip_conntrack_put(ct->master);
-
-	DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
-	ip_conntrack_free(ct);
-}
-
-static void death_by_timeout(unsigned long ul_conntrack)
-{
-	struct ip_conntrack *ct = (void *)ul_conntrack;
-
-	write_lock_bh(&ip_conntrack_lock);
-	/* Inside lock so preempt is disabled on module removal path.
-	 * Otherwise we can get spurious warnings. */
-	CONNTRACK_STAT_INC(delete_list);
-	clean_from_lists(ct);
-	write_unlock_bh(&ip_conntrack_lock);
-	ip_conntrack_put(ct);
-}
-
-struct ip_conntrack_tuple_hash *
-__ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
-		    const struct ip_conntrack *ignored_conntrack)
-{
-	struct ip_conntrack_tuple_hash *h;
-	unsigned int hash = hash_conntrack(tuple);
-
-	list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
-		if (tuplehash_to_ctrack(h) != ignored_conntrack &&
-		    ip_ct_tuple_equal(tuple, &h->tuple)) {
-			CONNTRACK_STAT_INC(found);
-			return h;
-		}
-		CONNTRACK_STAT_INC(searched);
-	}
-
-	return NULL;
-}
-
-/* Find a connection corresponding to a tuple. */
-struct ip_conntrack_tuple_hash *
-ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
-		      const struct ip_conntrack *ignored_conntrack)
-{
-	struct ip_conntrack_tuple_hash *h;
-
-	read_lock_bh(&ip_conntrack_lock);
-	h = __ip_conntrack_find(tuple, ignored_conntrack);
-	if (h)
-		atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
-	read_unlock_bh(&ip_conntrack_lock);
-
-	return h;
-}
-
-static void __ip_conntrack_hash_insert(struct ip_conntrack *ct,
-					unsigned int hash,
-					unsigned int repl_hash)
-{
-	ct->id = ++ip_conntrack_next_id;
-	list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list,
-		 &ip_conntrack_hash[hash]);
-	list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list,
-		 &ip_conntrack_hash[repl_hash]);
-}
-
-void ip_conntrack_hash_insert(struct ip_conntrack *ct)
-{
-	unsigned int hash, repl_hash;
-
-	hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-
-	write_lock_bh(&ip_conntrack_lock);
-	__ip_conntrack_hash_insert(ct, hash, repl_hash);
-	write_unlock_bh(&ip_conntrack_lock);
-}
-
-/* Confirm a connection given skb; places it in hash table */
-int
-__ip_conntrack_confirm(struct sk_buff **pskb)
-{
-	unsigned int hash, repl_hash;
-	struct ip_conntrack_tuple_hash *h;
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
-
-	ct = ip_conntrack_get(*pskb, &ctinfo);
-
-	/* ipt_REJECT uses ip_conntrack_attach to attach related
-	   ICMP/TCP RST packets in other direction.  Actual packet
-	   which created connection will be IP_CT_NEW or for an
-	   expected connection, IP_CT_RELATED. */
-	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
-		return NF_ACCEPT;
-
-	hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-
-	/* We're not in hash table, and we refuse to set up related
-	   connections for unconfirmed conns.  But packet copies and
-	   REJECT will give spurious warnings here. */
-	/* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
-
-	/* No external references means noone else could have
-	   confirmed us. */
-	IP_NF_ASSERT(!is_confirmed(ct));
-	DEBUGP("Confirming conntrack %p\n", ct);
-
-	write_lock_bh(&ip_conntrack_lock);
-
-	/* See if there's one in the list already, including reverse:
-	   NAT could have grabbed it without realizing, since we're
-	   not in the hash.  If there is, we lost race. */
-	list_for_each_entry(h, &ip_conntrack_hash[hash], list)
-		if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
-				      &h->tuple))
-			goto out;
-	list_for_each_entry(h, &ip_conntrack_hash[repl_hash], list)
-		if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
-				      &h->tuple))
-			goto out;
-
-	/* Remove from unconfirmed list */
-	list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
-
-	__ip_conntrack_hash_insert(ct, hash, repl_hash);
-	/* Timer relative to confirmation time, not original
-	   setting time, otherwise we'd get timer wrap in
-	   weird delay cases. */
-	ct->timeout.expires += jiffies;
-	add_timer(&ct->timeout);
-	atomic_inc(&ct->ct_general.use);
-	set_bit(IPS_CONFIRMED_BIT, &ct->status);
-	CONNTRACK_STAT_INC(insert);
-	write_unlock_bh(&ip_conntrack_lock);
-	if (ct->helper)
-		ip_conntrack_event_cache(IPCT_HELPER, *pskb);
-#ifdef CONFIG_IP_NF_NAT_NEEDED
-	if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
-	    test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
-		ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
-#endif
-	ip_conntrack_event_cache(master_ct(ct) ?
-				 IPCT_RELATED : IPCT_NEW, *pskb);
-
-	return NF_ACCEPT;
-
-out:
-	CONNTRACK_STAT_INC(insert_failed);
-	write_unlock_bh(&ip_conntrack_lock);
-	return NF_DROP;
-}
-
-/* Returns true if a connection correspondings to the tuple (required
-   for NAT). */
-int
-ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
-			 const struct ip_conntrack *ignored_conntrack)
-{
-	struct ip_conntrack_tuple_hash *h;
-
-	read_lock_bh(&ip_conntrack_lock);
-	h = __ip_conntrack_find(tuple, ignored_conntrack);
-	read_unlock_bh(&ip_conntrack_lock);
-
-	return h != NULL;
-}
-
-/* There's a small race here where we may free a just-assured
-   connection.  Too bad: we're in trouble anyway. */
-static int early_drop(struct list_head *chain)
-{
-	/* Traverse backwards: gives us oldest, which is roughly LRU */
-	struct ip_conntrack_tuple_hash *h;
-	struct ip_conntrack *ct = NULL, *tmp;
-	int dropped = 0;
-
-	read_lock_bh(&ip_conntrack_lock);
-	list_for_each_entry_reverse(h, chain, list) {
-		tmp = tuplehash_to_ctrack(h);
-		if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) {
-			ct = tmp;
-			atomic_inc(&ct->ct_general.use);
-			break;
-		}
-	}
-	read_unlock_bh(&ip_conntrack_lock);
-
-	if (!ct)
-		return dropped;
-
-	if (del_timer(&ct->timeout)) {
-		death_by_timeout((unsigned long)ct);
-		dropped = 1;
-		CONNTRACK_STAT_INC_ATOMIC(early_drop);
-	}
-	ip_conntrack_put(ct);
-	return dropped;
-}
-
-static struct ip_conntrack_helper *
-__ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple)
-{
-	struct ip_conntrack_helper *h;
-
-	list_for_each_entry(h, &helpers, list) {
-		if (ip_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask))
-			return h;
-	}
-	return NULL;
-}
-
-struct ip_conntrack_helper *
-ip_conntrack_helper_find_get( const struct ip_conntrack_tuple *tuple)
-{
-	struct ip_conntrack_helper *helper;
-
-	/* need ip_conntrack_lock to assure that helper exists until
-	 * try_module_get() is called */
-	read_lock_bh(&ip_conntrack_lock);
-
-	helper = __ip_conntrack_helper_find(tuple);
-	if (helper) {
-		/* need to increase module usage count to assure helper will
-		 * not go away while the caller is e.g. busy putting a
-		 * conntrack in the hash that uses the helper */
-		if (!try_module_get(helper->me))
-			helper = NULL;
-	}
-
-	read_unlock_bh(&ip_conntrack_lock);
-
-	return helper;
-}
-
-void ip_conntrack_helper_put(struct ip_conntrack_helper *helper)
-{
-	module_put(helper->me);
-}
-
-struct ip_conntrack_protocol *
-__ip_conntrack_proto_find(u_int8_t protocol)
-{
-	return ip_ct_protos[protocol];
-}
-
-/* this is guaranteed to always return a valid protocol helper, since
- * it falls back to generic_protocol */
-struct ip_conntrack_protocol *
-ip_conntrack_proto_find_get(u_int8_t protocol)
-{
-	struct ip_conntrack_protocol *p;
-
-	rcu_read_lock();
-	p = __ip_conntrack_proto_find(protocol);
-	if (p) {
-		if (!try_module_get(p->me))
-			p = &ip_conntrack_generic_protocol;
-	}
-	rcu_read_unlock();
-
-	return p;
-}
-
-void ip_conntrack_proto_put(struct ip_conntrack_protocol *p)
-{
-	module_put(p->me);
-}
-
-struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
-					struct ip_conntrack_tuple *repl)
-{
-	struct ip_conntrack *conntrack;
-
-	if (!ip_conntrack_hash_rnd_initted) {
-		get_random_bytes(&ip_conntrack_hash_rnd, 4);
-		ip_conntrack_hash_rnd_initted = 1;
-	}
-
-	/* We don't want any race condition at early drop stage */
-	atomic_inc(&ip_conntrack_count);
-
-	if (ip_conntrack_max
-	    && atomic_read(&ip_conntrack_count) > ip_conntrack_max) {
-		unsigned int hash = hash_conntrack(orig);
-		/* Try dropping from this hash chain. */
-		if (!early_drop(&ip_conntrack_hash[hash])) {
-			atomic_dec(&ip_conntrack_count);
-			if (net_ratelimit())
-				printk(KERN_WARNING
-				       "ip_conntrack: table full, dropping"
-				       " packet.\n");
-			return ERR_PTR(-ENOMEM);
-		}
-	}
-
-	conntrack = kmem_cache_zalloc(ip_conntrack_cachep, GFP_ATOMIC);
-	if (!conntrack) {
-		DEBUGP("Can't allocate conntrack.\n");
-		atomic_dec(&ip_conntrack_count);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	atomic_set(&conntrack->ct_general.use, 1);
-	conntrack->ct_general.destroy = destroy_conntrack;
-	conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
-	conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
-	/* Don't set timer yet: wait for confirmation */
-	init_timer(&conntrack->timeout);
-	conntrack->timeout.data = (unsigned long)conntrack;
-	conntrack->timeout.function = death_by_timeout;
-
-	return conntrack;
-}
-
-void
-ip_conntrack_free(struct ip_conntrack *conntrack)
-{
-	atomic_dec(&ip_conntrack_count);
-	kmem_cache_free(ip_conntrack_cachep, conntrack);
-}
-
-/* Allocate a new conntrack: we return -ENOMEM if classification
- * failed due to stress.   Otherwise it really is unclassifiable */
-static struct ip_conntrack_tuple_hash *
-init_conntrack(struct ip_conntrack_tuple *tuple,
-	       struct ip_conntrack_protocol *protocol,
-	       struct sk_buff *skb)
-{
-	struct ip_conntrack *conntrack;
-	struct ip_conntrack_tuple repl_tuple;
-	struct ip_conntrack_expect *exp;
-
-	if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
-		DEBUGP("Can't invert tuple.\n");
-		return NULL;
-	}
-
-	conntrack = ip_conntrack_alloc(tuple, &repl_tuple);
-	if (conntrack == NULL || IS_ERR(conntrack))
-		return (struct ip_conntrack_tuple_hash *)conntrack;
-
-	if (!protocol->new(conntrack, skb)) {
-		ip_conntrack_free(conntrack);
-		return NULL;
-	}
-
-	write_lock_bh(&ip_conntrack_lock);
-	exp = find_expectation(tuple);
-
-	if (exp) {
-		DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
-			conntrack, exp);
-		/* Welcome, Mr. Bond.  We've been expecting you... */
-		__set_bit(IPS_EXPECTED_BIT, &conntrack->status);
-		conntrack->master = exp->master;
-#ifdef CONFIG_IP_NF_CONNTRACK_MARK
-		conntrack->mark = exp->master->mark;
-#endif
-#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
-    defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
-		/* this is ugly, but there is no other place where to put it */
-		conntrack->nat.masq_index = exp->master->nat.masq_index;
-#endif
-#ifdef CONFIG_IP_NF_CONNTRACK_SECMARK
-		conntrack->secmark = exp->master->secmark;
-#endif
-		nf_conntrack_get(&conntrack->master->ct_general);
-		CONNTRACK_STAT_INC(expect_new);
-	} else {
-		conntrack->helper = __ip_conntrack_helper_find(&repl_tuple);
-
-		CONNTRACK_STAT_INC(new);
-	}
-
-	/* Overload tuple linked list to put us in unconfirmed list. */
-	list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
-
-	write_unlock_bh(&ip_conntrack_lock);
-
-	if (exp) {
-		if (exp->expectfn)
-			exp->expectfn(conntrack, exp);
-		ip_conntrack_expect_put(exp);
-	}
-
-	return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
-}
-
-/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
-static inline struct ip_conntrack *
-resolve_normal_ct(struct sk_buff *skb,
-		  struct ip_conntrack_protocol *proto,
-		  int *set_reply,
-		  unsigned int hooknum,
-		  enum ip_conntrack_info *ctinfo)
-{
-	struct ip_conntrack_tuple tuple;
-	struct ip_conntrack_tuple_hash *h;
-	struct ip_conntrack *ct;
-
-	IP_NF_ASSERT((ip_hdr(skb)->frag_off & htons(IP_OFFSET)) == 0);
-
-	if (!ip_ct_get_tuple(ip_hdr(skb), skb, ip_hdrlen(skb), &tuple,proto))
-		return NULL;
-
-	/* look for tuple match */
-	h = ip_conntrack_find_get(&tuple, NULL);
-	if (!h) {
-		h = init_conntrack(&tuple, proto, skb);
-		if (!h)
-			return NULL;
-		if (IS_ERR(h))
-			return (void *)h;
-	}
-	ct = tuplehash_to_ctrack(h);
-
-	/* It exists; we have (non-exclusive) reference. */
-	if (DIRECTION(h) == IP_CT_DIR_REPLY) {
-		*ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
-		/* Please set reply bit if this packet OK */
-		*set_reply = 1;
-	} else {
-		/* Once we've had two way comms, always ESTABLISHED. */
-		if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
-			DEBUGP("ip_conntrack_in: normal packet for %p\n",
-			       ct);
-			*ctinfo = IP_CT_ESTABLISHED;
-		} else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
-			DEBUGP("ip_conntrack_in: related packet for %p\n",
-			       ct);
-			*ctinfo = IP_CT_RELATED;
-		} else {
-			DEBUGP("ip_conntrack_in: new packet for %p\n",
-			       ct);
-			*ctinfo = IP_CT_NEW;
-		}
-		*set_reply = 0;
-	}
-	skb->nfct = &ct->ct_general;
-	skb->nfctinfo = *ctinfo;
-	return ct;
-}
-
-/* Netfilter hook itself. */
-unsigned int ip_conntrack_in(unsigned int hooknum,
-			     struct sk_buff **pskb,
-			     const struct net_device *in,
-			     const struct net_device *out,
-			     int (*okfn)(struct sk_buff *))
-{
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
-	struct ip_conntrack_protocol *proto;
-	int set_reply = 0;
-	int ret;
-
-	/* Previously seen (loopback or untracked)?  Ignore. */
-	if ((*pskb)->nfct) {
-		CONNTRACK_STAT_INC_ATOMIC(ignore);
-		return NF_ACCEPT;
-	}
-
-	/* Never happen */
-	if (ip_hdr(*pskb)->frag_off & htons(IP_OFFSET)) {
-		if (net_ratelimit()) {
-		printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
-		       ip_hdr(*pskb)->protocol, hooknum);
-		}
-		return NF_DROP;
-	}
-
-/* Doesn't cover locally-generated broadcast, so not worth it. */
-#if 0
-	/* Ignore broadcast: no `connection'. */
-	if ((*pskb)->pkt_type == PACKET_BROADCAST) {
-		printk("Broadcast packet!\n");
-		return NF_ACCEPT;
-	} else if ((ip_hdr(*pskb)->daddr & htonl(0x000000FF))
-		   == htonl(0x000000FF)) {
-		printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
-		       NIPQUAD(ip_hdr(*pskb)->saddr),
-		       NIPQUAD(ip_hdr(*pskb)->daddr),
-		       (*pskb)->sk, (*pskb)->pkt_type);
-	}
-#endif
-
-	/* rcu_read_lock()ed by nf_hook_slow */
-	proto = __ip_conntrack_proto_find(ip_hdr(*pskb)->protocol);
-
-	/* It may be an special packet, error, unclean...
-	 * inverse of the return code tells to the netfilter
-	 * core what to do with the packet. */
-	if (proto->error != NULL
-	    && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) {
-		CONNTRACK_STAT_INC_ATOMIC(error);
-		CONNTRACK_STAT_INC_ATOMIC(invalid);
-		return -ret;
-	}
-
-	if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) {
-		/* Not valid part of a connection */
-		CONNTRACK_STAT_INC_ATOMIC(invalid);
-		return NF_ACCEPT;
-	}
-
-	if (IS_ERR(ct)) {
-		/* Too stressed to deal. */
-		CONNTRACK_STAT_INC_ATOMIC(drop);
-		return NF_DROP;
-	}
-
-	IP_NF_ASSERT((*pskb)->nfct);
-
-	ret = proto->packet(ct, *pskb, ctinfo);
-	if (ret < 0) {
-		/* Invalid: inverse of the return code tells
-		 * the netfilter core what to do*/
-		nf_conntrack_put((*pskb)->nfct);
-		(*pskb)->nfct = NULL;
-		CONNTRACK_STAT_INC_ATOMIC(invalid);
-		return -ret;
-	}
-
-	if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
-		ip_conntrack_event_cache(IPCT_STATUS, *pskb);
-
-	return ret;
-}
-
-int invert_tuplepr(struct ip_conntrack_tuple *inverse,
-		   const struct ip_conntrack_tuple *orig)
-{
-	struct ip_conntrack_protocol *proto;
-	int ret;
-
-	rcu_read_lock();
-	proto = __ip_conntrack_proto_find(orig->dst.protonum);
-	ret = ip_ct_invert_tuple(inverse, orig, proto);
-	rcu_read_unlock();
-
-	return ret;
-}
-
-/* Would two expected things clash? */
-static inline int expect_clash(const struct ip_conntrack_expect *a,
-			       const struct ip_conntrack_expect *b)
-{
-	/* Part covered by intersection of masks must be unequal,
-	   otherwise they clash */
-	struct ip_conntrack_tuple intersect_mask
-		= { { a->mask.src.ip & b->mask.src.ip,
-		      { a->mask.src.u.all & b->mask.src.u.all } },
-		    { a->mask.dst.ip & b->mask.dst.ip,
-		      { a->mask.dst.u.all & b->mask.dst.u.all },
-		      a->mask.dst.protonum & b->mask.dst.protonum } };
-
-	return ip_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
-}
-
-static inline int expect_matches(const struct ip_conntrack_expect *a,
-				 const struct ip_conntrack_expect *b)
-{
-	return a->master == b->master
-		&& ip_ct_tuple_equal(&a->tuple, &b->tuple)
-		&& ip_ct_tuple_equal(&a->mask, &b->mask);
-}
-
-/* Generally a bad idea to call this: could have matched already. */
-void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp)
-{
-	struct ip_conntrack_expect *i;
-
-	write_lock_bh(&ip_conntrack_lock);
-	/* choose the the oldest expectation to evict */
-	list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
-		if (expect_matches(i, exp) && del_timer(&i->timeout)) {
-			ip_ct_unlink_expect(i);
-			write_unlock_bh(&ip_conntrack_lock);
-			ip_conntrack_expect_put(i);
-			return;
-		}
-	}
-	write_unlock_bh(&ip_conntrack_lock);
-}
-
-/* We don't increase the master conntrack refcount for non-fulfilled
- * conntracks. During the conntrack destruction, the expectations are
- * always killed before the conntrack itself */
-struct ip_conntrack_expect *ip_conntrack_expect_alloc(struct ip_conntrack *me)
-{
-	struct ip_conntrack_expect *new;
-
-	new = kmem_cache_alloc(ip_conntrack_expect_cachep, GFP_ATOMIC);
-	if (!new) {
-		DEBUGP("expect_related: OOM allocating expect\n");
-		return NULL;
-	}
-	new->master = me;
-	atomic_set(&new->use, 1);
-	return new;
-}
-
-void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
-{
-	if (atomic_dec_and_test(&exp->use))
-		kmem_cache_free(ip_conntrack_expect_cachep, exp);
-}
-
-static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
-{
-	atomic_inc(&exp->use);
-	exp->master->expecting++;
-	list_add(&exp->list, &ip_conntrack_expect_list);
-
-	init_timer(&exp->timeout);
-	exp->timeout.data = (unsigned long)exp;
-	exp->timeout.function = expectation_timed_out;
-	exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ;
-	add_timer(&exp->timeout);
-
-	exp->id = ++ip_conntrack_expect_next_id;
-	atomic_inc(&exp->use);
-	CONNTRACK_STAT_INC(expect_create);
-}
-
-/* Race with expectations being used means we could have none to find; OK. */
-static void evict_oldest_expect(struct ip_conntrack *master)
-{
-	struct ip_conntrack_expect *i;
-
-	list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
-		if (i->master == master) {
-			if (del_timer(&i->timeout)) {
-				ip_ct_unlink_expect(i);
-				ip_conntrack_expect_put(i);
-			}
-			break;
-		}
-	}
-}
-
-static inline int refresh_timer(struct ip_conntrack_expect *i)
-{
-	if (!del_timer(&i->timeout))
-		return 0;
-
-	i->timeout.expires = jiffies + i->master->helper->timeout*HZ;
-	add_timer(&i->timeout);
-	return 1;
-}
-
-int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
-{
-	struct ip_conntrack_expect *i;
-	int ret;
-
-	DEBUGP("ip_conntrack_expect_related %p\n", related_to);
-	DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
-	DEBUGP("mask:  "); DUMP_TUPLE(&expect->mask);
-
-	write_lock_bh(&ip_conntrack_lock);
-	list_for_each_entry(i, &ip_conntrack_expect_list, list) {
-		if (expect_matches(i, expect)) {
-			/* Refresh timer: if it's dying, ignore.. */
-			if (refresh_timer(i)) {
-				ret = 0;
-				goto out;
-			}
-		} else if (expect_clash(i, expect)) {
-			ret = -EBUSY;
-			goto out;
-		}
-	}
-
-	/* Will be over limit? */
-	if (expect->master->helper->max_expected &&
-	    expect->master->expecting >= expect->master->helper->max_expected)
-		evict_oldest_expect(expect->master);
-
-	ip_conntrack_expect_insert(expect);
-	ip_conntrack_expect_event(IPEXP_NEW, expect);
-	ret = 0;
-out:
-	write_unlock_bh(&ip_conntrack_lock);
-	return ret;
-}
-
-/* Alter reply tuple (maybe alter helper).  This is for NAT, and is
-   implicitly racy: see __ip_conntrack_confirm */
-void ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
-			      const struct ip_conntrack_tuple *newreply)
-{
-	write_lock_bh(&ip_conntrack_lock);
-	/* Should be unconfirmed, so not in hash table yet */
-	IP_NF_ASSERT(!is_confirmed(conntrack));
-
-	DEBUGP("Altering reply tuple of %p to ", conntrack);
-	DUMP_TUPLE(newreply);
-
-	conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
-	if (!conntrack->master && conntrack->expecting == 0)
-		conntrack->helper = __ip_conntrack_helper_find(newreply);
-	write_unlock_bh(&ip_conntrack_lock);
-}
-
-int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
-{
-	BUG_ON(me->timeout == 0);
-	write_lock_bh(&ip_conntrack_lock);
-	list_add(&me->list, &helpers);
-	write_unlock_bh(&ip_conntrack_lock);
-
-	return 0;
-}
-
-struct ip_conntrack_helper *
-__ip_conntrack_helper_find_byname(const char *name)
-{
-	struct ip_conntrack_helper *h;
-
-	list_for_each_entry(h, &helpers, list) {
-		if (!strcmp(h->name, name))
-			return h;
-	}
-
-	return NULL;
-}
-
-static inline void unhelp(struct ip_conntrack_tuple_hash *i,
-			  const struct ip_conntrack_helper *me)
-{
-	if (tuplehash_to_ctrack(i)->helper == me) {
-		ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i));
-		tuplehash_to_ctrack(i)->helper = NULL;
-	}
-}
-
-void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
-{
-	unsigned int i;
-	struct ip_conntrack_tuple_hash *h;
-	struct ip_conntrack_expect *exp, *tmp;
-
-	/* Need write lock here, to delete helper. */
-	write_lock_bh(&ip_conntrack_lock);
-	list_del(&me->list);
-
-	/* Get rid of expectations */
-	list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
-		if (exp->master->helper == me && del_timer(&exp->timeout)) {
-			ip_ct_unlink_expect(exp);
-			ip_conntrack_expect_put(exp);
-		}
-	}
-	/* Get rid of expecteds, set helpers to NULL. */
-	list_for_each_entry(h, &unconfirmed, list)
-		unhelp(h, me);
-	for (i = 0; i < ip_conntrack_htable_size; i++) {
-		list_for_each_entry(h, &ip_conntrack_hash[i], list)
-			unhelp(h, me);
-	}
-	write_unlock_bh(&ip_conntrack_lock);
-
-	/* Someone could be still looking at the helper in a bh. */
-	synchronize_net();
-}
-
-/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
-void __ip_ct_refresh_acct(struct ip_conntrack *ct,
-			enum ip_conntrack_info ctinfo,
-			const struct sk_buff *skb,
-			unsigned long extra_jiffies,
-			int do_acct)
-{
-	int event = 0;
-
-	IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
-	IP_NF_ASSERT(skb);
-
-	write_lock_bh(&ip_conntrack_lock);
-
-	/* Only update if this is not a fixed timeout */
-	if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) {
-		write_unlock_bh(&ip_conntrack_lock);
-		return;
-	}
-
-	/* If not in hash table, timer will not be active yet */
-	if (!is_confirmed(ct)) {
-		ct->timeout.expires = extra_jiffies;
-		event = IPCT_REFRESH;
-	} else {
-		/* Need del_timer for race avoidance (may already be dying). */
-		if (del_timer(&ct->timeout)) {
-			ct->timeout.expires = jiffies + extra_jiffies;
-			add_timer(&ct->timeout);
-			event = IPCT_REFRESH;
-		}
-	}
-
-#ifdef CONFIG_IP_NF_CT_ACCT
-	if (do_acct) {
-		ct->counters[CTINFO2DIR(ctinfo)].packets++;
-		ct->counters[CTINFO2DIR(ctinfo)].bytes +=
-						ntohs(ip_hdr(skb)->tot_len);
-		if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
-		    || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
-			event |= IPCT_COUNTER_FILLING;
-	}
-#endif
-
-	write_unlock_bh(&ip_conntrack_lock);
-
-	/* must be unlocked when calling event cache */
-	if (event)
-		ip_conntrack_event_cache(event, skb);
-}
-
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
- * in ip_conntrack_core, since we don't want the protocols to autoload
- * or depend on ctnetlink */
-int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb,
-			       const struct ip_conntrack_tuple *tuple)
-{
-	NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(__be16),
-		&tuple->src.u.tcp.port);
-	NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(__be16),
-		&tuple->dst.u.tcp.port);
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-
-int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[],
-			       struct ip_conntrack_tuple *t)
-{
-	if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1])
-		return -EINVAL;
-
-	t->src.u.tcp.port =
-		*(__be16 *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
-	t->dst.u.tcp.port =
-		*(__be16 *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
-
-	return 0;
-}
-#endif
-
-/* Returns new sk_buff, or NULL */
-struct sk_buff *
-ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
-{
-	skb_orphan(skb);
-
-	local_bh_disable();
-	skb = ip_defrag(skb, user);
-	local_bh_enable();
-
-	if (skb)
-		ip_send_check(ip_hdr(skb));
-	return skb;
-}
-
-/* Used by ipt_REJECT. */
-static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
-{
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
-
-	/* This ICMP is in reverse direction to the packet which caused it */
-	ct = ip_conntrack_get(skb, &ctinfo);
-
-	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
-		ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
-	else
-		ctinfo = IP_CT_RELATED;
-
-	/* Attach to new skbuff, and increment count */
-	nskb->nfct = &ct->ct_general;
-	nskb->nfctinfo = ctinfo;
-	nf_conntrack_get(nskb->nfct);
-}
-
-/* Bring out ya dead! */
-static struct ip_conntrack *
-get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
-		void *data, unsigned int *bucket)
-{
-	struct ip_conntrack_tuple_hash *h;
-	struct ip_conntrack *ct;
-
-	write_lock_bh(&ip_conntrack_lock);
-	for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
-		list_for_each_entry(h, &ip_conntrack_hash[*bucket], list) {
-			ct = tuplehash_to_ctrack(h);
-			if (iter(ct, data))
-				goto found;
-		}
-	}
-	list_for_each_entry(h, &unconfirmed, list) {
-		ct = tuplehash_to_ctrack(h);
-		if (iter(ct, data))
-			set_bit(IPS_DYING_BIT, &ct->status);
-	}
-	write_unlock_bh(&ip_conntrack_lock);
-	return NULL;
-
-found:
-	atomic_inc(&ct->ct_general.use);
-	write_unlock_bh(&ip_conntrack_lock);
-	return ct;
-}
-
-void
-ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
-{
-	struct ip_conntrack *ct;
-	unsigned int bucket = 0;
-
-	while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
-		/* Time to push up daises... */
-		if (del_timer(&ct->timeout))
-			death_by_timeout((unsigned long)ct);
-		/* ... else the timer will get him soon. */
-
-		ip_conntrack_put(ct);
-	}
-}
-
-/* Fast function for those who don't want to parse /proc (and I don't
-   blame them). */
-/* Reversing the socket's dst/src point of view gives us the reply
-   mapping. */
-static int
-getorigdst(struct sock *sk, int optval, void __user *user, int *len)
-{
-	struct inet_sock *inet = inet_sk(sk);
-	struct ip_conntrack_tuple_hash *h;
-	struct ip_conntrack_tuple tuple;
-
-	IP_CT_TUPLE_U_BLANK(&tuple);
-	tuple.src.ip = inet->rcv_saddr;
-	tuple.src.u.tcp.port = inet->sport;
-	tuple.dst.ip = inet->daddr;
-	tuple.dst.u.tcp.port = inet->dport;
-	tuple.dst.protonum = IPPROTO_TCP;
-
-	/* We only do TCP at the moment: is there a better way? */
-	if (strcmp(sk->sk_prot->name, "TCP")) {
-		DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
-		return -ENOPROTOOPT;
-	}
-
-	if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
-		DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
-		       *len, sizeof(struct sockaddr_in));
-		return -EINVAL;
-	}
-
-	h = ip_conntrack_find_get(&tuple, NULL);
-	if (h) {
-		struct sockaddr_in sin;
-		struct ip_conntrack *ct = tuplehash_to_ctrack(h);
-
-		sin.sin_family = AF_INET;
-		sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
-			.tuple.dst.u.tcp.port;
-		sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
-			.tuple.dst.ip;
-		memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
-
-		DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
-		       NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
-		ip_conntrack_put(ct);
-		if (copy_to_user(user, &sin, sizeof(sin)) != 0)
-			return -EFAULT;
-		else
-			return 0;
-	}
-	DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
-	       NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
-	       NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
-	return -ENOENT;
-}
-
-static struct nf_sockopt_ops so_getorigdst = {
-	.pf		= PF_INET,
-	.get_optmin	= SO_ORIGINAL_DST,
-	.get_optmax	= SO_ORIGINAL_DST+1,
-	.get		= &getorigdst,
-};
-
-static int kill_all(struct ip_conntrack *i, void *data)
-{
-	return 1;
-}
-
-void ip_conntrack_flush(void)
-{
-	ip_ct_iterate_cleanup(kill_all, NULL);
-}
-
-static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
-{
-	if (vmalloced)
-		vfree(hash);
-	else
-		free_pages((unsigned long)hash,
-			   get_order(sizeof(struct list_head) * size));
-}
-
-/* Mishearing the voices in his head, our hero wonders how he's
-   supposed to kill the mall. */
-void ip_conntrack_cleanup(void)
-{
-	rcu_assign_pointer(ip_ct_attach, NULL);
-
-	/* This makes sure all current packets have passed through
-	   netfilter framework.  Roll on, two-stage module
-	   delete... */
-	synchronize_net();
-
-	ip_ct_event_cache_flush();
- i_see_dead_people:
-	ip_conntrack_flush();
-	if (atomic_read(&ip_conntrack_count) != 0) {
-		schedule();
-		goto i_see_dead_people;
-	}
-	/* wait until all references to ip_conntrack_untracked are dropped */
-	while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
-		schedule();
-
-	kmem_cache_destroy(ip_conntrack_cachep);
-	kmem_cache_destroy(ip_conntrack_expect_cachep);
-	free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
-			    ip_conntrack_htable_size);
-	nf_unregister_sockopt(&so_getorigdst);
-}
-
-static struct list_head *alloc_hashtable(int size, int *vmalloced)
-{
-	struct list_head *hash;
-	unsigned int i;
-
-	*vmalloced = 0;
-	hash = (void*)__get_free_pages(GFP_KERNEL,
-				       get_order(sizeof(struct list_head)
-						 * size));
-	if (!hash) {
-		*vmalloced = 1;
-		printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n");
-		hash = vmalloc(sizeof(struct list_head) * size);
-	}
-
-	if (hash)
-		for (i = 0; i < size; i++)
-			INIT_LIST_HEAD(&hash[i]);
-
-	return hash;
-}
-
-static int set_hashsize(const char *val, struct kernel_param *kp)
-{
-	int i, bucket, hashsize, vmalloced;
-	int old_vmalloced, old_size;
-	int rnd;
-	struct list_head *hash, *old_hash;
-	struct ip_conntrack_tuple_hash *h;
-
-	/* On boot, we can set this without any fancy locking. */
-	if (!ip_conntrack_htable_size)
-		return param_set_int(val, kp);
-
-	hashsize = simple_strtol(val, NULL, 0);
-	if (!hashsize)
-		return -EINVAL;
-
-	hash = alloc_hashtable(hashsize, &vmalloced);
-	if (!hash)
-		return -ENOMEM;
-
-	/* We have to rehash for the new table anyway, so we also can
-	 * use a new random seed */
-	get_random_bytes(&rnd, 4);
-
-	write_lock_bh(&ip_conntrack_lock);
-	for (i = 0; i < ip_conntrack_htable_size; i++) {
-		while (!list_empty(&ip_conntrack_hash[i])) {
-			h = list_entry(ip_conntrack_hash[i].next,
-				       struct ip_conntrack_tuple_hash, list);
-			list_del(&h->list);
-			bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
-			list_add_tail(&h->list, &hash[bucket]);
-		}
-	}
-	old_size = ip_conntrack_htable_size;
-	old_vmalloced = ip_conntrack_vmalloc;
-	old_hash = ip_conntrack_hash;
-
-	ip_conntrack_htable_size = hashsize;
-	ip_conntrack_vmalloc = vmalloced;
-	ip_conntrack_hash = hash;
-	ip_conntrack_hash_rnd = rnd;
-	write_unlock_bh(&ip_conntrack_lock);
-
-	free_conntrack_hash(old_hash, old_vmalloced, old_size);
-	return 0;
-}
-
-module_param_call(hashsize, set_hashsize, param_get_uint,
-		  &ip_conntrack_htable_size, 0600);
-
-int __init ip_conntrack_init(void)
-{
-	unsigned int i;
-	int ret;
-
-	/* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
-	 * machine has 256 buckets.  >= 1GB machines have 8192 buckets. */
-	if (!ip_conntrack_htable_size) {
-		ip_conntrack_htable_size
-			= (((num_physpages << PAGE_SHIFT) / 16384)
-			   / sizeof(struct list_head));
-		if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
-			ip_conntrack_htable_size = 8192;
-		if (ip_conntrack_htable_size < 16)
-			ip_conntrack_htable_size = 16;
-	}
-	ip_conntrack_max = 8 * ip_conntrack_htable_size;
-
-	printk("ip_conntrack version %s (%u buckets, %d max)"
-	       " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
-	       ip_conntrack_htable_size, ip_conntrack_max,
-	       sizeof(struct ip_conntrack));
-
-	ret = nf_register_sockopt(&so_getorigdst);
-	if (ret != 0) {
-		printk(KERN_ERR "Unable to register netfilter socket option\n");
-		return ret;
-	}
-
-	ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
-					    &ip_conntrack_vmalloc);
-	if (!ip_conntrack_hash) {
-		printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
-		goto err_unreg_sockopt;
-	}
-
-	ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
-						sizeof(struct ip_conntrack), 0,
-						0, NULL, NULL);
-	if (!ip_conntrack_cachep) {
-		printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
-		goto err_free_hash;
-	}
-
-	ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
-					sizeof(struct ip_conntrack_expect),
-					0, 0, NULL, NULL);
-	if (!ip_conntrack_expect_cachep) {
-		printk(KERN_ERR "Unable to create ip_expect slab cache\n");
-		goto err_free_conntrack_slab;
-	}
-
-	/* Don't NEED lock here, but good form anyway. */
-	write_lock_bh(&ip_conntrack_lock);
-	for (i = 0; i < MAX_IP_CT_PROTO; i++)
-		rcu_assign_pointer(ip_ct_protos[i], &ip_conntrack_generic_protocol);
-	/* Sew in builtin protocols. */
-	rcu_assign_pointer(ip_ct_protos[IPPROTO_TCP], &ip_conntrack_protocol_tcp);
-	rcu_assign_pointer(ip_ct_protos[IPPROTO_UDP], &ip_conntrack_protocol_udp);
-	rcu_assign_pointer(ip_ct_protos[IPPROTO_ICMP], &ip_conntrack_protocol_icmp);
-	write_unlock_bh(&ip_conntrack_lock);
-
-	/* For use by ipt_REJECT */
-	rcu_assign_pointer(ip_ct_attach, ip_conntrack_attach);
-
-	/* Set up fake conntrack:
-	    - to never be deleted, not in any hashes */
-	atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
-	/*  - and look it like as a confirmed connection */
-	set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
-
-	return ret;
-
-err_free_conntrack_slab:
-	kmem_cache_destroy(ip_conntrack_cachep);
-err_free_hash:
-	free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
-			    ip_conntrack_htable_size);
-err_unreg_sockopt:
-	nf_unregister_sockopt(&so_getorigdst);
-
-	return -ENOMEM;
-}
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
deleted file mode 100644
index 92389987e78..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ /dev/null
@@ -1,520 +0,0 @@
-/* FTP extension for IP connection tracking. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/ctype.h>
-#include <net/checksum.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
-#include <linux/moduleparam.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
-MODULE_DESCRIPTION("ftp connection tracking helper");
-
-/* This is slow, but it's simple. --RR */
-static char *ftp_buffer;
-static DEFINE_SPINLOCK(ip_ftp_lock);
-
-#define MAX_PORTS 8
-static unsigned short ports[MAX_PORTS];
-static int ports_c;
-module_param_array(ports, ushort, &ports_c, 0400);
-
-static int loose;
-module_param(loose, bool, 0600);
-
-unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
-				enum ip_conntrack_info ctinfo,
-				enum ip_ct_ftp_type type,
-				unsigned int matchoff,
-				unsigned int matchlen,
-				struct ip_conntrack_expect *exp,
-				u32 *seq);
-EXPORT_SYMBOL_GPL(ip_nat_ftp_hook);
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static int try_rfc959(const char *, size_t, u_int32_t [], char);
-static int try_eprt(const char *, size_t, u_int32_t [], char);
-static int try_epsv_response(const char *, size_t, u_int32_t [], char);
-
-static const struct ftp_search {
-	const char *pattern;
-	size_t plen;
-	char skip;
-	char term;
-	enum ip_ct_ftp_type ftptype;
-	int (*getnum)(const char *, size_t, u_int32_t[], char);
-} search[IP_CT_DIR_MAX][2] = {
-	[IP_CT_DIR_ORIGINAL] = {
-		{
-			.pattern	=  "PORT",
-			.plen		= sizeof("PORT") - 1,
-			.skip		= ' ',
-			.term		= '\r',
-			.ftptype	= IP_CT_FTP_PORT,
-			.getnum		= try_rfc959,
-		},
-		{
-			.pattern	= "EPRT",
-			.plen		= sizeof("EPRT") - 1,
-			.skip		= ' ',
-			.term		= '\r',
-			.ftptype	= IP_CT_FTP_EPRT,
-			.getnum		= try_eprt,
-		},
-	},
-	[IP_CT_DIR_REPLY] = {
-		{
-			.pattern	= "227 ",
-			.plen		= sizeof("227 ") - 1,
-			.skip		= '(',
-			.term		= ')',
-			.ftptype	= IP_CT_FTP_PASV,
-			.getnum		= try_rfc959,
-		},
-		{
-			.pattern	= "229 ",
-			.plen		= sizeof("229 ") - 1,
-			.skip		= '(',
-			.term		= ')',
-			.ftptype	= IP_CT_FTP_EPSV,
-			.getnum		= try_epsv_response,
-		},
-	},
-};
-
-static int try_number(const char *data, size_t dlen, u_int32_t array[],
-		      int array_size, char sep, char term)
-{
-	u_int32_t i, len;
-
-	memset(array, 0, sizeof(array[0])*array_size);
-
-	/* Keep data pointing at next char. */
-	for (i = 0, len = 0; len < dlen && i < array_size; len++, data++) {
-		if (*data >= '0' && *data <= '9') {
-			array[i] = array[i]*10 + *data - '0';
-		}
-		else if (*data == sep)
-			i++;
-		else {
-			/* Unexpected character; true if it's the
-			   terminator and we're finished. */
-			if (*data == term && i == array_size - 1)
-				return len;
-
-			DEBUGP("Char %u (got %u nums) `%u' unexpected\n",
-			       len, i, *data);
-			return 0;
-		}
-	}
-	DEBUGP("Failed to fill %u numbers separated by %c\n", array_size, sep);
-
-	return 0;
-}
-
-/* Returns 0, or length of numbers: 192,168,1,1,5,6 */
-static int try_rfc959(const char *data, size_t dlen, u_int32_t array[6],
-		       char term)
-{
-	return try_number(data, dlen, array, 6, ',', term);
-}
-
-/* Grab port: number up to delimiter */
-static int get_port(const char *data, int start, size_t dlen, char delim,
-		    u_int32_t array[2])
-{
-	u_int16_t port = 0;
-	int i;
-
-	for (i = start; i < dlen; i++) {
-		/* Finished? */
-		if (data[i] == delim) {
-			if (port == 0)
-				break;
-			array[0] = port >> 8;
-			array[1] = port;
-			return i + 1;
-		}
-		else if (data[i] >= '0' && data[i] <= '9')
-			port = port*10 + data[i] - '0';
-		else /* Some other crap */
-			break;
-	}
-	return 0;
-}
-
-/* Returns 0, or length of numbers: |1|132.235.1.2|6275| */
-static int try_eprt(const char *data, size_t dlen, u_int32_t array[6],
-		    char term)
-{
-	char delim;
-	int length;
-
-	/* First character is delimiter, then "1" for IPv4, then
-	   delimiter again. */
-	if (dlen <= 3) return 0;
-	delim = data[0];
-	if (isdigit(delim) || delim < 33 || delim > 126
-	    || data[1] != '1' || data[2] != delim)
-		return 0;
-
-	DEBUGP("EPRT: Got |1|!\n");
-	/* Now we have IP address. */
-	length = try_number(data + 3, dlen - 3, array, 4, '.', delim);
-	if (length == 0)
-		return 0;
-
-	DEBUGP("EPRT: Got IP address!\n");
-	/* Start offset includes initial "|1|", and trailing delimiter */
-	return get_port(data, 3 + length + 1, dlen, delim, array+4);
-}
-
-/* Returns 0, or length of numbers: |||6446| */
-static int try_epsv_response(const char *data, size_t dlen, u_int32_t array[6],
-			     char term)
-{
-	char delim;
-
-	/* Three delimiters. */
-	if (dlen <= 3) return 0;
-	delim = data[0];
-	if (isdigit(delim) || delim < 33 || delim > 126
-	    || data[1] != delim || data[2] != delim)
-		return 0;
-
-	return get_port(data, 3, dlen, delim, array+4);
-}
-
-/* Return 1 for match, 0 for accept, -1 for partial. */
-static int find_pattern(const char *data, size_t dlen,
-			const char *pattern, size_t plen,
-			char skip, char term,
-			unsigned int *numoff,
-			unsigned int *numlen,
-			u_int32_t array[6],
-			int (*getnum)(const char *, size_t, u_int32_t[], char))
-{
-	size_t i;
-
-	DEBUGP("find_pattern `%s': dlen = %u\n", pattern, dlen);
-	if (dlen == 0)
-		return 0;
-
-	if (dlen <= plen) {
-		/* Short packet: try for partial? */
-		if (strnicmp(data, pattern, dlen) == 0)
-			return -1;
-		else return 0;
-	}
-
-	if (strnicmp(data, pattern, plen) != 0) {
-#if 0
-		size_t i;
-
-		DEBUGP("ftp: string mismatch\n");
-		for (i = 0; i < plen; i++) {
-			DEBUGP("ftp:char %u `%c'(%u) vs `%c'(%u)\n",
-				i, data[i], data[i],
-				pattern[i], pattern[i]);
-		}
-#endif
-		return 0;
-	}
-
-	DEBUGP("Pattern matches!\n");
-	/* Now we've found the constant string, try to skip
-	   to the 'skip' character */
-	for (i = plen; data[i] != skip; i++)
-		if (i == dlen - 1) return -1;
-
-	/* Skip over the last character */
-	i++;
-
-	DEBUGP("Skipped up to `%c'!\n", skip);
-
-	*numoff = i;
-	*numlen = getnum(data + i, dlen - i, array, term);
-	if (!*numlen)
-		return -1;
-
-	DEBUGP("Match succeeded!\n");
-	return 1;
-}
-
-/* Look up to see if we're just after a \n. */
-static int find_nl_seq(u32 seq, const struct ip_ct_ftp_master *info, int dir)
-{
-	unsigned int i;
-
-	for (i = 0; i < info->seq_aft_nl_num[dir]; i++)
-		if (info->seq_aft_nl[dir][i] == seq)
-			return 1;
-	return 0;
-}
-
-/* We don't update if it's older than what we have. */
-static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir,
-			  struct sk_buff *skb)
-{
-	unsigned int i, oldest = NUM_SEQ_TO_REMEMBER;
-
-	/* Look for oldest: if we find exact match, we're done. */
-	for (i = 0; i < info->seq_aft_nl_num[dir]; i++) {
-		if (info->seq_aft_nl[dir][i] == nl_seq)
-			return;
-
-		if (oldest == info->seq_aft_nl_num[dir]
-		    || before(info->seq_aft_nl[dir][i], oldest))
-			oldest = i;
-	}
-
-	if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) {
-		info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
-		ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
-	} else if (oldest != NUM_SEQ_TO_REMEMBER) {
-		info->seq_aft_nl[dir][oldest] = nl_seq;
-		ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
-	}
-}
-
-static int help(struct sk_buff **pskb,
-		struct ip_conntrack *ct,
-		enum ip_conntrack_info ctinfo)
-{
-	unsigned int dataoff, datalen;
-	struct tcphdr _tcph, *th;
-	char *fb_ptr;
-	int ret;
-	u32 seq, array[6] = { 0 };
-	int dir = CTINFO2DIR(ctinfo);
-	unsigned int matchlen, matchoff;
-	struct ip_ct_ftp_master *ct_ftp_info = &ct->help.ct_ftp_info;
-	struct ip_conntrack_expect *exp;
-	unsigned int i;
-	int found = 0, ends_in_nl;
-	typeof(ip_nat_ftp_hook) ip_nat_ftp;
-
-	/* Until there's been traffic both ways, don't look in packets. */
-	if (ctinfo != IP_CT_ESTABLISHED
-	    && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
-		DEBUGP("ftp: Conntrackinfo = %u\n", ctinfo);
-		return NF_ACCEPT;
-	}
-
-	th = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
-				sizeof(_tcph), &_tcph);
-	if (th == NULL)
-		return NF_ACCEPT;
-
-	dataoff = ip_hdrlen(*pskb) + th->doff * 4;
-	/* No data? */
-	if (dataoff >= (*pskb)->len) {
-		DEBUGP("ftp: pskblen = %u\n", (*pskb)->len);
-		return NF_ACCEPT;
-	}
-	datalen = (*pskb)->len - dataoff;
-
-	spin_lock_bh(&ip_ftp_lock);
-	fb_ptr = skb_header_pointer(*pskb, dataoff,
-				    (*pskb)->len - dataoff, ftp_buffer);
-	BUG_ON(fb_ptr == NULL);
-
-	ends_in_nl = (fb_ptr[datalen - 1] == '\n');
-	seq = ntohl(th->seq) + datalen;
-
-	/* Look up to see if we're just after a \n. */
-	if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) {
-		/* Now if this ends in \n, update ftp info. */
-		DEBUGP("ip_conntrack_ftp_help: wrong seq pos %s(%u) or %s(%u)\n",
-		       ct_ftp_info->seq_aft_nl[0][dir]
-		       old_seq_aft_nl_set ? "":"(UNSET) ", old_seq_aft_nl);
-		ret = NF_ACCEPT;
-		goto out_update_nl;
-	}
-
-	/* Initialize IP array to expected address (it's not mentioned
-	   in EPSV responses) */
-	array[0] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 24) & 0xFF;
-	array[1] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 16) & 0xFF;
-	array[2] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 8) & 0xFF;
-	array[3] = ntohl(ct->tuplehash[dir].tuple.src.ip) & 0xFF;
-
-	for (i = 0; i < ARRAY_SIZE(search[dir]); i++) {
-		found = find_pattern(fb_ptr, (*pskb)->len - dataoff,
-				     search[dir][i].pattern,
-				     search[dir][i].plen,
-				     search[dir][i].skip,
-				     search[dir][i].term,
-				     &matchoff, &matchlen,
-				     array,
-				     search[dir][i].getnum);
-		if (found) break;
-	}
-	if (found == -1) {
-		/* We don't usually drop packets.  After all, this is
-		   connection tracking, not packet filtering.
-		   However, it is necessary for accurate tracking in
-		   this case. */
-		if (net_ratelimit())
-			printk("conntrack_ftp: partial %s %u+%u\n",
-			       search[dir][i].pattern,
-			       ntohl(th->seq), datalen);
-		ret = NF_DROP;
-		goto out;
-	} else if (found == 0) { /* No match */
-		ret = NF_ACCEPT;
-		goto out_update_nl;
-	}
-
-	DEBUGP("conntrack_ftp: match `%s' (%u bytes at %u)\n",
-	       fb_ptr + matchoff, matchlen, ntohl(th->seq) + matchoff);
-
-	/* Allocate expectation which will be inserted */
-	exp = ip_conntrack_expect_alloc(ct);
-	if (exp == NULL) {
-		ret = NF_DROP;
-		goto out;
-	}
-
-	/* We refer to the reverse direction ("!dir") tuples here,
-	 * because we're expecting something in the other direction.
-	 * Doesn't matter unless NAT is happening.  */
-	exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
-
-	if (htonl((array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3])
-	    != ct->tuplehash[dir].tuple.src.ip) {
-		/* Enrico Scholz's passive FTP to partially RNAT'd ftp
-		   server: it really wants us to connect to a
-		   different IP address.  Simply don't record it for
-		   NAT. */
-		DEBUGP("conntrack_ftp: NOT RECORDING: %u,%u,%u,%u != %u.%u.%u.%u\n",
-		       array[0], array[1], array[2], array[3],
-		       NIPQUAD(ct->tuplehash[dir].tuple.src.ip));
-
-		/* Thanks to Cristiano Lincoln Mattos
-		   <lincoln@cesar.org.br> for reporting this potential
-		   problem (DMZ machines opening holes to internal
-		   networks, or the packet filter itself). */
-		if (!loose) {
-			ret = NF_ACCEPT;
-			goto out_put_expect;
-		}
-		exp->tuple.dst.ip = htonl((array[0] << 24) | (array[1] << 16)
-					 | (array[2] << 8) | array[3]);
-	}
-
-	exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
-	exp->tuple.dst.u.tcp.port = htons(array[4] << 8 | array[5]);
-	exp->tuple.src.u.tcp.port = 0; /* Don't care. */
-	exp->tuple.dst.protonum = IPPROTO_TCP;
-	exp->mask = ((struct ip_conntrack_tuple)
-		{ { htonl(0xFFFFFFFF), { 0 } },
-		  { htonl(0xFFFFFFFF), { .tcp = { htons(0xFFFF) } }, 0xFF }});
-
-	exp->expectfn = NULL;
-	exp->flags = 0;
-
-	/* Now, NAT might want to mangle the packet, and register the
-	 * (possibly changed) expectation itself. */
-	ip_nat_ftp = rcu_dereference(ip_nat_ftp_hook);
-	if (ip_nat_ftp)
-		ret = ip_nat_ftp(pskb, ctinfo, search[dir][i].ftptype,
-				 matchoff, matchlen, exp, &seq);
-	else {
-		/* Can't expect this?  Best to drop packet now. */
-		if (ip_conntrack_expect_related(exp) != 0)
-			ret = NF_DROP;
-		else
-			ret = NF_ACCEPT;
-	}
-
-out_put_expect:
-	ip_conntrack_expect_put(exp);
-
-out_update_nl:
-	/* Now if this ends in \n, update ftp info.  Seq may have been
-	 * adjusted by NAT code. */
-	if (ends_in_nl)
-		update_nl_seq(seq, ct_ftp_info,dir, *pskb);
- out:
-	spin_unlock_bh(&ip_ftp_lock);
-	return ret;
-}
-
-static struct ip_conntrack_helper ftp[MAX_PORTS];
-static char ftp_names[MAX_PORTS][sizeof("ftp-65535")];
-
-/* Not __exit: called from init() */
-static void ip_conntrack_ftp_fini(void)
-{
-	int i;
-	for (i = 0; i < ports_c; i++) {
-		DEBUGP("ip_ct_ftp: unregistering helper for port %d\n",
-				ports[i]);
-		ip_conntrack_helper_unregister(&ftp[i]);
-	}
-
-	kfree(ftp_buffer);
-}
-
-static int __init ip_conntrack_ftp_init(void)
-{
-	int i, ret;
-	char *tmpname;
-
-	ftp_buffer = kmalloc(65536, GFP_KERNEL);
-	if (!ftp_buffer)
-		return -ENOMEM;
-
-	if (ports_c == 0)
-		ports[ports_c++] = FTP_PORT;
-
-	for (i = 0; i < ports_c; i++) {
-		ftp[i].tuple.src.u.tcp.port = htons(ports[i]);
-		ftp[i].tuple.dst.protonum = IPPROTO_TCP;
-		ftp[i].mask.src.u.tcp.port = htons(0xFFFF);
-		ftp[i].mask.dst.protonum = 0xFF;
-		ftp[i].max_expected = 1;
-		ftp[i].timeout = 5 * 60; /* 5 minutes */
-		ftp[i].me = THIS_MODULE;
-		ftp[i].help = help;
-
-		tmpname = &ftp_names[i][0];
-		if (ports[i] == FTP_PORT)
-			sprintf(tmpname, "ftp");
-		else
-			sprintf(tmpname, "ftp-%d", ports[i]);
-		ftp[i].name = tmpname;
-
-		DEBUGP("ip_ct_ftp: registering helper for port %d\n",
-				ports[i]);
-		ret = ip_conntrack_helper_register(&ftp[i]);
-
-		if (ret) {
-			ip_conntrack_ftp_fini();
-			return ret;
-		}
-	}
-	return 0;
-}
-
-module_init(ip_conntrack_ftp_init);
-module_exit(ip_conntrack_ftp_fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_h323.c b/net/ipv4/netfilter/ip_conntrack_helper_h323.c
deleted file mode 100644
index cecb6e0c8ed..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_helper_h323.c
+++ /dev/null
@@ -1,1840 +0,0 @@
-/*
- * H.323 connection tracking helper
- *
- * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
- *
- * This source code is licensed under General Public License version 2.
- *
- * Based on the 'brute force' H.323 connection tracking module by
- * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- *
- * For more information, please see http://nath323.sourceforge.net/
- */
-
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <net/tcp.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
-#include <linux/netfilter_ipv4/ip_conntrack_h323.h>
-#include <linux/moduleparam.h>
-#include <linux/ctype.h>
-#include <linux/inet.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/* Parameters */
-static unsigned int default_rrq_ttl = 300;
-module_param(default_rrq_ttl, uint, 0600);
-MODULE_PARM_DESC(default_rrq_ttl, "use this TTL if it's missing in RRQ");
-
-static int gkrouted_only = 1;
-module_param(gkrouted_only, int, 0600);
-MODULE_PARM_DESC(gkrouted_only, "only accept calls from gatekeeper");
-
-static int callforward_filter = 1;
-module_param(callforward_filter, bool, 0600);
-MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations "
-				     "if both endpoints are on different sides "
-				     "(determined by routing information)");
-
-/* Hooks for NAT */
-int (*set_h245_addr_hook) (struct sk_buff ** pskb,
-			   unsigned char **data, int dataoff,
-			   H245_TransportAddress * addr,
-			   __be32 ip, u_int16_t port);
-int (*set_h225_addr_hook) (struct sk_buff ** pskb,
-			   unsigned char **data, int dataoff,
-			   TransportAddress * addr,
-			   __be32 ip, u_int16_t port);
-int (*set_sig_addr_hook) (struct sk_buff ** pskb,
-			  struct ip_conntrack * ct,
-			  enum ip_conntrack_info ctinfo,
-			  unsigned char **data,
-			  TransportAddress * addr, int count);
-int (*set_ras_addr_hook) (struct sk_buff ** pskb,
-			  struct ip_conntrack * ct,
-			  enum ip_conntrack_info ctinfo,
-			  unsigned char **data,
-			  TransportAddress * addr, int count);
-int (*nat_rtp_rtcp_hook) (struct sk_buff ** pskb,
-			  struct ip_conntrack * ct,
-			  enum ip_conntrack_info ctinfo,
-			  unsigned char **data, int dataoff,
-			  H245_TransportAddress * addr,
-			  u_int16_t port, u_int16_t rtp_port,
-			  struct ip_conntrack_expect * rtp_exp,
-			  struct ip_conntrack_expect * rtcp_exp);
-int (*nat_t120_hook) (struct sk_buff ** pskb,
-		      struct ip_conntrack * ct,
-		      enum ip_conntrack_info ctinfo,
-		      unsigned char **data, int dataoff,
-		      H245_TransportAddress * addr, u_int16_t port,
-		      struct ip_conntrack_expect * exp);
-int (*nat_h245_hook) (struct sk_buff ** pskb,
-		      struct ip_conntrack * ct,
-		      enum ip_conntrack_info ctinfo,
-		      unsigned char **data, int dataoff,
-		      TransportAddress * addr, u_int16_t port,
-		      struct ip_conntrack_expect * exp);
-int (*nat_callforwarding_hook) (struct sk_buff ** pskb,
-				struct ip_conntrack * ct,
-				enum ip_conntrack_info ctinfo,
-				unsigned char **data, int dataoff,
-				TransportAddress * addr, u_int16_t port,
-				struct ip_conntrack_expect * exp);
-int (*nat_q931_hook) (struct sk_buff ** pskb,
-		      struct ip_conntrack * ct,
-		      enum ip_conntrack_info ctinfo,
-		      unsigned char **data, TransportAddress * addr, int idx,
-		      u_int16_t port, struct ip_conntrack_expect * exp);
-
-
-static DEFINE_SPINLOCK(ip_h323_lock);
-static char *h323_buffer;
-
-/****************************************************************************/
-static int get_tpkt_data(struct sk_buff **pskb, struct ip_conntrack *ct,
-			 enum ip_conntrack_info ctinfo,
-			 unsigned char **data, int *datalen, int *dataoff)
-{
-	struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
-	int dir = CTINFO2DIR(ctinfo);
-	struct tcphdr _tcph, *th;
-	int tcpdatalen;
-	int tcpdataoff;
-	unsigned char *tpkt;
-	int tpktlen;
-	int tpktoff;
-
-	/* Get TCP header */
-	th = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
-				sizeof(_tcph), &_tcph);
-	if (th == NULL)
-		return 0;
-
-	/* Get TCP data offset */
-	tcpdataoff = ip_hdrlen(*pskb) + th->doff * 4;
-
-	/* Get TCP data length */
-	tcpdatalen = (*pskb)->len - tcpdataoff;
-	if (tcpdatalen <= 0)	/* No TCP data */
-		goto clear_out;
-
-	if (*data == NULL) {	/* first TPKT */
-		/* Get first TPKT pointer */
-		tpkt = skb_header_pointer(*pskb, tcpdataoff, tcpdatalen,
-					  h323_buffer);
-		BUG_ON(tpkt == NULL);
-
-		/* Validate TPKT identifier */
-		if (tcpdatalen < 4 || tpkt[0] != 0x03 || tpkt[1] != 0) {
-			/* Netmeeting sends TPKT header and data separately */
-			if (info->tpkt_len[dir] > 0) {
-				DEBUGP("ip_ct_h323: previous packet "
-				       "indicated separate TPKT data of %hu "
-				       "bytes\n", info->tpkt_len[dir]);
-				if (info->tpkt_len[dir] <= tcpdatalen) {
-					/* Yes, there was a TPKT header
-					 * received */
-					*data = tpkt;
-					*datalen = info->tpkt_len[dir];
-					*dataoff = 0;
-					goto out;
-				}
-
-				/* Fragmented TPKT */
-				if (net_ratelimit())
-					printk("ip_ct_h323: "
-					       "fragmented TPKT\n");
-				goto clear_out;
-			}
-
-			/* It is not even a TPKT */
-			return 0;
-		}
-		tpktoff = 0;
-	} else {		/* Next TPKT */
-		tpktoff = *dataoff + *datalen;
-		tcpdatalen -= tpktoff;
-		if (tcpdatalen <= 4)	/* No more TPKT */
-			goto clear_out;
-		tpkt = *data + *datalen;
-
-		/* Validate TPKT identifier */
-		if (tpkt[0] != 0x03 || tpkt[1] != 0)
-			goto clear_out;
-	}
-
-	/* Validate TPKT length */
-	tpktlen = tpkt[2] * 256 + tpkt[3];
-	if (tpktlen < 4)
-		goto clear_out;
-	if (tpktlen > tcpdatalen) {
-		if (tcpdatalen == 4) {	/* Separate TPKT header */
-			/* Netmeeting sends TPKT header and data separately */
-			DEBUGP("ip_ct_h323: separate TPKT header indicates "
-			       "there will be TPKT data of %hu bytes\n",
-			       tpktlen - 4);
-			info->tpkt_len[dir] = tpktlen - 4;
-			return 0;
-		}
-
-		if (net_ratelimit())
-			printk("ip_ct_h323: incomplete TPKT (fragmented?)\n");
-		goto clear_out;
-	}
-
-	/* This is the encapsulated data */
-	*data = tpkt + 4;
-	*datalen = tpktlen - 4;
-	*dataoff = tpktoff + 4;
-
-      out:
-	/* Clear TPKT length */
-	info->tpkt_len[dir] = 0;
-	return 1;
-
-      clear_out:
-	info->tpkt_len[dir] = 0;
-	return 0;
-}
-
-/****************************************************************************/
-static int get_h245_addr(unsigned char *data, H245_TransportAddress * addr,
-			 __be32 * ip, u_int16_t * port)
-{
-	unsigned char *p;
-
-	if (addr->choice != eH245_TransportAddress_unicastAddress ||
-	    addr->unicastAddress.choice != eUnicastAddress_iPAddress)
-		return 0;
-
-	p = data + addr->unicastAddress.iPAddress.network;
-	*ip = htonl((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | (p[3]));
-	*port = (p[4] << 8) | (p[5]);
-
-	return 1;
-}
-
-/****************************************************************************/
-static int expect_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct,
-			   enum ip_conntrack_info ctinfo,
-			   unsigned char **data, int dataoff,
-			   H245_TransportAddress * addr)
-{
-	int dir = CTINFO2DIR(ctinfo);
-	int ret = 0;
-	__be32 ip;
-	u_int16_t port;
-	u_int16_t rtp_port;
-	struct ip_conntrack_expect *rtp_exp;
-	struct ip_conntrack_expect *rtcp_exp;
-	typeof(nat_rtp_rtcp_hook) nat_rtp_rtcp;
-
-	/* Read RTP or RTCP address */
-	if (!get_h245_addr(*data, addr, &ip, &port) ||
-	    ip != ct->tuplehash[dir].tuple.src.ip || port == 0)
-		return 0;
-
-	/* RTP port is even */
-	rtp_port = port & (~1);
-
-	/* Create expect for RTP */
-	if ((rtp_exp = ip_conntrack_expect_alloc(ct)) == NULL)
-		return -1;
-	rtp_exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
-	rtp_exp->tuple.src.u.udp.port = 0;
-	rtp_exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
-	rtp_exp->tuple.dst.u.udp.port = htons(rtp_port);
-	rtp_exp->tuple.dst.protonum = IPPROTO_UDP;
-	rtp_exp->mask.src.ip = htonl(0xFFFFFFFF);
-	rtp_exp->mask.src.u.udp.port = 0;
-	rtp_exp->mask.dst.ip = htonl(0xFFFFFFFF);
-	rtp_exp->mask.dst.u.udp.port = htons(0xFFFF);
-	rtp_exp->mask.dst.protonum = 0xFF;
-	rtp_exp->flags = 0;
-
-	/* Create expect for RTCP */
-	if ((rtcp_exp = ip_conntrack_expect_alloc(ct)) == NULL) {
-		ip_conntrack_expect_put(rtp_exp);
-		return -1;
-	}
-	rtcp_exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
-	rtcp_exp->tuple.src.u.udp.port = 0;
-	rtcp_exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
-	rtcp_exp->tuple.dst.u.udp.port = htons(rtp_port + 1);
-	rtcp_exp->tuple.dst.protonum = IPPROTO_UDP;
-	rtcp_exp->mask.src.ip = htonl(0xFFFFFFFF);
-	rtcp_exp->mask.src.u.udp.port = 0;
-	rtcp_exp->mask.dst.ip = htonl(0xFFFFFFFF);
-	rtcp_exp->mask.dst.u.udp.port = htons(0xFFFF);
-	rtcp_exp->mask.dst.protonum = 0xFF;
-	rtcp_exp->flags = 0;
-
-	if (ct->tuplehash[dir].tuple.src.ip !=
-	    ct->tuplehash[!dir].tuple.dst.ip &&
-	    (nat_rtp_rtcp = rcu_dereference(nat_rtp_rtcp_hook))) {
-		/* NAT needed */
-		ret = nat_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
-				   addr, port, rtp_port, rtp_exp, rtcp_exp);
-	} else {		/* Conntrack only */
-		rtp_exp->expectfn = NULL;
-		rtcp_exp->expectfn = NULL;
-
-		if (ip_conntrack_expect_related(rtp_exp) == 0) {
-			if (ip_conntrack_expect_related(rtcp_exp) == 0) {
-				DEBUGP("ip_ct_h323: expect RTP "
-				       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-				       NIPQUAD(rtp_exp->tuple.src.ip),
-				       ntohs(rtp_exp->tuple.src.u.udp.port),
-				       NIPQUAD(rtp_exp->tuple.dst.ip),
-				       ntohs(rtp_exp->tuple.dst.u.udp.port));
-				DEBUGP("ip_ct_h323: expect RTCP "
-				       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-				       NIPQUAD(rtcp_exp->tuple.src.ip),
-				       ntohs(rtcp_exp->tuple.src.u.udp.port),
-				       NIPQUAD(rtcp_exp->tuple.dst.ip),
-				       ntohs(rtcp_exp->tuple.dst.u.udp.port));
-			} else {
-				ip_conntrack_unexpect_related(rtp_exp);
-				ret = -1;
-			}
-		} else
-			ret = -1;
-	}
-
-	ip_conntrack_expect_put(rtp_exp);
-	ip_conntrack_expect_put(rtcp_exp);
-
-	return ret;
-}
-
-/****************************************************************************/
-static int expect_t120(struct sk_buff **pskb,
-		       struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, int dataoff,
-		       H245_TransportAddress * addr)
-{
-	int dir = CTINFO2DIR(ctinfo);
-	int ret = 0;
-	__be32 ip;
-	u_int16_t port;
-	struct ip_conntrack_expect *exp = NULL;
-	typeof(nat_t120_hook) nat_t120;
-
-	/* Read T.120 address */
-	if (!get_h245_addr(*data, addr, &ip, &port) ||
-	    ip != ct->tuplehash[dir].tuple.src.ip || port == 0)
-		return 0;
-
-	/* Create expect for T.120 connections */
-	if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
-		return -1;
-	exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
-	exp->tuple.src.u.tcp.port = 0;
-	exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
-	exp->tuple.dst.u.tcp.port = htons(port);
-	exp->tuple.dst.protonum = IPPROTO_TCP;
-	exp->mask.src.ip = htonl(0xFFFFFFFF);
-	exp->mask.src.u.tcp.port = 0;
-	exp->mask.dst.ip = htonl(0xFFFFFFFF);
-	exp->mask.dst.u.tcp.port = htons(0xFFFF);
-	exp->mask.dst.protonum = 0xFF;
-	exp->flags = IP_CT_EXPECT_PERMANENT;	/* Accept multiple channels */
-
-	if (ct->tuplehash[dir].tuple.src.ip !=
-	    ct->tuplehash[!dir].tuple.dst.ip &&
-	    (nat_t120 = rcu_dereference(nat_t120_hook))) {
-		/* NAT needed */
-		ret = nat_t120(pskb, ct, ctinfo, data, dataoff, addr,
-			       port, exp);
-	} else {		/* Conntrack only */
-		exp->expectfn = NULL;
-		if (ip_conntrack_expect_related(exp) == 0) {
-			DEBUGP("ip_ct_h323: expect T.120 "
-			       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-			       NIPQUAD(exp->tuple.src.ip),
-			       ntohs(exp->tuple.src.u.tcp.port),
-			       NIPQUAD(exp->tuple.dst.ip),
-			       ntohs(exp->tuple.dst.u.tcp.port));
-		} else
-			ret = -1;
-	}
-
-	ip_conntrack_expect_put(exp);
-
-	return ret;
-}
-
-/****************************************************************************/
-static int process_h245_channel(struct sk_buff **pskb,
-				struct ip_conntrack *ct,
-				enum ip_conntrack_info ctinfo,
-				unsigned char **data, int dataoff,
-				H2250LogicalChannelParameters * channel)
-{
-	int ret;
-
-	if (channel->options & eH2250LogicalChannelParameters_mediaChannel) {
-		/* RTP */
-		ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
-				      &channel->mediaChannel);
-		if (ret < 0)
-			return -1;
-	}
-
-	if (channel->
-	    options & eH2250LogicalChannelParameters_mediaControlChannel) {
-		/* RTCP */
-		ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
-				      &channel->mediaControlChannel);
-		if (ret < 0)
-			return -1;
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_olc(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, int dataoff,
-		       OpenLogicalChannel * olc)
-{
-	int ret;
-
-	DEBUGP("ip_ct_h323: OpenLogicalChannel\n");
-
-	if (olc->forwardLogicalChannelParameters.multiplexParameters.choice ==
-	    eOpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)
-	{
-		ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff,
-					   &olc->
-					   forwardLogicalChannelParameters.
-					   multiplexParameters.
-					   h2250LogicalChannelParameters);
-		if (ret < 0)
-			return -1;
-	}
-
-	if ((olc->options &
-	     eOpenLogicalChannel_reverseLogicalChannelParameters) &&
-	    (olc->reverseLogicalChannelParameters.options &
-	     eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters)
-	    && (olc->reverseLogicalChannelParameters.multiplexParameters.
-		choice ==
-		eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
-	{
-		ret =
-		    process_h245_channel(pskb, ct, ctinfo, data, dataoff,
-					 &olc->
-					 reverseLogicalChannelParameters.
-					 multiplexParameters.
-					 h2250LogicalChannelParameters);
-		if (ret < 0)
-			return -1;
-	}
-
-	if ((olc->options & eOpenLogicalChannel_separateStack) &&
-	    olc->forwardLogicalChannelParameters.dataType.choice ==
-	    eDataType_data &&
-	    olc->forwardLogicalChannelParameters.dataType.data.application.
-	    choice == eDataApplicationCapability_application_t120 &&
-	    olc->forwardLogicalChannelParameters.dataType.data.application.
-	    t120.choice == eDataProtocolCapability_separateLANStack &&
-	    olc->separateStack.networkAddress.choice ==
-	    eNetworkAccessParameters_networkAddress_localAreaAddress) {
-		ret = expect_t120(pskb, ct, ctinfo, data, dataoff,
-				  &olc->separateStack.networkAddress.
-				  localAreaAddress);
-		if (ret < 0)
-			return -1;
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_olca(struct sk_buff **pskb, struct ip_conntrack *ct,
-			enum ip_conntrack_info ctinfo,
-			unsigned char **data, int dataoff,
-			OpenLogicalChannelAck * olca)
-{
-	H2250LogicalChannelAckParameters *ack;
-	int ret;
-
-	DEBUGP("ip_ct_h323: OpenLogicalChannelAck\n");
-
-	if ((olca->options &
-	     eOpenLogicalChannelAck_reverseLogicalChannelParameters) &&
-	    (olca->reverseLogicalChannelParameters.options &
-	     eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters)
-	    && (olca->reverseLogicalChannelParameters.multiplexParameters.
-		choice ==
-		eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
-	{
-		ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff,
-					   &olca->
-					   reverseLogicalChannelParameters.
-					   multiplexParameters.
-					   h2250LogicalChannelParameters);
-		if (ret < 0)
-			return -1;
-	}
-
-	if ((olca->options &
-	     eOpenLogicalChannelAck_forwardMultiplexAckParameters) &&
-	    (olca->forwardMultiplexAckParameters.choice ==
-	     eOpenLogicalChannelAck_forwardMultiplexAckParameters_h2250LogicalChannelAckParameters))
-	{
-		ack = &olca->forwardMultiplexAckParameters.
-		    h2250LogicalChannelAckParameters;
-		if (ack->options &
-		    eH2250LogicalChannelAckParameters_mediaChannel) {
-			/* RTP */
-			ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
-					      &ack->mediaChannel);
-			if (ret < 0)
-				return -1;
-		}
-
-		if (ack->options &
-		    eH2250LogicalChannelAckParameters_mediaControlChannel) {
-			/* RTCP */
-			ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
-					      &ack->mediaControlChannel);
-			if (ret < 0)
-				return -1;
-		}
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_h245(struct sk_buff **pskb, struct ip_conntrack *ct,
-			enum ip_conntrack_info ctinfo,
-			unsigned char **data, int dataoff,
-			MultimediaSystemControlMessage * mscm)
-{
-	switch (mscm->choice) {
-	case eMultimediaSystemControlMessage_request:
-		if (mscm->request.choice ==
-		    eRequestMessage_openLogicalChannel) {
-			return process_olc(pskb, ct, ctinfo, data, dataoff,
-					   &mscm->request.openLogicalChannel);
-		}
-		DEBUGP("ip_ct_h323: H.245 Request %d\n",
-		       mscm->request.choice);
-		break;
-	case eMultimediaSystemControlMessage_response:
-		if (mscm->response.choice ==
-		    eResponseMessage_openLogicalChannelAck) {
-			return process_olca(pskb, ct, ctinfo, data, dataoff,
-					    &mscm->response.
-					    openLogicalChannelAck);
-		}
-		DEBUGP("ip_ct_h323: H.245 Response %d\n",
-		       mscm->response.choice);
-		break;
-	default:
-		DEBUGP("ip_ct_h323: H.245 signal %d\n", mscm->choice);
-		break;
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int h245_help(struct sk_buff **pskb, struct ip_conntrack *ct,
-		     enum ip_conntrack_info ctinfo)
-{
-	static MultimediaSystemControlMessage mscm;
-	unsigned char *data = NULL;
-	int datalen;
-	int dataoff;
-	int ret;
-
-	/* Until there's been traffic both ways, don't look in packets. */
-	if (ctinfo != IP_CT_ESTABLISHED
-	    && ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
-		return NF_ACCEPT;
-	}
-	DEBUGP("ip_ct_h245: skblen = %u\n", (*pskb)->len);
-
-	spin_lock_bh(&ip_h323_lock);
-
-	/* Process each TPKT */
-	while (get_tpkt_data(pskb, ct, ctinfo, &data, &datalen, &dataoff)) {
-		DEBUGP("ip_ct_h245: TPKT %u.%u.%u.%u->%u.%u.%u.%u, len=%d\n",
-		       NIPQUAD(ip_hdr(*pskb)->saddr),
-		       NIPQUAD(ip_hdr(*pskb)->daddr), datalen);
-
-		/* Decode H.245 signal */
-		ret = DecodeMultimediaSystemControlMessage(data, datalen,
-							   &mscm);
-		if (ret < 0) {
-			if (net_ratelimit())
-				printk("ip_ct_h245: decoding error: %s\n",
-				       ret == H323_ERROR_BOUND ?
-				       "out of bound" : "out of range");
-			/* We don't drop when decoding error */
-			break;
-		}
-
-		/* Process H.245 signal */
-		if (process_h245(pskb, ct, ctinfo, &data, dataoff, &mscm) < 0)
-			goto drop;
-	}
-
-	spin_unlock_bh(&ip_h323_lock);
-	return NF_ACCEPT;
-
-      drop:
-	spin_unlock_bh(&ip_h323_lock);
-	if (net_ratelimit())
-		printk("ip_ct_h245: packet dropped\n");
-	return NF_DROP;
-}
-
-/****************************************************************************/
-static struct ip_conntrack_helper ip_conntrack_helper_h245 = {
-	.name = "H.245",
-	.me = THIS_MODULE,
-	.max_expected = H323_RTP_CHANNEL_MAX * 4 + 2 /* T.120 */ ,
-	.timeout = 240,
-	.tuple = {.dst = {.protonum = IPPROTO_TCP}},
-	.mask = {.src = {.u = {0xFFFF}},
-		 .dst = {.protonum = 0xFF}},
-	.help = h245_help
-};
-
-/****************************************************************************/
-void ip_conntrack_h245_expect(struct ip_conntrack *new,
-			      struct ip_conntrack_expect *this)
-{
-	write_lock_bh(&ip_conntrack_lock);
-	new->helper = &ip_conntrack_helper_h245;
-	write_unlock_bh(&ip_conntrack_lock);
-}
-
-/****************************************************************************/
-int get_h225_addr(unsigned char *data, TransportAddress * addr,
-		  __be32 * ip, u_int16_t * port)
-{
-	unsigned char *p;
-
-	if (addr->choice != eTransportAddress_ipAddress)
-		return 0;
-
-	p = data + addr->ipAddress.ip;
-	*ip = htonl((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | (p[3]));
-	*port = (p[4] << 8) | (p[5]);
-
-	return 1;
-}
-
-/****************************************************************************/
-static int expect_h245(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, int dataoff,
-		       TransportAddress * addr)
-{
-	int dir = CTINFO2DIR(ctinfo);
-	int ret = 0;
-	__be32 ip;
-	u_int16_t port;
-	struct ip_conntrack_expect *exp = NULL;
-	typeof(nat_h245_hook) nat_h245;
-
-	/* Read h245Address */
-	if (!get_h225_addr(*data, addr, &ip, &port) ||
-	    ip != ct->tuplehash[dir].tuple.src.ip || port == 0)
-		return 0;
-
-	/* Create expect for h245 connection */
-	if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
-		return -1;
-	exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
-	exp->tuple.src.u.tcp.port = 0;
-	exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
-	exp->tuple.dst.u.tcp.port = htons(port);
-	exp->tuple.dst.protonum = IPPROTO_TCP;
-	exp->mask.src.ip = htonl(0xFFFFFFFF);
-	exp->mask.src.u.tcp.port = 0;
-	exp->mask.dst.ip = htonl(0xFFFFFFFF);
-	exp->mask.dst.u.tcp.port = htons(0xFFFF);
-	exp->mask.dst.protonum = 0xFF;
-	exp->flags = 0;
-
-	if (ct->tuplehash[dir].tuple.src.ip !=
-	    ct->tuplehash[!dir].tuple.dst.ip &&
-	    (nat_h245 = rcu_dereference(nat_h245_hook))) {
-		/* NAT needed */
-		ret = nat_h245(pskb, ct, ctinfo, data, dataoff, addr,
-			       port, exp);
-	} else {		/* Conntrack only */
-		exp->expectfn = ip_conntrack_h245_expect;
-
-		if (ip_conntrack_expect_related(exp) == 0) {
-			DEBUGP("ip_ct_q931: expect H.245 "
-			       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-			       NIPQUAD(exp->tuple.src.ip),
-			       ntohs(exp->tuple.src.u.tcp.port),
-			       NIPQUAD(exp->tuple.dst.ip),
-			       ntohs(exp->tuple.dst.u.tcp.port));
-		} else
-			ret = -1;
-	}
-
-	ip_conntrack_expect_put(exp);
-
-	return ret;
-}
-
-/* Forwarding declaration */
-void ip_conntrack_q931_expect(struct ip_conntrack *new,
-			      struct ip_conntrack_expect *this);
-
-/****************************************************************************/
-static int expect_callforwarding(struct sk_buff **pskb,
-				 struct ip_conntrack *ct,
-				 enum ip_conntrack_info ctinfo,
-				 unsigned char **data, int dataoff,
-				 TransportAddress * addr)
-{
-	int dir = CTINFO2DIR(ctinfo);
-	int ret = 0;
-	__be32 ip;
-	u_int16_t port;
-	struct ip_conntrack_expect *exp = NULL;
-	typeof(nat_callforwarding_hook) nat_callforwarding;
-
-	/* Read alternativeAddress */
-	if (!get_h225_addr(*data, addr, &ip, &port) || port == 0)
-		return 0;
-
-	/* If the calling party is on the same side of the forward-to party,
-	 * we don't need to track the second call */
-	if (callforward_filter) {
-		struct rtable *rt1, *rt2;
-		struct flowi fl1 = {
-			.fl4_dst = ip,
-		};
-		struct flowi fl2 = {
-			.fl4_dst = ct->tuplehash[!dir].tuple.src.ip,
-		};
-
-		if (ip_route_output_key(&rt1, &fl1) == 0) {
-			if (ip_route_output_key(&rt2, &fl2) == 0) {
-				if (rt1->rt_gateway == rt2->rt_gateway &&
-				    rt1->u.dst.dev  == rt2->u.dst.dev)
-					ret = 1;
-				dst_release(&rt2->u.dst);
-			}
-			dst_release(&rt1->u.dst);
-		}
-		if (ret) {
-			DEBUGP("ip_ct_q931: Call Forwarding not tracked\n");
-			return 0;
-		}
-	}
-
-	/* Create expect for the second call leg */
-	if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
-		return -1;
-	exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
-	exp->tuple.src.u.tcp.port = 0;
-	exp->tuple.dst.ip = ip;
-	exp->tuple.dst.u.tcp.port = htons(port);
-	exp->tuple.dst.protonum = IPPROTO_TCP;
-	exp->mask.src.ip = htonl(0xFFFFFFFF);
-	exp->mask.src.u.tcp.port = 0;
-	exp->mask.dst.ip = htonl(0xFFFFFFFF);
-	exp->mask.dst.u.tcp.port = htons(0xFFFF);
-	exp->mask.dst.protonum = 0xFF;
-	exp->flags = 0;
-
-	if (ct->tuplehash[dir].tuple.src.ip !=
-	    ct->tuplehash[!dir].tuple.dst.ip &&
-	    (nat_callforwarding = rcu_dereference(nat_callforwarding_hook))) {
-		/* Need NAT */
-		ret = nat_callforwarding(pskb, ct, ctinfo, data, dataoff,
-					 addr, port, exp);
-	} else {		/* Conntrack only */
-		exp->expectfn = ip_conntrack_q931_expect;
-
-		if (ip_conntrack_expect_related(exp) == 0) {
-			DEBUGP("ip_ct_q931: expect Call Forwarding "
-			       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-			       NIPQUAD(exp->tuple.src.ip),
-			       ntohs(exp->tuple.src.u.tcp.port),
-			       NIPQUAD(exp->tuple.dst.ip),
-			       ntohs(exp->tuple.dst.u.tcp.port));
-		} else
-			ret = -1;
-	}
-
-	ip_conntrack_expect_put(exp);
-
-	return ret;
-}
-
-/****************************************************************************/
-static int process_setup(struct sk_buff **pskb, struct ip_conntrack *ct,
-			 enum ip_conntrack_info ctinfo,
-			 unsigned char **data, int dataoff,
-			 Setup_UUIE * setup)
-{
-	int dir = CTINFO2DIR(ctinfo);
-	int ret;
-	int i;
-	__be32 ip;
-	u_int16_t port;
-	typeof(set_h225_addr_hook) set_h225_addr;
-
-	DEBUGP("ip_ct_q931: Setup\n");
-
-	if (setup->options & eSetup_UUIE_h245Address) {
-		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
-				  &setup->h245Address);
-		if (ret < 0)
-			return -1;
-	}
-
-	set_h225_addr = rcu_dereference(set_h225_addr_hook);
-
-	if ((setup->options & eSetup_UUIE_destCallSignalAddress) &&
-	    (set_h225_addr) &&
-	    get_h225_addr(*data, &setup->destCallSignalAddress, &ip, &port) &&
-	    ip != ct->tuplehash[!dir].tuple.src.ip) {
-		DEBUGP("ip_ct_q931: set destCallSignalAddress "
-		       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-		       NIPQUAD(ip), port,
-		       NIPQUAD(ct->tuplehash[!dir].tuple.src.ip),
-		       ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port));
-		ret = set_h225_addr(pskb, data, dataoff,
-				    &setup->destCallSignalAddress,
-				    ct->tuplehash[!dir].tuple.src.ip,
-				    ntohs(ct->tuplehash[!dir].tuple.src.
-					  u.tcp.port));
-		if (ret < 0)
-			return -1;
-	}
-
-	if ((setup->options & eSetup_UUIE_sourceCallSignalAddress) &&
-	    (set_h225_addr) &&
-	    get_h225_addr(*data, &setup->sourceCallSignalAddress, &ip, &port)
-	    && ip != ct->tuplehash[!dir].tuple.dst.ip) {
-		DEBUGP("ip_ct_q931: set sourceCallSignalAddress "
-		       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-		       NIPQUAD(ip), port,
-		       NIPQUAD(ct->tuplehash[!dir].tuple.dst.ip),
-		       ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port));
-		ret = set_h225_addr(pskb, data, dataoff,
-				    &setup->sourceCallSignalAddress,
-				    ct->tuplehash[!dir].tuple.dst.ip,
-				    ntohs(ct->tuplehash[!dir].tuple.dst.
-					  u.tcp.port));
-		if (ret < 0)
-			return -1;
-	}
-
-	if (setup->options & eSetup_UUIE_fastStart) {
-		for (i = 0; i < setup->fastStart.count; i++) {
-			ret = process_olc(pskb, ct, ctinfo, data, dataoff,
-					  &setup->fastStart.item[i]);
-			if (ret < 0)
-				return -1;
-		}
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_callproceeding(struct sk_buff **pskb,
-				  struct ip_conntrack *ct,
-				  enum ip_conntrack_info ctinfo,
-				  unsigned char **data, int dataoff,
-				  CallProceeding_UUIE * callproc)
-{
-	int ret;
-	int i;
-
-	DEBUGP("ip_ct_q931: CallProceeding\n");
-
-	if (callproc->options & eCallProceeding_UUIE_h245Address) {
-		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
-				  &callproc->h245Address);
-		if (ret < 0)
-			return -1;
-	}
-
-	if (callproc->options & eCallProceeding_UUIE_fastStart) {
-		for (i = 0; i < callproc->fastStart.count; i++) {
-			ret = process_olc(pskb, ct, ctinfo, data, dataoff,
-					  &callproc->fastStart.item[i]);
-			if (ret < 0)
-				return -1;
-		}
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_connect(struct sk_buff **pskb, struct ip_conntrack *ct,
-			   enum ip_conntrack_info ctinfo,
-			   unsigned char **data, int dataoff,
-			   Connect_UUIE * connect)
-{
-	int ret;
-	int i;
-
-	DEBUGP("ip_ct_q931: Connect\n");
-
-	if (connect->options & eConnect_UUIE_h245Address) {
-		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
-				  &connect->h245Address);
-		if (ret < 0)
-			return -1;
-	}
-
-	if (connect->options & eConnect_UUIE_fastStart) {
-		for (i = 0; i < connect->fastStart.count; i++) {
-			ret = process_olc(pskb, ct, ctinfo, data, dataoff,
-					  &connect->fastStart.item[i]);
-			if (ret < 0)
-				return -1;
-		}
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_alerting(struct sk_buff **pskb, struct ip_conntrack *ct,
-			    enum ip_conntrack_info ctinfo,
-			    unsigned char **data, int dataoff,
-			    Alerting_UUIE * alert)
-{
-	int ret;
-	int i;
-
-	DEBUGP("ip_ct_q931: Alerting\n");
-
-	if (alert->options & eAlerting_UUIE_h245Address) {
-		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
-				  &alert->h245Address);
-		if (ret < 0)
-			return -1;
-	}
-
-	if (alert->options & eAlerting_UUIE_fastStart) {
-		for (i = 0; i < alert->fastStart.count; i++) {
-			ret = process_olc(pskb, ct, ctinfo, data, dataoff,
-					  &alert->fastStart.item[i]);
-			if (ret < 0)
-				return -1;
-		}
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_information(struct sk_buff **pskb,
-			       struct ip_conntrack *ct,
-			       enum ip_conntrack_info ctinfo,
-			       unsigned char **data, int dataoff,
-			       Information_UUIE * info)
-{
-	int ret;
-	int i;
-
-	DEBUGP("ip_ct_q931: Information\n");
-
-	if (info->options & eInformation_UUIE_fastStart) {
-		for (i = 0; i < info->fastStart.count; i++) {
-			ret = process_olc(pskb, ct, ctinfo, data, dataoff,
-					  &info->fastStart.item[i]);
-			if (ret < 0)
-				return -1;
-		}
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_facility(struct sk_buff **pskb, struct ip_conntrack *ct,
-			    enum ip_conntrack_info ctinfo,
-			    unsigned char **data, int dataoff,
-			    Facility_UUIE * facility)
-{
-	int ret;
-	int i;
-
-	DEBUGP("ip_ct_q931: Facility\n");
-
-	if (facility->reason.choice == eFacilityReason_callForwarded) {
-		if (facility->options & eFacility_UUIE_alternativeAddress)
-			return expect_callforwarding(pskb, ct, ctinfo, data,
-						     dataoff,
-						     &facility->
-						     alternativeAddress);
-		return 0;
-	}
-
-	if (facility->options & eFacility_UUIE_h245Address) {
-		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
-				  &facility->h245Address);
-		if (ret < 0)
-			return -1;
-	}
-
-	if (facility->options & eFacility_UUIE_fastStart) {
-		for (i = 0; i < facility->fastStart.count; i++) {
-			ret = process_olc(pskb, ct, ctinfo, data, dataoff,
-					  &facility->fastStart.item[i]);
-			if (ret < 0)
-				return -1;
-		}
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_progress(struct sk_buff **pskb, struct ip_conntrack *ct,
-			    enum ip_conntrack_info ctinfo,
-			    unsigned char **data, int dataoff,
-			    Progress_UUIE * progress)
-{
-	int ret;
-	int i;
-
-	DEBUGP("ip_ct_q931: Progress\n");
-
-	if (progress->options & eProgress_UUIE_h245Address) {
-		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
-				  &progress->h245Address);
-		if (ret < 0)
-			return -1;
-	}
-
-	if (progress->options & eProgress_UUIE_fastStart) {
-		for (i = 0; i < progress->fastStart.count; i++) {
-			ret = process_olc(pskb, ct, ctinfo, data, dataoff,
-					  &progress->fastStart.item[i]);
-			if (ret < 0)
-				return -1;
-		}
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_q931(struct sk_buff **pskb, struct ip_conntrack *ct,
-			enum ip_conntrack_info ctinfo,
-			unsigned char **data, int dataoff, Q931 * q931)
-{
-	H323_UU_PDU *pdu = &q931->UUIE.h323_uu_pdu;
-	int i;
-	int ret = 0;
-
-	switch (pdu->h323_message_body.choice) {
-	case eH323_UU_PDU_h323_message_body_setup:
-		ret = process_setup(pskb, ct, ctinfo, data, dataoff,
-				    &pdu->h323_message_body.setup);
-		break;
-	case eH323_UU_PDU_h323_message_body_callProceeding:
-		ret = process_callproceeding(pskb, ct, ctinfo, data, dataoff,
-					     &pdu->h323_message_body.
-					     callProceeding);
-		break;
-	case eH323_UU_PDU_h323_message_body_connect:
-		ret = process_connect(pskb, ct, ctinfo, data, dataoff,
-				      &pdu->h323_message_body.connect);
-		break;
-	case eH323_UU_PDU_h323_message_body_alerting:
-		ret = process_alerting(pskb, ct, ctinfo, data, dataoff,
-				       &pdu->h323_message_body.alerting);
-		break;
-	case eH323_UU_PDU_h323_message_body_information:
-		ret = process_information(pskb, ct, ctinfo, data, dataoff,
-					  &pdu->h323_message_body.
-					  information);
-		break;
-	case eH323_UU_PDU_h323_message_body_facility:
-		ret = process_facility(pskb, ct, ctinfo, data, dataoff,
-				       &pdu->h323_message_body.facility);
-		break;
-	case eH323_UU_PDU_h323_message_body_progress:
-		ret = process_progress(pskb, ct, ctinfo, data, dataoff,
-				       &pdu->h323_message_body.progress);
-		break;
-	default:
-		DEBUGP("ip_ct_q931: Q.931 signal %d\n",
-		       pdu->h323_message_body.choice);
-		break;
-	}
-
-	if (ret < 0)
-		return -1;
-
-	if (pdu->options & eH323_UU_PDU_h245Control) {
-		for (i = 0; i < pdu->h245Control.count; i++) {
-			ret = process_h245(pskb, ct, ctinfo, data, dataoff,
-					   &pdu->h245Control.item[i]);
-			if (ret < 0)
-				return -1;
-		}
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int q931_help(struct sk_buff **pskb, struct ip_conntrack *ct,
-		     enum ip_conntrack_info ctinfo)
-{
-	static Q931 q931;
-	unsigned char *data = NULL;
-	int datalen;
-	int dataoff;
-	int ret;
-
-	/* Until there's been traffic both ways, don't look in packets. */
-	if (ctinfo != IP_CT_ESTABLISHED
-	    && ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
-		return NF_ACCEPT;
-	}
-	DEBUGP("ip_ct_q931: skblen = %u\n", (*pskb)->len);
-
-	spin_lock_bh(&ip_h323_lock);
-
-	/* Process each TPKT */
-	while (get_tpkt_data(pskb, ct, ctinfo, &data, &datalen, &dataoff)) {
-		DEBUGP("ip_ct_q931: TPKT %u.%u.%u.%u->%u.%u.%u.%u, len=%d\n",
-		       NIPQUAD(ip_hdr(*pskb)->saddr),
-		       NIPQUAD(ip_hdr(*pskb)->daddr), datalen);
-
-		/* Decode Q.931 signal */
-		ret = DecodeQ931(data, datalen, &q931);
-		if (ret < 0) {
-			if (net_ratelimit())
-				printk("ip_ct_q931: decoding error: %s\n",
-				       ret == H323_ERROR_BOUND ?
-				       "out of bound" : "out of range");
-			/* We don't drop when decoding error */
-			break;
-		}
-
-		/* Process Q.931 signal */
-		if (process_q931(pskb, ct, ctinfo, &data, dataoff, &q931) < 0)
-			goto drop;
-	}
-
-	spin_unlock_bh(&ip_h323_lock);
-	return NF_ACCEPT;
-
-      drop:
-	spin_unlock_bh(&ip_h323_lock);
-	if (net_ratelimit())
-		printk("ip_ct_q931: packet dropped\n");
-	return NF_DROP;
-}
-
-/****************************************************************************/
-static struct ip_conntrack_helper ip_conntrack_helper_q931 = {
-	.name = "Q.931",
-	.me = THIS_MODULE,
-	.max_expected = H323_RTP_CHANNEL_MAX * 4 + 4 /* T.120 and H.245 */ ,
-	.timeout = 240,
-	.tuple = {.src = {.u = {.tcp = {.port = __constant_htons(Q931_PORT)}}},
-		  .dst = {.protonum = IPPROTO_TCP}},
-	.mask = {.src = {.u = {0xFFFF}},
-		 .dst = {.protonum = 0xFF}},
-	.help = q931_help
-};
-
-/****************************************************************************/
-void ip_conntrack_q931_expect(struct ip_conntrack *new,
-			      struct ip_conntrack_expect *this)
-{
-	write_lock_bh(&ip_conntrack_lock);
-	new->helper = &ip_conntrack_helper_q931;
-	write_unlock_bh(&ip_conntrack_lock);
-}
-
-/****************************************************************************/
-static unsigned char *get_udp_data(struct sk_buff **pskb, int *datalen)
-{
-	struct udphdr _uh, *uh;
-	int dataoff;
-
-	uh = skb_header_pointer(*pskb, ip_hdrlen(*pskb), sizeof(_uh), &_uh);
-	if (uh == NULL)
-		return NULL;
-	dataoff = ip_hdrlen(*pskb) + sizeof(_uh);
-	if (dataoff >= (*pskb)->len)
-		return NULL;
-	*datalen = (*pskb)->len - dataoff;
-	return skb_header_pointer(*pskb, dataoff, *datalen, h323_buffer);
-}
-
-/****************************************************************************/
-static struct ip_conntrack_expect *find_expect(struct ip_conntrack *ct,
-					       __be32 ip, u_int16_t port)
-{
-	struct ip_conntrack_expect *exp;
-	struct ip_conntrack_tuple tuple;
-
-	tuple.src.ip = 0;
-	tuple.src.u.tcp.port = 0;
-	tuple.dst.ip = ip;
-	tuple.dst.u.tcp.port = htons(port);
-	tuple.dst.protonum = IPPROTO_TCP;
-
-	exp = __ip_conntrack_expect_find(&tuple);
-	if (exp && exp->master == ct)
-		return exp;
-	return NULL;
-}
-
-/****************************************************************************/
-static int set_expect_timeout(struct ip_conntrack_expect *exp,
-			      unsigned timeout)
-{
-	if (!exp || !del_timer(&exp->timeout))
-		return 0;
-
-	exp->timeout.expires = jiffies + timeout * HZ;
-	add_timer(&exp->timeout);
-
-	return 1;
-}
-
-/****************************************************************************/
-static int expect_q931(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data,
-		       TransportAddress * addr, int count)
-{
-	struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
-	int dir = CTINFO2DIR(ctinfo);
-	int ret = 0;
-	int i;
-	__be32 ip;
-	u_int16_t port;
-	struct ip_conntrack_expect *exp;
-	typeof(nat_q931_hook) nat_q931;
-
-	/* Look for the first related address */
-	for (i = 0; i < count; i++) {
-		if (get_h225_addr(*data, &addr[i], &ip, &port) &&
-		    ip == ct->tuplehash[dir].tuple.src.ip && port != 0)
-			break;
-	}
-
-	if (i >= count)		/* Not found */
-		return 0;
-
-	/* Create expect for Q.931 */
-	if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
-		return -1;
-	exp->tuple.src.ip = gkrouted_only ?	/* only accept calls from GK? */
-	    ct->tuplehash[!dir].tuple.src.ip : 0;
-	exp->tuple.src.u.tcp.port = 0;
-	exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
-	exp->tuple.dst.u.tcp.port = htons(port);
-	exp->tuple.dst.protonum = IPPROTO_TCP;
-	exp->mask.src.ip = gkrouted_only ? htonl(0xFFFFFFFF) : 0;
-	exp->mask.src.u.tcp.port = 0;
-	exp->mask.dst.ip = htonl(0xFFFFFFFF);
-	exp->mask.dst.u.tcp.port = htons(0xFFFF);
-	exp->mask.dst.protonum = 0xFF;
-	exp->flags = IP_CT_EXPECT_PERMANENT;	/* Accept multiple calls */
-
-	nat_q931 = rcu_dereference(nat_q931_hook);
-	if (nat_q931) {	/* Need NAT */
-		ret = nat_q931(pskb, ct, ctinfo, data, addr, i, port, exp);
-	} else {		/* Conntrack only */
-		exp->expectfn = ip_conntrack_q931_expect;
-
-		if (ip_conntrack_expect_related(exp) == 0) {
-			DEBUGP("ip_ct_ras: expect Q.931 "
-			       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-			       NIPQUAD(exp->tuple.src.ip),
-			       ntohs(exp->tuple.src.u.tcp.port),
-			       NIPQUAD(exp->tuple.dst.ip),
-			       ntohs(exp->tuple.dst.u.tcp.port));
-
-			/* Save port for looking up expect in processing RCF */
-			info->sig_port[dir] = port;
-		} else
-			ret = -1;
-	}
-
-	ip_conntrack_expect_put(exp);
-
-	return ret;
-}
-
-/****************************************************************************/
-static int process_grq(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, GatekeeperRequest * grq)
-{
-	typeof(set_ras_addr_hook) set_ras_addr;
-
-	DEBUGP("ip_ct_ras: GRQ\n");
-
-	set_ras_addr = rcu_dereference(set_ras_addr_hook);
-	if (set_ras_addr)	/* NATed */
-		return set_ras_addr(pskb, ct, ctinfo, data,
-				    &grq->rasAddress, 1);
-	return 0;
-}
-
-/* Declare before using */
-static void ip_conntrack_ras_expect(struct ip_conntrack *new,
-				    struct ip_conntrack_expect *this);
-
-/****************************************************************************/
-static int process_gcf(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, GatekeeperConfirm * gcf)
-{
-	int dir = CTINFO2DIR(ctinfo);
-	int ret = 0;
-	__be32 ip;
-	u_int16_t port;
-	struct ip_conntrack_expect *exp;
-
-	DEBUGP("ip_ct_ras: GCF\n");
-
-	if (!get_h225_addr(*data, &gcf->rasAddress, &ip, &port))
-		return 0;
-
-	/* Registration port is the same as discovery port */
-	if (ip == ct->tuplehash[dir].tuple.src.ip &&
-	    port == ntohs(ct->tuplehash[dir].tuple.src.u.udp.port))
-		return 0;
-
-	/* Avoid RAS expectation loops. A GCF is never expected. */
-	if (test_bit(IPS_EXPECTED_BIT, &ct->status))
-		return 0;
-
-	/* Need new expect */
-	if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
-		return -1;
-	exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
-	exp->tuple.src.u.tcp.port = 0;
-	exp->tuple.dst.ip = ip;
-	exp->tuple.dst.u.tcp.port = htons(port);
-	exp->tuple.dst.protonum = IPPROTO_UDP;
-	exp->mask.src.ip = htonl(0xFFFFFFFF);
-	exp->mask.src.u.tcp.port = 0;
-	exp->mask.dst.ip = htonl(0xFFFFFFFF);
-	exp->mask.dst.u.tcp.port = htons(0xFFFF);
-	exp->mask.dst.protonum = 0xFF;
-	exp->flags = 0;
-	exp->expectfn = ip_conntrack_ras_expect;
-	if (ip_conntrack_expect_related(exp) == 0) {
-		DEBUGP("ip_ct_ras: expect RAS "
-		       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-		       NIPQUAD(exp->tuple.src.ip),
-		       ntohs(exp->tuple.src.u.tcp.port),
-		       NIPQUAD(exp->tuple.dst.ip),
-		       ntohs(exp->tuple.dst.u.tcp.port));
-	} else
-		ret = -1;
-
-	ip_conntrack_expect_put(exp);
-
-	return ret;
-}
-
-/****************************************************************************/
-static int process_rrq(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, RegistrationRequest * rrq)
-{
-	struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
-	int ret;
-	typeof(set_ras_addr_hook) set_ras_addr;
-
-	DEBUGP("ip_ct_ras: RRQ\n");
-
-	ret = expect_q931(pskb, ct, ctinfo, data,
-			  rrq->callSignalAddress.item,
-			  rrq->callSignalAddress.count);
-	if (ret < 0)
-		return -1;
-
-	set_ras_addr = rcu_dereference(set_ras_addr_hook);
-	if (set_ras_addr) {
-		ret = set_ras_addr(pskb, ct, ctinfo, data,
-				   rrq->rasAddress.item,
-				   rrq->rasAddress.count);
-		if (ret < 0)
-			return -1;
-	}
-
-	if (rrq->options & eRegistrationRequest_timeToLive) {
-		DEBUGP("ip_ct_ras: RRQ TTL = %u seconds\n", rrq->timeToLive);
-		info->timeout = rrq->timeToLive;
-	} else
-		info->timeout = default_rrq_ttl;
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_rcf(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, RegistrationConfirm * rcf)
-{
-	struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
-	int dir = CTINFO2DIR(ctinfo);
-	int ret;
-	struct ip_conntrack_expect *exp;
-	typeof(set_sig_addr_hook) set_sig_addr;
-
-	DEBUGP("ip_ct_ras: RCF\n");
-
-	set_sig_addr = rcu_dereference(set_sig_addr_hook);
-	if (set_sig_addr) {
-		ret = set_sig_addr(pskb, ct, ctinfo, data,
-				   rcf->callSignalAddress.item,
-				   rcf->callSignalAddress.count);
-		if (ret < 0)
-			return -1;
-	}
-
-	if (rcf->options & eRegistrationConfirm_timeToLive) {
-		DEBUGP("ip_ct_ras: RCF TTL = %u seconds\n", rcf->timeToLive);
-		info->timeout = rcf->timeToLive;
-	}
-
-	if (info->timeout > 0) {
-		DEBUGP
-		    ("ip_ct_ras: set RAS connection timeout to %u seconds\n",
-		     info->timeout);
-		ip_ct_refresh(ct, *pskb, info->timeout * HZ);
-
-		/* Set expect timeout */
-		read_lock_bh(&ip_conntrack_lock);
-		exp = find_expect(ct, ct->tuplehash[dir].tuple.dst.ip,
-				  info->sig_port[!dir]);
-		if (exp) {
-			DEBUGP("ip_ct_ras: set Q.931 expect "
-			       "(%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu) "
-			       "timeout to %u seconds\n",
-			       NIPQUAD(exp->tuple.src.ip),
-			       ntohs(exp->tuple.src.u.tcp.port),
-			       NIPQUAD(exp->tuple.dst.ip),
-			       ntohs(exp->tuple.dst.u.tcp.port),
-			       info->timeout);
-			set_expect_timeout(exp, info->timeout);
-		}
-		read_unlock_bh(&ip_conntrack_lock);
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_urq(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, UnregistrationRequest * urq)
-{
-	struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
-	int dir = CTINFO2DIR(ctinfo);
-	int ret;
-	typeof(set_sig_addr_hook) set_sig_addr;
-
-	DEBUGP("ip_ct_ras: URQ\n");
-
-	set_sig_addr = rcu_dereference(set_sig_addr_hook);
-	if (set_sig_addr) {
-		ret = set_sig_addr(pskb, ct, ctinfo, data,
-				   urq->callSignalAddress.item,
-				   urq->callSignalAddress.count);
-		if (ret < 0)
-			return -1;
-	}
-
-	/* Clear old expect */
-	ip_ct_remove_expectations(ct);
-	info->sig_port[dir] = 0;
-	info->sig_port[!dir] = 0;
-
-	/* Give it 30 seconds for UCF or URJ */
-	ip_ct_refresh(ct, *pskb, 30 * HZ);
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_arq(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, AdmissionRequest * arq)
-{
-	struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
-	int dir = CTINFO2DIR(ctinfo);
-	__be32 ip;
-	u_int16_t port;
-	typeof(set_h225_addr_hook) set_h225_addr;
-
-	DEBUGP("ip_ct_ras: ARQ\n");
-
-	set_h225_addr = rcu_dereference(set_h225_addr_hook);
-	if ((arq->options & eAdmissionRequest_destCallSignalAddress) &&
-	    get_h225_addr(*data, &arq->destCallSignalAddress, &ip, &port) &&
-	    ip == ct->tuplehash[dir].tuple.src.ip &&
-	    port == info->sig_port[dir] && set_h225_addr) {
-		/* Answering ARQ */
-		return set_h225_addr(pskb, data, 0,
-				     &arq->destCallSignalAddress,
-				     ct->tuplehash[!dir].tuple.dst.ip,
-				     info->sig_port[!dir]);
-	}
-
-	if ((arq->options & eAdmissionRequest_srcCallSignalAddress) &&
-	    get_h225_addr(*data, &arq->srcCallSignalAddress, &ip, &port) &&
-	    ip == ct->tuplehash[dir].tuple.src.ip && set_h225_addr) {
-		/* Calling ARQ */
-		return set_h225_addr(pskb, data, 0,
-				     &arq->srcCallSignalAddress,
-				     ct->tuplehash[!dir].tuple.dst.ip,
-				     port);
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_acf(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, AdmissionConfirm * acf)
-{
-	int dir = CTINFO2DIR(ctinfo);
-	int ret = 0;
-	__be32 ip;
-	u_int16_t port;
-	struct ip_conntrack_expect *exp;
-	typeof(set_sig_addr_hook) set_sig_addr;
-
-	DEBUGP("ip_ct_ras: ACF\n");
-
-	if (!get_h225_addr(*data, &acf->destCallSignalAddress, &ip, &port))
-		return 0;
-
-	if (ip == ct->tuplehash[dir].tuple.dst.ip) {	/* Answering ACF */
-		set_sig_addr = rcu_dereference(set_sig_addr_hook);
-		if (set_sig_addr)
-			return set_sig_addr(pskb, ct, ctinfo, data,
-					    &acf->destCallSignalAddress, 1);
-		return 0;
-	}
-
-	/* Need new expect */
-	if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
-		return -1;
-	exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
-	exp->tuple.src.u.tcp.port = 0;
-	exp->tuple.dst.ip = ip;
-	exp->tuple.dst.u.tcp.port = htons(port);
-	exp->tuple.dst.protonum = IPPROTO_TCP;
-	exp->mask.src.ip = htonl(0xFFFFFFFF);
-	exp->mask.src.u.tcp.port = 0;
-	exp->mask.dst.ip = htonl(0xFFFFFFFF);
-	exp->mask.dst.u.tcp.port = htons(0xFFFF);
-	exp->mask.dst.protonum = 0xFF;
-	exp->flags = IP_CT_EXPECT_PERMANENT;
-	exp->expectfn = ip_conntrack_q931_expect;
-
-	if (ip_conntrack_expect_related(exp) == 0) {
-		DEBUGP("ip_ct_ras: expect Q.931 "
-		       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-		       NIPQUAD(exp->tuple.src.ip),
-		       ntohs(exp->tuple.src.u.tcp.port),
-		       NIPQUAD(exp->tuple.dst.ip),
-		       ntohs(exp->tuple.dst.u.tcp.port));
-	} else
-		ret = -1;
-
-	ip_conntrack_expect_put(exp);
-
-	return ret;
-}
-
-/****************************************************************************/
-static int process_lrq(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, LocationRequest * lrq)
-{
-	typeof(set_ras_addr_hook) set_ras_addr;
-
-	DEBUGP("ip_ct_ras: LRQ\n");
-
-	set_ras_addr = rcu_dereference(set_ras_addr_hook);
-	if (set_ras_addr)
-		return set_ras_addr(pskb, ct, ctinfo, data,
-				    &lrq->replyAddress, 1);
-	return 0;
-}
-
-/****************************************************************************/
-static int process_lcf(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, LocationConfirm * lcf)
-{
-	int dir = CTINFO2DIR(ctinfo);
-	int ret = 0;
-	__be32 ip;
-	u_int16_t port;
-	struct ip_conntrack_expect *exp = NULL;
-
-	DEBUGP("ip_ct_ras: LCF\n");
-
-	if (!get_h225_addr(*data, &lcf->callSignalAddress, &ip, &port))
-		return 0;
-
-	/* Need new expect for call signal */
-	if ((exp = ip_conntrack_expect_alloc(ct)) == NULL)
-		return -1;
-	exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
-	exp->tuple.src.u.tcp.port = 0;
-	exp->tuple.dst.ip = ip;
-	exp->tuple.dst.u.tcp.port = htons(port);
-	exp->tuple.dst.protonum = IPPROTO_TCP;
-	exp->mask.src.ip = htonl(0xFFFFFFFF);
-	exp->mask.src.u.tcp.port = 0;
-	exp->mask.dst.ip = htonl(0xFFFFFFFF);
-	exp->mask.dst.u.tcp.port = htons(0xFFFF);
-	exp->mask.dst.protonum = 0xFF;
-	exp->flags = IP_CT_EXPECT_PERMANENT;
-	exp->expectfn = ip_conntrack_q931_expect;
-
-	if (ip_conntrack_expect_related(exp) == 0) {
-		DEBUGP("ip_ct_ras: expect Q.931 "
-		       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-		       NIPQUAD(exp->tuple.src.ip),
-		       ntohs(exp->tuple.src.u.tcp.port),
-		       NIPQUAD(exp->tuple.dst.ip),
-		       ntohs(exp->tuple.dst.u.tcp.port));
-	} else
-		ret = -1;
-
-	ip_conntrack_expect_put(exp);
-
-	/* Ignore rasAddress */
-
-	return ret;
-}
-
-/****************************************************************************/
-static int process_irr(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, InfoRequestResponse * irr)
-{
-	int ret;
-	typeof(set_ras_addr_hook) set_ras_addr;
-	typeof(set_sig_addr_hook) set_sig_addr;
-
-	DEBUGP("ip_ct_ras: IRR\n");
-
-	set_ras_addr = rcu_dereference(set_ras_addr_hook);
-	if (set_ras_addr) {
-		ret = set_ras_addr(pskb, ct, ctinfo, data,
-				   &irr->rasAddress, 1);
-		if (ret < 0)
-			return -1;
-	}
-
-	set_sig_addr = rcu_dereference(set_sig_addr_hook);
-	if (set_sig_addr) {
-		ret = set_sig_addr(pskb, ct, ctinfo, data,
-				   irr->callSignalAddress.item,
-				   irr->callSignalAddress.count);
-		if (ret < 0)
-			return -1;
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int process_ras(struct sk_buff **pskb, struct ip_conntrack *ct,
-		       enum ip_conntrack_info ctinfo,
-		       unsigned char **data, RasMessage * ras)
-{
-	switch (ras->choice) {
-	case eRasMessage_gatekeeperRequest:
-		return process_grq(pskb, ct, ctinfo, data,
-				   &ras->gatekeeperRequest);
-	case eRasMessage_gatekeeperConfirm:
-		return process_gcf(pskb, ct, ctinfo, data,
-				   &ras->gatekeeperConfirm);
-	case eRasMessage_registrationRequest:
-		return process_rrq(pskb, ct, ctinfo, data,
-				   &ras->registrationRequest);
-	case eRasMessage_registrationConfirm:
-		return process_rcf(pskb, ct, ctinfo, data,
-				   &ras->registrationConfirm);
-	case eRasMessage_unregistrationRequest:
-		return process_urq(pskb, ct, ctinfo, data,
-				   &ras->unregistrationRequest);
-	case eRasMessage_admissionRequest:
-		return process_arq(pskb, ct, ctinfo, data,
-				   &ras->admissionRequest);
-	case eRasMessage_admissionConfirm:
-		return process_acf(pskb, ct, ctinfo, data,
-				   &ras->admissionConfirm);
-	case eRasMessage_locationRequest:
-		return process_lrq(pskb, ct, ctinfo, data,
-				   &ras->locationRequest);
-	case eRasMessage_locationConfirm:
-		return process_lcf(pskb, ct, ctinfo, data,
-				   &ras->locationConfirm);
-	case eRasMessage_infoRequestResponse:
-		return process_irr(pskb, ct, ctinfo, data,
-				   &ras->infoRequestResponse);
-	default:
-		DEBUGP("ip_ct_ras: RAS message %d\n", ras->choice);
-		break;
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int ras_help(struct sk_buff **pskb, struct ip_conntrack *ct,
-		    enum ip_conntrack_info ctinfo)
-{
-	static RasMessage ras;
-	unsigned char *data;
-	int datalen = 0;
-	int ret;
-
-	DEBUGP("ip_ct_ras: skblen = %u\n", (*pskb)->len);
-
-	spin_lock_bh(&ip_h323_lock);
-
-	/* Get UDP data */
-	data = get_udp_data(pskb, &datalen);
-	if (data == NULL)
-		goto accept;
-	DEBUGP("ip_ct_ras: RAS message %u.%u.%u.%u->%u.%u.%u.%u, len=%d\n",
-	       NIPQUAD(ip_hdr(*pskb)->saddr),
-	       NIPQUAD(ip_hdr(*pskb)->daddr), datalen);
-
-	/* Decode RAS message */
-	ret = DecodeRasMessage(data, datalen, &ras);
-	if (ret < 0) {
-		if (net_ratelimit())
-			printk("ip_ct_ras: decoding error: %s\n",
-			       ret == H323_ERROR_BOUND ?
-			       "out of bound" : "out of range");
-		goto accept;
-	}
-
-	/* Process RAS message */
-	if (process_ras(pskb, ct, ctinfo, &data, &ras) < 0)
-		goto drop;
-
-      accept:
-	spin_unlock_bh(&ip_h323_lock);
-	return NF_ACCEPT;
-
-      drop:
-	spin_unlock_bh(&ip_h323_lock);
-	if (net_ratelimit())
-		printk("ip_ct_ras: packet dropped\n");
-	return NF_DROP;
-}
-
-/****************************************************************************/
-static struct ip_conntrack_helper ip_conntrack_helper_ras = {
-	.name = "RAS",
-	.me = THIS_MODULE,
-	.max_expected = 32,
-	.timeout = 240,
-	.tuple = {.src = {.u = {.tcp = {.port = __constant_htons(RAS_PORT)}}},
-		  .dst = {.protonum = IPPROTO_UDP}},
-	.mask = {.src = {.u = {0xFFFE}},
-		 .dst = {.protonum = 0xFF}},
-	.help = ras_help,
-};
-
-/****************************************************************************/
-static void ip_conntrack_ras_expect(struct ip_conntrack *new,
-				    struct ip_conntrack_expect *this)
-{
-	write_lock_bh(&ip_conntrack_lock);
-	new->helper = &ip_conntrack_helper_ras;
-	write_unlock_bh(&ip_conntrack_lock);
-}
-
-/****************************************************************************/
-/* Not __exit - called from init() */
-static void fini(void)
-{
-	ip_conntrack_helper_unregister(&ip_conntrack_helper_ras);
-	ip_conntrack_helper_unregister(&ip_conntrack_helper_q931);
-	kfree(h323_buffer);
-	DEBUGP("ip_ct_h323: fini\n");
-}
-
-/****************************************************************************/
-static int __init init(void)
-{
-	int ret;
-
-	h323_buffer = kmalloc(65536, GFP_KERNEL);
-	if (!h323_buffer)
-		return -ENOMEM;
-	if ((ret = ip_conntrack_helper_register(&ip_conntrack_helper_q931)) ||
-	    (ret = ip_conntrack_helper_register(&ip_conntrack_helper_ras))) {
-		fini();
-		return ret;
-	}
-	DEBUGP("ip_ct_h323: init success\n");
-	return 0;
-}
-
-/****************************************************************************/
-module_init(init);
-module_exit(fini);
-
-EXPORT_SYMBOL_GPL(get_h225_addr);
-EXPORT_SYMBOL_GPL(ip_conntrack_h245_expect);
-EXPORT_SYMBOL_GPL(ip_conntrack_q931_expect);
-EXPORT_SYMBOL_GPL(set_h245_addr_hook);
-EXPORT_SYMBOL_GPL(set_h225_addr_hook);
-EXPORT_SYMBOL_GPL(set_sig_addr_hook);
-EXPORT_SYMBOL_GPL(set_ras_addr_hook);
-EXPORT_SYMBOL_GPL(nat_rtp_rtcp_hook);
-EXPORT_SYMBOL_GPL(nat_t120_hook);
-EXPORT_SYMBOL_GPL(nat_h245_hook);
-EXPORT_SYMBOL_GPL(nat_callforwarding_hook);
-EXPORT_SYMBOL_GPL(nat_q931_hook);
-
-MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>");
-MODULE_DESCRIPTION("H.323 connection tracking helper");
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
deleted file mode 100644
index f5ab8e4b97c..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ /dev/null
@@ -1,684 +0,0 @@
-/*
- * ip_conntrack_pptp.c	- Version 3.0
- *
- * Connection tracking support for PPTP (Point to Point Tunneling Protocol).
- * PPTP is a a protocol for creating virtual private networks.
- * It is a specification defined by Microsoft and some vendors
- * working with Microsoft.  PPTP is built on top of a modified
- * version of the Internet Generic Routing Encapsulation Protocol.
- * GRE is defined in RFC 1701 and RFC 1702.  Documentation of
- * PPTP can be found in RFC 2637
- *
- * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- * Limitations:
- * 	 - We blindly assume that control connections are always
- * 	   established in PNS->PAC direction.  This is a violation
- * 	   of RFFC2673
- * 	 - We can only support one single call within each session
- *
- * TODO:
- *	 - testing of incoming PPTP calls
- *
- * Changes:
- * 	2002-02-05 - Version 1.3
- * 	  - Call ip_conntrack_unexpect_related() from
- * 	    pptp_destroy_siblings() to destroy expectations in case
- * 	    CALL_DISCONNECT_NOTIFY or tcp fin packet was seen
- * 	    (Philip Craig <philipc@snapgear.com>)
- * 	  - Add Version information at module loadtime
- * 	2002-02-10 - Version 1.6
- * 	  - move to C99 style initializers
- * 	  - remove second expectation if first arrives
- * 	2004-10-22 - Version 2.0
- * 	  - merge Mandrake's 2.6.x port with recent 2.6.x API changes
- * 	  - fix lots of linear skb assumptions from Mandrake's port
- * 	2005-06-10 - Version 2.1
- * 	  - use ip_conntrack_expect_free() instead of kfree() on the
- * 	    expect's (which are from the slab for quite some time)
- * 	2005-06-10 - Version 3.0
- * 	  - port helper to post-2.6.11 API changes,
- * 	    funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
- * 	2005-07-30 - Version 3.1
- * 	  - port helper to 2.6.13 API changes
- *
- */
-
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
-
-#define IP_CT_PPTP_VERSION "3.1"
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP");
-
-static DEFINE_SPINLOCK(ip_pptp_lock);
-
-int
-(*ip_nat_pptp_hook_outbound)(struct sk_buff **pskb,
-			  struct ip_conntrack *ct,
-			  enum ip_conntrack_info ctinfo,
-			  struct PptpControlHeader *ctlh,
-			  union pptp_ctrl_union *pptpReq);
-
-int
-(*ip_nat_pptp_hook_inbound)(struct sk_buff **pskb,
-			  struct ip_conntrack *ct,
-			  enum ip_conntrack_info ctinfo,
-			  struct PptpControlHeader *ctlh,
-			  union pptp_ctrl_union *pptpReq);
-
-void
-(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig,
-			    struct ip_conntrack_expect *expect_reply);
-
-void
-(*ip_nat_pptp_hook_expectfn)(struct ip_conntrack *ct,
-			     struct ip_conntrack_expect *exp);
-
-#if 0
-/* PptpControlMessageType names */
-const char *pptp_msg_name[] = {
-	"UNKNOWN_MESSAGE",
-	"START_SESSION_REQUEST",
-	"START_SESSION_REPLY",
-	"STOP_SESSION_REQUEST",
-	"STOP_SESSION_REPLY",
-	"ECHO_REQUEST",
-	"ECHO_REPLY",
-	"OUT_CALL_REQUEST",
-	"OUT_CALL_REPLY",
-	"IN_CALL_REQUEST",
-	"IN_CALL_REPLY",
-	"IN_CALL_CONNECT",
-	"CALL_CLEAR_REQUEST",
-	"CALL_DISCONNECT_NOTIFY",
-	"WAN_ERROR_NOTIFY",
-	"SET_LINK_INFO"
-};
-EXPORT_SYMBOL(pptp_msg_name);
-#define DEBUGP(format, args...)	printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-#define SECS *HZ
-#define MINS * 60 SECS
-#define HOURS * 60 MINS
-
-#define PPTP_GRE_TIMEOUT 		(10 MINS)
-#define PPTP_GRE_STREAM_TIMEOUT 	(5 HOURS)
-
-static void pptp_expectfn(struct ip_conntrack *ct,
-			 struct ip_conntrack_expect *exp)
-{
-	typeof(ip_nat_pptp_hook_expectfn) ip_nat_pptp_expectfn;
-
-	DEBUGP("increasing timeouts\n");
-
-	/* increase timeout of GRE data channel conntrack entry */
-	ct->proto.gre.timeout = PPTP_GRE_TIMEOUT;
-	ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT;
-
-	/* Can you see how rusty this code is, compared with the pre-2.6.11
-	 * one? That's what happened to my shiny newnat of 2002 ;( -HW */
-
-	rcu_read_lock();
-	ip_nat_pptp_expectfn = rcu_dereference(ip_nat_pptp_hook_expectfn);
-	if (!ip_nat_pptp_expectfn) {
-		struct ip_conntrack_tuple inv_t;
-		struct ip_conntrack_expect *exp_other;
-
-		/* obviously this tuple inversion only works until you do NAT */
-		invert_tuplepr(&inv_t, &exp->tuple);
-		DEBUGP("trying to unexpect other dir: ");
-		DUMP_TUPLE(&inv_t);
-
-		exp_other = ip_conntrack_expect_find_get(&inv_t);
-		if (exp_other) {
-			/* delete other expectation.  */
-			DEBUGP("found\n");
-			ip_conntrack_unexpect_related(exp_other);
-			ip_conntrack_expect_put(exp_other);
-		} else {
-			DEBUGP("not found\n");
-		}
-	} else {
-		/* we need more than simple inversion */
-		ip_nat_pptp_expectfn(ct, exp);
-	}
-	rcu_read_unlock();
-}
-
-static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t)
-{
-	struct ip_conntrack_tuple_hash *h;
-	struct ip_conntrack_expect *exp;
-
-	DEBUGP("trying to timeout ct or exp for tuple ");
-	DUMP_TUPLE(t);
-
-	h = ip_conntrack_find_get(t, NULL);
-	if (h)  {
-		struct ip_conntrack *sibling = tuplehash_to_ctrack(h);
-		DEBUGP("setting timeout of conntrack %p to 0\n", sibling);
-		sibling->proto.gre.timeout = 0;
-		sibling->proto.gre.stream_timeout = 0;
-		if (del_timer(&sibling->timeout))
-			sibling->timeout.function((unsigned long)sibling);
-		ip_conntrack_put(sibling);
-		return 1;
-	} else {
-		exp = ip_conntrack_expect_find_get(t);
-		if (exp) {
-			DEBUGP("unexpect_related of expect %p\n", exp);
-			ip_conntrack_unexpect_related(exp);
-			ip_conntrack_expect_put(exp);
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
-
-/* timeout GRE data connections */
-static void pptp_destroy_siblings(struct ip_conntrack *ct)
-{
-	struct ip_conntrack_tuple t;
-
-	ip_ct_gre_keymap_destroy(ct);
-	/* Since ct->sibling_list has literally rusted away in 2.6.11,
-	 * we now need another way to find out about our sibling
-	 * contrack and expects... -HW */
-
-	/* try original (pns->pac) tuple */
-	memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t));
-	t.dst.protonum = IPPROTO_GRE;
-	t.src.u.gre.key = ct->help.ct_pptp_info.pns_call_id;
-	t.dst.u.gre.key = ct->help.ct_pptp_info.pac_call_id;
-
-	if (!destroy_sibling_or_exp(&t))
-		DEBUGP("failed to timeout original pns->pac ct/exp\n");
-
-	/* try reply (pac->pns) tuple */
-	memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t));
-	t.dst.protonum = IPPROTO_GRE;
-	t.src.u.gre.key = ct->help.ct_pptp_info.pac_call_id;
-	t.dst.u.gre.key = ct->help.ct_pptp_info.pns_call_id;
-
-	if (!destroy_sibling_or_exp(&t))
-		DEBUGP("failed to timeout reply pac->pns ct/exp\n");
-}
-
-/* expect GRE connections (PNS->PAC and PAC->PNS direction) */
-static inline int
-exp_gre(struct ip_conntrack *ct,
-	__be16 callid,
-	__be16 peer_callid)
-{
-	struct ip_conntrack_expect *exp_orig, *exp_reply;
-	int ret = 1;
-	typeof(ip_nat_pptp_hook_exp_gre) ip_nat_pptp_exp_gre;
-
-	exp_orig = ip_conntrack_expect_alloc(ct);
-	if (exp_orig == NULL)
-		goto out;
-
-	exp_reply = ip_conntrack_expect_alloc(ct);
-	if (exp_reply == NULL)
-		goto out_put_orig;
-
-	/* original direction, PNS->PAC */
-	exp_orig->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
-	exp_orig->tuple.src.u.gre.key = peer_callid;
-	exp_orig->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
-	exp_orig->tuple.dst.u.gre.key = callid;
-	exp_orig->tuple.dst.protonum = IPPROTO_GRE;
-
-	exp_orig->mask.src.ip = htonl(0xffffffff);
-	exp_orig->mask.src.u.all = 0;
-	exp_orig->mask.dst.u.gre.key = htons(0xffff);
-	exp_orig->mask.dst.ip = htonl(0xffffffff);
-	exp_orig->mask.dst.protonum = 0xff;
-
-	exp_orig->master = ct;
-	exp_orig->expectfn = pptp_expectfn;
-	exp_orig->flags = 0;
-
-	/* both expectations are identical apart from tuple */
-	memcpy(exp_reply, exp_orig, sizeof(*exp_reply));
-
-	/* reply direction, PAC->PNS */
-	exp_reply->tuple.src.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
-	exp_reply->tuple.src.u.gre.key = callid;
-	exp_reply->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
-	exp_reply->tuple.dst.u.gre.key = peer_callid;
-	exp_reply->tuple.dst.protonum = IPPROTO_GRE;
-
-	ip_nat_pptp_exp_gre = rcu_dereference(ip_nat_pptp_hook_exp_gre);
-	if (ip_nat_pptp_exp_gre)
-		ip_nat_pptp_exp_gre(exp_orig, exp_reply);
-	if (ip_conntrack_expect_related(exp_orig) != 0)
-		goto out_put_both;
-	if (ip_conntrack_expect_related(exp_reply) != 0)
-		goto out_unexpect_orig;
-
-	/* Add GRE keymap entries */
-	if (ip_ct_gre_keymap_add(ct, &exp_orig->tuple, 0) != 0)
-		goto out_unexpect_both;
-	if (ip_ct_gre_keymap_add(ct, &exp_reply->tuple, 1) != 0) {
-		ip_ct_gre_keymap_destroy(ct);
-		goto out_unexpect_both;
-	}
-	ret = 0;
-
-out_put_both:
-	ip_conntrack_expect_put(exp_reply);
-out_put_orig:
-	ip_conntrack_expect_put(exp_orig);
-out:
-	return ret;
-
-out_unexpect_both:
-	ip_conntrack_unexpect_related(exp_reply);
-out_unexpect_orig:
-	ip_conntrack_unexpect_related(exp_orig);
-	goto out_put_both;
-}
-
-static inline int
-pptp_inbound_pkt(struct sk_buff **pskb,
-		 struct PptpControlHeader *ctlh,
-		 union pptp_ctrl_union *pptpReq,
-		 unsigned int reqlen,
-		 struct ip_conntrack *ct,
-		 enum ip_conntrack_info ctinfo)
-{
-	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
-	u_int16_t msg;
-	__be16 cid = 0, pcid = 0;
-	typeof(ip_nat_pptp_hook_inbound) ip_nat_pptp_inbound;
-
-	msg = ntohs(ctlh->messageType);
-	DEBUGP("inbound control message %s\n", pptp_msg_name[msg]);
-
-	switch (msg) {
-	case PPTP_START_SESSION_REPLY:
-		/* server confirms new control session */
-		if (info->sstate < PPTP_SESSION_REQUESTED)
-			goto invalid;
-		if (pptpReq->srep.resultCode == PPTP_START_OK)
-			info->sstate = PPTP_SESSION_CONFIRMED;
-		else
-			info->sstate = PPTP_SESSION_ERROR;
-		break;
-
-	case PPTP_STOP_SESSION_REPLY:
-		/* server confirms end of control session */
-		if (info->sstate > PPTP_SESSION_STOPREQ)
-			goto invalid;
-		if (pptpReq->strep.resultCode == PPTP_STOP_OK)
-			info->sstate = PPTP_SESSION_NONE;
-		else
-			info->sstate = PPTP_SESSION_ERROR;
-		break;
-
-	case PPTP_OUT_CALL_REPLY:
-		/* server accepted call, we now expect GRE frames */
-		if (info->sstate != PPTP_SESSION_CONFIRMED)
-			goto invalid;
-		if (info->cstate != PPTP_CALL_OUT_REQ &&
-		    info->cstate != PPTP_CALL_OUT_CONF)
-			goto invalid;
-
-		cid = pptpReq->ocack.callID;
-		pcid = pptpReq->ocack.peersCallID;
-		if (info->pns_call_id != pcid)
-			goto invalid;
-		DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
-			ntohs(cid), ntohs(pcid));
-
-		if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) {
-			info->cstate = PPTP_CALL_OUT_CONF;
-			info->pac_call_id = cid;
-			exp_gre(ct, cid, pcid);
-		} else
-			info->cstate = PPTP_CALL_NONE;
-		break;
-
-	case PPTP_IN_CALL_REQUEST:
-		/* server tells us about incoming call request */
-		if (info->sstate != PPTP_SESSION_CONFIRMED)
-			goto invalid;
-
-		cid = pptpReq->icreq.callID;
-		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
-		info->cstate = PPTP_CALL_IN_REQ;
-		info->pac_call_id = cid;
-		break;
-
-	case PPTP_IN_CALL_CONNECT:
-		/* server tells us about incoming call established */
-		if (info->sstate != PPTP_SESSION_CONFIRMED)
-			goto invalid;
-		if (info->cstate != PPTP_CALL_IN_REP &&
-		    info->cstate != PPTP_CALL_IN_CONF)
-			goto invalid;
-
-		pcid = pptpReq->iccon.peersCallID;
-		cid = info->pac_call_id;
-
-		if (info->pns_call_id != pcid)
-			goto invalid;
-
-		DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid));
-		info->cstate = PPTP_CALL_IN_CONF;
-
-		/* we expect a GRE connection from PAC to PNS */
-		exp_gre(ct, cid, pcid);
-		break;
-
-	case PPTP_CALL_DISCONNECT_NOTIFY:
-		/* server confirms disconnect */
-		cid = pptpReq->disc.callID;
-		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
-		info->cstate = PPTP_CALL_NONE;
-
-		/* untrack this call id, unexpect GRE packets */
-		pptp_destroy_siblings(ct);
-		break;
-
-	case PPTP_WAN_ERROR_NOTIFY:
-	case PPTP_ECHO_REQUEST:
-	case PPTP_ECHO_REPLY:
-		/* I don't have to explain these ;) */
-		break;
-	default:
-		goto invalid;
-	}
-
-	ip_nat_pptp_inbound = rcu_dereference(ip_nat_pptp_hook_inbound);
-	if (ip_nat_pptp_inbound)
-		return ip_nat_pptp_inbound(pskb, ct, ctinfo, ctlh, pptpReq);
-	return NF_ACCEPT;
-
-invalid:
-	DEBUGP("invalid %s: type=%d cid=%u pcid=%u "
-	       "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
-	       msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
-	       msg, ntohs(cid), ntohs(pcid),  info->cstate, info->sstate,
-	       ntohs(info->pns_call_id), ntohs(info->pac_call_id));
-	return NF_ACCEPT;
-}
-
-static inline int
-pptp_outbound_pkt(struct sk_buff **pskb,
-		  struct PptpControlHeader *ctlh,
-		  union pptp_ctrl_union *pptpReq,
-		  unsigned int reqlen,
-		  struct ip_conntrack *ct,
-		  enum ip_conntrack_info ctinfo)
-{
-	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
-	u_int16_t msg;
-	__be16 cid = 0, pcid = 0;
-	typeof(ip_nat_pptp_hook_outbound) ip_nat_pptp_outbound;
-
-	msg = ntohs(ctlh->messageType);
-	DEBUGP("outbound control message %s\n", pptp_msg_name[msg]);
-
-	switch (msg) {
-	case PPTP_START_SESSION_REQUEST:
-		/* client requests for new control session */
-		if (info->sstate != PPTP_SESSION_NONE)
-			goto invalid;
-		info->sstate = PPTP_SESSION_REQUESTED;
-		break;
-	case PPTP_STOP_SESSION_REQUEST:
-		/* client requests end of control session */
-		info->sstate = PPTP_SESSION_STOPREQ;
-		break;
-
-	case PPTP_OUT_CALL_REQUEST:
-		/* client initiating connection to server */
-		if (info->sstate != PPTP_SESSION_CONFIRMED)
-			goto invalid;
-		info->cstate = PPTP_CALL_OUT_REQ;
-		/* track PNS call id */
-		cid = pptpReq->ocreq.callID;
-		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid));
-		info->pns_call_id = cid;
-		break;
-	case PPTP_IN_CALL_REPLY:
-		/* client answers incoming call */
-		if (info->cstate != PPTP_CALL_IN_REQ &&
-		    info->cstate != PPTP_CALL_IN_REP)
-			goto invalid;
-
-		cid = pptpReq->icack.callID;
-		pcid = pptpReq->icack.peersCallID;
-		if (info->pac_call_id != pcid)
-			goto invalid;
-		DEBUGP("%s, CID=%X PCID=%X\n", pptp_msg_name[msg],
-		       ntohs(cid), ntohs(pcid));
-
-		if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) {
-			/* part two of the three-way handshake */
-			info->cstate = PPTP_CALL_IN_REP;
-			info->pns_call_id = cid;
-		} else
-			info->cstate = PPTP_CALL_NONE;
-		break;
-
-	case PPTP_CALL_CLEAR_REQUEST:
-		/* client requests hangup of call */
-		if (info->sstate != PPTP_SESSION_CONFIRMED)
-			goto invalid;
-		/* FUTURE: iterate over all calls and check if
-		 * call ID is valid.  We don't do this without newnat,
-		 * because we only know about last call */
-		info->cstate = PPTP_CALL_CLEAR_REQ;
-		break;
-	case PPTP_SET_LINK_INFO:
-	case PPTP_ECHO_REQUEST:
-	case PPTP_ECHO_REPLY:
-		/* I don't have to explain these ;) */
-		break;
-	default:
-		goto invalid;
-	}
-
-	ip_nat_pptp_outbound = rcu_dereference(ip_nat_pptp_hook_outbound);
-	if (ip_nat_pptp_outbound)
-		return ip_nat_pptp_outbound(pskb, ct, ctinfo, ctlh, pptpReq);
-	return NF_ACCEPT;
-
-invalid:
-	DEBUGP("invalid %s: type=%d cid=%u pcid=%u "
-	       "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n",
-	       msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0],
-	       msg, ntohs(cid), ntohs(pcid),  info->cstate, info->sstate,
-	       ntohs(info->pns_call_id), ntohs(info->pac_call_id));
-	return NF_ACCEPT;
-}
-
-static const unsigned int pptp_msg_size[] = {
-	[PPTP_START_SESSION_REQUEST]  = sizeof(struct PptpStartSessionRequest),
-	[PPTP_START_SESSION_REPLY]    = sizeof(struct PptpStartSessionReply),
-	[PPTP_STOP_SESSION_REQUEST]   = sizeof(struct PptpStopSessionRequest),
-	[PPTP_STOP_SESSION_REPLY]     = sizeof(struct PptpStopSessionReply),
-	[PPTP_OUT_CALL_REQUEST]       = sizeof(struct PptpOutCallRequest),
-	[PPTP_OUT_CALL_REPLY]	      = sizeof(struct PptpOutCallReply),
-	[PPTP_IN_CALL_REQUEST]	      = sizeof(struct PptpInCallRequest),
-	[PPTP_IN_CALL_REPLY]	      = sizeof(struct PptpInCallReply),
-	[PPTP_IN_CALL_CONNECT]	      = sizeof(struct PptpInCallConnected),
-	[PPTP_CALL_CLEAR_REQUEST]     = sizeof(struct PptpClearCallRequest),
-	[PPTP_CALL_DISCONNECT_NOTIFY] = sizeof(struct PptpCallDisconnectNotify),
-	[PPTP_WAN_ERROR_NOTIFY]	      = sizeof(struct PptpWanErrorNotify),
-	[PPTP_SET_LINK_INFO]	      = sizeof(struct PptpSetLinkInfo),
-};
-
-/* track caller id inside control connection, call expect_related */
-static int
-conntrack_pptp_help(struct sk_buff **pskb,
-		    struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
-
-{
-	int dir = CTINFO2DIR(ctinfo);
-	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
-	struct tcphdr _tcph, *tcph;
-	struct pptp_pkt_hdr _pptph, *pptph;
-	struct PptpControlHeader _ctlh, *ctlh;
-	union pptp_ctrl_union _pptpReq, *pptpReq;
-	unsigned int tcplen = (*pskb)->len - ip_hdrlen(*pskb);
-	unsigned int datalen, reqlen, nexthdr_off;
-	int oldsstate, oldcstate;
-	int ret;
-	u_int16_t msg;
-
-	/* don't do any tracking before tcp handshake complete */
-	if (ctinfo != IP_CT_ESTABLISHED
-	    && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
-		DEBUGP("ctinfo = %u, skipping\n", ctinfo);
-		return NF_ACCEPT;
-	}
-
-	nexthdr_off = ip_hdrlen(*pskb);
-	tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph);
-	BUG_ON(!tcph);
-	nexthdr_off += tcph->doff * 4;
-	datalen = tcplen - tcph->doff * 4;
-
-	pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph);
-	if (!pptph) {
-		DEBUGP("no full PPTP header, can't track\n");
-		return NF_ACCEPT;
-	}
-	nexthdr_off += sizeof(_pptph);
-	datalen -= sizeof(_pptph);
-
-	/* if it's not a control message we can't do anything with it */
-	if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL ||
-	    ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) {
-		DEBUGP("not a control packet\n");
-		return NF_ACCEPT;
-	}
-
-	ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
-	if (!ctlh)
-		return NF_ACCEPT;
-	nexthdr_off += sizeof(_ctlh);
-	datalen -= sizeof(_ctlh);
-
-	reqlen = datalen;
-	msg = ntohs(ctlh->messageType);
-	if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg])
-		return NF_ACCEPT;
-	if (reqlen > sizeof(*pptpReq))
-		reqlen = sizeof(*pptpReq);
-
-	pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
-	if (!pptpReq)
-		return NF_ACCEPT;
-
-	oldsstate = info->sstate;
-	oldcstate = info->cstate;
-
-	spin_lock_bh(&ip_pptp_lock);
-
-	/* FIXME: We just blindly assume that the control connection is always
-	 * established from PNS->PAC.  However, RFC makes no guarantee */
-	if (dir == IP_CT_DIR_ORIGINAL)
-		/* client -> server (PNS -> PAC) */
-		ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
-					ctinfo);
-	else
-		/* server -> client (PAC -> PNS) */
-		ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
-				       ctinfo);
-	DEBUGP("sstate: %d->%d, cstate: %d->%d\n",
-		oldsstate, info->sstate, oldcstate, info->cstate);
-	spin_unlock_bh(&ip_pptp_lock);
-
-	return ret;
-}
-
-/* control protocol helper */
-static struct ip_conntrack_helper pptp = {
-	.list = { NULL, NULL },
-	.name = "pptp",
-	.me = THIS_MODULE,
-	.max_expected = 2,
-	.timeout = 5 * 60,
-	.tuple = { .src = { .ip = 0,
-			    .u = { .tcp = { .port =
-				    __constant_htons(PPTP_CONTROL_PORT) } }
-			  },
-		   .dst = { .ip = 0,
-			    .u = { .all = 0 },
-			    .protonum = IPPROTO_TCP
-			  }
-		 },
-	.mask = { .src = { .ip = 0,
-			   .u = { .tcp = { .port = __constant_htons(0xffff) } }
-			 },
-		  .dst = { .ip = 0,
-			   .u = { .all = 0 },
-			   .protonum = 0xff
-			 }
-		},
-	.help = conntrack_pptp_help,
-	.destroy = pptp_destroy_siblings,
-};
-
-extern void ip_ct_proto_gre_fini(void);
-extern int __init ip_ct_proto_gre_init(void);
-
-/* ip_conntrack_pptp initialization */
-static int __init ip_conntrack_helper_pptp_init(void)
-{
-	int retcode;
-
-	retcode = ip_ct_proto_gre_init();
-	if (retcode < 0)
-		return retcode;
-
-	DEBUGP(" registering helper\n");
-	if ((retcode = ip_conntrack_helper_register(&pptp))) {
-		printk(KERN_ERR "Unable to register conntrack application "
-				"helper for pptp: %d\n", retcode);
-		ip_ct_proto_gre_fini();
-		return retcode;
-	}
-
-	printk("ip_conntrack_pptp version %s loaded\n", IP_CT_PPTP_VERSION);
-	return 0;
-}
-
-static void __exit ip_conntrack_helper_pptp_fini(void)
-{
-	ip_conntrack_helper_unregister(&pptp);
-	ip_ct_proto_gre_fini();
-	printk("ip_conntrack_pptp version %s unloaded\n", IP_CT_PPTP_VERSION);
-}
-
-module_init(ip_conntrack_helper_pptp_init);
-module_exit(ip_conntrack_helper_pptp_fini);
-
-EXPORT_SYMBOL(ip_nat_pptp_hook_outbound);
-EXPORT_SYMBOL(ip_nat_pptp_hook_inbound);
-EXPORT_SYMBOL(ip_nat_pptp_hook_exp_gre);
-EXPORT_SYMBOL(ip_nat_pptp_hook_expectfn);
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c
deleted file mode 100644
index ee99abe482e..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_irc.c
+++ /dev/null
@@ -1,314 +0,0 @@
-/* IRC extension for IP connection tracking, Version 1.21
- * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org>
- * based on RR's ip_conntrack_ftp.c
- *
- * ip_conntrack_irc.c,v 1.21 2002/02/05 14:49:26 laforge Exp
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- **
- *	Module load syntax:
- * 	insmod ip_conntrack_irc.o ports=port1,port2,...port<MAX_PORTS>
- *			    max_dcc_channels=n dcc_timeout=secs
- *
- * 	please give the ports of all IRC servers You wish to connect to.
- *	If You don't specify ports, the default will be port 6667.
- *	With max_dcc_channels you can define the maximum number of not
- *	yet answered DCC channels per IRC session (default 8).
- *	With dcc_timeout you can specify how long the system waits for
- *	an expected DCC channel (default 300 seconds).
- *
- */
-
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_irc.h>
-#include <linux/moduleparam.h>
-
-#define MAX_PORTS 8
-static unsigned short ports[MAX_PORTS];
-static int ports_c;
-static unsigned int max_dcc_channels = 8;
-static unsigned int dcc_timeout = 300;
-/* This is slow, but it's simple. --RR */
-static char *irc_buffer;
-static DEFINE_SPINLOCK(irc_buffer_lock);
-
-unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb,
-				enum ip_conntrack_info ctinfo,
-				unsigned int matchoff,
-				unsigned int matchlen,
-				struct ip_conntrack_expect *exp);
-EXPORT_SYMBOL_GPL(ip_nat_irc_hook);
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
-MODULE_LICENSE("GPL");
-module_param_array(ports, ushort, &ports_c, 0400);
-MODULE_PARM_DESC(ports, "port numbers of IRC servers");
-module_param(max_dcc_channels, uint, 0400);
-MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session");
-module_param(dcc_timeout, uint, 0400);
-MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels");
-
-static const char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " };
-#define MINMATCHLEN	5
-
-#if 0
-#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s:" format, \
-				       __FILE__, __FUNCTION__ , ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static int parse_dcc(char *data, char *data_end, u_int32_t *ip,
-		     u_int16_t *port, char **ad_beg_p, char **ad_end_p)
-/* tries to get the ip_addr and port out of a dcc command
-   return value: -1 on failure, 0 on success
-	data		pointer to first byte of DCC command data
-	data_end	pointer to last byte of dcc command data
-	ip		returns parsed ip of dcc command
-	port		returns parsed port of dcc command
-	ad_beg_p	returns pointer to first byte of addr data
-	ad_end_p	returns pointer to last byte of addr data */
-{
-
-	/* at least 12: "AAAAAAAA P\1\n" */
-	while (*data++ != ' ')
-		if (data > data_end - 12)
-			return -1;
-
-	*ad_beg_p = data;
-	*ip = simple_strtoul(data, &data, 10);
-
-	/* skip blanks between ip and port */
-	while (*data == ' ') {
-		if (data >= data_end)
-			return -1;
-		data++;
-	}
-
-	*port = simple_strtoul(data, &data, 10);
-	*ad_end_p = data;
-
-	return 0;
-}
-
-static int help(struct sk_buff **pskb,
-		struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
-{
-	unsigned int dataoff;
-	struct tcphdr _tcph, *th;
-	char *data, *data_limit, *ib_ptr;
-	int dir = CTINFO2DIR(ctinfo);
-	struct ip_conntrack_expect *exp;
-	u32 seq;
-	u_int32_t dcc_ip;
-	u_int16_t dcc_port;
-	int i, ret = NF_ACCEPT;
-	char *addr_beg_p, *addr_end_p;
-	typeof(ip_nat_irc_hook) ip_nat_irc;
-
-	DEBUGP("entered\n");
-
-	/* If packet is coming from IRC server */
-	if (dir == IP_CT_DIR_REPLY)
-		return NF_ACCEPT;
-
-	/* Until there's been traffic both ways, don't look in packets. */
-	if (ctinfo != IP_CT_ESTABLISHED
-	    && ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
-		DEBUGP("Conntrackinfo = %u\n", ctinfo);
-		return NF_ACCEPT;
-	}
-
-	/* Not a full tcp header? */
-	th = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
-				sizeof(_tcph), &_tcph);
-	if (th == NULL)
-		return NF_ACCEPT;
-
-	/* No data? */
-	dataoff = ip_hdrlen(*pskb) + th->doff * 4;
-	if (dataoff >= (*pskb)->len)
-		return NF_ACCEPT;
-
-	spin_lock_bh(&irc_buffer_lock);
-	ib_ptr = skb_header_pointer(*pskb, dataoff,
-				    (*pskb)->len - dataoff, irc_buffer);
-	BUG_ON(ib_ptr == NULL);
-
-	data = ib_ptr;
-	data_limit = ib_ptr + (*pskb)->len - dataoff;
-
-	/* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24
-	 * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */
-	while (data < (data_limit - (19 + MINMATCHLEN))) {
-		if (memcmp(data, "\1DCC ", 5)) {
-			data++;
-			continue;
-		}
-
-		data += 5;
-		/* we have at least (19+MINMATCHLEN)-5 bytes valid data left */
-
-		DEBUGP("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u...\n",
-			NIPQUAD(iph->saddr), ntohs(th->source),
-			NIPQUAD(iph->daddr), ntohs(th->dest));
-
-		for (i = 0; i < ARRAY_SIZE(dccprotos); i++) {
-			if (memcmp(data, dccprotos[i], strlen(dccprotos[i]))) {
-				/* no match */
-				continue;
-			}
-
-			DEBUGP("DCC %s detected\n", dccprotos[i]);
-			data += strlen(dccprotos[i]);
-			/* we have at least
-			 * (19+MINMATCHLEN)-5-dccprotos[i].matchlen bytes valid
-			 * data left (== 14/13 bytes) */
-			if (parse_dcc((char *)data, data_limit, &dcc_ip,
-				       &dcc_port, &addr_beg_p, &addr_end_p)) {
-				/* unable to parse */
-				DEBUGP("unable to parse dcc command\n");
-				continue;
-			}
-			DEBUGP("DCC bound ip/port: %u.%u.%u.%u:%u\n",
-				HIPQUAD(dcc_ip), dcc_port);
-
-			/* dcc_ip can be the internal OR external (NAT'ed) IP
-			 * Tiago Sousa <mirage@kaotik.org> */
-			if (ct->tuplehash[dir].tuple.src.ip != htonl(dcc_ip)
-			    && ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip != htonl(dcc_ip)) {
-				if (net_ratelimit())
-					printk(KERN_WARNING
-						"Forged DCC command from "
-						"%u.%u.%u.%u: %u.%u.%u.%u:%u\n",
-				NIPQUAD(ct->tuplehash[dir].tuple.src.ip),
-						HIPQUAD(dcc_ip), dcc_port);
-
-				continue;
-			}
-
-			exp = ip_conntrack_expect_alloc(ct);
-			if (exp == NULL) {
-				ret = NF_DROP;
-				goto out;
-			}
-
-			/* save position of address in dcc string,
-			 * necessary for NAT */
-			DEBUGP("tcph->seq = %u\n", th->seq);
-			seq = ntohl(th->seq) + (addr_beg_p - ib_ptr);
-
-			/* We refer to the reverse direction ("!dir")
-			 * tuples here, because we're expecting
-			 * something in the other * direction.
-			 * Doesn't matter unless NAT is happening.  */
-			exp->tuple = ((struct ip_conntrack_tuple)
-				{ { 0, { 0 } },
-				  { ct->tuplehash[!dir].tuple.dst.ip,
-				    { .tcp = { htons(dcc_port) } },
-				    IPPROTO_TCP }});
-			exp->mask = ((struct ip_conntrack_tuple)
-				{ { 0, { 0 } },
-				  { htonl(0xFFFFFFFF),
-					{ .tcp = { htons(0xFFFF) } }, 0xFF }});
-			exp->expectfn = NULL;
-			exp->flags = 0;
-			ip_nat_irc = rcu_dereference(ip_nat_irc_hook);
-			if (ip_nat_irc)
-				ret = ip_nat_irc(pskb, ctinfo,
-						 addr_beg_p - ib_ptr,
-						 addr_end_p - addr_beg_p,
-						 exp);
-			else if (ip_conntrack_expect_related(exp) != 0)
-				ret = NF_DROP;
-			ip_conntrack_expect_put(exp);
-			goto out;
-		} /* for .. NUM_DCCPROTO */
-	} /* while data < ... */
-
- out:
-	spin_unlock_bh(&irc_buffer_lock);
-	return ret;
-}
-
-static struct ip_conntrack_helper irc_helpers[MAX_PORTS];
-static char irc_names[MAX_PORTS][sizeof("irc-65535")];
-
-static void ip_conntrack_irc_fini(void);
-
-static int __init ip_conntrack_irc_init(void)
-{
-	int i, ret;
-	struct ip_conntrack_helper *hlpr;
-	char *tmpname;
-
-	if (max_dcc_channels < 1) {
-		printk("ip_conntrack_irc: max_dcc_channels must be a positive integer\n");
-		return -EBUSY;
-	}
-
-	irc_buffer = kmalloc(65536, GFP_KERNEL);
-	if (!irc_buffer)
-		return -ENOMEM;
-
-	/* If no port given, default to standard irc port */
-	if (ports_c == 0)
-		ports[ports_c++] = IRC_PORT;
-
-	for (i = 0; i < ports_c; i++) {
-		hlpr = &irc_helpers[i];
-		hlpr->tuple.src.u.tcp.port = htons(ports[i]);
-		hlpr->tuple.dst.protonum = IPPROTO_TCP;
-		hlpr->mask.src.u.tcp.port = htons(0xFFFF);
-		hlpr->mask.dst.protonum = 0xFF;
-		hlpr->max_expected = max_dcc_channels;
-		hlpr->timeout = dcc_timeout;
-		hlpr->me = THIS_MODULE;
-		hlpr->help = help;
-
-		tmpname = &irc_names[i][0];
-		if (ports[i] == IRC_PORT)
-			sprintf(tmpname, "irc");
-		else
-			sprintf(tmpname, "irc-%d", i);
-		hlpr->name = tmpname;
-
-		DEBUGP("port #%d: %d\n", i, ports[i]);
-
-		ret = ip_conntrack_helper_register(hlpr);
-
-		if (ret) {
-			printk("ip_conntrack_irc: ERROR registering port %d\n",
-				ports[i]);
-			ip_conntrack_irc_fini();
-			return -EBUSY;
-		}
-	}
-	return 0;
-}
-
-/* This function is intentionally _NOT_ defined as __exit, because
- * it is needed by the init function */
-static void ip_conntrack_irc_fini(void)
-{
-	int i;
-	for (i = 0; i < ports_c; i++) {
-		DEBUGP("unregistering port %d\n",
-		       ports[i]);
-		ip_conntrack_helper_unregister(&irc_helpers[i]);
-	}
-	kfree(irc_buffer);
-}
-
-module_init(ip_conntrack_irc_init);
-module_exit(ip_conntrack_irc_fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
deleted file mode 100644
index df07c5f1d87..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- *      NetBIOS name service broadcast connection tracking helper
- *
- *      (c) 2005 Patrick McHardy <kaber@trash.net>
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- */
-/*
- *      This helper tracks locally originating NetBIOS name service
- *      requests by issuing permanent expectations (valid until
- *      timing out) matching all reply connections from the
- *      destination network. The only NetBIOS specific thing is
- *      actually the port number.
- */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/if_addr.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <net/route.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-
-#define NMBD_PORT	137
-
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper");
-MODULE_LICENSE("GPL");
-
-static unsigned int timeout = 3;
-module_param(timeout, uint, 0400);
-MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
-
-static int help(struct sk_buff **pskb,
-		struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
-{
-	struct ip_conntrack_expect *exp;
-	struct iphdr *iph = ip_hdr(*pskb);
-	struct rtable *rt = (struct rtable *)(*pskb)->dst;
-	struct in_device *in_dev;
-	__be32 mask = 0;
-
-	/* we're only interested in locally generated packets */
-	if ((*pskb)->sk == NULL)
-		goto out;
-	if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
-		goto out;
-	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
-		goto out;
-
-	rcu_read_lock();
-	in_dev = __in_dev_get_rcu(rt->u.dst.dev);
-	if (in_dev != NULL) {
-		for_primary_ifa(in_dev) {
-			if (ifa->ifa_broadcast == iph->daddr) {
-				mask = ifa->ifa_mask;
-				break;
-			}
-		} endfor_ifa(in_dev);
-	}
-	rcu_read_unlock();
-
-	if (mask == 0)
-		goto out;
-
-	exp = ip_conntrack_expect_alloc(ct);
-	if (exp == NULL)
-		goto out;
-
-	exp->tuple                = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-	exp->tuple.src.u.udp.port = htons(NMBD_PORT);
-
-	exp->mask.src.ip          = mask;
-	exp->mask.src.u.udp.port  = htons(0xFFFF);
-	exp->mask.dst.ip          = htonl(0xFFFFFFFF);
-	exp->mask.dst.u.udp.port  = htons(0xFFFF);
-	exp->mask.dst.protonum    = 0xFF;
-
-	exp->expectfn             = NULL;
-	exp->flags                = IP_CT_EXPECT_PERMANENT;
-
-	ip_conntrack_expect_related(exp);
-	ip_conntrack_expect_put(exp);
-
-	ip_ct_refresh(ct, *pskb, timeout * HZ);
-out:
-	return NF_ACCEPT;
-}
-
-static struct ip_conntrack_helper helper = {
-	.name			= "netbios-ns",
-	.tuple = {
-		.src = {
-			.u = {
-				.udp = {
-					.port	= __constant_htons(NMBD_PORT),
-				}
-			}
-		},
-		.dst = {
-			.protonum	= IPPROTO_UDP,
-		},
-	},
-	.mask = {
-		.src = {
-			.u = {
-				.udp = {
-					.port	= __constant_htons(0xFFFF),
-				}
-			}
-		},
-		.dst = {
-			.protonum	= 0xFF,
-		},
-	},
-	.max_expected		= 1,
-	.me			= THIS_MODULE,
-	.help			= help,
-};
-
-static int __init ip_conntrack_netbios_ns_init(void)
-{
-	helper.timeout = timeout;
-	return ip_conntrack_helper_register(&helper);
-}
-
-static void __exit ip_conntrack_netbios_ns_fini(void)
-{
-	ip_conntrack_helper_unregister(&helper);
-}
-
-module_init(ip_conntrack_netbios_ns_init);
-module_exit(ip_conntrack_netbios_ns_fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
deleted file mode 100644
index 9228b76ccd9..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ /dev/null
@@ -1,1577 +0,0 @@
-/* Connection tracking via netlink socket. Allows for user space
- * protocol helpers and general trouble making from userspace.
- *
- * (C) 2001 by Jay Schulist <jschlst@samba.org>
- * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org>
- * (C) 2003 by Patrick Mchardy <kaber@trash.net>
- * (C) 2005-2006 by Pablo Neira Ayuso <pablo@eurodev.net>
- *
- * I've reworked this stuff to use attributes instead of conntrack
- * structures. 5.44 am. I need more tea. --pablo 05/07/11.
- *
- * Initial connection tracking via netlink development funded and
- * generally made possible by Network Robots, Inc. (www.networkrobots.com)
- *
- * Further development of this code funded by Astaro AG (http://www.astaro.com)
- *
- * This software may be used and distributed according to the terms
- * of the GNU General Public License, incorporated herein by reference.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/skbuff.h>
-#include <linux/errno.h>
-#include <linux/netlink.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-#include <linux/notifier.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-
-MODULE_LICENSE("GPL");
-
-static char __initdata version[] = "0.90";
-
-static inline int
-ctnetlink_dump_tuples_proto(struct sk_buff *skb,
-			    const struct ip_conntrack_tuple *tuple,
-			    struct ip_conntrack_protocol *proto)
-{
-	int ret = 0;
-	struct nfattr *nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO);
-
-	NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum);
-
-	if (likely(proto->tuple_to_nfattr))
-		ret = proto->tuple_to_nfattr(skb, tuple);
-
-	NFA_NEST_END(skb, nest_parms);
-
-	return ret;
-
-nfattr_failure:
-	return -1;
-}
-
-static inline int
-ctnetlink_dump_tuples_ip(struct sk_buff *skb,
-			 const struct ip_conntrack_tuple *tuple)
-{
-	struct nfattr *nest_parms = NFA_NEST(skb, CTA_TUPLE_IP);
-
-	NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(__be32), &tuple->src.ip);
-	NFA_PUT(skb, CTA_IP_V4_DST, sizeof(__be32), &tuple->dst.ip);
-
-	NFA_NEST_END(skb, nest_parms);
-
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-
-static inline int
-ctnetlink_dump_tuples(struct sk_buff *skb,
-		      const struct ip_conntrack_tuple *tuple)
-{
-	int ret;
-	struct ip_conntrack_protocol *proto;
-
-	ret = ctnetlink_dump_tuples_ip(skb, tuple);
-	if (unlikely(ret < 0))
-		return ret;
-
-	proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
-	ret = ctnetlink_dump_tuples_proto(skb, tuple, proto);
-	ip_conntrack_proto_put(proto);
-
-	return ret;
-}
-
-static inline int
-ctnetlink_dump_status(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
-	__be32 status = htonl((u_int32_t) ct->status);
-	NFA_PUT(skb, CTA_STATUS, sizeof(status), &status);
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-
-static inline int
-ctnetlink_dump_timeout(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
-	long timeout_l = ct->timeout.expires - jiffies;
-	__be32 timeout;
-
-	if (timeout_l < 0)
-		timeout = 0;
-	else
-		timeout = htonl(timeout_l / HZ);
-
-	NFA_PUT(skb, CTA_TIMEOUT, sizeof(timeout), &timeout);
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-
-static inline int
-ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
-	struct ip_conntrack_protocol *proto = ip_conntrack_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
-
-	struct nfattr *nest_proto;
-	int ret;
-
-	if (!proto->to_nfattr) {
-		ip_conntrack_proto_put(proto);
-		return 0;
-	}
-
-	nest_proto = NFA_NEST(skb, CTA_PROTOINFO);
-
-	ret = proto->to_nfattr(skb, nest_proto, ct);
-
-	ip_conntrack_proto_put(proto);
-
-	NFA_NEST_END(skb, nest_proto);
-
-	return ret;
-
-nfattr_failure:
-	ip_conntrack_proto_put(proto);
-	return -1;
-}
-
-static inline int
-ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
-	struct nfattr *nest_helper;
-
-	if (!ct->helper)
-		return 0;
-
-	nest_helper = NFA_NEST(skb, CTA_HELP);
-	NFA_PUT(skb, CTA_HELP_NAME, strlen(ct->helper->name), ct->helper->name);
-
-	if (ct->helper->to_nfattr)
-		ct->helper->to_nfattr(skb, ct);
-
-	NFA_NEST_END(skb, nest_helper);
-
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-
-#ifdef CONFIG_IP_NF_CT_ACCT
-static inline int
-ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct,
-			enum ip_conntrack_dir dir)
-{
-	enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
-	struct nfattr *nest_count = NFA_NEST(skb, type);
-	__be32 tmp;
-
-	tmp = htonl(ct->counters[dir].packets);
-	NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(__be32), &tmp);
-
-	tmp = htonl(ct->counters[dir].bytes);
-	NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(__be32), &tmp);
-
-	NFA_NEST_END(skb, nest_count);
-
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-#else
-#define ctnetlink_dump_counters(a, b, c) (0)
-#endif
-
-#ifdef CONFIG_IP_NF_CONNTRACK_MARK
-static inline int
-ctnetlink_dump_mark(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
-	__be32 mark = htonl(ct->mark);
-
-	NFA_PUT(skb, CTA_MARK, sizeof(__be32), &mark);
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-#else
-#define ctnetlink_dump_mark(a, b) (0)
-#endif
-
-static inline int
-ctnetlink_dump_id(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
-	__be32 id = htonl(ct->id);
-	NFA_PUT(skb, CTA_ID, sizeof(__be32), &id);
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-
-static inline int
-ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
-	__be32 use = htonl(atomic_read(&ct->ct_general.use));
-
-	NFA_PUT(skb, CTA_USE, sizeof(__be32), &use);
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-
-#define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple)
-
-static int
-ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
-		    int event, int nowait,
-		    const struct ip_conntrack *ct)
-{
-	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
-	struct nfattr *nest_parms;
-	unsigned char *b;
-
-	b = skb->tail;
-
-	event |= NFNL_SUBSYS_CTNETLINK << 8;
-	nlh    = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
-	nfmsg  = NLMSG_DATA(nlh);
-
-	nlh->nlmsg_flags    = (nowait && pid) ? NLM_F_MULTI : 0;
-	nfmsg->nfgen_family = AF_INET;
-	nfmsg->version      = NFNETLINK_V0;
-	nfmsg->res_id	    = 0;
-
-	nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG);
-	if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
-		goto nfattr_failure;
-	NFA_NEST_END(skb, nest_parms);
-
-	nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY);
-	if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
-		goto nfattr_failure;
-	NFA_NEST_END(skb, nest_parms);
-
-	if (ctnetlink_dump_status(skb, ct) < 0 ||
-	    ctnetlink_dump_timeout(skb, ct) < 0 ||
-	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
-	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
-	    ctnetlink_dump_protoinfo(skb, ct) < 0 ||
-	    ctnetlink_dump_helpinfo(skb, ct) < 0 ||
-	    ctnetlink_dump_mark(skb, ct) < 0 ||
-	    ctnetlink_dump_id(skb, ct) < 0 ||
-	    ctnetlink_dump_use(skb, ct) < 0)
-		goto nfattr_failure;
-
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
-
-nlmsg_failure:
-nfattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
-}
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-static int ctnetlink_conntrack_event(struct notifier_block *this,
-				     unsigned long events, void *ptr)
-{
-	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
-	struct nfattr *nest_parms;
-	struct ip_conntrack *ct = (struct ip_conntrack *)ptr;
-	struct sk_buff *skb;
-	unsigned int type;
-	unsigned char *b;
-	unsigned int flags = 0, group;
-
-	/* ignore our fake conntrack entry */
-	if (ct == &ip_conntrack_untracked)
-		return NOTIFY_DONE;
-
-	if (events & IPCT_DESTROY) {
-		type = IPCTNL_MSG_CT_DELETE;
-		group = NFNLGRP_CONNTRACK_DESTROY;
-	} else if (events & (IPCT_NEW | IPCT_RELATED)) {
-		type = IPCTNL_MSG_CT_NEW;
-		flags = NLM_F_CREATE|NLM_F_EXCL;
-		group = NFNLGRP_CONNTRACK_NEW;
-	} else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) {
-		type = IPCTNL_MSG_CT_NEW;
-		group = NFNLGRP_CONNTRACK_UPDATE;
-	} else
-		return NOTIFY_DONE;
-
-	if (!nfnetlink_has_listeners(group))
-		return NOTIFY_DONE;
-
-	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
-	if (!skb)
-		return NOTIFY_DONE;
-
-	b = skb->tail;
-
-	type |= NFNL_SUBSYS_CTNETLINK << 8;
-	nlh   = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
-	nfmsg = NLMSG_DATA(nlh);
-
-	nlh->nlmsg_flags    = flags;
-	nfmsg->nfgen_family = AF_INET;
-	nfmsg->version	= NFNETLINK_V0;
-	nfmsg->res_id	= 0;
-
-	nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG);
-	if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
-		goto nfattr_failure;
-	NFA_NEST_END(skb, nest_parms);
-
-	nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY);
-	if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
-		goto nfattr_failure;
-	NFA_NEST_END(skb, nest_parms);
-
-	if (events & IPCT_DESTROY) {
-		if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
-		    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
-			goto nfattr_failure;
-	} else {
-		if (ctnetlink_dump_status(skb, ct) < 0)
-			goto nfattr_failure;
-
-		if (ctnetlink_dump_timeout(skb, ct) < 0)
-			goto nfattr_failure;
-
-		if (events & IPCT_PROTOINFO
-		    && ctnetlink_dump_protoinfo(skb, ct) < 0)
-			goto nfattr_failure;
-
-		if ((events & IPCT_HELPER || ct->helper)
-		    && ctnetlink_dump_helpinfo(skb, ct) < 0)
-			goto nfattr_failure;
-
-#ifdef CONFIG_IP_NF_CONNTRACK_MARK
-		if ((events & IPCT_MARK || ct->mark)
-		    && ctnetlink_dump_mark(skb, ct) < 0)
-			goto nfattr_failure;
-#endif
-
-		if (events & IPCT_COUNTER_FILLING &&
-		    (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
-		     ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0))
-			goto nfattr_failure;
-	}
-
-	nlh->nlmsg_len = skb->tail - b;
-	nfnetlink_send(skb, 0, group, 0);
-	return NOTIFY_DONE;
-
-nlmsg_failure:
-nfattr_failure:
-	kfree_skb(skb);
-	return NOTIFY_DONE;
-}
-#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
-
-static int ctnetlink_done(struct netlink_callback *cb)
-{
-	if (cb->args[1])
-		ip_conntrack_put((struct ip_conntrack *)cb->args[1]);
-	return 0;
-}
-
-static int
-ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	struct ip_conntrack *ct, *last;
-	struct ip_conntrack_tuple_hash *h;
-	struct list_head *i;
-
-	read_lock_bh(&ip_conntrack_lock);
-	last = (struct ip_conntrack *)cb->args[1];
-	for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++) {
-restart:
-		list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
-			h = (struct ip_conntrack_tuple_hash *) i;
-			if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
-				continue;
-			ct = tuplehash_to_ctrack(h);
-			if (cb->args[1]) {
-				if (ct != last)
-					continue;
-				cb->args[1] = 0;
-			}
-			if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
-						cb->nlh->nlmsg_seq,
-						IPCTNL_MSG_CT_NEW,
-						1, ct) < 0) {
-				nf_conntrack_get(&ct->ct_general);
-				cb->args[1] = (unsigned long)ct;
-				goto out;
-			}
-#ifdef CONFIG_NF_CT_ACCT
-			if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) ==
-						IPCTNL_MSG_CT_GET_CTRZERO)
-				memset(&ct->counters, 0, sizeof(ct->counters));
-#endif
-		}
-		if (cb->args[1]) {
-			cb->args[1] = 0;
-			goto restart;
-		}
-	}
-out:
-	read_unlock_bh(&ip_conntrack_lock);
-	if (last)
-		ip_conntrack_put(last);
-
-	return skb->len;
-}
-
-static const size_t cta_min_ip[CTA_IP_MAX] = {
-	[CTA_IP_V4_SRC-1]	= sizeof(__be32),
-	[CTA_IP_V4_DST-1]	= sizeof(__be32),
-};
-
-static inline int
-ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple)
-{
-	struct nfattr *tb[CTA_IP_MAX];
-
-	nfattr_parse_nested(tb, CTA_IP_MAX, attr);
-
-	if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
-		return -EINVAL;
-
-	if (!tb[CTA_IP_V4_SRC-1])
-		return -EINVAL;
-	tuple->src.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_SRC-1]);
-
-	if (!tb[CTA_IP_V4_DST-1])
-		return -EINVAL;
-	tuple->dst.ip = *(__be32 *)NFA_DATA(tb[CTA_IP_V4_DST-1]);
-
-	return 0;
-}
-
-static const size_t cta_min_proto[CTA_PROTO_MAX] = {
-	[CTA_PROTO_NUM-1]	= sizeof(u_int8_t),
-	[CTA_PROTO_SRC_PORT-1]	= sizeof(u_int16_t),
-	[CTA_PROTO_DST_PORT-1]	= sizeof(u_int16_t),
-	[CTA_PROTO_ICMP_TYPE-1]	= sizeof(u_int8_t),
-	[CTA_PROTO_ICMP_CODE-1]	= sizeof(u_int8_t),
-	[CTA_PROTO_ICMP_ID-1]	= sizeof(u_int16_t),
-};
-
-static inline int
-ctnetlink_parse_tuple_proto(struct nfattr *attr,
-			    struct ip_conntrack_tuple *tuple)
-{
-	struct nfattr *tb[CTA_PROTO_MAX];
-	struct ip_conntrack_protocol *proto;
-	int ret = 0;
-
-	nfattr_parse_nested(tb, CTA_PROTO_MAX, attr);
-
-	if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
-		return -EINVAL;
-
-	if (!tb[CTA_PROTO_NUM-1])
-		return -EINVAL;
-	tuple->dst.protonum = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]);
-
-	proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
-
-	if (likely(proto->nfattr_to_tuple))
-		ret = proto->nfattr_to_tuple(tb, tuple);
-
-	ip_conntrack_proto_put(proto);
-
-	return ret;
-}
-
-static inline int
-ctnetlink_parse_tuple(struct nfattr *cda[], struct ip_conntrack_tuple *tuple,
-		      enum ctattr_tuple type)
-{
-	struct nfattr *tb[CTA_TUPLE_MAX];
-	int err;
-
-	memset(tuple, 0, sizeof(*tuple));
-
-	nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]);
-
-	if (!tb[CTA_TUPLE_IP-1])
-		return -EINVAL;
-
-	err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP-1], tuple);
-	if (err < 0)
-		return err;
-
-	if (!tb[CTA_TUPLE_PROTO-1])
-		return -EINVAL;
-
-	err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO-1], tuple);
-	if (err < 0)
-		return err;
-
-	/* orig and expect tuples get DIR_ORIGINAL */
-	if (type == CTA_TUPLE_REPLY)
-		tuple->dst.dir = IP_CT_DIR_REPLY;
-	else
-		tuple->dst.dir = IP_CT_DIR_ORIGINAL;
-
-	return 0;
-}
-
-#ifdef CONFIG_IP_NF_NAT_NEEDED
-static const size_t cta_min_protonat[CTA_PROTONAT_MAX] = {
-	[CTA_PROTONAT_PORT_MIN-1]	= sizeof(u_int16_t),
-	[CTA_PROTONAT_PORT_MAX-1]	= sizeof(u_int16_t),
-};
-
-static int ctnetlink_parse_nat_proto(struct nfattr *attr,
-				     const struct ip_conntrack *ct,
-				     struct ip_nat_range *range)
-{
-	struct nfattr *tb[CTA_PROTONAT_MAX];
-	struct ip_nat_protocol *npt;
-
-	nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr);
-
-	if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat))
-		return -EINVAL;
-
-	npt = ip_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
-
-	if (!npt->nfattr_to_range) {
-		ip_nat_proto_put(npt);
-		return 0;
-	}
-
-	/* nfattr_to_range returns 1 if it parsed, 0 if not, neg. on error */
-	if (npt->nfattr_to_range(tb, range) > 0)
-		range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
-
-	ip_nat_proto_put(npt);
-
-	return 0;
-}
-
-static const size_t cta_min_nat[CTA_NAT_MAX] = {
-	[CTA_NAT_MINIP-1]       = sizeof(__be32),
-	[CTA_NAT_MAXIP-1]       = sizeof(__be32),
-};
-
-static inline int
-ctnetlink_parse_nat(struct nfattr *nat,
-		    const struct ip_conntrack *ct, struct ip_nat_range *range)
-{
-	struct nfattr *tb[CTA_NAT_MAX];
-	int err;
-
-	memset(range, 0, sizeof(*range));
-
-	nfattr_parse_nested(tb, CTA_NAT_MAX, nat);
-
-	if (nfattr_bad_size(tb, CTA_NAT_MAX, cta_min_nat))
-		return -EINVAL;
-
-	if (tb[CTA_NAT_MINIP-1])
-		range->min_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MINIP-1]);
-
-	if (!tb[CTA_NAT_MAXIP-1])
-		range->max_ip = range->min_ip;
-	else
-		range->max_ip = *(__be32 *)NFA_DATA(tb[CTA_NAT_MAXIP-1]);
-
-	if (range->min_ip)
-		range->flags |= IP_NAT_RANGE_MAP_IPS;
-
-	if (!tb[CTA_NAT_PROTO-1])
-		return 0;
-
-	err = ctnetlink_parse_nat_proto(tb[CTA_NAT_PROTO-1], ct, range);
-	if (err < 0)
-		return err;
-
-	return 0;
-}
-#endif
-
-static inline int
-ctnetlink_parse_help(struct nfattr *attr, char **helper_name)
-{
-	struct nfattr *tb[CTA_HELP_MAX];
-
-	nfattr_parse_nested(tb, CTA_HELP_MAX, attr);
-
-	if (!tb[CTA_HELP_NAME-1])
-		return -EINVAL;
-
-	*helper_name = NFA_DATA(tb[CTA_HELP_NAME-1]);
-
-	return 0;
-}
-
-static const size_t cta_min[CTA_MAX] = {
-	[CTA_STATUS-1] 		= sizeof(__be32),
-	[CTA_TIMEOUT-1] 	= sizeof(__be32),
-	[CTA_MARK-1]		= sizeof(__be32),
-	[CTA_USE-1]		= sizeof(__be32),
-	[CTA_ID-1]		= sizeof(__be32)
-};
-
-static int
-ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
-			struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
-	struct ip_conntrack_tuple_hash *h;
-	struct ip_conntrack_tuple tuple;
-	struct ip_conntrack *ct;
-	int err = 0;
-
-	if (nfattr_bad_size(cda, CTA_MAX, cta_min))
-		return -EINVAL;
-
-	if (cda[CTA_TUPLE_ORIG-1])
-		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG);
-	else if (cda[CTA_TUPLE_REPLY-1])
-		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY);
-	else {
-		/* Flush the whole table */
-		ip_conntrack_flush();
-		return 0;
-	}
-
-	if (err < 0)
-		return err;
-
-	h = ip_conntrack_find_get(&tuple, NULL);
-	if (!h)
-		return -ENOENT;
-
-	ct = tuplehash_to_ctrack(h);
-
-	if (cda[CTA_ID-1]) {
-		u_int32_t id = ntohl(*(__be32 *)NFA_DATA(cda[CTA_ID-1]));
-		if (ct->id != id) {
-			ip_conntrack_put(ct);
-			return -ENOENT;
-		}
-	}
-	if (del_timer(&ct->timeout))
-		ct->timeout.function((unsigned long)ct);
-
-	ip_conntrack_put(ct);
-
-	return 0;
-}
-
-static int
-ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
-			struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
-	struct ip_conntrack_tuple_hash *h;
-	struct ip_conntrack_tuple tuple;
-	struct ip_conntrack *ct;
-	struct sk_buff *skb2 = NULL;
-	int err = 0;
-
-	if (nlh->nlmsg_flags & NLM_F_DUMP) {
-		struct nfgenmsg *msg = NLMSG_DATA(nlh);
-		u32 rlen;
-
-		if (msg->nfgen_family != AF_INET)
-			return -EAFNOSUPPORT;
-
-#ifndef CONFIG_IP_NF_CT_ACCT
-		if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO)
-			return -ENOTSUPP;
-#endif
-		if ((*errp = netlink_dump_start(ctnl, skb, nlh,
-						ctnetlink_dump_table,
-						ctnetlink_done)) != 0)
-			return -EINVAL;
-
-		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
-		if (rlen > skb->len)
-			rlen = skb->len;
-		skb_pull(skb, rlen);
-		return 0;
-	}
-
-	if (nfattr_bad_size(cda, CTA_MAX, cta_min))
-		return -EINVAL;
-
-	if (cda[CTA_TUPLE_ORIG-1])
-		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG);
-	else if (cda[CTA_TUPLE_REPLY-1])
-		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY);
-	else
-		return -EINVAL;
-
-	if (err < 0)
-		return err;
-
-	h = ip_conntrack_find_get(&tuple, NULL);
-	if (!h)
-		return -ENOENT;
-
-	ct = tuplehash_to_ctrack(h);
-
-	err = -ENOMEM;
-	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-	if (!skb2) {
-		ip_conntrack_put(ct);
-		return -ENOMEM;
-	}
-
-	err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq,
-				  IPCTNL_MSG_CT_NEW, 1, ct);
-	ip_conntrack_put(ct);
-	if (err <= 0)
-		goto free;
-
-	err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
-	if (err < 0)
-		goto out;
-
-	return 0;
-
-free:
-	kfree_skb(skb2);
-out:
-	return err;
-}
-
-static inline int
-ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[])
-{
-	unsigned long d;
-	unsigned status = ntohl(*(__be32 *)NFA_DATA(cda[CTA_STATUS-1]));
-	d = ct->status ^ status;
-
-	if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
-		/* unchangeable */
-		return -EINVAL;
-
-	if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY))
-		/* SEEN_REPLY bit can only be set */
-		return -EINVAL;
-
-
-	if (d & IPS_ASSURED && !(status & IPS_ASSURED))
-		/* ASSURED bit can only be set */
-		return -EINVAL;
-
-	if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) {
-#ifndef CONFIG_IP_NF_NAT_NEEDED
-		return -EINVAL;
-#else
-		struct ip_nat_range range;
-
-		if (cda[CTA_NAT_DST-1]) {
-			if (ctnetlink_parse_nat(cda[CTA_NAT_DST-1], ct,
-						&range) < 0)
-				return -EINVAL;
-			if (ip_nat_initialized(ct,
-					       HOOK2MANIP(NF_IP_PRE_ROUTING)))
-				return -EEXIST;
-			ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
-		}
-		if (cda[CTA_NAT_SRC-1]) {
-			if (ctnetlink_parse_nat(cda[CTA_NAT_SRC-1], ct,
-						&range) < 0)
-				return -EINVAL;
-			if (ip_nat_initialized(ct,
-					       HOOK2MANIP(NF_IP_POST_ROUTING)))
-				return -EEXIST;
-			ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
-		}
-#endif
-	}
-
-	/* Be careful here, modifying NAT bits can screw up things,
-	 * so don't let users modify them directly if they don't pass
-	 * ip_nat_range. */
-	ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK);
-	return 0;
-}
-
-
-static inline int
-ctnetlink_change_helper(struct ip_conntrack *ct, struct nfattr *cda[])
-{
-	struct ip_conntrack_helper *helper;
-	char *helpname;
-	int err;
-
-	/* don't change helper of sibling connections */
-	if (ct->master)
-		return -EINVAL;
-
-	err = ctnetlink_parse_help(cda[CTA_HELP-1], &helpname);
-	if (err < 0)
-		return err;
-
-	helper = __ip_conntrack_helper_find_byname(helpname);
-	if (!helper) {
-		if (!strcmp(helpname, ""))
-			helper = NULL;
-		else
-			return -EINVAL;
-	}
-
-	if (ct->helper) {
-		if (!helper) {
-			/* we had a helper before ... */
-			ip_ct_remove_expectations(ct);
-			ct->helper = NULL;
-		} else {
-			/* need to zero data of old helper */
-			memset(&ct->help, 0, sizeof(ct->help));
-		}
-	}
-
-	ct->helper = helper;
-
-	return 0;
-}
-
-static inline int
-ctnetlink_change_timeout(struct ip_conntrack *ct, struct nfattr *cda[])
-{
-	u_int32_t timeout = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1]));
-
-	if (!del_timer(&ct->timeout))
-		return -ETIME;
-
-	ct->timeout.expires = jiffies + timeout * HZ;
-	add_timer(&ct->timeout);
-
-	return 0;
-}
-
-static inline int
-ctnetlink_change_protoinfo(struct ip_conntrack *ct, struct nfattr *cda[])
-{
-	struct nfattr *tb[CTA_PROTOINFO_MAX], *attr = cda[CTA_PROTOINFO-1];
-	struct ip_conntrack_protocol *proto;
-	u_int16_t npt = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
-	int err = 0;
-
-	nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr);
-
-	proto = ip_conntrack_proto_find_get(npt);
-
-	if (proto->from_nfattr)
-		err = proto->from_nfattr(tb, ct);
-	ip_conntrack_proto_put(proto);
-
-	return err;
-}
-
-static int
-ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[])
-{
-	int err;
-
-	if (cda[CTA_HELP-1]) {
-		err = ctnetlink_change_helper(ct, cda);
-		if (err < 0)
-			return err;
-	}
-
-	if (cda[CTA_TIMEOUT-1]) {
-		err = ctnetlink_change_timeout(ct, cda);
-		if (err < 0)
-			return err;
-	}
-
-	if (cda[CTA_STATUS-1]) {
-		err = ctnetlink_change_status(ct, cda);
-		if (err < 0)
-			return err;
-	}
-
-	if (cda[CTA_PROTOINFO-1]) {
-		err = ctnetlink_change_protoinfo(ct, cda);
-		if (err < 0)
-			return err;
-	}
-
-#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
-	if (cda[CTA_MARK-1])
-		ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1]));
-#endif
-
-	return 0;
-}
-
-static int
-ctnetlink_create_conntrack(struct nfattr *cda[],
-			   struct ip_conntrack_tuple *otuple,
-			   struct ip_conntrack_tuple *rtuple)
-{
-	struct ip_conntrack *ct;
-	int err = -EINVAL;
-
-	ct = ip_conntrack_alloc(otuple, rtuple);
-	if (ct == NULL || IS_ERR(ct))
-		return -ENOMEM;
-
-	if (!cda[CTA_TIMEOUT-1])
-		goto err;
-	ct->timeout.expires = ntohl(*(__be32 *)NFA_DATA(cda[CTA_TIMEOUT-1]));
-
-	ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
-	ct->status |= IPS_CONFIRMED;
-
-	if (cda[CTA_STATUS-1]) {
-		err = ctnetlink_change_status(ct, cda);
-		if (err < 0)
-			goto err;
-	}
-
-	if (cda[CTA_PROTOINFO-1]) {
-		err = ctnetlink_change_protoinfo(ct, cda);
-		if (err < 0)
-			goto err;
-	}
-
-#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
-	if (cda[CTA_MARK-1])
-		ct->mark = ntohl(*(__be32 *)NFA_DATA(cda[CTA_MARK-1]));
-#endif
-
-	ct->helper = ip_conntrack_helper_find_get(rtuple);
-
-	add_timer(&ct->timeout);
-	ip_conntrack_hash_insert(ct);
-
-	if (ct->helper)
-		ip_conntrack_helper_put(ct->helper);
-
-	return 0;
-
-err:
-	ip_conntrack_free(ct);
-	return err;
-}
-
-static int
-ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
-			struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
-	struct ip_conntrack_tuple otuple, rtuple;
-	struct ip_conntrack_tuple_hash *h = NULL;
-	int err = 0;
-
-	if (nfattr_bad_size(cda, CTA_MAX, cta_min))
-		return -EINVAL;
-
-	if (cda[CTA_TUPLE_ORIG-1]) {
-		err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG);
-		if (err < 0)
-			return err;
-	}
-
-	if (cda[CTA_TUPLE_REPLY-1]) {
-		err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY);
-		if (err < 0)
-			return err;
-	}
-
-	write_lock_bh(&ip_conntrack_lock);
-	if (cda[CTA_TUPLE_ORIG-1])
-		h = __ip_conntrack_find(&otuple, NULL);
-	else if (cda[CTA_TUPLE_REPLY-1])
-		h = __ip_conntrack_find(&rtuple, NULL);
-
-	if (h == NULL) {
-		write_unlock_bh(&ip_conntrack_lock);
-		err = -ENOENT;
-		if (nlh->nlmsg_flags & NLM_F_CREATE)
-			err = ctnetlink_create_conntrack(cda, &otuple, &rtuple);
-		return err;
-	}
-	/* implicit 'else' */
-
-	/* we only allow nat config for new conntracks */
-	if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) {
-		err = -EINVAL;
-		goto out_unlock;
-	}
-
-	/* We manipulate the conntrack inside the global conntrack table lock,
-	 * so there's no need to increase the refcount */
-	err = -EEXIST;
-	if (!(nlh->nlmsg_flags & NLM_F_EXCL))
-		err = ctnetlink_change_conntrack(tuplehash_to_ctrack(h), cda);
-
-out_unlock:
-	write_unlock_bh(&ip_conntrack_lock);
-	return err;
-}
-
-/***********************************************************************
- * EXPECT
- ***********************************************************************/
-
-static inline int
-ctnetlink_exp_dump_tuple(struct sk_buff *skb,
-			 const struct ip_conntrack_tuple *tuple,
-			 enum ctattr_expect type)
-{
-	struct nfattr *nest_parms = NFA_NEST(skb, type);
-
-	if (ctnetlink_dump_tuples(skb, tuple) < 0)
-		goto nfattr_failure;
-
-	NFA_NEST_END(skb, nest_parms);
-
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-
-static inline int
-ctnetlink_exp_dump_mask(struct sk_buff *skb,
-			const struct ip_conntrack_tuple *tuple,
-			const struct ip_conntrack_tuple *mask)
-{
-	int ret;
-	struct ip_conntrack_protocol *proto;
-	struct nfattr *nest_parms = NFA_NEST(skb, CTA_EXPECT_MASK);
-
-	ret = ctnetlink_dump_tuples_ip(skb, mask);
-	if (unlikely(ret < 0))
-		goto nfattr_failure;
-
-	proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
-	ret = ctnetlink_dump_tuples_proto(skb, mask, proto);
-	ip_conntrack_proto_put(proto);
-	if (unlikely(ret < 0))
-		goto nfattr_failure;
-
-	NFA_NEST_END(skb, nest_parms);
-
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-
-static inline int
-ctnetlink_exp_dump_expect(struct sk_buff *skb,
-			  const struct ip_conntrack_expect *exp)
-{
-	struct ip_conntrack *master = exp->master;
-	__be32 timeout = htonl((exp->timeout.expires - jiffies) / HZ);
-	__be32 id = htonl(exp->id);
-
-	if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0)
-		goto nfattr_failure;
-	if (ctnetlink_exp_dump_mask(skb, &exp->tuple, &exp->mask) < 0)
-		goto nfattr_failure;
-	if (ctnetlink_exp_dump_tuple(skb,
-				 &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
-				 CTA_EXPECT_MASTER) < 0)
-		goto nfattr_failure;
-
-	NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(__be32), &timeout);
-	NFA_PUT(skb, CTA_EXPECT_ID, sizeof(__be32), &id);
-
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-
-static int
-ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
-		    int event,
-		    int nowait,
-		    const struct ip_conntrack_expect *exp)
-{
-	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
-	unsigned char *b;
-
-	b = skb->tail;
-
-	event |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
-	nlh    = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
-	nfmsg  = NLMSG_DATA(nlh);
-
-	nlh->nlmsg_flags    = (nowait && pid) ? NLM_F_MULTI : 0;
-	nfmsg->nfgen_family = AF_INET;
-	nfmsg->version	    = NFNETLINK_V0;
-	nfmsg->res_id	    = 0;
-
-	if (ctnetlink_exp_dump_expect(skb, exp) < 0)
-		goto nfattr_failure;
-
-	nlh->nlmsg_len = skb->tail - b;
-	return skb->len;
-
-nlmsg_failure:
-nfattr_failure:
-	skb_trim(skb, b - skb->data);
-	return -1;
-}
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-static int ctnetlink_expect_event(struct notifier_block *this,
-				  unsigned long events, void *ptr)
-{
-	struct nlmsghdr *nlh;
-	struct nfgenmsg *nfmsg;
-	struct ip_conntrack_expect *exp = (struct ip_conntrack_expect *)ptr;
-	struct sk_buff *skb;
-	unsigned int type;
-	unsigned char *b;
-	int flags = 0;
-
-	if (events & IPEXP_NEW) {
-		type = IPCTNL_MSG_EXP_NEW;
-		flags = NLM_F_CREATE|NLM_F_EXCL;
-	} else
-		return NOTIFY_DONE;
-
-	if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
-		return NOTIFY_DONE;
-
-	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
-	if (!skb)
-		return NOTIFY_DONE;
-
-	b = skb->tail;
-
-	type |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
-	nlh   = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
-	nfmsg = NLMSG_DATA(nlh);
-
-	nlh->nlmsg_flags    = flags;
-	nfmsg->nfgen_family = AF_INET;
-	nfmsg->version	    = NFNETLINK_V0;
-	nfmsg->res_id	    = 0;
-
-	if (ctnetlink_exp_dump_expect(skb, exp) < 0)
-		goto nfattr_failure;
-
-	nlh->nlmsg_len = skb->tail - b;
-	nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0);
-	return NOTIFY_DONE;
-
-nlmsg_failure:
-nfattr_failure:
-	kfree_skb(skb);
-	return NOTIFY_DONE;
-}
-#endif
-
-static int
-ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	struct ip_conntrack_expect *exp = NULL;
-	struct list_head *i;
-	u_int32_t *id = (u_int32_t *) &cb->args[0];
-
-	read_lock_bh(&ip_conntrack_lock);
-	list_for_each_prev(i, &ip_conntrack_expect_list) {
-		exp = (struct ip_conntrack_expect *) i;
-		if (exp->id <= *id)
-			continue;
-		if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid,
-					    cb->nlh->nlmsg_seq,
-					    IPCTNL_MSG_EXP_NEW,
-					    1, exp) < 0)
-			goto out;
-		*id = exp->id;
-	}
-out:
-	read_unlock_bh(&ip_conntrack_lock);
-
-	return skb->len;
-}
-
-static const size_t cta_min_exp[CTA_EXPECT_MAX] = {
-	[CTA_EXPECT_TIMEOUT-1]          = sizeof(__be32),
-	[CTA_EXPECT_ID-1]               = sizeof(__be32)
-};
-
-static int
-ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
-		     struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
-	struct ip_conntrack_tuple tuple;
-	struct ip_conntrack_expect *exp;
-	struct sk_buff *skb2;
-	int err = 0;
-
-	if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
-		return -EINVAL;
-
-	if (nlh->nlmsg_flags & NLM_F_DUMP) {
-		struct nfgenmsg *msg = NLMSG_DATA(nlh);
-		u32 rlen;
-
-		if (msg->nfgen_family != AF_INET)
-			return -EAFNOSUPPORT;
-
-		if ((*errp = netlink_dump_start(ctnl, skb, nlh,
-						ctnetlink_exp_dump_table,
-						ctnetlink_done)) != 0)
-			return -EINVAL;
-		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
-		if (rlen > skb->len)
-			rlen = skb->len;
-		skb_pull(skb, rlen);
-		return 0;
-	}
-
-	if (cda[CTA_EXPECT_MASTER-1])
-		err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER);
-	else
-		return -EINVAL;
-
-	if (err < 0)
-		return err;
-
-	exp = ip_conntrack_expect_find_get(&tuple);
-	if (!exp)
-		return -ENOENT;
-
-	if (cda[CTA_EXPECT_ID-1]) {
-		__be32 id = *(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
-		if (exp->id != ntohl(id)) {
-			ip_conntrack_expect_put(exp);
-			return -ENOENT;
-		}
-	}
-
-	err = -ENOMEM;
-	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
-	if (!skb2)
-		goto out;
-
-	err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid,
-				      nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
-				      1, exp);
-	if (err <= 0)
-		goto free;
-
-	ip_conntrack_expect_put(exp);
-
-	return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
-
-free:
-	kfree_skb(skb2);
-out:
-	ip_conntrack_expect_put(exp);
-	return err;
-}
-
-static int
-ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
-		     struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
-	struct ip_conntrack_expect *exp, *tmp;
-	struct ip_conntrack_tuple tuple;
-	struct ip_conntrack_helper *h;
-	int err;
-
-	if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
-		return -EINVAL;
-
-	if (cda[CTA_EXPECT_TUPLE-1]) {
-		/* delete a single expect by tuple */
-		err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE);
-		if (err < 0)
-			return err;
-
-		/* bump usage count to 2 */
-		exp = ip_conntrack_expect_find_get(&tuple);
-		if (!exp)
-			return -ENOENT;
-
-		if (cda[CTA_EXPECT_ID-1]) {
-			__be32 id =
-				*(__be32 *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
-			if (exp->id != ntohl(id)) {
-				ip_conntrack_expect_put(exp);
-				return -ENOENT;
-			}
-		}
-
-		/* after list removal, usage count == 1 */
-		ip_conntrack_unexpect_related(exp);
-		/* have to put what we 'get' above.
-		 * after this line usage count == 0 */
-		ip_conntrack_expect_put(exp);
-	} else if (cda[CTA_EXPECT_HELP_NAME-1]) {
-		char *name = NFA_DATA(cda[CTA_EXPECT_HELP_NAME-1]);
-
-		/* delete all expectations for this helper */
-		write_lock_bh(&ip_conntrack_lock);
-		h = __ip_conntrack_helper_find_byname(name);
-		if (!h) {
-			write_unlock_bh(&ip_conntrack_lock);
-			return -EINVAL;
-		}
-		list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
-					 list) {
-			if (exp->master->helper == h
-			    && del_timer(&exp->timeout)) {
-				ip_ct_unlink_expect(exp);
-				ip_conntrack_expect_put(exp);
-			}
-		}
-		write_unlock_bh(&ip_conntrack_lock);
-	} else {
-		/* This basically means we have to flush everything*/
-		write_lock_bh(&ip_conntrack_lock);
-		list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
-					 list) {
-			if (del_timer(&exp->timeout)) {
-				ip_ct_unlink_expect(exp);
-				ip_conntrack_expect_put(exp);
-			}
-		}
-		write_unlock_bh(&ip_conntrack_lock);
-	}
-
-	return 0;
-}
-static int
-ctnetlink_change_expect(struct ip_conntrack_expect *x, struct nfattr *cda[])
-{
-	return -EOPNOTSUPP;
-}
-
-static int
-ctnetlink_create_expect(struct nfattr *cda[])
-{
-	struct ip_conntrack_tuple tuple, mask, master_tuple;
-	struct ip_conntrack_tuple_hash *h = NULL;
-	struct ip_conntrack_expect *exp;
-	struct ip_conntrack *ct;
-	int err = 0;
-
-	/* caller guarantees that those three CTA_EXPECT_* exist */
-	err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE);
-	if (err < 0)
-		return err;
-	err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK);
-	if (err < 0)
-		return err;
-	err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER);
-	if (err < 0)
-		return err;
-
-	/* Look for master conntrack of this expectation */
-	h = ip_conntrack_find_get(&master_tuple, NULL);
-	if (!h)
-		return -ENOENT;
-	ct = tuplehash_to_ctrack(h);
-
-	if (!ct->helper) {
-		/* such conntrack hasn't got any helper, abort */
-		err = -EINVAL;
-		goto out;
-	}
-
-	exp = ip_conntrack_expect_alloc(ct);
-	if (!exp) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	exp->expectfn = NULL;
-	exp->flags = 0;
-	exp->master = ct;
-	memcpy(&exp->tuple, &tuple, sizeof(struct ip_conntrack_tuple));
-	memcpy(&exp->mask, &mask, sizeof(struct ip_conntrack_tuple));
-
-	err = ip_conntrack_expect_related(exp);
-	ip_conntrack_expect_put(exp);
-
-out:
-	ip_conntrack_put(tuplehash_to_ctrack(h));
-	return err;
-}
-
-static int
-ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
-		     struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
-	struct ip_conntrack_tuple tuple;
-	struct ip_conntrack_expect *exp;
-	int err = 0;
-
-	if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
-		return -EINVAL;
-
-	if (!cda[CTA_EXPECT_TUPLE-1]
-	    || !cda[CTA_EXPECT_MASK-1]
-	    || !cda[CTA_EXPECT_MASTER-1])
-		return -EINVAL;
-
-	err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE);
-	if (err < 0)
-		return err;
-
-	write_lock_bh(&ip_conntrack_lock);
-	exp = __ip_conntrack_expect_find(&tuple);
-
-	if (!exp) {
-		write_unlock_bh(&ip_conntrack_lock);
-		err = -ENOENT;
-		if (nlh->nlmsg_flags & NLM_F_CREATE)
-			err = ctnetlink_create_expect(cda);
-		return err;
-	}
-
-	err = -EEXIST;
-	if (!(nlh->nlmsg_flags & NLM_F_EXCL))
-		err = ctnetlink_change_expect(exp, cda);
-	write_unlock_bh(&ip_conntrack_lock);
-
-	return err;
-}
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-static struct notifier_block ctnl_notifier = {
-	.notifier_call	= ctnetlink_conntrack_event,
-};
-
-static struct notifier_block ctnl_notifier_exp = {
-	.notifier_call	= ctnetlink_expect_event,
-};
-#endif
-
-static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = {
-	[IPCTNL_MSG_CT_NEW]		= { .call = ctnetlink_new_conntrack,
-					    .attr_count = CTA_MAX, },
-	[IPCTNL_MSG_CT_GET] 		= { .call = ctnetlink_get_conntrack,
-					    .attr_count = CTA_MAX, },
-	[IPCTNL_MSG_CT_DELETE]  	= { .call = ctnetlink_del_conntrack,
-					    .attr_count = CTA_MAX, },
-	[IPCTNL_MSG_CT_GET_CTRZERO] 	= { .call = ctnetlink_get_conntrack,
-					    .attr_count = CTA_MAX, },
-};
-
-static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = {
-	[IPCTNL_MSG_EXP_GET]		= { .call = ctnetlink_get_expect,
-					    .attr_count = CTA_EXPECT_MAX, },
-	[IPCTNL_MSG_EXP_NEW]		= { .call = ctnetlink_new_expect,
-					    .attr_count = CTA_EXPECT_MAX, },
-	[IPCTNL_MSG_EXP_DELETE]		= { .call = ctnetlink_del_expect,
-					    .attr_count = CTA_EXPECT_MAX, },
-};
-
-static struct nfnetlink_subsystem ctnl_subsys = {
-	.name				= "conntrack",
-	.subsys_id			= NFNL_SUBSYS_CTNETLINK,
-	.cb_count			= IPCTNL_MSG_MAX,
-	.cb				= ctnl_cb,
-};
-
-static struct nfnetlink_subsystem ctnl_exp_subsys = {
-	.name				= "conntrack_expect",
-	.subsys_id			= NFNL_SUBSYS_CTNETLINK_EXP,
-	.cb_count			= IPCTNL_MSG_EXP_MAX,
-	.cb				= ctnl_exp_cb,
-};
-
-MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK);
-MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP);
-
-static int __init ctnetlink_init(void)
-{
-	int ret;
-
-	printk("ctnetlink v%s: registering with nfnetlink.\n", version);
-	ret = nfnetlink_subsys_register(&ctnl_subsys);
-	if (ret < 0) {
-		printk("ctnetlink_init: cannot register with nfnetlink.\n");
-		goto err_out;
-	}
-
-	ret = nfnetlink_subsys_register(&ctnl_exp_subsys);
-	if (ret < 0) {
-		printk("ctnetlink_init: cannot register exp with nfnetlink.\n");
-		goto err_unreg_subsys;
-	}
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-	ret = ip_conntrack_register_notifier(&ctnl_notifier);
-	if (ret < 0) {
-		printk("ctnetlink_init: cannot register notifier.\n");
-		goto err_unreg_exp_subsys;
-	}
-
-	ret = ip_conntrack_expect_register_notifier(&ctnl_notifier_exp);
-	if (ret < 0) {
-		printk("ctnetlink_init: cannot expect register notifier.\n");
-		goto err_unreg_notifier;
-	}
-#endif
-
-	return 0;
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-err_unreg_notifier:
-	ip_conntrack_unregister_notifier(&ctnl_notifier);
-err_unreg_exp_subsys:
-	nfnetlink_subsys_unregister(&ctnl_exp_subsys);
-#endif
-err_unreg_subsys:
-	nfnetlink_subsys_unregister(&ctnl_subsys);
-err_out:
-	return ret;
-}
-
-static void __exit ctnetlink_exit(void)
-{
-	printk("ctnetlink: unregistering from nfnetlink.\n");
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-	ip_conntrack_expect_unregister_notifier(&ctnl_notifier_exp);
-	ip_conntrack_unregister_notifier(&ctnl_notifier);
-#endif
-
-	nfnetlink_subsys_unregister(&ctnl_exp_subsys);
-	nfnetlink_subsys_unregister(&ctnl_subsys);
-	return;
-}
-
-module_init(ctnetlink_init);
-module_exit(ctnetlink_exit);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
deleted file mode 100644
index 88af82e9865..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-
-unsigned int ip_ct_generic_timeout __read_mostly = 600*HZ;
-
-static int generic_pkt_to_tuple(const struct sk_buff *skb,
-				unsigned int dataoff,
-				struct ip_conntrack_tuple *tuple)
-{
-	tuple->src.u.all = 0;
-	tuple->dst.u.all = 0;
-
-	return 1;
-}
-
-static int generic_invert_tuple(struct ip_conntrack_tuple *tuple,
-				const struct ip_conntrack_tuple *orig)
-{
-	tuple->src.u.all = 0;
-	tuple->dst.u.all = 0;
-
-	return 1;
-}
-
-/* Print out the per-protocol part of the tuple. */
-static int generic_print_tuple(struct seq_file *s,
-			       const struct ip_conntrack_tuple *tuple)
-{
-	return 0;
-}
-
-/* Print out the private part of the conntrack. */
-static int generic_print_conntrack(struct seq_file *s,
-				   const struct ip_conntrack *state)
-{
-	return 0;
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int packet(struct ip_conntrack *conntrack,
-		  const struct sk_buff *skb,
-		  enum ip_conntrack_info ctinfo)
-{
-	ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_generic_timeout);
-	return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int new(struct ip_conntrack *conntrack, const struct sk_buff *skb)
-{
-	return 1;
-}
-
-struct ip_conntrack_protocol ip_conntrack_generic_protocol =
-{
-	.proto			= 0,
-	.name			= "unknown",
-	.pkt_to_tuple		= generic_pkt_to_tuple,
-	.invert_tuple		= generic_invert_tuple,
-	.print_tuple		= generic_print_tuple,
-	.print_conntrack	= generic_print_conntrack,
-	.packet			= packet,
-	.new			= new,
-};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
deleted file mode 100644
index ac1c49ef36a..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c
+++ /dev/null
@@ -1,328 +0,0 @@
-/*
- * ip_conntrack_proto_gre.c - Version 3.0
- *
- * Connection tracking protocol helper module for GRE.
- *
- * GRE is a generic encapsulation protocol, which is generally not very
- * suited for NAT, as it has no protocol-specific part as port numbers.
- *
- * It has an optional key field, which may help us distinguishing two
- * connections between the same two hosts.
- *
- * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
- *
- * PPTP is built on top of a modified version of GRE, and has a mandatory
- * field called "CallID", which serves us for the same purpose as the key
- * field in plain GRE.
- *
- * Documentation about PPTP can be found in RFC 2637
- *
- * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/in.h>
-#include <linux/list.h>
-#include <linux/seq_file.h>
-#include <linux/interrupt.h>
-
-static DEFINE_RWLOCK(ip_ct_gre_lock);
-
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE");
-
-/* shamelessly stolen from ip_conntrack_proto_udp.c */
-#define GRE_TIMEOUT		(30*HZ)
-#define GRE_STREAM_TIMEOUT	(180*HZ)
-
-#if 0
-#define DEBUGP(format, args...)	printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
-#define DUMP_TUPLE_GRE(x) printk("%u.%u.%u.%u:0x%x -> %u.%u.%u.%u:0x%x\n", \
-			NIPQUAD((x)->src.ip), ntohs((x)->src.u.gre.key), \
-			NIPQUAD((x)->dst.ip), ntohs((x)->dst.u.gre.key))
-#else
-#define DEBUGP(x, args...)
-#define DUMP_TUPLE_GRE(x)
-#endif
-
-/* GRE KEYMAP HANDLING FUNCTIONS */
-static LIST_HEAD(gre_keymap_list);
-
-static inline int gre_key_cmpfn(const struct ip_ct_gre_keymap *km,
-				const struct ip_conntrack_tuple *t)
-{
-	return ((km->tuple.src.ip == t->src.ip) &&
-		(km->tuple.dst.ip == t->dst.ip) &&
-		(km->tuple.dst.protonum == t->dst.protonum) &&
-		(km->tuple.dst.u.all == t->dst.u.all));
-}
-
-/* look up the source key for a given tuple */
-static __be16 gre_keymap_lookup(struct ip_conntrack_tuple *t)
-{
-	struct ip_ct_gre_keymap *km;
-	__be16 key = 0;
-
-	read_lock_bh(&ip_ct_gre_lock);
-	list_for_each_entry(km, &gre_keymap_list, list) {
-		if (gre_key_cmpfn(km, t)) {
-			key = km->tuple.src.u.gre.key;
-			break;
-		}
-	}
-	read_unlock_bh(&ip_ct_gre_lock);
-
-	DEBUGP("lookup src key 0x%x up key for ", key);
-	DUMP_TUPLE_GRE(t);
-
-	return key;
-}
-
-/* add a single keymap entry, associate with specified master ct */
-int
-ip_ct_gre_keymap_add(struct ip_conntrack *ct,
-		     struct ip_conntrack_tuple *t, int reply)
-{
-	struct ip_ct_gre_keymap **exist_km, *km;
-
-	if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
-		DEBUGP("refusing to add GRE keymap to non-pptp session\n");
-		return -1;
-	}
-
-	if (!reply)
-		exist_km = &ct->help.ct_pptp_info.keymap_orig;
-	else
-		exist_km = &ct->help.ct_pptp_info.keymap_reply;
-
-	if (*exist_km) {
-		/* check whether it's a retransmission */
-		list_for_each_entry(km, &gre_keymap_list, list) {
-			if (gre_key_cmpfn(km, t) && km == *exist_km)
-				return 0;
-		}
-		DEBUGP("trying to override keymap_%s for ct %p\n",
-			reply? "reply":"orig", ct);
-		return -EEXIST;
-	}
-
-	km = kmalloc(sizeof(*km), GFP_ATOMIC);
-	if (!km)
-		return -ENOMEM;
-
-	memcpy(&km->tuple, t, sizeof(*t));
-	*exist_km = km;
-
-	DEBUGP("adding new entry %p: ", km);
-	DUMP_TUPLE_GRE(&km->tuple);
-
-	write_lock_bh(&ip_ct_gre_lock);
-	list_add_tail(&km->list, &gre_keymap_list);
-	write_unlock_bh(&ip_ct_gre_lock);
-
-	return 0;
-}
-
-/* destroy the keymap entries associated with specified master ct */
-void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct)
-{
-	DEBUGP("entering for ct %p\n", ct);
-
-	if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
-		DEBUGP("refusing to destroy GRE keymap to non-pptp session\n");
-		return;
-	}
-
-	write_lock_bh(&ip_ct_gre_lock);
-	if (ct->help.ct_pptp_info.keymap_orig) {
-		DEBUGP("removing %p from list\n",
-			ct->help.ct_pptp_info.keymap_orig);
-		list_del(&ct->help.ct_pptp_info.keymap_orig->list);
-		kfree(ct->help.ct_pptp_info.keymap_orig);
-		ct->help.ct_pptp_info.keymap_orig = NULL;
-	}
-	if (ct->help.ct_pptp_info.keymap_reply) {
-		DEBUGP("removing %p from list\n",
-			ct->help.ct_pptp_info.keymap_reply);
-		list_del(&ct->help.ct_pptp_info.keymap_reply->list);
-		kfree(ct->help.ct_pptp_info.keymap_reply);
-		ct->help.ct_pptp_info.keymap_reply = NULL;
-	}
-	write_unlock_bh(&ip_ct_gre_lock);
-}
-
-
-/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
-
-/* invert gre part of tuple */
-static int gre_invert_tuple(struct ip_conntrack_tuple *tuple,
-			    const struct ip_conntrack_tuple *orig)
-{
-	tuple->dst.u.gre.key = orig->src.u.gre.key;
-	tuple->src.u.gre.key = orig->dst.u.gre.key;
-
-	return 1;
-}
-
-/* gre hdr info to tuple */
-static int gre_pkt_to_tuple(const struct sk_buff *skb,
-			   unsigned int dataoff,
-			   struct ip_conntrack_tuple *tuple)
-{
-	struct gre_hdr_pptp _pgrehdr, *pgrehdr;
-	__be16 srckey;
-	struct gre_hdr _grehdr, *grehdr;
-
-	/* first only delinearize old RFC1701 GRE header */
-	grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr);
-	if (!grehdr || grehdr->version != GRE_VERSION_PPTP) {
-		/* try to behave like "ip_conntrack_proto_generic" */
-		tuple->src.u.all = 0;
-		tuple->dst.u.all = 0;
-		return 1;
-	}
-
-	/* PPTP header is variable length, only need up to the call_id field */
-	pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr);
-	if (!pgrehdr)
-		return 1;
-
-	if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) {
-		DEBUGP("GRE_VERSION_PPTP but unknown proto\n");
-		return 0;
-	}
-
-	tuple->dst.u.gre.key = pgrehdr->call_id;
-	srckey = gre_keymap_lookup(tuple);
-	tuple->src.u.gre.key = srckey;
-
-	return 1;
-}
-
-/* print gre part of tuple */
-static int gre_print_tuple(struct seq_file *s,
-			   const struct ip_conntrack_tuple *tuple)
-{
-	return seq_printf(s, "srckey=0x%x dstkey=0x%x ",
-			  ntohs(tuple->src.u.gre.key),
-			  ntohs(tuple->dst.u.gre.key));
-}
-
-/* print private data for conntrack */
-static int gre_print_conntrack(struct seq_file *s,
-			       const struct ip_conntrack *ct)
-{
-	return seq_printf(s, "timeout=%u, stream_timeout=%u ",
-			  (ct->proto.gre.timeout / HZ),
-			  (ct->proto.gre.stream_timeout / HZ));
-}
-
-/* Returns verdict for packet, and may modify conntrack */
-static int gre_packet(struct ip_conntrack *ct,
-		      const struct sk_buff *skb,
-		      enum ip_conntrack_info conntrackinfo)
-{
-	/* If we've seen traffic both ways, this is a GRE connection.
-	 * Extend timeout. */
-	if (ct->status & IPS_SEEN_REPLY) {
-		ip_ct_refresh_acct(ct, conntrackinfo, skb,
-				   ct->proto.gre.stream_timeout);
-		/* Also, more likely to be important, and not a probe. */
-		set_bit(IPS_ASSURED_BIT, &ct->status);
-		ip_conntrack_event_cache(IPCT_STATUS, skb);
-	} else
-		ip_ct_refresh_acct(ct, conntrackinfo, skb,
-				   ct->proto.gre.timeout);
-
-	return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int gre_new(struct ip_conntrack *ct,
-		   const struct sk_buff *skb)
-{
-	DEBUGP(": ");
-	DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-
-	/* initialize to sane value.  Ideally a conntrack helper
-	 * (e.g. in case of pptp) is increasing them */
-	ct->proto.gre.stream_timeout = GRE_STREAM_TIMEOUT;
-	ct->proto.gre.timeout = GRE_TIMEOUT;
-
-	return 1;
-}
-
-/* Called when a conntrack entry has already been removed from the hashes
- * and is about to be deleted from memory */
-static void gre_destroy(struct ip_conntrack *ct)
-{
-	struct ip_conntrack *master = ct->master;
-	DEBUGP(" entering\n");
-
-	if (!master)
-		DEBUGP("no master !?!\n");
-	else
-		ip_ct_gre_keymap_destroy(master);
-}
-
-/* protocol helper struct */
-static struct ip_conntrack_protocol gre = {
-	.proto		 = IPPROTO_GRE,
-	.name		 = "gre",
-	.pkt_to_tuple	 = gre_pkt_to_tuple,
-	.invert_tuple	 = gre_invert_tuple,
-	.print_tuple	 = gre_print_tuple,
-	.print_conntrack = gre_print_conntrack,
-	.packet		 = gre_packet,
-	.new		 = gre_new,
-	.destroy	 = gre_destroy,
-	.me 		 = THIS_MODULE,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-	.tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
-	.nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
-#endif
-};
-
-/* ip_conntrack_proto_gre initialization */
-int __init ip_ct_proto_gre_init(void)
-{
-	return ip_conntrack_protocol_register(&gre);
-}
-
-/* This cannot be __exit, as it is invoked from ip_conntrack_helper_pptp.c's
- * init() code on errors.
- */
-void ip_ct_proto_gre_fini(void)
-{
-	struct list_head *pos, *n;
-
-	/* delete all keymap entries */
-	write_lock_bh(&ip_ct_gre_lock);
-	list_for_each_safe(pos, n, &gre_keymap_list) {
-		DEBUGP("deleting keymap %p at module unload time\n", pos);
-		list_del(pos);
-		kfree(pos);
-	}
-	write_unlock_bh(&ip_ct_gre_lock);
-
-	ip_conntrack_protocol_unregister(&gre);
-}
-
-EXPORT_SYMBOL(ip_ct_gre_keymap_add);
-EXPORT_SYMBOL(ip_ct_gre_keymap_destroy);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
deleted file mode 100644
index e253f3ee52d..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ /dev/null
@@ -1,315 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/in.h>
-#include <linux/icmp.h>
-#include <linux/seq_file.h>
-#include <linux/skbuff.h>
-#include <net/ip.h>
-#include <net/checksum.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-
-unsigned int ip_ct_icmp_timeout __read_mostly = 30*HZ;
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static int icmp_pkt_to_tuple(const struct sk_buff *skb,
-			     unsigned int dataoff,
-			     struct ip_conntrack_tuple *tuple)
-{
-	struct icmphdr _hdr, *hp;
-
-	hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
-	if (hp == NULL)
-		return 0;
-
-	tuple->dst.u.icmp.type = hp->type;
-	tuple->src.u.icmp.id = hp->un.echo.id;
-	tuple->dst.u.icmp.code = hp->code;
-
-	return 1;
-}
-
-/* Add 1; spaces filled with 0. */
-static const u_int8_t invmap[] = {
-	[ICMP_ECHO] = ICMP_ECHOREPLY + 1,
-	[ICMP_ECHOREPLY] = ICMP_ECHO + 1,
-	[ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
-	[ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
-	[ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
-	[ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
-	[ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
-	[ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
-};
-
-static int icmp_invert_tuple(struct ip_conntrack_tuple *tuple,
-			     const struct ip_conntrack_tuple *orig)
-{
-	if (orig->dst.u.icmp.type >= sizeof(invmap)
-	    || !invmap[orig->dst.u.icmp.type])
-		return 0;
-
-	tuple->src.u.icmp.id = orig->src.u.icmp.id;
-	tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1;
-	tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
-	return 1;
-}
-
-/* Print out the per-protocol part of the tuple. */
-static int icmp_print_tuple(struct seq_file *s,
-			    const struct ip_conntrack_tuple *tuple)
-{
-	return seq_printf(s, "type=%u code=%u id=%u ",
-			  tuple->dst.u.icmp.type,
-			  tuple->dst.u.icmp.code,
-			  ntohs(tuple->src.u.icmp.id));
-}
-
-/* Print out the private part of the conntrack. */
-static int icmp_print_conntrack(struct seq_file *s,
-				const struct ip_conntrack *conntrack)
-{
-	return 0;
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int icmp_packet(struct ip_conntrack *ct,
-		       const struct sk_buff *skb,
-		       enum ip_conntrack_info ctinfo)
-{
-	/* Try to delete connection immediately after all replies:
-	   won't actually vanish as we still have skb, and del_timer
-	   means this will only run once even if count hits zero twice
-	   (theoretically possible with SMP) */
-	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
-		if (atomic_dec_and_test(&ct->proto.icmp.count)
-		    && del_timer(&ct->timeout))
-			ct->timeout.function((unsigned long)ct);
-	} else {
-		atomic_inc(&ct->proto.icmp.count);
-		ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
-		ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout);
-	}
-
-	return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int icmp_new(struct ip_conntrack *conntrack,
-		    const struct sk_buff *skb)
-{
-	static const u_int8_t valid_new[] = {
-		[ICMP_ECHO] = 1,
-		[ICMP_TIMESTAMP] = 1,
-		[ICMP_INFO_REQUEST] = 1,
-		[ICMP_ADDRESS] = 1
-	};
-
-	if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
-	    || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
-		/* Can't create a new ICMP `conn' with this. */
-		DEBUGP("icmp: can't create new conn with type %u\n",
-		       conntrack->tuplehash[0].tuple.dst.u.icmp.type);
-		DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
-		return 0;
-	}
-	atomic_set(&conntrack->proto.icmp.count, 0);
-	return 1;
-}
-
-static int
-icmp_error_message(struct sk_buff *skb,
-		   enum ip_conntrack_info *ctinfo,
-		   unsigned int hooknum)
-{
-	struct ip_conntrack_tuple innertuple, origtuple;
-	struct {
-		struct icmphdr icmp;
-		struct iphdr ip;
-	} _in, *inside;
-	struct ip_conntrack_protocol *innerproto;
-	struct ip_conntrack_tuple_hash *h;
-	int dataoff;
-
-	IP_NF_ASSERT(skb->nfct == NULL);
-
-	/* Not enough header? */
-	inside = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_in), &_in);
-	if (inside == NULL)
-		return -NF_ACCEPT;
-
-	/* Ignore ICMP's containing fragments (shouldn't happen) */
-	if (inside->ip.frag_off & htons(IP_OFFSET)) {
-		DEBUGP("icmp_error_track: fragment of proto %u\n",
-		       inside->ip.protocol);
-		return -NF_ACCEPT;
-	}
-
-	innerproto = ip_conntrack_proto_find_get(inside->ip.protocol);
-	dataoff = ip_hdrlen(skb) + sizeof(inside->icmp) + inside->ip.ihl * 4;
-	/* Are they talking about one of our connections? */
-	if (!ip_ct_get_tuple(&inside->ip, skb, dataoff, &origtuple, innerproto)) {
-		DEBUGP("icmp_error: ! get_tuple p=%u", inside->ip.protocol);
-		ip_conntrack_proto_put(innerproto);
-		return -NF_ACCEPT;
-	}
-
-	/* Ordinarily, we'd expect the inverted tupleproto, but it's
-	   been preserved inside the ICMP. */
-	if (!ip_ct_invert_tuple(&innertuple, &origtuple, innerproto)) {
-		DEBUGP("icmp_error_track: Can't invert tuple\n");
-		ip_conntrack_proto_put(innerproto);
-		return -NF_ACCEPT;
-	}
-	ip_conntrack_proto_put(innerproto);
-
-	*ctinfo = IP_CT_RELATED;
-
-	h = ip_conntrack_find_get(&innertuple, NULL);
-	if (!h) {
-		/* Locally generated ICMPs will match inverted if they
-		   haven't been SNAT'ed yet */
-		/* FIXME: NAT code has to handle half-done double NAT --RR */
-		if (hooknum == NF_IP_LOCAL_OUT)
-			h = ip_conntrack_find_get(&origtuple, NULL);
-
-		if (!h) {
-			DEBUGP("icmp_error_track: no match\n");
-			return -NF_ACCEPT;
-		}
-		/* Reverse direction from that found */
-		if (DIRECTION(h) != IP_CT_DIR_REPLY)
-			*ctinfo += IP_CT_IS_REPLY;
-	} else {
-		if (DIRECTION(h) == IP_CT_DIR_REPLY)
-			*ctinfo += IP_CT_IS_REPLY;
-	}
-
-	/* Update skb to refer to this connection */
-	skb->nfct = &tuplehash_to_ctrack(h)->ct_general;
-	skb->nfctinfo = *ctinfo;
-	return -NF_ACCEPT;
-}
-
-/* Small and modified version of icmp_rcv */
-static int
-icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
-	   unsigned int hooknum)
-{
-	struct icmphdr _ih, *icmph;
-
-	/* Not enough header? */
-	icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
-	if (icmph == NULL) {
-		if (LOG_INVALID(IPPROTO_ICMP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				      "ip_ct_icmp: short packet ");
-		return -NF_ACCEPT;
-	}
-
-	/* See ip_conntrack_proto_tcp.c */
-	if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
-	    nf_ip_checksum(skb, hooknum, ip_hdrlen(skb), 0)) {
-		if (LOG_INVALID(IPPROTO_ICMP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				      "ip_ct_icmp: bad ICMP checksum ");
-		return -NF_ACCEPT;
-	}
-
-	/*
-	 *	18 is the highest 'known' ICMP type. Anything else is a mystery
-	 *
-	 *	RFC 1122: 3.2.2  Unknown ICMP messages types MUST be silently
-	 *		  discarded.
-	 */
-	if (icmph->type > NR_ICMP_TYPES) {
-		if (LOG_INVALID(IPPROTO_ICMP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				      "ip_ct_icmp: invalid ICMP type ");
-		return -NF_ACCEPT;
-	}
-
-	/* Need to track icmp error message? */
-	if (icmph->type != ICMP_DEST_UNREACH
-	    && icmph->type != ICMP_SOURCE_QUENCH
-	    && icmph->type != ICMP_TIME_EXCEEDED
-	    && icmph->type != ICMP_PARAMETERPROB
-	    && icmph->type != ICMP_REDIRECT)
-		return NF_ACCEPT;
-
-	return icmp_error_message(skb, ctinfo, hooknum);
-}
-
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-static int icmp_tuple_to_nfattr(struct sk_buff *skb,
-				const struct ip_conntrack_tuple *t)
-{
-	NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(__be16),
-		&t->src.u.icmp.id);
-	NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t),
-		&t->dst.u.icmp.type);
-	NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t),
-		&t->dst.u.icmp.code);
-
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-
-static int icmp_nfattr_to_tuple(struct nfattr *tb[],
-				struct ip_conntrack_tuple *tuple)
-{
-	if (!tb[CTA_PROTO_ICMP_TYPE-1]
-	    || !tb[CTA_PROTO_ICMP_CODE-1]
-	    || !tb[CTA_PROTO_ICMP_ID-1])
-		return -EINVAL;
-
-	tuple->dst.u.icmp.type =
-			*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]);
-	tuple->dst.u.icmp.code =
-			*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]);
-	tuple->src.u.icmp.id =
-			*(__be16 *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
-
-	if (tuple->dst.u.icmp.type >= sizeof(invmap)
-	    || !invmap[tuple->dst.u.icmp.type])
-		return -EINVAL;
-
-	return 0;
-}
-#endif
-
-struct ip_conntrack_protocol ip_conntrack_protocol_icmp =
-{
-	.proto 			= IPPROTO_ICMP,
-	.name 			= "icmp",
-	.pkt_to_tuple		= icmp_pkt_to_tuple,
-	.invert_tuple		= icmp_invert_tuple,
-	.print_tuple		= icmp_print_tuple,
-	.print_conntrack	= icmp_print_conntrack,
-	.packet			= icmp_packet,
-	.new			= icmp_new,
-	.error			= icmp_error,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-	.tuple_to_nfattr	= icmp_tuple_to_nfattr,
-	.nfattr_to_tuple	= icmp_nfattr_to_tuple,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
deleted file mode 100644
index 91d0c05c8e8..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ /dev/null
@@ -1,659 +0,0 @@
-/*
- * Connection tracking protocol helper module for SCTP.
- *
- * SCTP is defined in RFC 2960. References to various sections in this code
- * are to this RFC.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/*
- * Added support for proc manipulation of timeouts.
- */
-
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/interrupt.h>
-#include <linux/netfilter.h>
-#include <linux/module.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/sctp.h>
-#include <linux/string.h>
-#include <linux/seq_file.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-
-#if 0
-#define DEBUGP(format, ...) printk(format, ## __VA_ARGS__)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/* Protects conntrack->proto.sctp */
-static DEFINE_RWLOCK(sctp_lock);
-
-/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
-   closely.  They're more complex. --RR
-
-   And so for me for SCTP :D -Kiran */
-
-static const char *sctp_conntrack_names[] = {
-	"NONE",
-	"CLOSED",
-	"COOKIE_WAIT",
-	"COOKIE_ECHOED",
-	"ESTABLISHED",
-	"SHUTDOWN_SENT",
-	"SHUTDOWN_RECD",
-	"SHUTDOWN_ACK_SENT",
-};
-
-#define SECS  * HZ
-#define MINS  * 60 SECS
-#define HOURS * 60 MINS
-#define DAYS  * 24 HOURS
-
-static unsigned int ip_ct_sctp_timeout_closed __read_mostly           = 10 SECS;
-static unsigned int ip_ct_sctp_timeout_cookie_wait __read_mostly      =  3 SECS;
-static unsigned int ip_ct_sctp_timeout_cookie_echoed __read_mostly    =  3 SECS;
-static unsigned int ip_ct_sctp_timeout_established __read_mostly      =  5 DAYS;
-static unsigned int ip_ct_sctp_timeout_shutdown_sent __read_mostly    = 300 SECS / 1000;
-static unsigned int ip_ct_sctp_timeout_shutdown_recd __read_mostly    = 300 SECS / 1000;
-static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS;
-
-static const unsigned int * sctp_timeouts[]
-= { NULL,                                  /* SCTP_CONNTRACK_NONE  */
-    &ip_ct_sctp_timeout_closed,	           /* SCTP_CONNTRACK_CLOSED */
-    &ip_ct_sctp_timeout_cookie_wait,       /* SCTP_CONNTRACK_COOKIE_WAIT */
-    &ip_ct_sctp_timeout_cookie_echoed,     /* SCTP_CONNTRACK_COOKIE_ECHOED */
-    &ip_ct_sctp_timeout_established,       /* SCTP_CONNTRACK_ESTABLISHED */
-    &ip_ct_sctp_timeout_shutdown_sent,     /* SCTP_CONNTRACK_SHUTDOWN_SENT */
-    &ip_ct_sctp_timeout_shutdown_recd,     /* SCTP_CONNTRACK_SHUTDOWN_RECD */
-    &ip_ct_sctp_timeout_shutdown_ack_sent  /* SCTP_CONNTRACK_SHUTDOWN_ACK_SENT */
- };
-
-#define sNO SCTP_CONNTRACK_NONE
-#define	sCL SCTP_CONNTRACK_CLOSED
-#define	sCW SCTP_CONNTRACK_COOKIE_WAIT
-#define	sCE SCTP_CONNTRACK_COOKIE_ECHOED
-#define	sES SCTP_CONNTRACK_ESTABLISHED
-#define	sSS SCTP_CONNTRACK_SHUTDOWN_SENT
-#define	sSR SCTP_CONNTRACK_SHUTDOWN_RECD
-#define	sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT
-#define	sIV SCTP_CONNTRACK_MAX
-
-/*
-	These are the descriptions of the states:
-
-NOTE: These state names are tantalizingly similar to the states of an
-SCTP endpoint. But the interpretation of the states is a little different,
-considering that these are the states of the connection and not of an end
-point. Please note the subtleties. -Kiran
-
-NONE              - Nothing so far.
-COOKIE WAIT       - We have seen an INIT chunk in the original direction, or also
-		    an INIT_ACK chunk in the reply direction.
-COOKIE ECHOED     - We have seen a COOKIE_ECHO chunk in the original direction.
-ESTABLISHED       - We have seen a COOKIE_ACK in the reply direction.
-SHUTDOWN_SENT     - We have seen a SHUTDOWN chunk in the original direction.
-SHUTDOWN_RECD     - We have seen a SHUTDOWN chunk in the reply directoin.
-SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite
-		    to that of the SHUTDOWN chunk.
-CLOSED            - We have seen a SHUTDOWN_COMPLETE chunk in the direction of
-		    the SHUTDOWN chunk. Connection is closed.
-*/
-
-/* TODO
- - I have assumed that the first INIT is in the original direction.
- This messes things when an INIT comes in the reply direction in CLOSED
- state.
- - Check the error type in the reply dir before transitioning from
-cookie echoed to closed.
- - Sec 5.2.4 of RFC 2960
- - Multi Homing support.
-*/
-
-/* SCTP conntrack state transitions */
-static const enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
-	{
-/*	ORIGINAL	*/
-/*                  sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
-/* init         */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA},
-/* init_ack     */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},
-/* abort        */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
-/* shutdown     */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA},
-/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA},
-/* error        */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant have Stale cookie*/
-/* cookie_echo  */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */
-/* cookie_ack   */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in orig dir */
-/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL}
-	},
-	{
-/*	REPLY	*/
-/*                  sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
-/* init         */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* INIT in sCL Big TODO */
-/* init_ack     */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},
-/* abort        */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
-/* shutdown     */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA},
-/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA},
-/* error        */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA},
-/* cookie_echo  */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in reply dir */
-/* cookie_ack   */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA},
-/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL}
-	}
-};
-
-static int sctp_pkt_to_tuple(const struct sk_buff *skb,
-			     unsigned int dataoff,
-			     struct ip_conntrack_tuple *tuple)
-{
-	sctp_sctphdr_t _hdr, *hp;
-
-	DEBUGP(__FUNCTION__);
-	DEBUGP("\n");
-
-	/* Actually only need first 8 bytes. */
-	hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
-	if (hp == NULL)
-		return 0;
-
-	tuple->src.u.sctp.port = hp->source;
-	tuple->dst.u.sctp.port = hp->dest;
-	return 1;
-}
-
-static int sctp_invert_tuple(struct ip_conntrack_tuple *tuple,
-			     const struct ip_conntrack_tuple *orig)
-{
-	DEBUGP(__FUNCTION__);
-	DEBUGP("\n");
-
-	tuple->src.u.sctp.port = orig->dst.u.sctp.port;
-	tuple->dst.u.sctp.port = orig->src.u.sctp.port;
-	return 1;
-}
-
-/* Print out the per-protocol part of the tuple. */
-static int sctp_print_tuple(struct seq_file *s,
-			    const struct ip_conntrack_tuple *tuple)
-{
-	DEBUGP(__FUNCTION__);
-	DEBUGP("\n");
-
-	return seq_printf(s, "sport=%hu dport=%hu ",
-			  ntohs(tuple->src.u.sctp.port),
-			  ntohs(tuple->dst.u.sctp.port));
-}
-
-/* Print out the private part of the conntrack. */
-static int sctp_print_conntrack(struct seq_file *s,
-				const struct ip_conntrack *conntrack)
-{
-	enum sctp_conntrack state;
-
-	DEBUGP(__FUNCTION__);
-	DEBUGP("\n");
-
-	read_lock_bh(&sctp_lock);
-	state = conntrack->proto.sctp.state;
-	read_unlock_bh(&sctp_lock);
-
-	return seq_printf(s, "%s ", sctp_conntrack_names[state]);
-}
-
-#define for_each_sctp_chunk(skb, sch, _sch, offset, count)		\
-for (offset = ip_hdrlen(skb) + sizeof(sctp_sctphdr_t), count = 0;	\
-	offset < skb->len &&						\
-	(sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch));	\
-	offset += (ntohs(sch->length) + 3) & ~3, count++)
-
-/* Some validity checks to make sure the chunks are fine */
-static int do_basic_checks(struct ip_conntrack *conntrack,
-			   const struct sk_buff *skb,
-			   char *map)
-{
-	u_int32_t offset, count;
-	sctp_chunkhdr_t _sch, *sch;
-	int flag;
-
-	DEBUGP(__FUNCTION__);
-	DEBUGP("\n");
-
-	flag = 0;
-
-	for_each_sctp_chunk (skb, sch, _sch, offset, count) {
-		DEBUGP("Chunk Num: %d  Type: %d\n", count, sch->type);
-
-		if (sch->type == SCTP_CID_INIT
-			|| sch->type == SCTP_CID_INIT_ACK
-			|| sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
-			flag = 1;
-		}
-
-		/*
-		 * Cookie Ack/Echo chunks not the first OR
-		 * Init / Init Ack / Shutdown compl chunks not the only chunks
-		 * OR zero-length.
-		 */
-		if (((sch->type == SCTP_CID_COOKIE_ACK
-			|| sch->type == SCTP_CID_COOKIE_ECHO
-			|| flag)
-		      && count !=0) || !sch->length) {
-			DEBUGP("Basic checks failed\n");
-			return 1;
-		}
-
-		if (map) {
-			set_bit(sch->type, (void *)map);
-		}
-	}
-
-	DEBUGP("Basic checks passed\n");
-	return count == 0;
-}
-
-static int new_state(enum ip_conntrack_dir dir,
-		     enum sctp_conntrack cur_state,
-		     int chunk_type)
-{
-	int i;
-
-	DEBUGP(__FUNCTION__);
-	DEBUGP("\n");
-
-	DEBUGP("Chunk type: %d\n", chunk_type);
-
-	switch (chunk_type) {
-		case SCTP_CID_INIT:
-			DEBUGP("SCTP_CID_INIT\n");
-			i = 0; break;
-		case SCTP_CID_INIT_ACK:
-			DEBUGP("SCTP_CID_INIT_ACK\n");
-			i = 1; break;
-		case SCTP_CID_ABORT:
-			DEBUGP("SCTP_CID_ABORT\n");
-			i = 2; break;
-		case SCTP_CID_SHUTDOWN:
-			DEBUGP("SCTP_CID_SHUTDOWN\n");
-			i = 3; break;
-		case SCTP_CID_SHUTDOWN_ACK:
-			DEBUGP("SCTP_CID_SHUTDOWN_ACK\n");
-			i = 4; break;
-		case SCTP_CID_ERROR:
-			DEBUGP("SCTP_CID_ERROR\n");
-			i = 5; break;
-		case SCTP_CID_COOKIE_ECHO:
-			DEBUGP("SCTP_CID_COOKIE_ECHO\n");
-			i = 6; break;
-		case SCTP_CID_COOKIE_ACK:
-			DEBUGP("SCTP_CID_COOKIE_ACK\n");
-			i = 7; break;
-		case SCTP_CID_SHUTDOWN_COMPLETE:
-			DEBUGP("SCTP_CID_SHUTDOWN_COMPLETE\n");
-			i = 8; break;
-		default:
-			/* Other chunks like DATA, SACK, HEARTBEAT and
-			its ACK do not cause a change in state */
-			DEBUGP("Unknown chunk type, Will stay in %s\n",
-						sctp_conntrack_names[cur_state]);
-			return cur_state;
-	}
-
-	DEBUGP("dir: %d   cur_state: %s  chunk_type: %d  new_state: %s\n",
-			dir, sctp_conntrack_names[cur_state], chunk_type,
-			sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]);
-
-	return sctp_conntracks[dir][i][cur_state];
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int sctp_packet(struct ip_conntrack *conntrack,
-		       const struct sk_buff *skb,
-		       enum ip_conntrack_info ctinfo)
-{
-	enum sctp_conntrack newconntrack, oldsctpstate;
-	struct iphdr *iph = ip_hdr(skb);
-	sctp_sctphdr_t _sctph, *sh;
-	sctp_chunkhdr_t _sch, *sch;
-	u_int32_t offset, count;
-	char map[256 / sizeof (char)] = {0};
-
-	DEBUGP(__FUNCTION__);
-	DEBUGP("\n");
-
-	sh = skb_header_pointer(skb, iph->ihl * 4, sizeof(_sctph), &_sctph);
-	if (sh == NULL)
-		return -1;
-
-	if (do_basic_checks(conntrack, skb, map) != 0)
-		return -1;
-
-	/* Check the verification tag (Sec 8.5) */
-	if (!test_bit(SCTP_CID_INIT, (void *)map)
-		&& !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, (void *)map)
-		&& !test_bit(SCTP_CID_COOKIE_ECHO, (void *)map)
-		&& !test_bit(SCTP_CID_ABORT, (void *)map)
-		&& !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map)
-		&& (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
-		DEBUGP("Verification tag check failed\n");
-		return -1;
-	}
-
-	oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX;
-	for_each_sctp_chunk (skb, sch, _sch, offset, count) {
-		write_lock_bh(&sctp_lock);
-
-		/* Special cases of Verification tag check (Sec 8.5.1) */
-		if (sch->type == SCTP_CID_INIT) {
-			/* Sec 8.5.1 (A) */
-			if (sh->vtag != 0) {
-				write_unlock_bh(&sctp_lock);
-				return -1;
-			}
-		} else if (sch->type == SCTP_CID_ABORT) {
-			/* Sec 8.5.1 (B) */
-			if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
-				&& !(sh->vtag == conntrack->proto.sctp.vtag
-							[1 - CTINFO2DIR(ctinfo)])) {
-				write_unlock_bh(&sctp_lock);
-				return -1;
-			}
-		} else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
-			/* Sec 8.5.1 (C) */
-			if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
-				&& !(sh->vtag == conntrack->proto.sctp.vtag
-							[1 - CTINFO2DIR(ctinfo)]
-					&& (sch->flags & 1))) {
-				write_unlock_bh(&sctp_lock);
-				return -1;
-			}
-		} else if (sch->type == SCTP_CID_COOKIE_ECHO) {
-			/* Sec 8.5.1 (D) */
-			if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
-				write_unlock_bh(&sctp_lock);
-				return -1;
-			}
-		}
-
-		oldsctpstate = conntrack->proto.sctp.state;
-		newconntrack = new_state(CTINFO2DIR(ctinfo), oldsctpstate, sch->type);
-
-		/* Invalid */
-		if (newconntrack == SCTP_CONNTRACK_MAX) {
-			DEBUGP("ip_conntrack_sctp: Invalid dir=%i ctype=%u conntrack=%u\n",
-			       CTINFO2DIR(ctinfo), sch->type, oldsctpstate);
-			write_unlock_bh(&sctp_lock);
-			return -1;
-		}
-
-		/* If it is an INIT or an INIT ACK note down the vtag */
-		if (sch->type == SCTP_CID_INIT
-			|| sch->type == SCTP_CID_INIT_ACK) {
-			sctp_inithdr_t _inithdr, *ih;
-
-			ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
-						sizeof(_inithdr), &_inithdr);
-			if (ih == NULL) {
-					write_unlock_bh(&sctp_lock);
-					return -1;
-			}
-			DEBUGP("Setting vtag %x for dir %d\n",
-					ih->init_tag, !CTINFO2DIR(ctinfo));
-			conntrack->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag;
-		}
-
-		conntrack->proto.sctp.state = newconntrack;
-		if (oldsctpstate != newconntrack)
-			ip_conntrack_event_cache(IPCT_PROTOINFO, skb);
-		write_unlock_bh(&sctp_lock);
-	}
-
-	ip_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]);
-
-	if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED
-		&& CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY
-		&& newconntrack == SCTP_CONNTRACK_ESTABLISHED) {
-		DEBUGP("Setting assured bit\n");
-		set_bit(IPS_ASSURED_BIT, &conntrack->status);
-		ip_conntrack_event_cache(IPCT_STATUS, skb);
-	}
-
-	return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int sctp_new(struct ip_conntrack *conntrack,
-		    const struct sk_buff *skb)
-{
-	enum sctp_conntrack newconntrack;
-	struct iphdr *iph = ip_hdr(skb);
-	sctp_sctphdr_t _sctph, *sh;
-	sctp_chunkhdr_t _sch, *sch;
-	u_int32_t offset, count;
-	char map[256 / sizeof (char)] = {0};
-
-	DEBUGP(__FUNCTION__);
-	DEBUGP("\n");
-
-	sh = skb_header_pointer(skb, iph->ihl * 4, sizeof(_sctph), &_sctph);
-	if (sh == NULL)
-		return 0;
-
-	if (do_basic_checks(conntrack, skb, map) != 0)
-		return 0;
-
-	/* If an OOTB packet has any of these chunks discard (Sec 8.4) */
-	if ((test_bit (SCTP_CID_ABORT, (void *)map))
-		|| (test_bit (SCTP_CID_SHUTDOWN_COMPLETE, (void *)map))
-		|| (test_bit (SCTP_CID_COOKIE_ACK, (void *)map))) {
-		return 0;
-	}
-
-	newconntrack = SCTP_CONNTRACK_MAX;
-	for_each_sctp_chunk (skb, sch, _sch, offset, count) {
-		/* Don't need lock here: this conntrack not in circulation yet */
-		newconntrack = new_state (IP_CT_DIR_ORIGINAL,
-						SCTP_CONNTRACK_NONE, sch->type);
-
-		/* Invalid: delete conntrack */
-		if (newconntrack == SCTP_CONNTRACK_MAX) {
-			DEBUGP("ip_conntrack_sctp: invalid new deleting.\n");
-			return 0;
-		}
-
-		/* Copy the vtag into the state info */
-		if (sch->type == SCTP_CID_INIT) {
-			if (sh->vtag == 0) {
-				sctp_inithdr_t _inithdr, *ih;
-
-				ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
-							sizeof(_inithdr), &_inithdr);
-				if (ih == NULL)
-					return 0;
-
-				DEBUGP("Setting vtag %x for new conn\n",
-					ih->init_tag);
-
-				conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] =
-								ih->init_tag;
-			} else {
-				/* Sec 8.5.1 (A) */
-				return 0;
-			}
-		}
-		/* If it is a shutdown ack OOTB packet, we expect a return
-		   shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
-		else {
-			DEBUGP("Setting vtag %x for new conn OOTB\n",
-				sh->vtag);
-			conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
-		}
-
-		conntrack->proto.sctp.state = newconntrack;
-	}
-
-	return 1;
-}
-
-static struct ip_conntrack_protocol ip_conntrack_protocol_sctp = {
-	.proto 		 = IPPROTO_SCTP,
-	.name 		 = "sctp",
-	.pkt_to_tuple 	 = sctp_pkt_to_tuple,
-	.invert_tuple 	 = sctp_invert_tuple,
-	.print_tuple 	 = sctp_print_tuple,
-	.print_conntrack = sctp_print_conntrack,
-	.packet 	 = sctp_packet,
-	.new 		 = sctp_new,
-	.destroy 	 = NULL,
-	.me 		 = THIS_MODULE,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-	.tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
-	.nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
-#endif
-};
-
-#ifdef CONFIG_SYSCTL
-static ctl_table ip_ct_sysctl_table[] = {
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED,
-		.procname	= "ip_conntrack_sctp_timeout_closed",
-		.data		= &ip_ct_sctp_timeout_closed,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT,
-		.procname	= "ip_conntrack_sctp_timeout_cookie_wait",
-		.data		= &ip_ct_sctp_timeout_cookie_wait,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED,
-		.procname	= "ip_conntrack_sctp_timeout_cookie_echoed",
-		.data		= &ip_ct_sctp_timeout_cookie_echoed,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED,
-		.procname	= "ip_conntrack_sctp_timeout_established",
-		.data		= &ip_ct_sctp_timeout_established,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT,
-		.procname	= "ip_conntrack_sctp_timeout_shutdown_sent",
-		.data		= &ip_ct_sctp_timeout_shutdown_sent,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD,
-		.procname	= "ip_conntrack_sctp_timeout_shutdown_recd",
-		.data		= &ip_ct_sctp_timeout_shutdown_recd,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT,
-		.procname	= "ip_conntrack_sctp_timeout_shutdown_ack_sent",
-		.data		= &ip_ct_sctp_timeout_shutdown_ack_sent,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table ip_ct_netfilter_table[] = {
-	{
-		.ctl_name	= NET_IPV4_NETFILTER,
-		.procname	= "netfilter",
-		.mode		= 0555,
-		.child		= ip_ct_sysctl_table,
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table ip_ct_ipv4_table[] = {
-	{
-		.ctl_name	= NET_IPV4,
-		.procname	= "ipv4",
-		.mode		= 0555,
-		.child		= ip_ct_netfilter_table,
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table ip_ct_net_table[] = {
-	{
-		.ctl_name	= CTL_NET,
-		.procname	= "net",
-		.mode		= 0555,
-		.child		= ip_ct_ipv4_table,
-	},
-	{ .ctl_name = 0 }
-};
-
-static struct ctl_table_header *ip_ct_sysctl_header;
-#endif
-
-static int __init ip_conntrack_proto_sctp_init(void)
-{
-	int ret;
-
-	ret = ip_conntrack_protocol_register(&ip_conntrack_protocol_sctp);
-	if (ret) {
-		printk("ip_conntrack_proto_sctp: protocol register failed\n");
-		goto out;
-	}
-
-#ifdef CONFIG_SYSCTL
-	ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table);
-	if (ip_ct_sysctl_header == NULL) {
-		ret = -ENOMEM;
-		printk("ip_conntrack_proto_sctp: can't register to sysctl.\n");
-		goto cleanup;
-	}
-#endif
-
-	return ret;
-
-#ifdef CONFIG_SYSCTL
- cleanup:
-	ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp);
-#endif
- out:
-	DEBUGP("SCTP conntrack module loading %s\n",
-					ret ? "failed": "succeeded");
-	return ret;
-}
-
-static void __exit ip_conntrack_proto_sctp_fini(void)
-{
-	ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp);
-#ifdef CONFIG_SYSCTL
-	unregister_sysctl_table(ip_ct_sysctl_header);
-#endif
-	DEBUGP("SCTP conntrack module unloaded\n");
-}
-
-module_init(ip_conntrack_proto_sctp_init);
-module_exit(ip_conntrack_proto_sctp_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Kiran Kumar Immidi");
-MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP");
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
deleted file mode 100644
index d03436edfd9..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ /dev/null
@@ -1,1163 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>:
- *	- Real stateful connection tracking
- *	- Modified state transitions table
- *	- Window scaling support added
- *	- SACK support added
- *
- * Willy Tarreau:
- *	- State table bugfixes
- *	- More robust state changes
- *	- Tuning timer parameters
- *
- * version 2.2
- */
-
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/module.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/spinlock.h>
-
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-
-#if 0
-#define DEBUGP printk
-#define DEBUGP_VARS
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/* Protects conntrack->proto.tcp */
-static DEFINE_RWLOCK(tcp_lock);
-
-/* "Be conservative in what you do,
-    be liberal in what you accept from others."
-    If it's non-zero, we mark only out of window RST segments as INVALID. */
-int ip_ct_tcp_be_liberal __read_mostly = 0;
-
-/* If it is set to zero, we disable picking up already established
-   connections. */
-int ip_ct_tcp_loose __read_mostly = 1;
-
-/* Max number of the retransmitted packets without receiving an (acceptable)
-   ACK from the destination. If this number is reached, a shorter timer
-   will be started. */
-int ip_ct_tcp_max_retrans __read_mostly = 3;
-
-  /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
-     closely.  They're more complex. --RR */
-
-static const char *tcp_conntrack_names[] = {
-	"NONE",
-	"SYN_SENT",
-	"SYN_RECV",
-	"ESTABLISHED",
-	"FIN_WAIT",
-	"CLOSE_WAIT",
-	"LAST_ACK",
-	"TIME_WAIT",
-	"CLOSE",
-	"LISTEN"
-};
-
-#define SECS * HZ
-#define MINS * 60 SECS
-#define HOURS * 60 MINS
-#define DAYS * 24 HOURS
-
-unsigned int ip_ct_tcp_timeout_syn_sent __read_mostly =      2 MINS;
-unsigned int ip_ct_tcp_timeout_syn_recv __read_mostly =     60 SECS;
-unsigned int ip_ct_tcp_timeout_established __read_mostly =   5 DAYS;
-unsigned int ip_ct_tcp_timeout_fin_wait __read_mostly =      2 MINS;
-unsigned int ip_ct_tcp_timeout_close_wait __read_mostly =   60 SECS;
-unsigned int ip_ct_tcp_timeout_last_ack __read_mostly =     30 SECS;
-unsigned int ip_ct_tcp_timeout_time_wait __read_mostly =     2 MINS;
-unsigned int ip_ct_tcp_timeout_close __read_mostly =        10 SECS;
-
-/* RFC1122 says the R2 limit should be at least 100 seconds.
-   Linux uses 15 packets as limit, which corresponds
-   to ~13-30min depending on RTO. */
-unsigned int ip_ct_tcp_timeout_max_retrans __read_mostly =   5 MINS;
-
-static const unsigned int * tcp_timeouts[]
-= { NULL,                              /*      TCP_CONNTRACK_NONE */
-    &ip_ct_tcp_timeout_syn_sent,       /*      TCP_CONNTRACK_SYN_SENT, */
-    &ip_ct_tcp_timeout_syn_recv,       /*      TCP_CONNTRACK_SYN_RECV, */
-    &ip_ct_tcp_timeout_established,    /*      TCP_CONNTRACK_ESTABLISHED,      */
-    &ip_ct_tcp_timeout_fin_wait,       /*      TCP_CONNTRACK_FIN_WAIT, */
-    &ip_ct_tcp_timeout_close_wait,     /*      TCP_CONNTRACK_CLOSE_WAIT,       */
-    &ip_ct_tcp_timeout_last_ack,       /*      TCP_CONNTRACK_LAST_ACK, */
-    &ip_ct_tcp_timeout_time_wait,      /*      TCP_CONNTRACK_TIME_WAIT,        */
-    &ip_ct_tcp_timeout_close,          /*      TCP_CONNTRACK_CLOSE,    */
-    NULL,                              /*      TCP_CONNTRACK_LISTEN */
- };
-
-#define sNO TCP_CONNTRACK_NONE
-#define sSS TCP_CONNTRACK_SYN_SENT
-#define sSR TCP_CONNTRACK_SYN_RECV
-#define sES TCP_CONNTRACK_ESTABLISHED
-#define sFW TCP_CONNTRACK_FIN_WAIT
-#define sCW TCP_CONNTRACK_CLOSE_WAIT
-#define sLA TCP_CONNTRACK_LAST_ACK
-#define sTW TCP_CONNTRACK_TIME_WAIT
-#define sCL TCP_CONNTRACK_CLOSE
-#define sLI TCP_CONNTRACK_LISTEN
-#define sIV TCP_CONNTRACK_MAX
-#define sIG TCP_CONNTRACK_IGNORE
-
-/* What TCP flags are set from RST/SYN/FIN/ACK. */
-enum tcp_bit_set {
-	TCP_SYN_SET,
-	TCP_SYNACK_SET,
-	TCP_FIN_SET,
-	TCP_ACK_SET,
-	TCP_RST_SET,
-	TCP_NONE_SET,
-};
-
-/*
- * The TCP state transition table needs a few words...
- *
- * We are the man in the middle. All the packets go through us
- * but might get lost in transit to the destination.
- * It is assumed that the destinations can't receive segments
- * we haven't seen.
- *
- * The checked segment is in window, but our windows are *not*
- * equivalent with the ones of the sender/receiver. We always
- * try to guess the state of the current sender.
- *
- * The meaning of the states are:
- *
- * NONE:	initial state
- * SYN_SENT:	SYN-only packet seen
- * SYN_RECV:	SYN-ACK packet seen
- * ESTABLISHED:	ACK packet seen
- * FIN_WAIT:	FIN packet seen
- * CLOSE_WAIT:	ACK seen (after FIN)
- * LAST_ACK:	FIN seen (after FIN)
- * TIME_WAIT:	last ACK seen
- * CLOSE:	closed connection
- *
- * LISTEN state is not used.
- *
- * Packets marked as IGNORED (sIG):
- *	if they may be either invalid or valid
- *	and the receiver may send back a connection
- *	closing RST or a SYN/ACK.
- *
- * Packets marked as INVALID (sIV):
- *	if they are invalid
- *	or we do not support the request (simultaneous open)
- */
-static const enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
-	{
-/* ORIGINAL */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*syn*/	   { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
-/*
- *	sNO -> sSS	Initialize a new connection
- *	sSS -> sSS	Retransmitted SYN
- *	sSR -> sIG	Late retransmitted SYN?
- *	sES -> sIG	Error: SYNs in window outside the SYN_SENT state
- *			are errors. Receiver will reply with RST
- *			and close the connection.
- *			Or we are not in sync and hold a dead connection.
- *	sFW -> sIG
- *	sCW -> sIG
- *	sLA -> sIG
- *	sTW -> sSS	Reopened connection (RFC 1122).
- *	sCL -> sSS
- */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
-/*
- * A SYN/ACK from the client is always invalid:
- *	- either it tries to set up a simultaneous open, which is
- *	  not supported;
- *	- or the firewall has just been inserted between the two hosts
- *	  during the session set-up. The SYN will be retransmitted
- *	  by the true client (or it'll time out).
- */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
-/*
- *	sNO -> sIV	Too late and no reason to do anything...
- *	sSS -> sIV	Client migth not send FIN in this state:
- *			we enforce waiting for a SYN/ACK reply first.
- *	sSR -> sFW	Close started.
- *	sES -> sFW
- *	sFW -> sLA	FIN seen in both directions, waiting for
- *			the last ACK.
- *			Migth be a retransmitted FIN as well...
- *	sCW -> sLA
- *	sLA -> sLA	Retransmitted FIN. Remain in the same state.
- *	sTW -> sTW
- *	sCL -> sCL
- */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*ack*/	   { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
-/*
- *	sNO -> sES	Assumed.
- *	sSS -> sIV	ACK is invalid: we haven't seen a SYN/ACK yet.
- *	sSR -> sES	Established state is reached.
- *	sES -> sES	:-)
- *	sFW -> sCW	Normal close request answered by ACK.
- *	sCW -> sCW
- *	sLA -> sTW	Last ACK detected.
- *	sTW -> sTW	Retransmitted last ACK. Remain in the same state.
- *	sCL -> sCL
- */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
-/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
-	},
-	{
-/* REPLY */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*syn*/	   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
-/*
- *	sNO -> sIV	Never reached.
- *	sSS -> sIV	Simultaneous open, not supported
- *	sSR -> sIV	Simultaneous open, not supported.
- *	sES -> sIV	Server may not initiate a connection.
- *	sFW -> sIV
- *	sCW -> sIV
- *	sLA -> sIV
- *	sTW -> sIV	Reopened connection, but server may not do it.
- *	sCL -> sIV
- */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
-/*
- *	sSS -> sSR	Standard open.
- *	sSR -> sSR	Retransmitted SYN/ACK.
- *	sES -> sIG	Late retransmitted SYN/ACK?
- *	sFW -> sIG	Might be SYN/ACK answering ignored SYN
- *	sCW -> sIG
- *	sLA -> sIG
- *	sTW -> sIG
- *	sCL -> sIG
- */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
-/*
- *	sSS -> sIV	Server might not send FIN in this state.
- *	sSR -> sFW	Close started.
- *	sES -> sFW
- *	sFW -> sLA	FIN seen in both directions.
- *	sCW -> sLA
- *	sLA -> sLA	Retransmitted FIN.
- *	sTW -> sTW
- *	sCL -> sCL
- */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*ack*/	   { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
-/*
- *	sSS -> sIG	Might be a half-open connection.
- *	sSR -> sSR	Might answer late resent SYN.
- *	sES -> sES	:-)
- *	sFW -> sCW	Normal close request answered by ACK.
- *	sCW -> sCW
- *	sLA -> sTW	Last ACK detected.
- *	sTW -> sTW	Retransmitted last ACK.
- *	sCL -> sCL
- */
-/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI	*/
-/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
-/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
-	}
-};
-
-static int tcp_pkt_to_tuple(const struct sk_buff *skb,
-			    unsigned int dataoff,
-			    struct ip_conntrack_tuple *tuple)
-{
-	struct tcphdr _hdr, *hp;
-
-	/* Actually only need first 8 bytes. */
-	hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
-	if (hp == NULL)
-		return 0;
-
-	tuple->src.u.tcp.port = hp->source;
-	tuple->dst.u.tcp.port = hp->dest;
-
-	return 1;
-}
-
-static int tcp_invert_tuple(struct ip_conntrack_tuple *tuple,
-			    const struct ip_conntrack_tuple *orig)
-{
-	tuple->src.u.tcp.port = orig->dst.u.tcp.port;
-	tuple->dst.u.tcp.port = orig->src.u.tcp.port;
-	return 1;
-}
-
-/* Print out the per-protocol part of the tuple. */
-static int tcp_print_tuple(struct seq_file *s,
-			   const struct ip_conntrack_tuple *tuple)
-{
-	return seq_printf(s, "sport=%hu dport=%hu ",
-			  ntohs(tuple->src.u.tcp.port),
-			  ntohs(tuple->dst.u.tcp.port));
-}
-
-/* Print out the private part of the conntrack. */
-static int tcp_print_conntrack(struct seq_file *s,
-			       const struct ip_conntrack *conntrack)
-{
-	enum tcp_conntrack state;
-
-	read_lock_bh(&tcp_lock);
-	state = conntrack->proto.tcp.state;
-	read_unlock_bh(&tcp_lock);
-
-	return seq_printf(s, "%s ", tcp_conntrack_names[state]);
-}
-
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
-			 const struct ip_conntrack *ct)
-{
-	struct nfattr *nest_parms;
-
-	read_lock_bh(&tcp_lock);
-	nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
-	NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
-		&ct->proto.tcp.state);
-	read_unlock_bh(&tcp_lock);
-
-	NFA_NEST_END(skb, nest_parms);
-
-	return 0;
-
-nfattr_failure:
-	read_unlock_bh(&tcp_lock);
-	return -1;
-}
-
-static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
-	[CTA_PROTOINFO_TCP_STATE-1]	= sizeof(u_int8_t),
-};
-
-static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct)
-{
-	struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1];
-	struct nfattr *tb[CTA_PROTOINFO_TCP_MAX];
-
-	/* updates could not contain anything about the private
-	 * protocol info, in that case skip the parsing */
-	if (!attr)
-		return 0;
-
-	nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr);
-
-	if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp))
-		return -EINVAL;
-
-	if (!tb[CTA_PROTOINFO_TCP_STATE-1])
-		return -EINVAL;
-
-	write_lock_bh(&tcp_lock);
-	ct->proto.tcp.state =
-		*(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
-	write_unlock_bh(&tcp_lock);
-
-	return 0;
-}
-#endif
-
-static unsigned int get_conntrack_index(const struct tcphdr *tcph)
-{
-	if (tcph->rst) return TCP_RST_SET;
-	else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
-	else if (tcph->fin) return TCP_FIN_SET;
-	else if (tcph->ack) return TCP_ACK_SET;
-	else return TCP_NONE_SET;
-}
-
-/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
-   in IP Filter' by Guido van Rooij.
-
-   http://www.nluug.nl/events/sane2000/papers.html
-   http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
-
-   The boundaries and the conditions are changed according to RFC793:
-   the packet must intersect the window (i.e. segments may be
-   after the right or before the left edge) and thus receivers may ACK
-   segments after the right edge of the window.
-
-	td_maxend = max(sack + max(win,1)) seen in reply packets
-	td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
-	td_maxwin += seq + len - sender.td_maxend
-			if seq + len > sender.td_maxend
-	td_end    = max(seq + len) seen in sent packets
-
-   I.   Upper bound for valid data:	seq <= sender.td_maxend
-   II.  Lower bound for valid data:	seq + len >= sender.td_end - receiver.td_maxwin
-   III.	Upper bound for valid ack:      sack <= receiver.td_end
-   IV.	Lower bound for valid ack:	ack >= receiver.td_end - MAXACKWINDOW
-
-   where sack is the highest right edge of sack block found in the packet.
-
-   The upper bound limit for a valid ack is not ignored -
-   we doesn't have to deal with fragments.
-*/
-
-static inline __u32 segment_seq_plus_len(__u32 seq,
-					 size_t len,
-					 struct iphdr *iph,
-					 struct tcphdr *tcph)
-{
-	return (seq + len - (iph->ihl + tcph->doff)*4
-		+ (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
-}
-
-/* Fixme: what about big packets? */
-#define MAXACKWINCONST			66000
-#define MAXACKWINDOW(sender)						\
-	((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin	\
-					      : MAXACKWINCONST)
-
-/*
- * Simplified tcp_parse_options routine from tcp_input.c
- */
-static void tcp_options(const struct sk_buff *skb,
-			struct iphdr *iph,
-			struct tcphdr *tcph,
-			struct ip_ct_tcp_state *state)
-{
-	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
-	unsigned char *ptr;
-	int length = (tcph->doff*4) - sizeof(struct tcphdr);
-
-	if (!length)
-		return;
-
-	ptr = skb_header_pointer(skb,
-				 (iph->ihl * 4) + sizeof(struct tcphdr),
-				 length, buff);
-	BUG_ON(ptr == NULL);
-
-	state->td_scale =
-	state->flags = 0;
-
-	while (length > 0) {
-		int opcode=*ptr++;
-		int opsize;
-
-		switch (opcode) {
-		case TCPOPT_EOL:
-			return;
-		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
-			length--;
-			continue;
-		default:
-			opsize=*ptr++;
-			if (opsize < 2) /* "silly options" */
-				return;
-			if (opsize > length)
-				break;	/* don't parse partial options */
-
-			if (opcode == TCPOPT_SACK_PERM
-			    && opsize == TCPOLEN_SACK_PERM)
-				state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
-			else if (opcode == TCPOPT_WINDOW
-				 && opsize == TCPOLEN_WINDOW) {
-				state->td_scale = *(u_int8_t *)ptr;
-
-				if (state->td_scale > 14) {
-					/* See RFC1323 */
-					state->td_scale = 14;
-				}
-				state->flags |=
-					IP_CT_TCP_FLAG_WINDOW_SCALE;
-			}
-			ptr += opsize - 2;
-			length -= opsize;
-		}
-	}
-}
-
-static void tcp_sack(const struct sk_buff *skb,
-		     struct iphdr *iph,
-		     struct tcphdr *tcph,
-		     __u32 *sack)
-{
-	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
-	unsigned char *ptr;
-	int length = (tcph->doff*4) - sizeof(struct tcphdr);
-	__u32 tmp;
-
-	if (!length)
-		return;
-
-	ptr = skb_header_pointer(skb,
-				 (iph->ihl * 4) + sizeof(struct tcphdr),
-				 length, buff);
-	BUG_ON(ptr == NULL);
-
-	/* Fast path for timestamp-only option */
-	if (length == TCPOLEN_TSTAMP_ALIGNED*4
-	    && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
-				       | (TCPOPT_NOP << 16)
-				       | (TCPOPT_TIMESTAMP << 8)
-				       | TCPOLEN_TIMESTAMP))
-		return;
-
-	while (length > 0) {
-		int opcode=*ptr++;
-		int opsize, i;
-
-		switch (opcode) {
-		case TCPOPT_EOL:
-			return;
-		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
-			length--;
-			continue;
-		default:
-			opsize=*ptr++;
-			if (opsize < 2) /* "silly options" */
-				return;
-			if (opsize > length)
-				break;	/* don't parse partial options */
-
-			if (opcode == TCPOPT_SACK
-			    && opsize >= (TCPOLEN_SACK_BASE
-					  + TCPOLEN_SACK_PERBLOCK)
-			    && !((opsize - TCPOLEN_SACK_BASE)
-				 % TCPOLEN_SACK_PERBLOCK)) {
-				for (i = 0;
-				     i < (opsize - TCPOLEN_SACK_BASE);
-				     i += TCPOLEN_SACK_PERBLOCK) {
-					tmp = ntohl(*((__be32 *)(ptr+i)+1));
-
-					if (after(tmp, *sack))
-						*sack = tmp;
-				}
-				return;
-			}
-			ptr += opsize - 2;
-			length -= opsize;
-		}
-	}
-}
-
-static int tcp_in_window(struct ip_ct_tcp *state,
-			 enum ip_conntrack_dir dir,
-			 unsigned int index,
-			 const struct sk_buff *skb,
-			 struct iphdr *iph,
-			 struct tcphdr *tcph)
-{
-	struct ip_ct_tcp_state *sender = &state->seen[dir];
-	struct ip_ct_tcp_state *receiver = &state->seen[!dir];
-	__u32 seq, ack, sack, end, win, swin;
-	int res;
-
-	/*
-	 * Get the required data from the packet.
-	 */
-	seq = ntohl(tcph->seq);
-	ack = sack = ntohl(tcph->ack_seq);
-	win = ntohs(tcph->window);
-	end = segment_seq_plus_len(seq, skb->len, iph, tcph);
-
-	if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
-		tcp_sack(skb, iph, tcph, &sack);
-
-	DEBUGP("tcp_in_window: START\n");
-	DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
-	       "seq=%u ack=%u sack=%u win=%u end=%u\n",
-		NIPQUAD(iph->saddr), ntohs(tcph->source),
-		NIPQUAD(iph->daddr), ntohs(tcph->dest),
-		seq, ack, sack, win, end);
-	DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
-	       "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
-		sender->td_end, sender->td_maxend, sender->td_maxwin,
-		sender->td_scale,
-		receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
-		receiver->td_scale);
-
-	if (sender->td_end == 0) {
-		/*
-		 * Initialize sender data.
-		 */
-		if (tcph->syn && tcph->ack) {
-			/*
-			 * Outgoing SYN-ACK in reply to a SYN.
-			 */
-			sender->td_end =
-			sender->td_maxend = end;
-			sender->td_maxwin = (win == 0 ? 1 : win);
-
-			tcp_options(skb, iph, tcph, sender);
-			/*
-			 * RFC 1323:
-			 * Both sides must send the Window Scale option
-			 * to enable window scaling in either direction.
-			 */
-			if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
-			      && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
-				sender->td_scale =
-				receiver->td_scale = 0;
-		} else {
-			/*
-			 * We are in the middle of a connection,
-			 * its history is lost for us.
-			 * Let's try to use the data from the packet.
-			 */
-			sender->td_end = end;
-			sender->td_maxwin = (win == 0 ? 1 : win);
-			sender->td_maxend = end + sender->td_maxwin;
-		}
-	} else if (((state->state == TCP_CONNTRACK_SYN_SENT
-		     && dir == IP_CT_DIR_ORIGINAL)
-		    || (state->state == TCP_CONNTRACK_SYN_RECV
-			&& dir == IP_CT_DIR_REPLY))
-		    && after(end, sender->td_end)) {
-		/*
-		 * RFC 793: "if a TCP is reinitialized ... then it need
-		 * not wait at all; it must only be sure to use sequence
-		 * numbers larger than those recently used."
-		 */
-		sender->td_end =
-		sender->td_maxend = end;
-		sender->td_maxwin = (win == 0 ? 1 : win);
-
-		tcp_options(skb, iph, tcph, sender);
-	}
-
-	if (!(tcph->ack)) {
-		/*
-		 * If there is no ACK, just pretend it was set and OK.
-		 */
-		ack = sack = receiver->td_end;
-	} else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
-		    (TCP_FLAG_ACK|TCP_FLAG_RST))
-		   && (ack == 0)) {
-		/*
-		 * Broken TCP stacks, that set ACK in RST packets as well
-		 * with zero ack value.
-		 */
-		ack = sack = receiver->td_end;
-	}
-
-	if (seq == end
-	    && (!tcph->rst
-		|| (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
-		/*
-		 * Packets contains no data: we assume it is valid
-		 * and check the ack value only.
-		 * However RST segments are always validated by their
-		 * SEQ number, except when seq == 0 (reset sent answering
-		 * SYN.
-		 */
-		seq = end = sender->td_end;
-
-	DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
-	       "seq=%u ack=%u sack =%u win=%u end=%u\n",
-		NIPQUAD(iph->saddr), ntohs(tcph->source),
-		NIPQUAD(iph->daddr), ntohs(tcph->dest),
-		seq, ack, sack, win, end);
-	DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
-	       "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
-		sender->td_end, sender->td_maxend, sender->td_maxwin,
-		sender->td_scale,
-		receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
-		receiver->td_scale);
-
-	DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
-		before(seq, sender->td_maxend + 1),
-		after(end, sender->td_end - receiver->td_maxwin - 1),
-		before(sack, receiver->td_end + 1),
-		after(ack, receiver->td_end - MAXACKWINDOW(sender)));
-
-	if (before(seq, sender->td_maxend + 1) &&
-	    after(end, sender->td_end - receiver->td_maxwin - 1) &&
-	    before(sack, receiver->td_end + 1) &&
-	    after(ack, receiver->td_end - MAXACKWINDOW(sender))) {
-		/*
-		 * Take into account window scaling (RFC 1323).
-		 */
-		if (!tcph->syn)
-			win <<= sender->td_scale;
-
-		/*
-		 * Update sender data.
-		 */
-		swin = win + (sack - ack);
-		if (sender->td_maxwin < swin)
-			sender->td_maxwin = swin;
-		if (after(end, sender->td_end))
-			sender->td_end = end;
-		/*
-		 * Update receiver data.
-		 */
-		if (after(end, sender->td_maxend))
-			receiver->td_maxwin += end - sender->td_maxend;
-		if (after(sack + win, receiver->td_maxend - 1)) {
-			receiver->td_maxend = sack + win;
-			if (win == 0)
-				receiver->td_maxend++;
-		}
-
-		/*
-		 * Check retransmissions.
-		 */
-		if (index == TCP_ACK_SET) {
-			if (state->last_dir == dir
-			    && state->last_seq == seq
-			    && state->last_ack == ack
-			    && state->last_end == end
-			    && state->last_win == win)
-				state->retrans++;
-			else {
-				state->last_dir = dir;
-				state->last_seq = seq;
-				state->last_ack = ack;
-				state->last_end = end;
-				state->last_win = win;
-				state->retrans = 0;
-			}
-		}
-		res = 1;
-	} else {
-		res = 0;
-		if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
-		    ip_ct_tcp_be_liberal)
-			res = 1;
-		if (!res && LOG_INVALID(IPPROTO_TCP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-			"ip_ct_tcp: %s ",
-			before(seq, sender->td_maxend + 1) ?
-			after(end, sender->td_end - receiver->td_maxwin - 1) ?
-			before(sack, receiver->td_end + 1) ?
-			after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
-			: "ACK is under the lower bound (possible overly delayed ACK)"
-			: "ACK is over the upper bound (ACKed data not seen yet)"
-			: "SEQ is under the lower bound (already ACKed data retransmitted)"
-			: "SEQ is over the upper bound (over the window of the receiver)");
-	}
-
-	DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
-	       "receiver end=%u maxend=%u maxwin=%u\n",
-		res, sender->td_end, sender->td_maxend, sender->td_maxwin,
-		receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
-
-	return res;
-}
-
-#ifdef CONFIG_IP_NF_NAT_NEEDED
-/* Update sender->td_end after NAT successfully mangled the packet */
-void ip_conntrack_tcp_update(struct sk_buff *skb,
-			     struct ip_conntrack *conntrack,
-			     enum ip_conntrack_dir dir)
-{
-	struct iphdr *iph = ip_hdr(skb);
-	struct tcphdr *tcph = (void *)iph + ip_hdrlen(skb);
-	__u32 end;
-#ifdef DEBUGP_VARS
-	struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir];
-	struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir];
-#endif
-
-	end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, iph, tcph);
-
-	write_lock_bh(&tcp_lock);
-	/*
-	 * We have to worry for the ack in the reply packet only...
-	 */
-	if (after(end, conntrack->proto.tcp.seen[dir].td_end))
-		conntrack->proto.tcp.seen[dir].td_end = end;
-	conntrack->proto.tcp.last_end = end;
-	write_unlock_bh(&tcp_lock);
-	DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
-	       "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
-		sender->td_end, sender->td_maxend, sender->td_maxwin,
-		sender->td_scale,
-		receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
-		receiver->td_scale);
-}
-
-#endif
-
-#define	TH_FIN	0x01
-#define	TH_SYN	0x02
-#define	TH_RST	0x04
-#define	TH_PUSH	0x08
-#define	TH_ACK	0x10
-#define	TH_URG	0x20
-#define	TH_ECE	0x40
-#define	TH_CWR	0x80
-
-/* table of valid flag combinations - ECE and CWR are always valid */
-static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
-{
-	[TH_SYN]			= 1,
-	[TH_SYN|TH_PUSH]		= 1,
-	[TH_SYN|TH_URG]			= 1,
-	[TH_SYN|TH_PUSH|TH_URG]		= 1,
-	[TH_SYN|TH_ACK]			= 1,
-	[TH_SYN|TH_ACK|TH_PUSH]		= 1,
-	[TH_RST]			= 1,
-	[TH_RST|TH_ACK]			= 1,
-	[TH_RST|TH_ACK|TH_PUSH]		= 1,
-	[TH_FIN|TH_ACK]			= 1,
-	[TH_ACK]			= 1,
-	[TH_ACK|TH_PUSH]		= 1,
-	[TH_ACK|TH_URG]			= 1,
-	[TH_ACK|TH_URG|TH_PUSH]		= 1,
-	[TH_FIN|TH_ACK|TH_PUSH]		= 1,
-	[TH_FIN|TH_ACK|TH_URG]		= 1,
-	[TH_FIN|TH_ACK|TH_URG|TH_PUSH]	= 1,
-};
-
-/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
-static int tcp_error(struct sk_buff *skb,
-		     enum ip_conntrack_info *ctinfo,
-		     unsigned int hooknum)
-{
-	const unsigned int hdrlen = ip_hdrlen(skb);
-	struct tcphdr _tcph, *th;
-	unsigned int tcplen = skb->len - hdrlen;
-	u_int8_t tcpflags;
-
-	/* Smaller that minimal TCP header? */
-	th = skb_header_pointer(skb, hdrlen,
-				sizeof(_tcph), &_tcph);
-	if (th == NULL) {
-		if (LOG_INVALID(IPPROTO_TCP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				"ip_ct_tcp: short packet ");
-		return -NF_ACCEPT;
-	}
-
-	/* Not whole TCP header or malformed packet */
-	if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
-		if (LOG_INVALID(IPPROTO_TCP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				"ip_ct_tcp: truncated/malformed packet ");
-		return -NF_ACCEPT;
-	}
-
-	/* Checksum invalid? Ignore.
-	 * We skip checking packets on the outgoing path
-	 * because it is assumed to be correct.
-	 */
-	/* FIXME: Source route IP option packets --RR */
-	if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
-	    nf_ip_checksum(skb, hooknum, hdrlen, IPPROTO_TCP)) {
-		if (LOG_INVALID(IPPROTO_TCP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				  "ip_ct_tcp: bad TCP checksum ");
-		return -NF_ACCEPT;
-	}
-
-	/* Check TCP flags. */
-	tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR));
-	if (!tcp_valid_flags[tcpflags]) {
-		if (LOG_INVALID(IPPROTO_TCP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				  "ip_ct_tcp: invalid TCP flag combination ");
-		return -NF_ACCEPT;
-	}
-
-	return NF_ACCEPT;
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int tcp_packet(struct ip_conntrack *conntrack,
-		      const struct sk_buff *skb,
-		      enum ip_conntrack_info ctinfo)
-{
-	enum tcp_conntrack new_state, old_state;
-	enum ip_conntrack_dir dir;
-	struct iphdr *iph = ip_hdr(skb);
-	struct tcphdr *th, _tcph;
-	unsigned long timeout;
-	unsigned int index;
-
-	th = skb_header_pointer(skb, iph->ihl * 4,
-				sizeof(_tcph), &_tcph);
-	BUG_ON(th == NULL);
-
-	write_lock_bh(&tcp_lock);
-	old_state = conntrack->proto.tcp.state;
-	dir = CTINFO2DIR(ctinfo);
-	index = get_conntrack_index(th);
-	new_state = tcp_conntracks[dir][index][old_state];
-
-	switch (new_state) {
-	case TCP_CONNTRACK_IGNORE:
-		/* Ignored packets:
-		 *
-		 * a) SYN in ORIGINAL
-		 * b) SYN/ACK in REPLY
-		 * c) ACK in reply direction after initial SYN in original.
-		 */
-		if (index == TCP_SYNACK_SET
-		    && conntrack->proto.tcp.last_index == TCP_SYN_SET
-		    && conntrack->proto.tcp.last_dir != dir
-		    && ntohl(th->ack_seq) ==
-			     conntrack->proto.tcp.last_end) {
-			/* This SYN/ACK acknowledges a SYN that we earlier
-			 * ignored as invalid. This means that the client and
-			 * the server are both in sync, while the firewall is
-			 * not. We kill this session and block the SYN/ACK so
-			 * that the client cannot but retransmit its SYN and
-			 * thus initiate a clean new session.
-			 */
-			write_unlock_bh(&tcp_lock);
-			if (LOG_INVALID(IPPROTO_TCP))
-				nf_log_packet(PF_INET, 0, skb, NULL, NULL,
-					      NULL, "ip_ct_tcp: "
-					      "killing out of sync session ");
-			if (del_timer(&conntrack->timeout))
-				conntrack->timeout.function((unsigned long)
-							    conntrack);
-			return -NF_DROP;
-		}
-		conntrack->proto.tcp.last_index = index;
-		conntrack->proto.tcp.last_dir = dir;
-		conntrack->proto.tcp.last_seq = ntohl(th->seq);
-		conntrack->proto.tcp.last_end =
-		    segment_seq_plus_len(ntohl(th->seq), skb->len, iph, th);
-
-		write_unlock_bh(&tcp_lock);
-		if (LOG_INVALID(IPPROTO_TCP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				  "ip_ct_tcp: invalid packet ignored ");
-		return NF_ACCEPT;
-	case TCP_CONNTRACK_MAX:
-		/* Invalid packet */
-		DEBUGP("ip_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
-		       dir, get_conntrack_index(th),
-		       old_state);
-		write_unlock_bh(&tcp_lock);
-		if (LOG_INVALID(IPPROTO_TCP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				  "ip_ct_tcp: invalid state ");
-		return -NF_ACCEPT;
-	case TCP_CONNTRACK_SYN_SENT:
-		if (old_state < TCP_CONNTRACK_TIME_WAIT)
-			break;
-		if ((conntrack->proto.tcp.seen[dir].flags &
-			 IP_CT_TCP_FLAG_CLOSE_INIT)
-		    || after(ntohl(th->seq),
-			     conntrack->proto.tcp.seen[dir].td_end)) {
-			/* Attempt to reopen a closed connection.
-			* Delete this connection and look up again. */
-			write_unlock_bh(&tcp_lock);
-			if (del_timer(&conntrack->timeout))
-				conntrack->timeout.function((unsigned long)
-							    conntrack);
-			return -NF_REPEAT;
-		} else {
-			write_unlock_bh(&tcp_lock);
-			if (LOG_INVALID(IPPROTO_TCP))
-				nf_log_packet(PF_INET, 0, skb, NULL, NULL,
-					      NULL, "ip_ct_tcp: invalid SYN");
-			return -NF_ACCEPT;
-		}
-	case TCP_CONNTRACK_CLOSE:
-		if (index == TCP_RST_SET
-		    && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
-			 && conntrack->proto.tcp.last_index == TCP_SYN_SET)
-			|| (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
-			    && conntrack->proto.tcp.last_index == TCP_ACK_SET))
-		    && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
-			/* RST sent to invalid SYN or ACK we had let through
-			 * at a) and c) above:
-			 *
-			 * a) SYN was in window then
-			 * c) we hold a half-open connection.
-			 *
-			 * Delete our connection entry.
-			 * We skip window checking, because packet might ACK
-			 * segments we ignored. */
-			goto in_window;
-		}
-		/* Just fall through */
-	default:
-		/* Keep compilers happy. */
-		break;
-	}
-
-	if (!tcp_in_window(&conntrack->proto.tcp, dir, index,
-			   skb, iph, th)) {
-		write_unlock_bh(&tcp_lock);
-		return -NF_ACCEPT;
-	}
-    in_window:
-	/* From now on we have got in-window packets */
-	conntrack->proto.tcp.last_index = index;
-
-	DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
-	       "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
-		NIPQUAD(iph->saddr), ntohs(th->source),
-		NIPQUAD(iph->daddr), ntohs(th->dest),
-		(th->syn ? 1 : 0), (th->ack ? 1 : 0),
-		(th->fin ? 1 : 0), (th->rst ? 1 : 0),
-		old_state, new_state);
-
-	conntrack->proto.tcp.state = new_state;
-	if (old_state != new_state
-	    && (new_state == TCP_CONNTRACK_FIN_WAIT
-		|| new_state == TCP_CONNTRACK_CLOSE))
-		conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
-	timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans
-		  && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans
-		  ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
-	write_unlock_bh(&tcp_lock);
-
-	ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
-	if (new_state != old_state)
-		ip_conntrack_event_cache(IPCT_PROTOINFO, skb);
-
-	if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
-		/* If only reply is a RST, we can consider ourselves not to
-		   have an established connection: this is a fairly common
-		   problem case, so we can delete the conntrack
-		   immediately.  --RR */
-		if (th->rst) {
-			if (del_timer(&conntrack->timeout))
-				conntrack->timeout.function((unsigned long)
-							    conntrack);
-			return NF_ACCEPT;
-		}
-	} else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
-		   && (old_state == TCP_CONNTRACK_SYN_RECV
-		       || old_state == TCP_CONNTRACK_ESTABLISHED)
-		   && new_state == TCP_CONNTRACK_ESTABLISHED) {
-		/* Set ASSURED if we see see valid ack in ESTABLISHED
-		   after SYN_RECV or a valid answer for a picked up
-		   connection. */
-		set_bit(IPS_ASSURED_BIT, &conntrack->status);
-		ip_conntrack_event_cache(IPCT_STATUS, skb);
-	}
-	ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout);
-
-	return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int tcp_new(struct ip_conntrack *conntrack,
-		   const struct sk_buff *skb)
-{
-	enum tcp_conntrack new_state;
-	struct iphdr *iph = ip_hdr(skb);
-	struct tcphdr *th, _tcph;
-#ifdef DEBUGP_VARS
-	struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0];
-	struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1];
-#endif
-
-	th = skb_header_pointer(skb, iph->ihl * 4,
-				sizeof(_tcph), &_tcph);
-	BUG_ON(th == NULL);
-
-	/* Don't need lock here: this conntrack not in circulation yet */
-	new_state
-		= tcp_conntracks[0][get_conntrack_index(th)]
-		[TCP_CONNTRACK_NONE];
-
-	/* Invalid: delete conntrack */
-	if (new_state >= TCP_CONNTRACK_MAX) {
-		DEBUGP("ip_ct_tcp: invalid new deleting.\n");
-		return 0;
-	}
-
-	if (new_state == TCP_CONNTRACK_SYN_SENT) {
-		/* SYN packet */
-		conntrack->proto.tcp.seen[0].td_end =
-			segment_seq_plus_len(ntohl(th->seq), skb->len,
-					     iph, th);
-		conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
-		if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
-			conntrack->proto.tcp.seen[0].td_maxwin = 1;
-		conntrack->proto.tcp.seen[0].td_maxend =
-			conntrack->proto.tcp.seen[0].td_end;
-
-		tcp_options(skb, iph, th, &conntrack->proto.tcp.seen[0]);
-		conntrack->proto.tcp.seen[1].flags = 0;
-	} else if (ip_ct_tcp_loose == 0) {
-		/* Don't try to pick up connections. */
-		return 0;
-	} else {
-		/*
-		 * We are in the middle of a connection,
-		 * its history is lost for us.
-		 * Let's try to use the data from the packet.
-		 */
-		conntrack->proto.tcp.seen[0].td_end =
-			segment_seq_plus_len(ntohl(th->seq), skb->len,
-					     iph, th);
-		conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
-		if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
-			conntrack->proto.tcp.seen[0].td_maxwin = 1;
-		conntrack->proto.tcp.seen[0].td_maxend =
-			conntrack->proto.tcp.seen[0].td_end +
-			conntrack->proto.tcp.seen[0].td_maxwin;
-		conntrack->proto.tcp.seen[0].td_scale = 0;
-
-		/* We assume SACK and liberal window checking to handle
-		 * window scaling */
-		conntrack->proto.tcp.seen[0].flags =
-		conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
-						     IP_CT_TCP_FLAG_BE_LIBERAL;
-	}
-
-	conntrack->proto.tcp.seen[1].td_end = 0;
-	conntrack->proto.tcp.seen[1].td_maxend = 0;
-	conntrack->proto.tcp.seen[1].td_maxwin = 1;
-	conntrack->proto.tcp.seen[1].td_scale = 0;
-
-	/* tcp_packet will set them */
-	conntrack->proto.tcp.state = TCP_CONNTRACK_NONE;
-	conntrack->proto.tcp.last_index = TCP_NONE_SET;
-
-	DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
-	       "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
-		sender->td_end, sender->td_maxend, sender->td_maxwin,
-		sender->td_scale,
-		receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
-		receiver->td_scale);
-	return 1;
-}
-
-struct ip_conntrack_protocol ip_conntrack_protocol_tcp =
-{
-	.proto 			= IPPROTO_TCP,
-	.name 			= "tcp",
-	.pkt_to_tuple 		= tcp_pkt_to_tuple,
-	.invert_tuple 		= tcp_invert_tuple,
-	.print_tuple 		= tcp_print_tuple,
-	.print_conntrack 	= tcp_print_conntrack,
-	.packet 		= tcp_packet,
-	.new 			= tcp_new,
-	.error			= tcp_error,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-	.to_nfattr		= tcp_to_nfattr,
-	.from_nfattr		= nfattr_to_tcp,
-	.tuple_to_nfattr	= ip_ct_port_tuple_to_nfattr,
-	.nfattr_to_tuple	= ip_ct_port_nfattr_to_tuple,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
deleted file mode 100644
index 3b47987bf1b..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ /dev/null
@@ -1,148 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <linux/seq_file.h>
-#include <net/checksum.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-
-unsigned int ip_ct_udp_timeout __read_mostly = 30*HZ;
-unsigned int ip_ct_udp_timeout_stream __read_mostly = 180*HZ;
-
-static int udp_pkt_to_tuple(const struct sk_buff *skb,
-			     unsigned int dataoff,
-			     struct ip_conntrack_tuple *tuple)
-{
-	struct udphdr _hdr, *hp;
-
-	/* Actually only need first 8 bytes. */
-	hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
-	if (hp == NULL)
-		return 0;
-
-	tuple->src.u.udp.port = hp->source;
-	tuple->dst.u.udp.port = hp->dest;
-
-	return 1;
-}
-
-static int udp_invert_tuple(struct ip_conntrack_tuple *tuple,
-			    const struct ip_conntrack_tuple *orig)
-{
-	tuple->src.u.udp.port = orig->dst.u.udp.port;
-	tuple->dst.u.udp.port = orig->src.u.udp.port;
-	return 1;
-}
-
-/* Print out the per-protocol part of the tuple. */
-static int udp_print_tuple(struct seq_file *s,
-			   const struct ip_conntrack_tuple *tuple)
-{
-	return seq_printf(s, "sport=%hu dport=%hu ",
-			  ntohs(tuple->src.u.udp.port),
-			  ntohs(tuple->dst.u.udp.port));
-}
-
-/* Print out the private part of the conntrack. */
-static int udp_print_conntrack(struct seq_file *s,
-			       const struct ip_conntrack *conntrack)
-{
-	return 0;
-}
-
-/* Returns verdict for packet, and may modify conntracktype */
-static int udp_packet(struct ip_conntrack *conntrack,
-		      const struct sk_buff *skb,
-		      enum ip_conntrack_info ctinfo)
-{
-	/* If we've seen traffic both ways, this is some kind of UDP
-	   stream.  Extend timeout. */
-	if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
-		ip_ct_refresh_acct(conntrack, ctinfo, skb,
-				   ip_ct_udp_timeout_stream);
-		/* Also, more likely to be important, and not a probe */
-		if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status))
-			ip_conntrack_event_cache(IPCT_STATUS, skb);
-	} else
-		ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout);
-
-	return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int udp_new(struct ip_conntrack *conntrack, const struct sk_buff *skb)
-{
-	return 1;
-}
-
-static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
-		     unsigned int hooknum)
-{
-	const unsigned int hdrlen = ip_hdrlen(skb);
-	unsigned int udplen = skb->len - hdrlen;
-	struct udphdr _hdr, *hdr;
-
-	/* Header is too small? */
-	hdr = skb_header_pointer(skb, hdrlen, sizeof(_hdr), &_hdr);
-	if (hdr == NULL) {
-		if (LOG_INVALID(IPPROTO_UDP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				  "ip_ct_udp: short packet ");
-		return -NF_ACCEPT;
-	}
-
-	/* Truncated/malformed packets */
-	if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
-		if (LOG_INVALID(IPPROTO_UDP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				  "ip_ct_udp: truncated/malformed packet ");
-		return -NF_ACCEPT;
-	}
-
-	/* Packet with no checksum */
-	if (!hdr->check)
-		return NF_ACCEPT;
-
-	/* Checksum invalid? Ignore.
-	 * We skip checking packets on the outgoing path
-	 * because the checksum is assumed to be correct.
-	 * FIXME: Source route IP option packets --RR */
-	if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING &&
-	    nf_ip_checksum(skb, hooknum, hdrlen, IPPROTO_UDP)) {
-		if (LOG_INVALID(IPPROTO_UDP))
-			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
-				  "ip_ct_udp: bad UDP checksum ");
-		return -NF_ACCEPT;
-	}
-
-	return NF_ACCEPT;
-}
-
-struct ip_conntrack_protocol ip_conntrack_protocol_udp =
-{
-	.proto 			= IPPROTO_UDP,
-	.name			= "udp",
-	.pkt_to_tuple		= udp_pkt_to_tuple,
-	.invert_tuple		= udp_invert_tuple,
-	.print_tuple		= udp_print_tuple,
-	.print_conntrack	= udp_print_conntrack,
-	.packet			= udp_packet,
-	.new			= udp_new,
-	.error			= udp_error,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-	.tuple_to_nfattr	= ip_ct_port_tuple_to_nfattr,
-	.nfattr_to_tuple	= ip_ct_port_nfattr_to_tuple,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c
deleted file mode 100644
index 7363e2a5cea..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_sip.c
+++ /dev/null
@@ -1,520 +0,0 @@
-/* SIP extension for IP connection tracking.
- *
- * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
- * based on RR's ip_conntrack_ftp.c and other modules.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/ctype.h>
-#include <linux/skbuff.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_sip.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
-MODULE_DESCRIPTION("SIP connection tracking helper");
-
-#define MAX_PORTS	8
-static unsigned short ports[MAX_PORTS];
-static int ports_c;
-module_param_array(ports, ushort, &ports_c, 0400);
-MODULE_PARM_DESC(ports, "port numbers of sip servers");
-
-static unsigned int sip_timeout = SIP_TIMEOUT;
-module_param(sip_timeout, uint, 0600);
-MODULE_PARM_DESC(sip_timeout, "timeout for the master SIP session");
-
-unsigned int (*ip_nat_sip_hook)(struct sk_buff **pskb,
-				enum ip_conntrack_info ctinfo,
-				struct ip_conntrack *ct,
-				const char **dptr);
-EXPORT_SYMBOL_GPL(ip_nat_sip_hook);
-
-unsigned int (*ip_nat_sdp_hook)(struct sk_buff **pskb,
-				enum ip_conntrack_info ctinfo,
-				struct ip_conntrack_expect *exp,
-				const char *dptr);
-EXPORT_SYMBOL_GPL(ip_nat_sdp_hook);
-
-static int digits_len(const char *dptr, const char *limit, int *shift);
-static int epaddr_len(const char *dptr, const char *limit, int *shift);
-static int skp_digits_len(const char *dptr, const char *limit, int *shift);
-static int skp_epaddr_len(const char *dptr, const char *limit, int *shift);
-
-struct sip_header_nfo {
-	const char	*lname;
-	const char	*sname;
-	const char	*ln_str;
-	size_t		lnlen;
-	size_t		snlen;
-	size_t		ln_strlen;
-	int		case_sensitive;
-	int		(*match_len)(const char *, const char *, int *);
-};
-
-static struct sip_header_nfo ct_sip_hdrs[] = {
-	[POS_REG_REQ_URI] = { 	/* SIP REGISTER request URI */
-		.lname		= "sip:",
-		.lnlen		= sizeof("sip:") - 1,
-		.ln_str		= ":",
-		.ln_strlen	= sizeof(":") - 1,
-		.match_len	= epaddr_len
-	},
-	[POS_REQ_URI] = { 	/* SIP request URI */
-		.lname		= "sip:",
-		.lnlen		= sizeof("sip:") - 1,
-		.ln_str		= "@",
-		.ln_strlen	= sizeof("@") - 1,
-		.match_len	= epaddr_len
-	},
-	[POS_FROM] = {		/* SIP From header */
-		.lname		= "From:",
-		.lnlen		= sizeof("From:") - 1,
-		.sname		= "\r\nf:",
-		.snlen		= sizeof("\r\nf:") - 1,
-		.ln_str		= "sip:",
-		.ln_strlen	= sizeof("sip:") - 1,
-		.match_len	= skp_epaddr_len,
-	},
-	[POS_TO] = {		/* SIP To header */
-		.lname		= "To:",
-		.lnlen		= sizeof("To:") - 1,
-		.sname		= "\r\nt:",
-		.snlen		= sizeof("\r\nt:") - 1,
-		.ln_str		= "sip:",
-		.ln_strlen	= sizeof("sip:") - 1,
-		.match_len	= skp_epaddr_len,
-	},
-	[POS_VIA] = { 		/* SIP Via header */
-		.lname		= "Via:",
-		.lnlen		= sizeof("Via:") - 1,
-		.sname		= "\r\nv:",
-		.snlen		= sizeof("\r\nv:") - 1, /* rfc3261 "\r\n" */
-		.ln_str		= "UDP ",
-		.ln_strlen	= sizeof("UDP ") - 1,
-		.match_len	= epaddr_len,
-	},
-	[POS_CONTACT] = { 	/* SIP Contact header */
-		.lname		= "Contact:",
-		.lnlen		= sizeof("Contact:") - 1,
-		.sname		= "\r\nm:",
-		.snlen		= sizeof("\r\nm:") - 1,
-		.ln_str		= "sip:",
-		.ln_strlen	= sizeof("sip:") - 1,
-		.match_len	= skp_epaddr_len
-	},
-	[POS_CONTENT] = { 	/* SIP Content length header */
-		.lname		= "Content-Length:",
-		.lnlen		= sizeof("Content-Length:") - 1,
-		.sname		= "\r\nl:",
-		.snlen		= sizeof("\r\nl:") - 1,
-		.ln_str		= ":",
-		.ln_strlen	= sizeof(":") - 1,
-		.match_len	= skp_digits_len
-	},
-	[POS_MEDIA] = {		/* SDP media info */
-		.case_sensitive	= 1,
-		.lname		= "\nm=",
-		.lnlen		= sizeof("\nm=") - 1,
-		.sname		= "\rm=",
-		.snlen		= sizeof("\rm=") - 1,
-		.ln_str		= "audio ",
-		.ln_strlen	= sizeof("audio ") - 1,
-		.match_len	= digits_len
-	},
-	[POS_OWNER] = { 	/* SDP owner address*/
-		.case_sensitive	= 1,
-		.lname		= "\no=",
-		.lnlen		= sizeof("\no=") - 1,
-		.sname		= "\ro=",
-		.snlen		= sizeof("\ro=") - 1,
-		.ln_str		= "IN IP4 ",
-		.ln_strlen	= sizeof("IN IP4 ") - 1,
-		.match_len	= epaddr_len
-	},
-	[POS_CONNECTION] = { 	/* SDP connection info */
-		.case_sensitive	= 1,
-		.lname		= "\nc=",
-		.lnlen		= sizeof("\nc=") - 1,
-		.sname		= "\rc=",
-		.snlen		= sizeof("\rc=") - 1,
-		.ln_str		= "IN IP4 ",
-		.ln_strlen	= sizeof("IN IP4 ") - 1,
-		.match_len	= epaddr_len
-	},
-	[POS_SDP_HEADER] = { 	/* SDP version header */
-		.case_sensitive	= 1,
-		.lname		= "\nv=",
-		.lnlen		= sizeof("\nv=") - 1,
-		.sname		= "\rv=",
-		.snlen		= sizeof("\rv=") - 1,
-		.ln_str		= "=",
-		.ln_strlen	= sizeof("=") - 1,
-		.match_len	= digits_len
-	}
-};
-
-/* get line lenght until first CR or LF seen. */
-int ct_sip_lnlen(const char *line, const char *limit)
-{
-	const char *k = line;
-
-	while ((line <= limit) && (*line == '\r' || *line == '\n'))
-		line++;
-
-	while (line <= limit) {
-		if (*line == '\r' || *line == '\n')
-			break;
-		line++;
-	}
-	return line - k;
-}
-EXPORT_SYMBOL_GPL(ct_sip_lnlen);
-
-/* Linear string search, case sensitive. */
-const char *ct_sip_search(const char *needle, const char *haystack,
-			  size_t needle_len, size_t haystack_len,
-			  int case_sensitive)
-{
-	const char *limit = haystack + (haystack_len - needle_len);
-
-	while (haystack <= limit) {
-		if (case_sensitive) {
-			if (strncmp(haystack, needle, needle_len) == 0)
-				return haystack;
-		} else {
-			if (strnicmp(haystack, needle, needle_len) == 0)
-				return haystack;
-		}
-		haystack++;
-	}
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(ct_sip_search);
-
-static int digits_len(const char *dptr, const char *limit, int *shift)
-{
-	int len = 0;
-	while (dptr <= limit && isdigit(*dptr)) {
-		dptr++;
-		len++;
-	}
-	return len;
-}
-
-/* get digits lenght, skiping blank spaces. */
-static int skp_digits_len(const char *dptr, const char *limit, int *shift)
-{
-	for (; dptr <= limit && *dptr == ' '; dptr++)
-		(*shift)++;
-
-	return digits_len(dptr, limit, shift);
-}
-
-/* Simple ipaddr parser.. */
-static int parse_ipaddr(const char *cp,	const char **endp,
-			__be32 *ipaddr, const char *limit)
-{
-	unsigned long int val;
-	int i, digit = 0;
-
-	for (i = 0, *ipaddr = 0; cp <= limit && i < 4; i++) {
-		digit = 0;
-		if (!isdigit(*cp))
-			break;
-
-		val = simple_strtoul(cp, (char **)&cp, 10);
-		if (val > 0xFF)
-			return -1;
-
-		((u_int8_t *)ipaddr)[i] = val;
-		digit = 1;
-
-		if (*cp != '.')
-			break;
-		cp++;
-	}
-	if (!digit)
-		return -1;
-
-	if (endp)
-		*endp = cp;
-
-	return 0;
-}
-
-/* skip ip address. returns it lenght. */
-static int epaddr_len(const char *dptr, const char *limit, int *shift)
-{
-	const char *aux = dptr;
-	__be32 ip;
-
-	if (parse_ipaddr(dptr, &dptr, &ip, limit) < 0) {
-		DEBUGP("ip: %s parse failed.!\n", dptr);
-		return 0;
-	}
-
-	/* Port number */
-	if (*dptr == ':') {
-		dptr++;
-		dptr += digits_len(dptr, limit, shift);
-	}
-	return dptr - aux;
-}
-
-/* get address length, skiping user info. */
-static int skp_epaddr_len(const char *dptr, const char *limit, int *shift)
-{
-	int s = *shift;
-
-	/* Search for @, but stop at the end of the line.
-	 * We are inside a sip: URI, so we don't need to worry about
-	 * continuation lines. */
-	while (dptr <= limit &&
-	       *dptr != '@' && *dptr != '\r' && *dptr != '\n') {
-		(*shift)++;
-		dptr++;
-	}
-
-	if (dptr <= limit && *dptr == '@') {
-		dptr++;
-		(*shift)++;
-	} else
-		*shift = s;
-
-	return epaddr_len(dptr, limit, shift);
-}
-
-/* Returns 0 if not found, -1 error parsing. */
-int ct_sip_get_info(const char *dptr, size_t dlen,
-		    unsigned int *matchoff,
-		    unsigned int *matchlen,
-		    enum sip_header_pos pos)
-{
-	struct sip_header_nfo *hnfo = &ct_sip_hdrs[pos];
-	const char *limit, *aux, *k = dptr;
-	int shift = 0;
-
-	limit = dptr + (dlen - hnfo->lnlen);
-
-	while (dptr <= limit) {
-		if ((strncmp(dptr, hnfo->lname, hnfo->lnlen) != 0) &&
-		    (hnfo->sname == NULL ||
-		     strncmp(dptr, hnfo->sname, hnfo->snlen) != 0)) {
-			dptr++;
-			continue;
-		}
-		aux = ct_sip_search(hnfo->ln_str, dptr, hnfo->ln_strlen,
-				    ct_sip_lnlen(dptr, limit),
-				    hnfo->case_sensitive);
-		if (!aux) {
-			DEBUGP("'%s' not found in '%s'.\n", hnfo->ln_str,
-			       hnfo->lname);
-			return -1;
-		}
-		aux += hnfo->ln_strlen;
-
-		*matchlen = hnfo->match_len(aux, limit, &shift);
-		if (!*matchlen)
-			return -1;
-
-		*matchoff = (aux - k) + shift;
-
-		DEBUGP("%s match succeeded! - len: %u\n", hnfo->lname,
-		       *matchlen);
-		return 1;
-	}
-	DEBUGP("%s header not found.\n", hnfo->lname);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ct_sip_get_info);
-
-static int set_expected_rtp(struct sk_buff **pskb,
-			    struct ip_conntrack *ct,
-			    enum ip_conntrack_info ctinfo,
-			    __be32 ipaddr, u_int16_t port,
-			    const char *dptr)
-{
-	struct ip_conntrack_expect *exp;
-	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-	int ret;
-	typeof(ip_nat_sdp_hook) ip_nat_sdp;
-
-	exp = ip_conntrack_expect_alloc(ct);
-	if (exp == NULL)
-		return NF_DROP;
-
-	exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
-	exp->tuple.src.u.udp.port = 0;
-	exp->tuple.dst.ip = ipaddr;
-	exp->tuple.dst.u.udp.port = htons(port);
-	exp->tuple.dst.protonum = IPPROTO_UDP;
-
-	exp->mask.src.ip = htonl(0xFFFFFFFF);
-	exp->mask.src.u.udp.port = 0;
-	exp->mask.dst.ip = htonl(0xFFFFFFFF);
-	exp->mask.dst.u.udp.port = htons(0xFFFF);
-	exp->mask.dst.protonum = 0xFF;
-
-	exp->expectfn = NULL;
-	exp->flags = 0;
-
-	ip_nat_sdp = rcu_dereference(ip_nat_sdp_hook);
-	if (ip_nat_sdp)
-		ret = ip_nat_sdp(pskb, ctinfo, exp, dptr);
-	else {
-		if (ip_conntrack_expect_related(exp) != 0)
-			ret = NF_DROP;
-		else
-			ret = NF_ACCEPT;
-	}
-	ip_conntrack_expect_put(exp);
-
-	return ret;
-}
-
-static int sip_help(struct sk_buff **pskb,
-		    struct ip_conntrack *ct,
-		    enum ip_conntrack_info ctinfo)
-{
-	unsigned int dataoff, datalen;
-	const char *dptr;
-	int ret = NF_ACCEPT;
-	int matchoff, matchlen;
-	__be32 ipaddr;
-	u_int16_t port;
-	typeof(ip_nat_sip_hook) ip_nat_sip;
-
-	/* No Data ? */
-	dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
-	if (dataoff >= (*pskb)->len) {
-		DEBUGP("skb->len = %u\n", (*pskb)->len);
-		return NF_ACCEPT;
-	}
-
-	ip_ct_refresh(ct, *pskb, sip_timeout * HZ);
-
-	if (!skb_is_nonlinear(*pskb))
-		dptr = (*pskb)->data + dataoff;
-	else {
-		DEBUGP("Copy of skbuff not supported yet.\n");
-		goto out;
-	}
-
-	ip_nat_sip = rcu_dereference(ip_nat_sip_hook);
-	if (ip_nat_sip) {
-		if (!ip_nat_sip(pskb, ctinfo, ct, &dptr)) {
-			ret = NF_DROP;
-			goto out;
-		}
-	}
-
-	/* After this point NAT, could have mangled skb, so
-	   we need to recalculate payload lenght. */
-	datalen = (*pskb)->len - dataoff;
-
-	if (datalen < (sizeof("SIP/2.0 200") - 1))
-		goto out;
-
-	/* RTP info only in some SDP pkts */
-	if (memcmp(dptr, "INVITE", sizeof("INVITE") - 1) != 0 &&
-	    memcmp(dptr, "SIP/2.0 200", sizeof("SIP/2.0 200") - 1) != 0) {
-		goto out;
-	}
-	/* Get ip and port address from SDP packet. */
-	if (ct_sip_get_info(dptr, datalen, &matchoff, &matchlen,
-			    POS_CONNECTION) > 0) {
-
-		/* We'll drop only if there are parse problems. */
-		if (parse_ipaddr(dptr + matchoff, NULL, &ipaddr,
-				 dptr + datalen) < 0) {
-			ret = NF_DROP;
-			goto out;
-		}
-		if (ct_sip_get_info(dptr, datalen, &matchoff, &matchlen,
-				    POS_MEDIA) > 0) {
-
-			port = simple_strtoul(dptr + matchoff, NULL, 10);
-			if (port < 1024) {
-				ret = NF_DROP;
-				goto out;
-			}
-			ret = set_expected_rtp(pskb, ct, ctinfo,
-					       ipaddr, port, dptr);
-		}
-	}
-out:
-	return ret;
-}
-
-static struct ip_conntrack_helper sip[MAX_PORTS];
-static char sip_names[MAX_PORTS][10];
-
-static void fini(void)
-{
-	int i;
-	for (i = 0; i < ports_c; i++) {
-		DEBUGP("unregistering helper for port %d\n", ports[i]);
-		ip_conntrack_helper_unregister(&sip[i]);
-	}
-}
-
-static int __init init(void)
-{
-	int i, ret;
-	char *tmpname;
-
-	if (ports_c == 0)
-		ports[ports_c++] = SIP_PORT;
-
-	for (i = 0; i < ports_c; i++) {
-		/* Create helper structure */
-		memset(&sip[i], 0, sizeof(struct ip_conntrack_helper));
-
-		sip[i].tuple.dst.protonum = IPPROTO_UDP;
-		sip[i].tuple.src.u.udp.port = htons(ports[i]);
-		sip[i].mask.src.u.udp.port = htons(0xFFFF);
-		sip[i].mask.dst.protonum = 0xFF;
-		sip[i].max_expected = 2;
-		sip[i].timeout = 3 * 60; /* 3 minutes */
-		sip[i].me = THIS_MODULE;
-		sip[i].help = sip_help;
-
-		tmpname = &sip_names[i][0];
-		if (ports[i] == SIP_PORT)
-			sprintf(tmpname, "sip");
-		else
-			sprintf(tmpname, "sip-%d", i);
-		sip[i].name = tmpname;
-
-		DEBUGP("port #%d: %d\n", i, ports[i]);
-
-		ret = ip_conntrack_helper_register(&sip[i]);
-		if (ret) {
-			printk("ERROR registering helper for port %d\n",
-				ports[i]);
-			fini();
-			return ret;
-		}
-	}
-	return 0;
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
deleted file mode 100644
index c32200153d6..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ /dev/null
@@ -1,962 +0,0 @@
-/* This file contains all the functions required for the standalone
-   ip_conntrack module.
-
-   These are not required by the compatibility layer.
-*/
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2005 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/percpu.h>
-#ifdef CONFIG_SYSCTL
-#include <linux/sysctl.h>
-#endif
-#include <net/checksum.h>
-#include <net/ip.h>
-#include <net/route.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-MODULE_LICENSE("GPL");
-
-extern atomic_t ip_conntrack_count;
-DECLARE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
-
-static int kill_proto(struct ip_conntrack *i, void *data)
-{
-	return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum ==
-			*((u_int8_t *) data));
-}
-
-#ifdef CONFIG_PROC_FS
-static int
-print_tuple(struct seq_file *s, const struct ip_conntrack_tuple *tuple,
-	    struct ip_conntrack_protocol *proto)
-{
-	seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ",
-		   NIPQUAD(tuple->src.ip), NIPQUAD(tuple->dst.ip));
-	return proto->print_tuple(s, tuple);
-}
-
-#ifdef CONFIG_IP_NF_CT_ACCT
-static unsigned int
-seq_print_counters(struct seq_file *s,
-		   const struct ip_conntrack_counter *counter)
-{
-	return seq_printf(s, "packets=%llu bytes=%llu ",
-			  (unsigned long long)counter->packets,
-			  (unsigned long long)counter->bytes);
-}
-#else
-#define seq_print_counters(x, y)	0
-#endif
-
-struct ct_iter_state {
-	unsigned int bucket;
-};
-
-static struct list_head *ct_get_first(struct seq_file *seq)
-{
-	struct ct_iter_state *st = seq->private;
-
-	for (st->bucket = 0;
-	     st->bucket < ip_conntrack_htable_size;
-	     st->bucket++) {
-		if (!list_empty(&ip_conntrack_hash[st->bucket]))
-			return ip_conntrack_hash[st->bucket].next;
-	}
-	return NULL;
-}
-
-static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head)
-{
-	struct ct_iter_state *st = seq->private;
-
-	head = head->next;
-	while (head == &ip_conntrack_hash[st->bucket]) {
-		if (++st->bucket >= ip_conntrack_htable_size)
-			return NULL;
-		head = ip_conntrack_hash[st->bucket].next;
-	}
-	return head;
-}
-
-static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos)
-{
-	struct list_head *head = ct_get_first(seq);
-
-	if (head)
-		while (pos && (head = ct_get_next(seq, head)))
-			pos--;
-	return pos ? NULL : head;
-}
-
-static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
-{
-	read_lock_bh(&ip_conntrack_lock);
-	return ct_get_idx(seq, *pos);
-}
-
-static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
-{
-	(*pos)++;
-	return ct_get_next(s, v);
-}
-
-static void ct_seq_stop(struct seq_file *s, void *v)
-{
-	read_unlock_bh(&ip_conntrack_lock);
-}
-
-static int ct_seq_show(struct seq_file *s, void *v)
-{
-	const struct ip_conntrack_tuple_hash *hash = v;
-	const struct ip_conntrack *conntrack = tuplehash_to_ctrack(hash);
-	struct ip_conntrack_protocol *proto;
-
-	IP_NF_ASSERT(conntrack);
-
-	/* we only want to print DIR_ORIGINAL */
-	if (DIRECTION(hash))
-		return 0;
-
-	proto = __ip_conntrack_proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
-	IP_NF_ASSERT(proto);
-
-	if (seq_printf(s, "%-8s %u %ld ",
-		      proto->name,
-		      conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum,
-		      timer_pending(&conntrack->timeout)
-		      ? (long)(conntrack->timeout.expires - jiffies)/HZ
-		      : 0) != 0)
-		return -ENOSPC;
-
-	if (proto->print_conntrack(s, conntrack))
-		return -ENOSPC;
-
-	if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
-			proto))
-		return -ENOSPC;
-
-	if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL]))
-		return -ENOSPC;
-
-	if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)))
-		if (seq_printf(s, "[UNREPLIED] "))
-			return -ENOSPC;
-
-	if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple,
-			proto))
-		return -ENOSPC;
-
-	if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY]))
-		return -ENOSPC;
-
-	if (test_bit(IPS_ASSURED_BIT, &conntrack->status))
-		if (seq_printf(s, "[ASSURED] "))
-			return -ENOSPC;
-
-#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
-	if (seq_printf(s, "mark=%u ", conntrack->mark))
-		return -ENOSPC;
-#endif
-
-#ifdef CONFIG_IP_NF_CONNTRACK_SECMARK
-	if (seq_printf(s, "secmark=%u ", conntrack->secmark))
-		return -ENOSPC;
-#endif
-
-	if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use)))
-		return -ENOSPC;
-
-	return 0;
-}
-
-static struct seq_operations ct_seq_ops = {
-	.start = ct_seq_start,
-	.next  = ct_seq_next,
-	.stop  = ct_seq_stop,
-	.show  = ct_seq_show
-};
-
-static int ct_open(struct inode *inode, struct file *file)
-{
-	struct seq_file *seq;
-	struct ct_iter_state *st;
-	int ret;
-
-	st = kmalloc(sizeof(struct ct_iter_state), GFP_KERNEL);
-	if (st == NULL)
-		return -ENOMEM;
-	ret = seq_open(file, &ct_seq_ops);
-	if (ret)
-		goto out_free;
-	seq          = file->private_data;
-	seq->private = st;
-	memset(st, 0, sizeof(struct ct_iter_state));
-	return ret;
-out_free:
-	kfree(st);
-	return ret;
-}
-
-static const struct file_operations ct_file_ops = {
-	.owner   = THIS_MODULE,
-	.open    = ct_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_private,
-};
-
-/* expects */
-static void *exp_seq_start(struct seq_file *s, loff_t *pos)
-{
-	struct list_head *e = &ip_conntrack_expect_list;
-	loff_t i;
-
-	/* strange seq_file api calls stop even if we fail,
-	 * thus we need to grab lock since stop unlocks */
-	read_lock_bh(&ip_conntrack_lock);
-
-	if (list_empty(e))
-		return NULL;
-
-	for (i = 0; i <= *pos; i++) {
-		e = e->next;
-		if (e == &ip_conntrack_expect_list)
-			return NULL;
-	}
-	return e;
-}
-
-static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
-{
-	struct list_head *e = v;
-
-	++*pos;
-	e = e->next;
-
-	if (e == &ip_conntrack_expect_list)
-		return NULL;
-
-	return e;
-}
-
-static void exp_seq_stop(struct seq_file *s, void *v)
-{
-	read_unlock_bh(&ip_conntrack_lock);
-}
-
-static int exp_seq_show(struct seq_file *s, void *v)
-{
-	struct ip_conntrack_expect *expect = v;
-
-	if (expect->timeout.function)
-		seq_printf(s, "%ld ", timer_pending(&expect->timeout)
-			   ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
-	else
-		seq_printf(s, "- ");
-
-	seq_printf(s, "proto=%u ", expect->tuple.dst.protonum);
-
-	print_tuple(s, &expect->tuple,
-		    __ip_conntrack_proto_find(expect->tuple.dst.protonum));
-	return seq_putc(s, '\n');
-}
-
-static struct seq_operations exp_seq_ops = {
-	.start = exp_seq_start,
-	.next = exp_seq_next,
-	.stop = exp_seq_stop,
-	.show = exp_seq_show
-};
-
-static int exp_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &exp_seq_ops);
-}
-
-static const struct file_operations exp_file_ops = {
-	.owner   = THIS_MODULE,
-	.open    = exp_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release
-};
-
-static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
-{
-	int cpu;
-
-	if (*pos == 0)
-		return SEQ_START_TOKEN;
-
-	for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
-		if (!cpu_possible(cpu))
-			continue;
-		*pos = cpu+1;
-		return &per_cpu(ip_conntrack_stat, cpu);
-	}
-
-	return NULL;
-}
-
-static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	int cpu;
-
-	for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
-		if (!cpu_possible(cpu))
-			continue;
-		*pos = cpu+1;
-		return &per_cpu(ip_conntrack_stat, cpu);
-	}
-
-	return NULL;
-}
-
-static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
-{
-}
-
-static int ct_cpu_seq_show(struct seq_file *seq, void *v)
-{
-	unsigned int nr_conntracks = atomic_read(&ip_conntrack_count);
-	struct ip_conntrack_stat *st = v;
-
-	if (v == SEQ_START_TOKEN) {
-		seq_printf(seq, "entries  searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete\n");
-		return 0;
-	}
-
-	seq_printf(seq, "%08x  %08x %08x %08x %08x %08x %08x %08x "
-			"%08x %08x %08x %08x %08x  %08x %08x %08x \n",
-		   nr_conntracks,
-		   st->searched,
-		   st->found,
-		   st->new,
-		   st->invalid,
-		   st->ignore,
-		   st->delete,
-		   st->delete_list,
-		   st->insert,
-		   st->insert_failed,
-		   st->drop,
-		   st->early_drop,
-		   st->error,
-
-		   st->expect_new,
-		   st->expect_create,
-		   st->expect_delete
-		);
-	return 0;
-}
-
-static struct seq_operations ct_cpu_seq_ops = {
-	.start  = ct_cpu_seq_start,
-	.next   = ct_cpu_seq_next,
-	.stop   = ct_cpu_seq_stop,
-	.show   = ct_cpu_seq_show,
-};
-
-static int ct_cpu_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &ct_cpu_seq_ops);
-}
-
-static const struct file_operations ct_cpu_seq_fops = {
-	.owner   = THIS_MODULE,
-	.open    = ct_cpu_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_private,
-};
-#endif
-
-static unsigned int ip_confirm(unsigned int hooknum,
-			       struct sk_buff **pskb,
-			       const struct net_device *in,
-			       const struct net_device *out,
-			       int (*okfn)(struct sk_buff *))
-{
-	/* We've seen it coming out the other side: confirm it */
-	return ip_conntrack_confirm(pskb);
-}
-
-static unsigned int ip_conntrack_help(unsigned int hooknum,
-				      struct sk_buff **pskb,
-				      const struct net_device *in,
-				      const struct net_device *out,
-				      int (*okfn)(struct sk_buff *))
-{
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
-
-	/* This is where we call the helper: as the packet goes out. */
-	ct = ip_conntrack_get(*pskb, &ctinfo);
-	if (ct && ct->helper && ctinfo != IP_CT_RELATED + IP_CT_IS_REPLY) {
-		unsigned int ret;
-		ret = ct->helper->help(pskb, ct, ctinfo);
-		if (ret != NF_ACCEPT)
-			return ret;
-	}
-	return NF_ACCEPT;
-}
-
-static unsigned int ip_conntrack_defrag(unsigned int hooknum,
-					struct sk_buff **pskb,
-					const struct net_device *in,
-					const struct net_device *out,
-					int (*okfn)(struct sk_buff *))
-{
-#if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE)
-	/* Previously seen (loopback)?  Ignore.  Do this before
-	   fragment check. */
-	if ((*pskb)->nfct)
-		return NF_ACCEPT;
-#endif
-
-	/* Gather fragments. */
-	if (ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)) {
-		*pskb = ip_ct_gather_frags(*pskb,
-					   hooknum == NF_IP_PRE_ROUTING ?
-					   IP_DEFRAG_CONNTRACK_IN :
-					   IP_DEFRAG_CONNTRACK_OUT);
-		if (!*pskb)
-			return NF_STOLEN;
-	}
-	return NF_ACCEPT;
-}
-
-static unsigned int ip_conntrack_local(unsigned int hooknum,
-				       struct sk_buff **pskb,
-				       const struct net_device *in,
-				       const struct net_device *out,
-				       int (*okfn)(struct sk_buff *))
-{
-	/* root is playing with raw sockets. */
-	if ((*pskb)->len < sizeof(struct iphdr)
-	    || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
-		if (net_ratelimit())
-			printk("ipt_hook: happy cracking.\n");
-		return NF_ACCEPT;
-	}
-	return ip_conntrack_in(hooknum, pskb, in, out, okfn);
-}
-
-/* Connection tracking may drop packets, but never alters them, so
-   make it the first hook. */
-static struct nf_hook_ops ip_conntrack_ops[] = {
-	{
-		.hook		= ip_conntrack_defrag,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_PRE_ROUTING,
-		.priority	= NF_IP_PRI_CONNTRACK_DEFRAG,
-	},
-	{
-		.hook		= ip_conntrack_in,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_PRE_ROUTING,
-		.priority	= NF_IP_PRI_CONNTRACK,
-	},
-	{
-		.hook		= ip_conntrack_defrag,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_LOCAL_OUT,
-		.priority	= NF_IP_PRI_CONNTRACK_DEFRAG,
-	},
-	{
-		.hook		= ip_conntrack_local,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_LOCAL_OUT,
-		.priority	= NF_IP_PRI_CONNTRACK,
-	},
-	{
-		.hook		= ip_conntrack_help,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_POST_ROUTING,
-		.priority	= NF_IP_PRI_CONNTRACK_HELPER,
-	},
-	{
-		.hook		= ip_conntrack_help,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_LOCAL_IN,
-		.priority	= NF_IP_PRI_CONNTRACK_HELPER,
-	},
-	{
-		.hook		= ip_confirm,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_POST_ROUTING,
-		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
-	},
-	{
-		.hook		= ip_confirm,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_LOCAL_IN,
-		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
-	},
-};
-
-/* Sysctl support */
-
-int ip_conntrack_checksum __read_mostly = 1;
-
-#ifdef CONFIG_SYSCTL
-
-/* From ip_conntrack_core.c */
-extern int ip_conntrack_max;
-extern unsigned int ip_conntrack_htable_size;
-
-/* From ip_conntrack_proto_tcp.c */
-extern unsigned int ip_ct_tcp_timeout_syn_sent;
-extern unsigned int ip_ct_tcp_timeout_syn_recv;
-extern unsigned int ip_ct_tcp_timeout_established;
-extern unsigned int ip_ct_tcp_timeout_fin_wait;
-extern unsigned int ip_ct_tcp_timeout_close_wait;
-extern unsigned int ip_ct_tcp_timeout_last_ack;
-extern unsigned int ip_ct_tcp_timeout_time_wait;
-extern unsigned int ip_ct_tcp_timeout_close;
-extern unsigned int ip_ct_tcp_timeout_max_retrans;
-extern int ip_ct_tcp_loose;
-extern int ip_ct_tcp_be_liberal;
-extern int ip_ct_tcp_max_retrans;
-
-/* From ip_conntrack_proto_udp.c */
-extern unsigned int ip_ct_udp_timeout;
-extern unsigned int ip_ct_udp_timeout_stream;
-
-/* From ip_conntrack_proto_icmp.c */
-extern unsigned int ip_ct_icmp_timeout;
-
-/* From ip_conntrack_proto_generic.c */
-extern unsigned int ip_ct_generic_timeout;
-
-/* Log invalid packets of a given protocol */
-static int log_invalid_proto_min = 0;
-static int log_invalid_proto_max = 255;
-
-static struct ctl_table_header *ip_ct_sysctl_header;
-
-static ctl_table ip_ct_sysctl_table[] = {
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_MAX,
-		.procname	= "ip_conntrack_max",
-		.data		= &ip_conntrack_max,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_COUNT,
-		.procname	= "ip_conntrack_count",
-		.data		= &ip_conntrack_count,
-		.maxlen		= sizeof(int),
-		.mode		= 0444,
-		.proc_handler	= &proc_dointvec,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_BUCKETS,
-		.procname	= "ip_conntrack_buckets",
-		.data		= &ip_conntrack_htable_size,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0444,
-		.proc_handler	= &proc_dointvec,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_CHECKSUM,
-		.procname	= "ip_conntrack_checksum",
-		.data		= &ip_conntrack_checksum,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT,
-		.procname	= "ip_conntrack_tcp_timeout_syn_sent",
-		.data		= &ip_ct_tcp_timeout_syn_sent,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV,
-		.procname	= "ip_conntrack_tcp_timeout_syn_recv",
-		.data		= &ip_ct_tcp_timeout_syn_recv,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED,
-		.procname	= "ip_conntrack_tcp_timeout_established",
-		.data		= &ip_ct_tcp_timeout_established,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT,
-		.procname	= "ip_conntrack_tcp_timeout_fin_wait",
-		.data		= &ip_ct_tcp_timeout_fin_wait,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT,
-		.procname	= "ip_conntrack_tcp_timeout_close_wait",
-		.data		= &ip_ct_tcp_timeout_close_wait,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK,
-		.procname	= "ip_conntrack_tcp_timeout_last_ack",
-		.data		= &ip_ct_tcp_timeout_last_ack,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT,
-		.procname	= "ip_conntrack_tcp_timeout_time_wait",
-		.data		= &ip_ct_tcp_timeout_time_wait,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE,
-		.procname	= "ip_conntrack_tcp_timeout_close",
-		.data		= &ip_ct_tcp_timeout_close,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT,
-		.procname	= "ip_conntrack_udp_timeout",
-		.data		= &ip_ct_udp_timeout,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM,
-		.procname	= "ip_conntrack_udp_timeout_stream",
-		.data		= &ip_ct_udp_timeout_stream,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT,
-		.procname	= "ip_conntrack_icmp_timeout",
-		.data		= &ip_ct_icmp_timeout,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT,
-		.procname	= "ip_conntrack_generic_timeout",
-		.data		= &ip_ct_generic_timeout,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_LOG_INVALID,
-		.procname	= "ip_conntrack_log_invalid",
-		.data		= &ip_ct_log_invalid,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_minmax,
-		.strategy	= &sysctl_intvec,
-		.extra1		= &log_invalid_proto_min,
-		.extra2		= &log_invalid_proto_max,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS,
-		.procname	= "ip_conntrack_tcp_timeout_max_retrans",
-		.data		= &ip_ct_tcp_timeout_max_retrans,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_jiffies,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_LOOSE,
-		.procname	= "ip_conntrack_tcp_loose",
-		.data		= &ip_ct_tcp_loose,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL,
-		.procname	= "ip_conntrack_tcp_be_liberal",
-		.data		= &ip_ct_tcp_be_liberal,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
-	{
-		.ctl_name	= NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS,
-		.procname	= "ip_conntrack_tcp_max_retrans",
-		.data		= &ip_ct_tcp_max_retrans,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
-	{ .ctl_name = 0 }
-};
-
-#define NET_IP_CONNTRACK_MAX 2089
-
-static ctl_table ip_ct_netfilter_table[] = {
-	{
-		.ctl_name	= NET_IPV4_NETFILTER,
-		.procname	= "netfilter",
-		.mode		= 0555,
-		.child		= ip_ct_sysctl_table,
-	},
-	{
-		.ctl_name	= NET_IP_CONNTRACK_MAX,
-		.procname	= "ip_conntrack_max",
-		.data		= &ip_conntrack_max,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table ip_ct_ipv4_table[] = {
-	{
-		.ctl_name	= NET_IPV4,
-		.procname	= "ipv4",
-		.mode		= 0555,
-		.child		= ip_ct_netfilter_table,
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table ip_ct_net_table[] = {
-	{
-		.ctl_name	= CTL_NET,
-		.procname	= "net",
-		.mode		= 0555,
-		.child		= ip_ct_ipv4_table,
-	},
-	{ .ctl_name = 0 }
-};
-
-EXPORT_SYMBOL(ip_ct_log_invalid);
-#endif /* CONFIG_SYSCTL */
-
-/* FIXME: Allow NULL functions and sub in pointers to generic for
-   them. --RR */
-int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto)
-{
-	int ret = 0;
-
-	write_lock_bh(&ip_conntrack_lock);
-	if (ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) {
-		ret = -EBUSY;
-		goto out;
-	}
-	rcu_assign_pointer(ip_ct_protos[proto->proto], proto);
- out:
-	write_unlock_bh(&ip_conntrack_lock);
-	return ret;
-}
-
-void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
-{
-	write_lock_bh(&ip_conntrack_lock);
-	rcu_assign_pointer(ip_ct_protos[proto->proto],
-			   &ip_conntrack_generic_protocol);
-	write_unlock_bh(&ip_conntrack_lock);
-	synchronize_rcu();
-
-	/* Remove all contrack entries for this protocol */
-	ip_ct_iterate_cleanup(kill_proto, &proto->proto);
-}
-
-static int __init ip_conntrack_standalone_init(void)
-{
-#ifdef CONFIG_PROC_FS
-	struct proc_dir_entry *proc, *proc_exp, *proc_stat;
-#endif
-	int ret = 0;
-
-	ret = ip_conntrack_init();
-	if (ret < 0)
-		return ret;
-
-#ifdef CONFIG_PROC_FS
-	ret = -ENOMEM;
-	proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops);
-	if (!proc) goto cleanup_init;
-
-	proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440,
-					&exp_file_ops);
-	if (!proc_exp) goto cleanup_proc;
-
-	proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
-	if (!proc_stat)
-		goto cleanup_proc_exp;
-
-	proc_stat->proc_fops = &ct_cpu_seq_fops;
-	proc_stat->owner = THIS_MODULE;
-#endif
-
-	ret = nf_register_hooks(ip_conntrack_ops, ARRAY_SIZE(ip_conntrack_ops));
-	if (ret < 0) {
-		printk("ip_conntrack: can't register hooks.\n");
-		goto cleanup_proc_stat;
-	}
-#ifdef CONFIG_SYSCTL
-	ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table);
-	if (ip_ct_sysctl_header == NULL) {
-		printk("ip_conntrack: can't register to sysctl.\n");
-		ret = -ENOMEM;
-		goto cleanup_hooks;
-	}
-#endif
-	return ret;
-
-#ifdef CONFIG_SYSCTL
- cleanup_hooks:
-	nf_unregister_hooks(ip_conntrack_ops, ARRAY_SIZE(ip_conntrack_ops));
-#endif
- cleanup_proc_stat:
-#ifdef CONFIG_PROC_FS
-	remove_proc_entry("ip_conntrack", proc_net_stat);
- cleanup_proc_exp:
-	proc_net_remove("ip_conntrack_expect");
- cleanup_proc:
-	proc_net_remove("ip_conntrack");
- cleanup_init:
-#endif /* CONFIG_PROC_FS */
-	ip_conntrack_cleanup();
-	return ret;
-}
-
-static void __exit ip_conntrack_standalone_fini(void)
-{
-	synchronize_net();
-#ifdef CONFIG_SYSCTL
-	unregister_sysctl_table(ip_ct_sysctl_header);
-#endif
-	nf_unregister_hooks(ip_conntrack_ops, ARRAY_SIZE(ip_conntrack_ops));
-#ifdef CONFIG_PROC_FS
-	remove_proc_entry("ip_conntrack", proc_net_stat);
-	proc_net_remove("ip_conntrack_expect");
-	proc_net_remove("ip_conntrack");
-#endif /* CONFIG_PROC_FS */
-	ip_conntrack_cleanup();
-}
-
-module_init(ip_conntrack_standalone_init);
-module_exit(ip_conntrack_standalone_fini);
-
-/* Some modules need us, but don't depend directly on any symbol.
-   They should call this. */
-void need_conntrack(void)
-{
-}
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-EXPORT_SYMBOL_GPL(ip_conntrack_chain);
-EXPORT_SYMBOL_GPL(ip_conntrack_expect_chain);
-EXPORT_SYMBOL_GPL(ip_conntrack_register_notifier);
-EXPORT_SYMBOL_GPL(ip_conntrack_unregister_notifier);
-EXPORT_SYMBOL_GPL(__ip_ct_event_cache_init);
-EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache);
-#endif
-EXPORT_SYMBOL(ip_conntrack_protocol_register);
-EXPORT_SYMBOL(ip_conntrack_protocol_unregister);
-EXPORT_SYMBOL(ip_ct_get_tuple);
-EXPORT_SYMBOL(invert_tuplepr);
-EXPORT_SYMBOL(ip_conntrack_alter_reply);
-EXPORT_SYMBOL(ip_conntrack_destroyed);
-EXPORT_SYMBOL(need_conntrack);
-EXPORT_SYMBOL(ip_conntrack_helper_register);
-EXPORT_SYMBOL(ip_conntrack_helper_unregister);
-EXPORT_SYMBOL(ip_ct_iterate_cleanup);
-EXPORT_SYMBOL(__ip_ct_refresh_acct);
-
-EXPORT_SYMBOL(ip_conntrack_expect_alloc);
-EXPORT_SYMBOL(ip_conntrack_expect_put);
-EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find);
-EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get);
-EXPORT_SYMBOL(ip_conntrack_expect_related);
-EXPORT_SYMBOL(ip_conntrack_unexpect_related);
-EXPORT_SYMBOL_GPL(ip_conntrack_expect_list);
-EXPORT_SYMBOL_GPL(ip_ct_unlink_expect);
-
-EXPORT_SYMBOL(ip_conntrack_tuple_taken);
-EXPORT_SYMBOL(ip_ct_gather_frags);
-EXPORT_SYMBOL(ip_conntrack_htable_size);
-EXPORT_SYMBOL(ip_conntrack_lock);
-EXPORT_SYMBOL(ip_conntrack_hash);
-EXPORT_SYMBOL(ip_conntrack_untracked);
-EXPORT_SYMBOL_GPL(ip_conntrack_find_get);
-#ifdef CONFIG_IP_NF_NAT_NEEDED
-EXPORT_SYMBOL(ip_conntrack_tcp_update);
-#endif
-
-EXPORT_SYMBOL_GPL(ip_conntrack_flush);
-EXPORT_SYMBOL_GPL(__ip_conntrack_find);
-
-EXPORT_SYMBOL_GPL(ip_conntrack_alloc);
-EXPORT_SYMBOL_GPL(ip_conntrack_free);
-EXPORT_SYMBOL_GPL(ip_conntrack_hash_insert);
-
-EXPORT_SYMBOL_GPL(ip_ct_remove_expectations);
-
-EXPORT_SYMBOL_GPL(ip_conntrack_helper_find_get);
-EXPORT_SYMBOL_GPL(ip_conntrack_helper_put);
-EXPORT_SYMBOL_GPL(__ip_conntrack_helper_find_byname);
-
-EXPORT_SYMBOL_GPL(ip_conntrack_proto_find_get);
-EXPORT_SYMBOL_GPL(ip_conntrack_proto_put);
-EXPORT_SYMBOL_GPL(__ip_conntrack_proto_find);
-EXPORT_SYMBOL_GPL(ip_conntrack_checksum);
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-EXPORT_SYMBOL_GPL(ip_ct_port_tuple_to_nfattr);
-EXPORT_SYMBOL_GPL(ip_ct_port_nfattr_to_tuple);
-#endif
diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c
deleted file mode 100644
index afc6809a388..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_tftp.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Version: 0.0.7
- *
- * Thu 21 Mar 2002 Harald Welte <laforge@gnumonks.org>
- * 	- port to newnat API
- *
- */
-
-#include <linux/module.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tftp.h>
-#include <linux/moduleparam.h>
-
-MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
-MODULE_DESCRIPTION("tftp connection tracking helper");
-MODULE_LICENSE("GPL");
-
-#define MAX_PORTS 8
-static unsigned short ports[MAX_PORTS];
-static int ports_c;
-module_param_array(ports, ushort, &ports_c, 0400);
-MODULE_PARM_DESC(ports, "port numbers of tftp servers");
-
-#if 0
-#define DEBUGP(format, args...) printk("%s:%s:" format, \
-				       __FILE__, __FUNCTION__ , ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-unsigned int (*ip_nat_tftp_hook)(struct sk_buff **pskb,
-				 enum ip_conntrack_info ctinfo,
-				 struct ip_conntrack_expect *exp);
-EXPORT_SYMBOL_GPL(ip_nat_tftp_hook);
-
-static int tftp_help(struct sk_buff **pskb,
-		     struct ip_conntrack *ct,
-		     enum ip_conntrack_info ctinfo)
-{
-	struct tftphdr _tftph, *tfh;
-	struct ip_conntrack_expect *exp;
-	unsigned int ret = NF_ACCEPT;
-	typeof(ip_nat_tftp_hook) ip_nat_tftp;
-
-	tfh = skb_header_pointer(*pskb,
-				 ip_hdrlen(*pskb) + sizeof(struct udphdr),
-				 sizeof(_tftph), &_tftph);
-	if (tfh == NULL)
-		return NF_ACCEPT;
-
-	switch (ntohs(tfh->opcode)) {
-	/* RRQ and WRQ works the same way */
-	case TFTP_OPCODE_READ:
-	case TFTP_OPCODE_WRITE:
-		DEBUGP("");
-		DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-		DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-
-		exp = ip_conntrack_expect_alloc(ct);
-		if (exp == NULL)
-			return NF_DROP;
-
-		exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-		exp->mask.src.ip = htonl(0xffffffff);
-		exp->mask.src.u.udp.port = 0;
-		exp->mask.dst.ip = htonl(0xffffffff);
-		exp->mask.dst.u.udp.port = htons(0xffff);
-		exp->mask.dst.protonum = 0xff;
-		exp->expectfn = NULL;
-		exp->flags = 0;
-
-		DEBUGP("expect: ");
-		DUMP_TUPLE(&exp->tuple);
-		DUMP_TUPLE(&exp->mask);
-		ip_nat_tftp = rcu_dereference(ip_nat_tftp_hook);
-		if (ip_nat_tftp)
-			ret = ip_nat_tftp(pskb, ctinfo, exp);
-		else if (ip_conntrack_expect_related(exp) != 0)
-			ret = NF_DROP;
-		ip_conntrack_expect_put(exp);
-		break;
-	case TFTP_OPCODE_DATA:
-	case TFTP_OPCODE_ACK:
-		DEBUGP("Data/ACK opcode\n");
-		break;
-	case TFTP_OPCODE_ERROR:
-		DEBUGP("Error opcode\n");
-		break;
-	default:
-		DEBUGP("Unknown opcode\n");
-	}
-	return NF_ACCEPT;
-}
-
-static struct ip_conntrack_helper tftp[MAX_PORTS];
-static char tftp_names[MAX_PORTS][sizeof("tftp-65535")];
-
-static void ip_conntrack_tftp_fini(void)
-{
-	int i;
-
-	for (i = 0 ; i < ports_c; i++) {
-		DEBUGP("unregistering helper for port %d\n",
-			ports[i]);
-		ip_conntrack_helper_unregister(&tftp[i]);
-	}
-}
-
-static int __init ip_conntrack_tftp_init(void)
-{
-	int i, ret;
-	char *tmpname;
-
-	if (ports_c == 0)
-		ports[ports_c++] = TFTP_PORT;
-
-	for (i = 0; i < ports_c; i++) {
-		/* Create helper structure */
-		memset(&tftp[i], 0, sizeof(struct ip_conntrack_helper));
-
-		tftp[i].tuple.dst.protonum = IPPROTO_UDP;
-		tftp[i].tuple.src.u.udp.port = htons(ports[i]);
-		tftp[i].mask.dst.protonum = 0xFF;
-		tftp[i].mask.src.u.udp.port = htons(0xFFFF);
-		tftp[i].max_expected = 1;
-		tftp[i].timeout = 5 * 60; /* 5 minutes */
-		tftp[i].me = THIS_MODULE;
-		tftp[i].help = tftp_help;
-
-		tmpname = &tftp_names[i][0];
-		if (ports[i] == TFTP_PORT)
-			sprintf(tmpname, "tftp");
-		else
-			sprintf(tmpname, "tftp-%d", i);
-		tftp[i].name = tmpname;
-
-		DEBUGP("port #%d: %d\n", i, ports[i]);
-
-		ret=ip_conntrack_helper_register(&tftp[i]);
-		if (ret) {
-			printk("ERROR registering helper for port %d\n",
-				ports[i]);
-			ip_conntrack_tftp_fini();
-			return(ret);
-		}
-	}
-	return(0);
-}
-
-module_init(ip_conntrack_tftp_init);
-module_exit(ip_conntrack_tftp_fini);
diff --git a/net/ipv4/netfilter/ip_nat_amanda.c b/net/ipv4/netfilter/ip_nat_amanda.c
deleted file mode 100644
index 85df1a9aed3..00000000000
--- a/net/ipv4/netfilter/ip_nat_amanda.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/* Amanda extension for TCP NAT alteration.
- * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
- * based on a copy of HW's ip_nat_irc.c as well as other modules
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- *
- *	Module load syntax:
- * 	insmod ip_nat_amanda.o
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <net/tcp.h>
-#include <net/udp.h>
-
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_amanda.h>
-
-
-MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
-MODULE_DESCRIPTION("Amanda NAT helper");
-MODULE_LICENSE("GPL");
-
-static unsigned int help(struct sk_buff **pskb,
-			 enum ip_conntrack_info ctinfo,
-			 unsigned int matchoff,
-			 unsigned int matchlen,
-			 struct ip_conntrack_expect *exp)
-{
-	char buffer[sizeof("65535")];
-	u_int16_t port;
-	unsigned int ret;
-
-	/* Connection comes from client. */
-	exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
-	exp->dir = IP_CT_DIR_ORIGINAL;
-
-	/* When you see the packet, we need to NAT it the same as the
-	 * this one (ie. same IP: it will be TCP and master is UDP). */
-	exp->expectfn = ip_nat_follow_master;
-
-	/* Try to get same port: if not, try to change it. */
-	for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
-		exp->tuple.dst.u.tcp.port = htons(port);
-		if (ip_conntrack_expect_related(exp) == 0)
-			break;
-	}
-
-	if (port == 0)
-		return NF_DROP;
-
-	sprintf(buffer, "%u", port);
-	ret = ip_nat_mangle_udp_packet(pskb, exp->master, ctinfo,
-				       matchoff, matchlen,
-				       buffer, strlen(buffer));
-	if (ret != NF_ACCEPT)
-		ip_conntrack_unexpect_related(exp);
-	return ret;
-}
-
-static void __exit ip_nat_amanda_fini(void)
-{
-	rcu_assign_pointer(ip_nat_amanda_hook, NULL);
-	synchronize_rcu();
-}
-
-static int __init ip_nat_amanda_init(void)
-{
-	BUG_ON(rcu_dereference(ip_nat_amanda_hook));
-	rcu_assign_pointer(ip_nat_amanda_hook, help);
-	return 0;
-}
-
-module_init(ip_nat_amanda_init);
-module_exit(ip_nat_amanda_fini);
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
deleted file mode 100644
index cf46930606f..00000000000
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ /dev/null
@@ -1,633 +0,0 @@
-/* NAT for netfilter; shared with compatibility layer. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/skbuff.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/vmalloc.h>
-#include <net/checksum.h>
-#include <net/icmp.h>
-#include <net/ip.h>
-#include <net/tcp.h>  /* For tcp_prot in getorigdst */
-#include <linux/icmp.h>
-#include <linux/udp.h>
-#include <linux/jhash.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-DEFINE_RWLOCK(ip_nat_lock);
-
-/* Calculated at init based on memory size */
-static unsigned int ip_nat_htable_size;
-
-static struct list_head *bysource;
-
-#define MAX_IP_NAT_PROTO 256
-static struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
-
-static inline struct ip_nat_protocol *
-__ip_nat_proto_find(u_int8_t protonum)
-{
-	return rcu_dereference(ip_nat_protos[protonum]);
-}
-
-struct ip_nat_protocol *
-ip_nat_proto_find_get(u_int8_t protonum)
-{
-	struct ip_nat_protocol *p;
-
-	rcu_read_lock();
-	p = __ip_nat_proto_find(protonum);
-	if (!try_module_get(p->me))
-		p = &ip_nat_unknown_protocol;
-	rcu_read_unlock();
-
-	return p;
-}
-EXPORT_SYMBOL_GPL(ip_nat_proto_find_get);
-
-void
-ip_nat_proto_put(struct ip_nat_protocol *p)
-{
-	module_put(p->me);
-}
-EXPORT_SYMBOL_GPL(ip_nat_proto_put);
-
-/* We keep an extra hash for each conntrack, for fast searching. */
-static inline unsigned int
-hash_by_src(const struct ip_conntrack_tuple *tuple)
-{
-	/* Original src, to ensure we map it consistently if poss. */
-	return jhash_3words((__force u32)tuple->src.ip, tuple->src.u.all,
-			    tuple->dst.protonum, 0) % ip_nat_htable_size;
-}
-
-/* Noone using conntrack by the time this called. */
-static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn)
-{
-	if (!(conn->status & IPS_NAT_DONE_MASK))
-		return;
-
-	write_lock_bh(&ip_nat_lock);
-	list_del(&conn->nat.info.bysource);
-	write_unlock_bh(&ip_nat_lock);
-}
-
-/* Is this tuple already taken? (not by us) */
-int
-ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
-		  const struct ip_conntrack *ignored_conntrack)
-{
-	/* Conntrack tracking doesn't keep track of outgoing tuples; only
-	   incoming ones.  NAT means they don't have a fixed mapping,
-	   so we invert the tuple and look for the incoming reply.
-
-	   We could keep a separate hash if this proves too slow. */
-	struct ip_conntrack_tuple reply;
-
-	invert_tuplepr(&reply, tuple);
-	return ip_conntrack_tuple_taken(&reply, ignored_conntrack);
-}
-EXPORT_SYMBOL(ip_nat_used_tuple);
-
-/* If we source map this tuple so reply looks like reply_tuple, will
- * that meet the constraints of range. */
-static int
-in_range(const struct ip_conntrack_tuple *tuple,
-	 const struct ip_nat_range *range)
-{
-	struct ip_nat_protocol *proto;
-	int ret = 0;
-
-	/* If we are supposed to map IPs, then we must be in the
-	   range specified, otherwise let this drag us onto a new src IP. */
-	if (range->flags & IP_NAT_RANGE_MAP_IPS) {
-		if (ntohl(tuple->src.ip) < ntohl(range->min_ip)
-		    || ntohl(tuple->src.ip) > ntohl(range->max_ip))
-			return 0;
-	}
-
-	rcu_read_lock();
-	proto = __ip_nat_proto_find(tuple->dst.protonum);
-	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
-	    || proto->in_range(tuple, IP_NAT_MANIP_SRC,
-			       &range->min, &range->max))
-		ret = 1;
-	rcu_read_unlock();
-
-	return ret;
-}
-
-static inline int
-same_src(const struct ip_conntrack *ct,
-	 const struct ip_conntrack_tuple *tuple)
-{
-	return (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum
-		== tuple->dst.protonum
-		&& ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip
-		== tuple->src.ip
-		&& ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all
-		== tuple->src.u.all);
-}
-
-/* Only called for SRC manip */
-static int
-find_appropriate_src(const struct ip_conntrack_tuple *tuple,
-		     struct ip_conntrack_tuple *result,
-		     const struct ip_nat_range *range)
-{
-	unsigned int h = hash_by_src(tuple);
-	struct ip_conntrack *ct;
-
-	read_lock_bh(&ip_nat_lock);
-	list_for_each_entry(ct, &bysource[h], nat.info.bysource) {
-		if (same_src(ct, tuple)) {
-			/* Copy source part from reply tuple. */
-			invert_tuplepr(result,
-				       &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-			result->dst = tuple->dst;
-
-			if (in_range(result, range)) {
-				read_unlock_bh(&ip_nat_lock);
-				return 1;
-			}
-		}
-	}
-	read_unlock_bh(&ip_nat_lock);
-	return 0;
-}
-
-/* For [FUTURE] fragmentation handling, we want the least-used
-   src-ip/dst-ip/proto triple.  Fairness doesn't come into it.  Thus
-   if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
-   1-65535, we don't do pro-rata allocation based on ports; we choose
-   the ip with the lowest src-ip/dst-ip/proto usage.
-*/
-static void
-find_best_ips_proto(struct ip_conntrack_tuple *tuple,
-		    const struct ip_nat_range *range,
-		    const struct ip_conntrack *conntrack,
-		    enum ip_nat_manip_type maniptype)
-{
-	__be32 *var_ipp;
-	/* Host order */
-	u_int32_t minip, maxip, j;
-
-	/* No IP mapping?  Do nothing. */
-	if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
-		return;
-
-	if (maniptype == IP_NAT_MANIP_SRC)
-		var_ipp = &tuple->src.ip;
-	else
-		var_ipp = &tuple->dst.ip;
-
-	/* Fast path: only one choice. */
-	if (range->min_ip == range->max_ip) {
-		*var_ipp = range->min_ip;
-		return;
-	}
-
-	/* Hashing source and destination IPs gives a fairly even
-	 * spread in practice (if there are a small number of IPs
-	 * involved, there usually aren't that many connections
-	 * anyway).  The consistency means that servers see the same
-	 * client coming from the same IP (some Internet Banking sites
-	 * like this), even across reboots. */
-	minip = ntohl(range->min_ip);
-	maxip = ntohl(range->max_ip);
-	j = jhash_2words((__force u32)tuple->src.ip, (__force u32)tuple->dst.ip, 0);
-	*var_ipp = htonl(minip + j % (maxip - minip + 1));
-}
-
-/* Manipulate the tuple into the range given.  For NF_IP_POST_ROUTING,
- * we change the source to map into the range.  For NF_IP_PRE_ROUTING
- * and NF_IP_LOCAL_OUT, we change the destination to map into the
- * range.  It might not be possible to get a unique tuple, but we try.
- * At worst (or if we race), we will end up with a final duplicate in
- * __ip_conntrack_confirm and drop the packet. */
-static void
-get_unique_tuple(struct ip_conntrack_tuple *tuple,
-		 const struct ip_conntrack_tuple *orig_tuple,
-		 const struct ip_nat_range *range,
-		 struct ip_conntrack *conntrack,
-		 enum ip_nat_manip_type maniptype)
-{
-	struct ip_nat_protocol *proto;
-
-	/* 1) If this srcip/proto/src-proto-part is currently mapped,
-	   and that same mapping gives a unique tuple within the given
-	   range, use that.
-
-	   This is only required for source (ie. NAT/masq) mappings.
-	   So far, we don't do local source mappings, so multiple
-	   manips not an issue.  */
-	if (maniptype == IP_NAT_MANIP_SRC) {
-		if (find_appropriate_src(orig_tuple, tuple, range)) {
-			DEBUGP("get_unique_tuple: Found current src map\n");
-			if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
-				if (!ip_nat_used_tuple(tuple, conntrack))
-					return;
-		}
-	}
-
-	/* 2) Select the least-used IP/proto combination in the given
-	   range. */
-	*tuple = *orig_tuple;
-	find_best_ips_proto(tuple, range, conntrack, maniptype);
-
-	/* 3) The per-protocol part of the manip is made to map into
-	   the range to make a unique tuple. */
-
-	rcu_read_lock();
-	proto = __ip_nat_proto_find(orig_tuple->dst.protonum);
-
-	/* Change protocol info to have some randomization */
-	if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) {
-		proto->unique_tuple(tuple, range, maniptype, conntrack);
-		goto out;
-	}
-
-	/* Only bother mapping if it's not already in range and unique */
-	if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
-	     || proto->in_range(tuple, maniptype, &range->min, &range->max))
-	    && !ip_nat_used_tuple(tuple, conntrack))
-		goto out;
-
-	/* Last change: get protocol to try to obtain unique tuple. */
-	proto->unique_tuple(tuple, range, maniptype, conntrack);
-out:
-	rcu_read_unlock();
-}
-
-unsigned int
-ip_nat_setup_info(struct ip_conntrack *conntrack,
-		  const struct ip_nat_range *range,
-		  unsigned int hooknum)
-{
-	struct ip_conntrack_tuple curr_tuple, new_tuple;
-	struct ip_nat_info *info = &conntrack->nat.info;
-	int have_to_hash = !(conntrack->status & IPS_NAT_DONE_MASK);
-	enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
-
-	IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
-		     || hooknum == NF_IP_POST_ROUTING
-		     || hooknum == NF_IP_LOCAL_IN
-		     || hooknum == NF_IP_LOCAL_OUT);
-	BUG_ON(ip_nat_initialized(conntrack, maniptype));
-
-	/* What we've got will look like inverse of reply. Normally
-	   this is what is in the conntrack, except for prior
-	   manipulations (future optimization: if num_manips == 0,
-	   orig_tp =
-	   conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
-	invert_tuplepr(&curr_tuple,
-		       &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple);
-
-	get_unique_tuple(&new_tuple, &curr_tuple, range, conntrack, maniptype);
-
-	if (!ip_ct_tuple_equal(&new_tuple, &curr_tuple)) {
-		struct ip_conntrack_tuple reply;
-
-		/* Alter conntrack table so will recognize replies. */
-		invert_tuplepr(&reply, &new_tuple);
-		ip_conntrack_alter_reply(conntrack, &reply);
-
-		/* Non-atomic: we own this at the moment. */
-		if (maniptype == IP_NAT_MANIP_SRC)
-			conntrack->status |= IPS_SRC_NAT;
-		else
-			conntrack->status |= IPS_DST_NAT;
-	}
-
-	/* Place in source hash if this is the first time. */
-	if (have_to_hash) {
-		unsigned int srchash
-			= hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
-				      .tuple);
-		write_lock_bh(&ip_nat_lock);
-		list_add(&info->bysource, &bysource[srchash]);
-		write_unlock_bh(&ip_nat_lock);
-	}
-
-	/* It's done. */
-	if (maniptype == IP_NAT_MANIP_DST)
-		set_bit(IPS_DST_NAT_DONE_BIT, &conntrack->status);
-	else
-		set_bit(IPS_SRC_NAT_DONE_BIT, &conntrack->status);
-
-	return NF_ACCEPT;
-}
-EXPORT_SYMBOL(ip_nat_setup_info);
-
-/* Returns true if succeeded. */
-static int
-manip_pkt(u_int16_t proto,
-	  struct sk_buff **pskb,
-	  unsigned int iphdroff,
-	  const struct ip_conntrack_tuple *target,
-	  enum ip_nat_manip_type maniptype)
-{
-	struct iphdr *iph;
-	struct ip_nat_protocol *p;
-
-	if (!skb_make_writable(pskb, iphdroff + sizeof(*iph)))
-		return 0;
-
-	iph = (void *)(*pskb)->data + iphdroff;
-
-	/* Manipulate protcol part. */
-
-	/* rcu_read_lock()ed by nf_hook_slow */
-	p = __ip_nat_proto_find(proto);
-	if (!p->manip_pkt(pskb, iphdroff, target, maniptype))
-		return 0;
-
-	iph = (void *)(*pskb)->data + iphdroff;
-
-	if (maniptype == IP_NAT_MANIP_SRC) {
-		nf_csum_replace4(&iph->check, iph->saddr, target->src.ip);
-		iph->saddr = target->src.ip;
-	} else {
-		nf_csum_replace4(&iph->check, iph->daddr, target->dst.ip);
-		iph->daddr = target->dst.ip;
-	}
-	return 1;
-}
-
-/* Do packet manipulations according to ip_nat_setup_info. */
-unsigned int ip_nat_packet(struct ip_conntrack *ct,
-			   enum ip_conntrack_info ctinfo,
-			   unsigned int hooknum,
-			   struct sk_buff **pskb)
-{
-	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-	unsigned long statusbit;
-	enum ip_nat_manip_type mtype = HOOK2MANIP(hooknum);
-
-	if (mtype == IP_NAT_MANIP_SRC)
-		statusbit = IPS_SRC_NAT;
-	else
-		statusbit = IPS_DST_NAT;
-
-	/* Invert if this is reply dir. */
-	if (dir == IP_CT_DIR_REPLY)
-		statusbit ^= IPS_NAT_MASK;
-
-	/* Non-atomic: these bits don't change. */
-	if (ct->status & statusbit) {
-		struct ip_conntrack_tuple target;
-
-		/* We are aiming to look like inverse of other direction. */
-		invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
-
-		if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype))
-			return NF_DROP;
-	}
-	return NF_ACCEPT;
-}
-EXPORT_SYMBOL_GPL(ip_nat_packet);
-
-/* Dir is direction ICMP is coming from (opposite to packet it contains) */
-int ip_nat_icmp_reply_translation(struct ip_conntrack *ct,
-				  enum ip_conntrack_info ctinfo,
-				  unsigned int hooknum,
-				  struct sk_buff **pskb)
-{
-	struct {
-		struct icmphdr icmp;
-		struct iphdr ip;
-	} *inside;
-	struct ip_conntrack_protocol *proto;
-	struct ip_conntrack_tuple inner, target;
-	int hdrlen = ip_hdrlen(*pskb);
-	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-	unsigned long statusbit;
-	enum ip_nat_manip_type manip = HOOK2MANIP(hooknum);
-
-	if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
-		return 0;
-
-	inside = (void *)(*pskb)->data + ip_hdrlen(*pskb);
-
-	/* We're actually going to mangle it beyond trivial checksum
-	   adjustment, so make sure the current checksum is correct. */
-	if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0))
-		return 0;
-
-	/* Must be RELATED */
-	IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
-		     (*pskb)->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
-
-	/* Redirects on non-null nats must be dropped, else they'll
-	   start talking to each other without our translation, and be
-	   confused... --RR */
-	if (inside->icmp.type == ICMP_REDIRECT) {
-		/* If NAT isn't finished, assume it and drop. */
-		if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
-			return 0;
-
-		if (ct->status & IPS_NAT_MASK)
-			return 0;
-	}
-
-	DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n",
-	       *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
-
-	/* rcu_read_lock()ed by nf_hook_slow */
-	proto = __ip_conntrack_proto_find(inside->ip.protocol);
-	if (!ip_ct_get_tuple(&inside->ip, *pskb, ip_hdrlen(*pskb) +
-			     sizeof(struct icmphdr) + inside->ip.ihl*4,
-			     &inner, proto))
-		return 0;
-
-	/* Change inner back to look like incoming packet.  We do the
-	   opposite manip on this hook to normal, because it might not
-	   pass all hooks (locally-generated ICMP).  Consider incoming
-	   packet: PREROUTING (DST manip), routing produces ICMP, goes
-	   through POSTROUTING (which must correct the DST manip). */
-	if (!manip_pkt(inside->ip.protocol, pskb,
-		       ip_hdrlen(*pskb) + sizeof(inside->icmp),
-		       &ct->tuplehash[!dir].tuple,
-		       !manip))
-		return 0;
-
-	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
-		/* Reloading "inside" here since manip_pkt inner. */
-		inside = (void *)(*pskb)->data + ip_hdrlen(*pskb);
-		inside->icmp.checksum = 0;
-		inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
-							       (*pskb)->len - hdrlen,
-							       0));
-	}
-
-	/* Change outer to look the reply to an incoming packet
-	 * (proto 0 means don't invert per-proto part). */
-	if (manip == IP_NAT_MANIP_SRC)
-		statusbit = IPS_SRC_NAT;
-	else
-		statusbit = IPS_DST_NAT;
-
-	/* Invert if this is reply dir. */
-	if (dir == IP_CT_DIR_REPLY)
-		statusbit ^= IPS_NAT_MASK;
-
-	if (ct->status & statusbit) {
-		invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
-		if (!manip_pkt(0, pskb, 0, &target, manip))
-			return 0;
-	}
-
-	return 1;
-}
-EXPORT_SYMBOL_GPL(ip_nat_icmp_reply_translation);
-
-/* Protocol registration. */
-int ip_nat_protocol_register(struct ip_nat_protocol *proto)
-{
-	int ret = 0;
-
-	write_lock_bh(&ip_nat_lock);
-	if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
-		ret = -EBUSY;
-		goto out;
-	}
-	rcu_assign_pointer(ip_nat_protos[proto->protonum], proto);
- out:
-	write_unlock_bh(&ip_nat_lock);
-	return ret;
-}
-EXPORT_SYMBOL(ip_nat_protocol_register);
-
-/* Noone stores the protocol anywhere; simply delete it. */
-void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
-{
-	write_lock_bh(&ip_nat_lock);
-	rcu_assign_pointer(ip_nat_protos[proto->protonum],
-			   &ip_nat_unknown_protocol);
-	write_unlock_bh(&ip_nat_lock);
-	synchronize_rcu();
-}
-EXPORT_SYMBOL(ip_nat_protocol_unregister);
-
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-int
-ip_nat_port_range_to_nfattr(struct sk_buff *skb,
-			    const struct ip_nat_range *range)
-{
-	NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(__be16),
-		&range->min.tcp.port);
-	NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(__be16),
-		&range->max.tcp.port);
-
-	return 0;
-
-nfattr_failure:
-	return -1;
-}
-
-int
-ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range)
-{
-	int ret = 0;
-
-	/* we have to return whether we actually parsed something or not */
-
-	if (tb[CTA_PROTONAT_PORT_MIN-1]) {
-		ret = 1;
-		range->min.tcp.port =
-			*(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]);
-	}
-
-	if (!tb[CTA_PROTONAT_PORT_MAX-1]) {
-		if (ret)
-			range->max.tcp.port = range->min.tcp.port;
-	} else {
-		ret = 1;
-		range->max.tcp.port =
-			*(__be16 *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]);
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range);
-EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr);
-#endif
-
-static int __init ip_nat_init(void)
-{
-	size_t i;
-
-	/* Leave them the same for the moment. */
-	ip_nat_htable_size = ip_conntrack_htable_size;
-
-	/* One vmalloc for both hash tables */
-	bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size);
-	if (!bysource)
-		return -ENOMEM;
-
-	/* Sew in builtin protocols. */
-	write_lock_bh(&ip_nat_lock);
-	for (i = 0; i < MAX_IP_NAT_PROTO; i++)
-		rcu_assign_pointer(ip_nat_protos[i], &ip_nat_unknown_protocol);
-	rcu_assign_pointer(ip_nat_protos[IPPROTO_TCP], &ip_nat_protocol_tcp);
-	rcu_assign_pointer(ip_nat_protos[IPPROTO_UDP], &ip_nat_protocol_udp);
-	rcu_assign_pointer(ip_nat_protos[IPPROTO_ICMP], &ip_nat_protocol_icmp);
-	write_unlock_bh(&ip_nat_lock);
-
-	for (i = 0; i < ip_nat_htable_size; i++) {
-		INIT_LIST_HEAD(&bysource[i]);
-	}
-
-	/* FIXME: Man, this is a hack.  <SIGH> */
-	IP_NF_ASSERT(rcu_dereference(ip_conntrack_destroyed) == NULL);
-	rcu_assign_pointer(ip_conntrack_destroyed, ip_nat_cleanup_conntrack);
-
-	/* Initialize fake conntrack so that NAT will skip it */
-	ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
-	return 0;
-}
-
-/* Clear NAT section of all conntracks, in case we're loaded again. */
-static int clean_nat(struct ip_conntrack *i, void *data)
-{
-	memset(&i->nat, 0, sizeof(i->nat));
-	i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
-	return 0;
-}
-
-static void __exit ip_nat_cleanup(void)
-{
-	ip_ct_iterate_cleanup(&clean_nat, NULL);
-	rcu_assign_pointer(ip_conntrack_destroyed, NULL);
-	synchronize_rcu();
-	vfree(bysource);
-}
-
-MODULE_LICENSE("GPL");
-
-module_init(ip_nat_init);
-module_exit(ip_nat_cleanup);
diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c
deleted file mode 100644
index 32e01d8dffc..00000000000
--- a/net/ipv4/netfilter/ip_nat_ftp.c
+++ /dev/null
@@ -1,180 +0,0 @@
-/* FTP extension for TCP NAT alteration. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/moduleparam.h>
-#include <net/tcp.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
-MODULE_DESCRIPTION("ftp NAT helper");
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/* FIXME: Time out? --RR */
-
-static int
-mangle_rfc959_packet(struct sk_buff **pskb,
-		     __be32 newip,
-		     u_int16_t port,
-		     unsigned int matchoff,
-		     unsigned int matchlen,
-		     struct ip_conntrack *ct,
-		     enum ip_conntrack_info ctinfo,
-		     u32 *seq)
-{
-	char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")];
-
-	sprintf(buffer, "%u,%u,%u,%u,%u,%u",
-		NIPQUAD(newip), port>>8, port&0xFF);
-
-	DEBUGP("calling ip_nat_mangle_tcp_packet\n");
-
-	*seq += strlen(buffer) - matchlen;
-	return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
-					matchlen, buffer, strlen(buffer));
-}
-
-/* |1|132.235.1.2|6275| */
-static int
-mangle_eprt_packet(struct sk_buff **pskb,
-		   __be32 newip,
-		   u_int16_t port,
-		   unsigned int matchoff,
-		   unsigned int matchlen,
-		   struct ip_conntrack *ct,
-		   enum ip_conntrack_info ctinfo,
-		   u32 *seq)
-{
-	char buffer[sizeof("|1|255.255.255.255|65535|")];
-
-	sprintf(buffer, "|1|%u.%u.%u.%u|%u|", NIPQUAD(newip), port);
-
-	DEBUGP("calling ip_nat_mangle_tcp_packet\n");
-
-	*seq += strlen(buffer) - matchlen;
-	return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
-					matchlen, buffer, strlen(buffer));
-}
-
-/* |1|132.235.1.2|6275| */
-static int
-mangle_epsv_packet(struct sk_buff **pskb,
-		   __be32 newip,
-		   u_int16_t port,
-		   unsigned int matchoff,
-		   unsigned int matchlen,
-		   struct ip_conntrack *ct,
-		   enum ip_conntrack_info ctinfo,
-		   u32 *seq)
-{
-	char buffer[sizeof("|||65535|")];
-
-	sprintf(buffer, "|||%u|", port);
-
-	DEBUGP("calling ip_nat_mangle_tcp_packet\n");
-
-	*seq += strlen(buffer) - matchlen;
-	return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
-					matchlen, buffer, strlen(buffer));
-}
-
-static int (*mangle[])(struct sk_buff **, __be32, u_int16_t,
-		     unsigned int,
-		     unsigned int,
-		     struct ip_conntrack *,
-		     enum ip_conntrack_info,
-		     u32 *seq)
-= { [IP_CT_FTP_PORT] = mangle_rfc959_packet,
-    [IP_CT_FTP_PASV] = mangle_rfc959_packet,
-    [IP_CT_FTP_EPRT] = mangle_eprt_packet,
-    [IP_CT_FTP_EPSV] = mangle_epsv_packet
-};
-
-/* So, this packet has hit the connection tracking matching code.
-   Mangle it, and change the expectation to match the new version. */
-static unsigned int ip_nat_ftp(struct sk_buff **pskb,
-			       enum ip_conntrack_info ctinfo,
-			       enum ip_ct_ftp_type type,
-			       unsigned int matchoff,
-			       unsigned int matchlen,
-			       struct ip_conntrack_expect *exp,
-			       u32 *seq)
-{
-	__be32 newip;
-	u_int16_t port;
-	int dir = CTINFO2DIR(ctinfo);
-	struct ip_conntrack *ct = exp->master;
-
-	DEBUGP("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen);
-
-	/* Connection will come from wherever this packet goes, hence !dir */
-	newip = ct->tuplehash[!dir].tuple.dst.ip;
-	exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
-	exp->dir = !dir;
-
-	/* When you see the packet, we need to NAT it the same as the
-	 * this one. */
-	exp->expectfn = ip_nat_follow_master;
-
-	/* Try to get same port: if not, try to change it. */
-	for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
-		exp->tuple.dst.u.tcp.port = htons(port);
-		if (ip_conntrack_expect_related(exp) == 0)
-			break;
-	}
-
-	if (port == 0)
-		return NF_DROP;
-
-	if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo,
-			  seq)) {
-		ip_conntrack_unexpect_related(exp);
-		return NF_DROP;
-	}
-	return NF_ACCEPT;
-}
-
-static void __exit ip_nat_ftp_fini(void)
-{
-	rcu_assign_pointer(ip_nat_ftp_hook, NULL);
-	synchronize_rcu();
-}
-
-static int __init ip_nat_ftp_init(void)
-{
-	BUG_ON(rcu_dereference(ip_nat_ftp_hook));
-	rcu_assign_pointer(ip_nat_ftp_hook, ip_nat_ftp);
-	return 0;
-}
-
-/* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
-static int warn_set(const char *val, struct kernel_param *kp)
-{
-	printk(KERN_INFO KBUILD_MODNAME
-	       ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
-	return 0;
-}
-module_param_call(ports, warn_set, NULL, NULL, 0);
-
-module_init(ip_nat_ftp_init);
-module_exit(ip_nat_ftp_fini);
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
deleted file mode 100644
index 4cddc295174..00000000000
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ /dev/null
@@ -1,436 +0,0 @@
-/* ip_nat_helper.c - generic support functions for NAT helpers
- *
- * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
- * (C) 2003-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * 	14 Jan 2002 Harald Welte <laforge@gnumonks.org>:
- *		- add support for SACK adjustment
- *	14 Mar 2002 Harald Welte <laforge@gnumonks.org>:
- *		- merge SACK support into newnat API
- *	16 Aug 2002 Brian J. Murrell <netfilter@interlinx.bc.ca>:
- *		- make ip_nat_resize_packet more generic (TCP and UDP)
- *		- add ip_nat_mangle_udp_packet
- */
-#include <linux/module.h>
-#include <linux/kmod.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/skbuff.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/checksum.h>
-#include <net/icmp.h>
-#include <net/ip.h>
-#include <net/tcp.h>
-#include <net/udp.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-
-#if 0
-#define DEBUGP printk
-#define DUMP_OFFSET(x)	printk("offset_before=%d, offset_after=%d, correction_pos=%u\n", x->offset_before, x->offset_after, x->correction_pos);
-#else
-#define DEBUGP(format, args...)
-#define DUMP_OFFSET(x)
-#endif
-
-static DEFINE_SPINLOCK(ip_nat_seqofs_lock);
-
-/* Setup TCP sequence correction given this change at this sequence */
-static inline void
-adjust_tcp_sequence(u32 seq,
-		    int sizediff,
-		    struct ip_conntrack *ct,
-		    enum ip_conntrack_info ctinfo)
-{
-	int dir;
-	struct ip_nat_seq *this_way, *other_way;
-
-	DEBUGP("ip_nat_resize_packet: old_size = %u, new_size = %u\n",
-		(*skb)->len, new_size);
-
-	dir = CTINFO2DIR(ctinfo);
-
-	this_way = &ct->nat.info.seq[dir];
-	other_way = &ct->nat.info.seq[!dir];
-
-	DEBUGP("ip_nat_resize_packet: Seq_offset before: ");
-	DUMP_OFFSET(this_way);
-
-	spin_lock_bh(&ip_nat_seqofs_lock);
-
-	/* SYN adjust. If it's uninitialized, or this is after last
-	 * correction, record it: we don't handle more than one
-	 * adjustment in the window, but do deal with common case of a
-	 * retransmit */
-	if (this_way->offset_before == this_way->offset_after
-	    || before(this_way->correction_pos, seq)) {
-		    this_way->correction_pos = seq;
-		    this_way->offset_before = this_way->offset_after;
-		    this_way->offset_after += sizediff;
-	}
-	spin_unlock_bh(&ip_nat_seqofs_lock);
-
-	DEBUGP("ip_nat_resize_packet: Seq_offset after: ");
-	DUMP_OFFSET(this_way);
-}
-
-/* Frobs data inside this packet, which is linear. */
-static void mangle_contents(struct sk_buff *skb,
-			    unsigned int dataoff,
-			    unsigned int match_offset,
-			    unsigned int match_len,
-			    const char *rep_buffer,
-			    unsigned int rep_len)
-{
-	unsigned char *data;
-
-	BUG_ON(skb_is_nonlinear(skb));
-	data = skb_network_header(skb) + dataoff;
-
-	/* move post-replacement */
-	memmove(data + match_offset + rep_len,
-		data + match_offset + match_len,
-		skb->tail - (data + match_offset + match_len));
-
-	/* insert data from buffer */
-	memcpy(data + match_offset, rep_buffer, rep_len);
-
-	/* update skb info */
-	if (rep_len > match_len) {
-		DEBUGP("ip_nat_mangle_packet: Extending packet by "
-			"%u from %u bytes\n", rep_len - match_len,
-		       skb->len);
-		skb_put(skb, rep_len - match_len);
-	} else {
-		DEBUGP("ip_nat_mangle_packet: Shrinking packet from "
-			"%u from %u bytes\n", match_len - rep_len,
-		       skb->len);
-		__skb_trim(skb, skb->len + rep_len - match_len);
-	}
-
-	/* fix IP hdr checksum information */
-	ip_hdr(skb)->tot_len = htons(skb->len);
-	ip_send_check(ip_hdr(skb));
-}
-
-/* Unusual, but possible case. */
-static int enlarge_skb(struct sk_buff **pskb, unsigned int extra)
-{
-	struct sk_buff *nskb;
-
-	if ((*pskb)->len + extra > 65535)
-		return 0;
-
-	nskb = skb_copy_expand(*pskb, skb_headroom(*pskb), extra, GFP_ATOMIC);
-	if (!nskb)
-		return 0;
-
-	/* Transfer socket to new skb. */
-	if ((*pskb)->sk)
-		skb_set_owner_w(nskb, (*pskb)->sk);
-	kfree_skb(*pskb);
-	*pskb = nskb;
-	return 1;
-}
-
-/* Generic function for mangling variable-length address changes inside
- * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
- * command in FTP).
- *
- * Takes care about all the nasty sequence number changes, checksumming,
- * skb enlargement, ...
- *
- * */
-int
-ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
-			 struct ip_conntrack *ct,
-			 enum ip_conntrack_info ctinfo,
-			 unsigned int match_offset,
-			 unsigned int match_len,
-			 const char *rep_buffer,
-			 unsigned int rep_len)
-{
-	struct iphdr *iph;
-	struct tcphdr *tcph;
-	int oldlen, datalen;
-
-	if (!skb_make_writable(pskb, (*pskb)->len))
-		return 0;
-
-	if (rep_len > match_len
-	    && rep_len - match_len > skb_tailroom(*pskb)
-	    && !enlarge_skb(pskb, rep_len - match_len))
-		return 0;
-
-	SKB_LINEAR_ASSERT(*pskb);
-
-	iph = ip_hdr(*pskb);
-	tcph = (void *)iph + iph->ihl*4;
-
-	oldlen = (*pskb)->len - iph->ihl*4;
-	mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4,
-			match_offset, match_len, rep_buffer, rep_len);
-
-	datalen = (*pskb)->len - iph->ihl*4;
-	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
-		tcph->check = 0;
-		tcph->check = tcp_v4_check(datalen,
-					   iph->saddr, iph->daddr,
-					   csum_partial((char *)tcph,
-							datalen, 0));
-	} else
-		nf_proto_csum_replace2(&tcph->check, *pskb,
-					htons(oldlen), htons(datalen), 1);
-
-	if (rep_len != match_len) {
-		set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
-		adjust_tcp_sequence(ntohl(tcph->seq),
-				    (int)rep_len - (int)match_len,
-				    ct, ctinfo);
-		/* Tell TCP window tracking about seq change */
-		ip_conntrack_tcp_update(*pskb, ct, CTINFO2DIR(ctinfo));
-	}
-	return 1;
-}
-EXPORT_SYMBOL(ip_nat_mangle_tcp_packet);
-
-/* Generic function for mangling variable-length address changes inside
- * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
- * command in the Amanda protocol)
- *
- * Takes care about all the nasty sequence number changes, checksumming,
- * skb enlargement, ...
- *
- * XXX - This function could be merged with ip_nat_mangle_tcp_packet which
- *       should be fairly easy to do.
- */
-int
-ip_nat_mangle_udp_packet(struct sk_buff **pskb,
-			 struct ip_conntrack *ct,
-			 enum ip_conntrack_info ctinfo,
-			 unsigned int match_offset,
-			 unsigned int match_len,
-			 const char *rep_buffer,
-			 unsigned int rep_len)
-{
-	struct iphdr *iph;
-	struct udphdr *udph;
-	int datalen, oldlen;
-
-	/* UDP helpers might accidentally mangle the wrong packet */
-	iph = ip_hdr(*pskb);
-	if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) +
-			       match_offset + match_len)
-		return 0;
-
-	if (!skb_make_writable(pskb, (*pskb)->len))
-		return 0;
-
-	if (rep_len > match_len
-	    && rep_len - match_len > skb_tailroom(*pskb)
-	    && !enlarge_skb(pskb, rep_len - match_len))
-		return 0;
-
-	iph = ip_hdr(*pskb);
-	udph = (void *)iph + iph->ihl*4;
-
-	oldlen = (*pskb)->len - iph->ihl*4;
-	mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph),
-			match_offset, match_len, rep_buffer, rep_len);
-
-	/* update the length of the UDP packet */
-	datalen = (*pskb)->len - iph->ihl*4;
-	udph->len = htons(datalen);
-
-	/* fix udp checksum if udp checksum was previously calculated */
-	if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL)
-		return 1;
-
-	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
-		udph->check = 0;
-		udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
-						datalen, IPPROTO_UDP,
-						csum_partial((char *)udph,
-							     datalen, 0));
-		if (!udph->check)
-			udph->check = CSUM_MANGLED_0;
-	} else
-		nf_proto_csum_replace2(&udph->check, *pskb,
-					htons(oldlen), htons(datalen), 1);
-	return 1;
-}
-EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
-
-/* Adjust one found SACK option including checksum correction */
-static void
-sack_adjust(struct sk_buff *skb,
-	    struct tcphdr *tcph,
-	    unsigned int sackoff,
-	    unsigned int sackend,
-	    struct ip_nat_seq *natseq)
-{
-	while (sackoff < sackend) {
-		struct tcp_sack_block_wire *sack;
-		__be32 new_start_seq, new_end_seq;
-
-		sack = (void *)skb->data + sackoff;
-		if (after(ntohl(sack->start_seq) - natseq->offset_before,
-			  natseq->correction_pos))
-			new_start_seq = htonl(ntohl(sack->start_seq)
-					- natseq->offset_after);
-		else
-			new_start_seq = htonl(ntohl(sack->start_seq)
-					- natseq->offset_before);
-
-		if (after(ntohl(sack->end_seq) - natseq->offset_before,
-			  natseq->correction_pos))
-			new_end_seq = htonl(ntohl(sack->end_seq)
-				      - natseq->offset_after);
-		else
-			new_end_seq = htonl(ntohl(sack->end_seq)
-				      - natseq->offset_before);
-
-		DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
-			ntohl(sack->start_seq), new_start_seq,
-			ntohl(sack->end_seq), new_end_seq);
-
-		nf_proto_csum_replace4(&tcph->check, skb,
-					sack->start_seq, new_start_seq, 0);
-		nf_proto_csum_replace4(&tcph->check, skb,
-					sack->end_seq, new_end_seq, 0);
-		sack->start_seq = new_start_seq;
-		sack->end_seq = new_end_seq;
-		sackoff += sizeof(*sack);
-	}
-}
-
-/* TCP SACK sequence number adjustment */
-static inline unsigned int
-ip_nat_sack_adjust(struct sk_buff **pskb,
-		   struct tcphdr *tcph,
-		   struct ip_conntrack *ct,
-		   enum ip_conntrack_info ctinfo)
-{
-	unsigned int dir, optoff, optend;
-
-	optoff = ip_hdrlen(*pskb) + sizeof(struct tcphdr);
-	optend = ip_hdrlen(*pskb) + tcph->doff * 4;
-
-	if (!skb_make_writable(pskb, optend))
-		return 0;
-
-	dir = CTINFO2DIR(ctinfo);
-
-	while (optoff < optend) {
-		/* Usually: option, length. */
-		unsigned char *op = (*pskb)->data + optoff;
-
-		switch (op[0]) {
-		case TCPOPT_EOL:
-			return 1;
-		case TCPOPT_NOP:
-			optoff++;
-			continue;
-		default:
-			/* no partial options */
-			if (optoff + 1 == optend
-			    || optoff + op[1] > optend
-			    || op[1] < 2)
-				return 0;
-			if (op[0] == TCPOPT_SACK
-			    && op[1] >= 2+TCPOLEN_SACK_PERBLOCK
-			    && ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
-				sack_adjust(*pskb, tcph, optoff+2,
-					    optoff+op[1],
-					    &ct->nat.info.seq[!dir]);
-			optoff += op[1];
-		}
-	}
-	return 1;
-}
-
-/* TCP sequence number adjustment.  Returns 1 on success, 0 on failure */
-int
-ip_nat_seq_adjust(struct sk_buff **pskb,
-		  struct ip_conntrack *ct,
-		  enum ip_conntrack_info ctinfo)
-{
-	struct tcphdr *tcph;
-	int dir;
-	__be32 newseq, newack;
-	struct ip_nat_seq *this_way, *other_way;
-
-	dir = CTINFO2DIR(ctinfo);
-
-	this_way = &ct->nat.info.seq[dir];
-	other_way = &ct->nat.info.seq[!dir];
-
-	if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph)))
-		return 0;
-
-	tcph = (void *)(*pskb)->data + ip_hdrlen(*pskb);
-	if (after(ntohl(tcph->seq), this_way->correction_pos))
-		newseq = htonl(ntohl(tcph->seq) + this_way->offset_after);
-	else
-		newseq = htonl(ntohl(tcph->seq) + this_way->offset_before);
-
-	if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
-		  other_way->correction_pos))
-		newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_after);
-	else
-		newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before);
-
-	nf_proto_csum_replace4(&tcph->check, *pskb, tcph->seq, newseq, 0);
-	nf_proto_csum_replace4(&tcph->check, *pskb, tcph->ack_seq, newack, 0);
-
-	DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n",
-		ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
-		ntohl(newack));
-
-	tcph->seq = newseq;
-	tcph->ack_seq = newack;
-
-	if (!ip_nat_sack_adjust(pskb, tcph, ct, ctinfo))
-		return 0;
-
-	ip_conntrack_tcp_update(*pskb, ct, dir);
-
-	return 1;
-}
-EXPORT_SYMBOL(ip_nat_seq_adjust);
-
-/* Setup NAT on this expected conntrack so it follows master. */
-/* If we fail to get a free NAT slot, we'll get dropped on confirm */
-void ip_nat_follow_master(struct ip_conntrack *ct,
-			  struct ip_conntrack_expect *exp)
-{
-	struct ip_nat_range range;
-
-	/* This must be a fresh one. */
-	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
-
-	/* Change src to where master sends to */
-	range.flags = IP_NAT_RANGE_MAP_IPS;
-	range.min_ip = range.max_ip
-		= ct->master->tuplehash[!exp->dir].tuple.dst.ip;
-	/* hook doesn't matter, but it has to do source manip */
-	ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
-
-	/* For DST manip, map port here to where it's expected. */
-	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
-	range.min = range.max = exp->saved_proto;
-	range.min_ip = range.max_ip
-		= ct->master->tuplehash[!exp->dir].tuple.src.ip;
-	/* hook doesn't matter, but it has to do destination manip */
-	ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
-}
-EXPORT_SYMBOL(ip_nat_follow_master);
diff --git a/net/ipv4/netfilter/ip_nat_helper_h323.c b/net/ipv4/netfilter/ip_nat_helper_h323.c
deleted file mode 100644
index 0d9444f9236..00000000000
--- a/net/ipv4/netfilter/ip_nat_helper_h323.c
+++ /dev/null
@@ -1,611 +0,0 @@
-/*
- * H.323 extension for NAT alteration.
- *
- * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
- *
- * This source code is licensed under General Public License version 2.
- *
- * Based on the 'brute force' H.323 NAT module by
- * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- */
-
-#include <linux/module.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/moduleparam.h>
-#include <net/tcp.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
-#include <linux/netfilter_ipv4/ip_conntrack_h323.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/****************************************************************************/
-static int set_addr(struct sk_buff **pskb,
-		    unsigned char **data, int dataoff,
-		    unsigned int addroff, __be32 ip, u_int16_t port)
-{
-	enum ip_conntrack_info ctinfo;
-	struct ip_conntrack *ct = ip_conntrack_get(*pskb, &ctinfo);
-	struct {
-		__be32 ip;
-		__be16 port;
-	} __attribute__ ((__packed__)) buf;
-	struct tcphdr _tcph, *th;
-
-	buf.ip = ip;
-	buf.port = htons(port);
-	addroff += dataoff;
-
-	if (ip_hdr(*pskb)->protocol == IPPROTO_TCP) {
-		if (!ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
-					      addroff, sizeof(buf),
-					      (char *) &buf, sizeof(buf))) {
-			if (net_ratelimit())
-				printk("ip_nat_h323: ip_nat_mangle_tcp_packet"
-				       " error\n");
-			return -1;
-		}
-
-		/* Relocate data pointer */
-		th = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
-					sizeof(_tcph), &_tcph);
-		if (th == NULL)
-			return -1;
-		*data = (*pskb)->data + ip_hdrlen(*pskb) +
-		    th->doff * 4 + dataoff;
-	} else {
-		if (!ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
-					      addroff, sizeof(buf),
-					      (char *) &buf, sizeof(buf))) {
-			if (net_ratelimit())
-				printk("ip_nat_h323: ip_nat_mangle_udp_packet"
-				       " error\n");
-			return -1;
-		}
-		/* ip_nat_mangle_udp_packet uses skb_make_writable() to copy
-		 * or pull everything in a linear buffer, so we can safely
-		 * use the skb pointers now */
-		*data = ((*pskb)->data + ip_hdrlen(*pskb) +
-			 sizeof(struct udphdr));
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int set_h225_addr(struct sk_buff **pskb,
-			 unsigned char **data, int dataoff,
-			 TransportAddress * addr,
-			 __be32 ip, u_int16_t port)
-{
-	return set_addr(pskb, data, dataoff, addr->ipAddress.ip, ip, port);
-}
-
-/****************************************************************************/
-static int set_h245_addr(struct sk_buff **pskb,
-			 unsigned char **data, int dataoff,
-			 H245_TransportAddress * addr,
-			 __be32 ip, u_int16_t port)
-{
-	return set_addr(pskb, data, dataoff,
-			addr->unicastAddress.iPAddress.network, ip, port);
-}
-
-/****************************************************************************/
-static int set_sig_addr(struct sk_buff **pskb, struct ip_conntrack *ct,
-			enum ip_conntrack_info ctinfo,
-			unsigned char **data,
-			TransportAddress * addr, int count)
-{
-	struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
-	int dir = CTINFO2DIR(ctinfo);
-	int i;
-	__be32 ip;
-	u_int16_t port;
-
-	for (i = 0; i < count; i++) {
-		if (get_h225_addr(*data, &addr[i], &ip, &port)) {
-			if (ip == ct->tuplehash[dir].tuple.src.ip &&
-			    port == info->sig_port[dir]) {
-				/* GW->GK */
-
-				/* Fix for Gnomemeeting */
-				if (i > 0 &&
-				    get_h225_addr(*data, &addr[0],
-						  &ip, &port) &&
-				    (ntohl(ip) & 0xff000000) == 0x7f000000)
-					i = 0;
-
-				DEBUGP
-				    ("ip_nat_ras: set signal address "
-				     "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-				     NIPQUAD(ip), port,
-				     NIPQUAD(ct->tuplehash[!dir].tuple.dst.
-					     ip), info->sig_port[!dir]);
-				return set_h225_addr(pskb, data, 0, &addr[i],
-						     ct->tuplehash[!dir].
-						     tuple.dst.ip,
-						     info->sig_port[!dir]);
-			} else if (ip == ct->tuplehash[dir].tuple.dst.ip &&
-				   port == info->sig_port[dir]) {
-				/* GK->GW */
-				DEBUGP
-				    ("ip_nat_ras: set signal address "
-				     "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-				     NIPQUAD(ip), port,
-				     NIPQUAD(ct->tuplehash[!dir].tuple.src.
-					     ip), info->sig_port[!dir]);
-				return set_h225_addr(pskb, data, 0, &addr[i],
-						     ct->tuplehash[!dir].
-						     tuple.src.ip,
-						     info->sig_port[!dir]);
-			}
-		}
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int set_ras_addr(struct sk_buff **pskb, struct ip_conntrack *ct,
-			enum ip_conntrack_info ctinfo,
-			unsigned char **data,
-			TransportAddress * addr, int count)
-{
-	int dir = CTINFO2DIR(ctinfo);
-	int i;
-	__be32 ip;
-	u_int16_t port;
-
-	for (i = 0; i < count; i++) {
-		if (get_h225_addr(*data, &addr[i], &ip, &port) &&
-		    ip == ct->tuplehash[dir].tuple.src.ip &&
-		    port == ntohs(ct->tuplehash[dir].tuple.src.u.udp.port)) {
-			DEBUGP("ip_nat_ras: set rasAddress "
-			       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-			       NIPQUAD(ip), port,
-			       NIPQUAD(ct->tuplehash[!dir].tuple.dst.ip),
-			       ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.
-				     port));
-			return set_h225_addr(pskb, data, 0, &addr[i],
-					     ct->tuplehash[!dir].tuple.dst.ip,
-					     ntohs(ct->tuplehash[!dir].tuple.
-						   dst.u.udp.port));
-		}
-	}
-
-	return 0;
-}
-
-/****************************************************************************/
-static int nat_rtp_rtcp(struct sk_buff **pskb, struct ip_conntrack *ct,
-			enum ip_conntrack_info ctinfo,
-			unsigned char **data, int dataoff,
-			H245_TransportAddress * addr,
-			u_int16_t port, u_int16_t rtp_port,
-			struct ip_conntrack_expect *rtp_exp,
-			struct ip_conntrack_expect *rtcp_exp)
-{
-	struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
-	int dir = CTINFO2DIR(ctinfo);
-	int i;
-	u_int16_t nated_port;
-
-	/* Set expectations for NAT */
-	rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port;
-	rtp_exp->expectfn = ip_nat_follow_master;
-	rtp_exp->dir = !dir;
-	rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port;
-	rtcp_exp->expectfn = ip_nat_follow_master;
-	rtcp_exp->dir = !dir;
-
-	/* Lookup existing expects */
-	for (i = 0; i < H323_RTP_CHANNEL_MAX; i++) {
-		if (info->rtp_port[i][dir] == rtp_port) {
-			/* Expected */
-
-			/* Use allocated ports first. This will refresh
-			 * the expects */
-			rtp_exp->tuple.dst.u.udp.port =
-			    htons(info->rtp_port[i][dir]);
-			rtcp_exp->tuple.dst.u.udp.port =
-			    htons(info->rtp_port[i][dir] + 1);
-			break;
-		} else if (info->rtp_port[i][dir] == 0) {
-			/* Not expected */
-			break;
-		}
-	}
-
-	/* Run out of expectations */
-	if (i >= H323_RTP_CHANNEL_MAX) {
-		if (net_ratelimit())
-			printk("ip_nat_h323: out of expectations\n");
-		return 0;
-	}
-
-	/* Try to get a pair of ports. */
-	for (nated_port = ntohs(rtp_exp->tuple.dst.u.udp.port);
-	     nated_port != 0; nated_port += 2) {
-		rtp_exp->tuple.dst.u.udp.port = htons(nated_port);
-		if (ip_conntrack_expect_related(rtp_exp) == 0) {
-			rtcp_exp->tuple.dst.u.udp.port =
-			    htons(nated_port + 1);
-			if (ip_conntrack_expect_related(rtcp_exp) == 0)
-				break;
-			ip_conntrack_unexpect_related(rtp_exp);
-		}
-	}
-
-	if (nated_port == 0) {	/* No port available */
-		if (net_ratelimit())
-			printk("ip_nat_h323: out of RTP ports\n");
-		return 0;
-	}
-
-	/* Modify signal */
-	if (set_h245_addr(pskb, data, dataoff, addr,
-			  ct->tuplehash[!dir].tuple.dst.ip,
-			  (port & 1) ? nated_port + 1 : nated_port) == 0) {
-		/* Save ports */
-		info->rtp_port[i][dir] = rtp_port;
-		info->rtp_port[i][!dir] = nated_port;
-	} else {
-		ip_conntrack_unexpect_related(rtp_exp);
-		ip_conntrack_unexpect_related(rtcp_exp);
-		return -1;
-	}
-
-	/* Success */
-	DEBUGP("ip_nat_h323: expect RTP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-	       NIPQUAD(rtp_exp->tuple.src.ip),
-	       ntohs(rtp_exp->tuple.src.u.udp.port),
-	       NIPQUAD(rtp_exp->tuple.dst.ip),
-	       ntohs(rtp_exp->tuple.dst.u.udp.port));
-	DEBUGP("ip_nat_h323: expect RTCP %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-	       NIPQUAD(rtcp_exp->tuple.src.ip),
-	       ntohs(rtcp_exp->tuple.src.u.udp.port),
-	       NIPQUAD(rtcp_exp->tuple.dst.ip),
-	       ntohs(rtcp_exp->tuple.dst.u.udp.port));
-
-	return 0;
-}
-
-/****************************************************************************/
-static int nat_t120(struct sk_buff **pskb, struct ip_conntrack *ct,
-		    enum ip_conntrack_info ctinfo,
-		    unsigned char **data, int dataoff,
-		    H245_TransportAddress * addr, u_int16_t port,
-		    struct ip_conntrack_expect *exp)
-{
-	int dir = CTINFO2DIR(ctinfo);
-	u_int16_t nated_port = port;
-
-	/* Set expectations for NAT */
-	exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
-	exp->expectfn = ip_nat_follow_master;
-	exp->dir = !dir;
-
-	/* Try to get same port: if not, try to change it. */
-	for (; nated_port != 0; nated_port++) {
-		exp->tuple.dst.u.tcp.port = htons(nated_port);
-		if (ip_conntrack_expect_related(exp) == 0)
-			break;
-	}
-
-	if (nated_port == 0) {	/* No port available */
-		if (net_ratelimit())
-			printk("ip_nat_h323: out of TCP ports\n");
-		return 0;
-	}
-
-	/* Modify signal */
-	if (set_h245_addr(pskb, data, dataoff, addr,
-			  ct->tuplehash[!dir].tuple.dst.ip, nated_port) < 0) {
-		ip_conntrack_unexpect_related(exp);
-		return -1;
-	}
-
-	DEBUGP("ip_nat_h323: expect T.120 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-	       NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
-	       NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
-
-	return 0;
-}
-
-/****************************************************************************
- * This conntrack expect function replaces ip_conntrack_h245_expect()
- * which was set by ip_conntrack_helper_h323.c. It calls both
- * ip_nat_follow_master() and ip_conntrack_h245_expect()
- ****************************************************************************/
-static void ip_nat_h245_expect(struct ip_conntrack *new,
-			       struct ip_conntrack_expect *this)
-{
-	ip_nat_follow_master(new, this);
-	ip_conntrack_h245_expect(new, this);
-}
-
-/****************************************************************************/
-static int nat_h245(struct sk_buff **pskb, struct ip_conntrack *ct,
-		    enum ip_conntrack_info ctinfo,
-		    unsigned char **data, int dataoff,
-		    TransportAddress * addr, u_int16_t port,
-		    struct ip_conntrack_expect *exp)
-{
-	struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
-	int dir = CTINFO2DIR(ctinfo);
-	u_int16_t nated_port = port;
-
-	/* Set expectations for NAT */
-	exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
-	exp->expectfn = ip_nat_h245_expect;
-	exp->dir = !dir;
-
-	/* Check existing expects */
-	if (info->sig_port[dir] == port)
-		nated_port = info->sig_port[!dir];
-
-	/* Try to get same port: if not, try to change it. */
-	for (; nated_port != 0; nated_port++) {
-		exp->tuple.dst.u.tcp.port = htons(nated_port);
-		if (ip_conntrack_expect_related(exp) == 0)
-			break;
-	}
-
-	if (nated_port == 0) {	/* No port available */
-		if (net_ratelimit())
-			printk("ip_nat_q931: out of TCP ports\n");
-		return 0;
-	}
-
-	/* Modify signal */
-	if (set_h225_addr(pskb, data, dataoff, addr,
-			  ct->tuplehash[!dir].tuple.dst.ip,
-			  nated_port) == 0) {
-		/* Save ports */
-		info->sig_port[dir] = port;
-		info->sig_port[!dir] = nated_port;
-	} else {
-		ip_conntrack_unexpect_related(exp);
-		return -1;
-	}
-
-	DEBUGP("ip_nat_q931: expect H.245 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-	       NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
-	       NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
-
-	return 0;
-}
-
-/****************************************************************************
- * This conntrack expect function replaces ip_conntrack_q931_expect()
- * which was set by ip_conntrack_helper_h323.c.
- ****************************************************************************/
-static void ip_nat_q931_expect(struct ip_conntrack *new,
-			       struct ip_conntrack_expect *this)
-{
-	struct ip_nat_range range;
-
-	if (this->tuple.src.ip != 0) {	/* Only accept calls from GK */
-		ip_nat_follow_master(new, this);
-		goto out;
-	}
-
-	/* This must be a fresh one. */
-	BUG_ON(new->status & IPS_NAT_DONE_MASK);
-
-	/* Change src to where master sends to */
-	range.flags = IP_NAT_RANGE_MAP_IPS;
-	range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.ip;
-
-	/* hook doesn't matter, but it has to do source manip */
-	ip_nat_setup_info(new, &range, NF_IP_POST_ROUTING);
-
-	/* For DST manip, map port here to where it's expected. */
-	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
-	range.min = range.max = this->saved_proto;
-	range.min_ip = range.max_ip =
-	    new->master->tuplehash[!this->dir].tuple.src.ip;
-
-	/* hook doesn't matter, but it has to do destination manip */
-	ip_nat_setup_info(new, &range, NF_IP_PRE_ROUTING);
-
-      out:
-	ip_conntrack_q931_expect(new, this);
-}
-
-/****************************************************************************/
-static int nat_q931(struct sk_buff **pskb, struct ip_conntrack *ct,
-		    enum ip_conntrack_info ctinfo,
-		    unsigned char **data, TransportAddress * addr, int idx,
-		    u_int16_t port, struct ip_conntrack_expect *exp)
-{
-	struct ip_ct_h323_master *info = &ct->help.ct_h323_info;
-	int dir = CTINFO2DIR(ctinfo);
-	u_int16_t nated_port = port;
-	__be32 ip;
-
-	/* Set expectations for NAT */
-	exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
-	exp->expectfn = ip_nat_q931_expect;
-	exp->dir = !dir;
-
-	/* Check existing expects */
-	if (info->sig_port[dir] == port)
-		nated_port = info->sig_port[!dir];
-
-	/* Try to get same port: if not, try to change it. */
-	for (; nated_port != 0; nated_port++) {
-		exp->tuple.dst.u.tcp.port = htons(nated_port);
-		if (ip_conntrack_expect_related(exp) == 0)
-			break;
-	}
-
-	if (nated_port == 0) {	/* No port available */
-		if (net_ratelimit())
-			printk("ip_nat_ras: out of TCP ports\n");
-		return 0;
-	}
-
-	/* Modify signal */
-	if (set_h225_addr(pskb, data, 0, &addr[idx],
-			  ct->tuplehash[!dir].tuple.dst.ip,
-			  nated_port) == 0) {
-		/* Save ports */
-		info->sig_port[dir] = port;
-		info->sig_port[!dir] = nated_port;
-
-		/* Fix for Gnomemeeting */
-		if (idx > 0 &&
-		    get_h225_addr(*data, &addr[0], &ip, &port) &&
-		    (ntohl(ip) & 0xff000000) == 0x7f000000) {
-			set_h225_addr_hook(pskb, data, 0, &addr[0],
-					   ct->tuplehash[!dir].tuple.dst.ip,
-					   info->sig_port[!dir]);
-		}
-	} else {
-		ip_conntrack_unexpect_related(exp);
-		return -1;
-	}
-
-	/* Success */
-	DEBUGP("ip_nat_ras: expect Q.931 %u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-	       NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
-	       NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
-
-	return 0;
-}
-
-/****************************************************************************/
-static void ip_nat_callforwarding_expect(struct ip_conntrack *new,
-					 struct ip_conntrack_expect *this)
-{
-	struct ip_nat_range range;
-
-	/* This must be a fresh one. */
-	BUG_ON(new->status & IPS_NAT_DONE_MASK);
-
-	/* Change src to where master sends to */
-	range.flags = IP_NAT_RANGE_MAP_IPS;
-	range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.ip;
-
-	/* hook doesn't matter, but it has to do source manip */
-	ip_nat_setup_info(new, &range, NF_IP_POST_ROUTING);
-
-	/* For DST manip, map port here to where it's expected. */
-	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
-	range.min = range.max = this->saved_proto;
-	range.min_ip = range.max_ip = this->saved_ip;
-
-	/* hook doesn't matter, but it has to do destination manip */
-	ip_nat_setup_info(new, &range, NF_IP_PRE_ROUTING);
-
-	ip_conntrack_q931_expect(new, this);
-}
-
-/****************************************************************************/
-static int nat_callforwarding(struct sk_buff **pskb, struct ip_conntrack *ct,
-			      enum ip_conntrack_info ctinfo,
-			      unsigned char **data, int dataoff,
-			      TransportAddress * addr, u_int16_t port,
-			      struct ip_conntrack_expect *exp)
-{
-	int dir = CTINFO2DIR(ctinfo);
-	u_int16_t nated_port;
-
-	/* Set expectations for NAT */
-	exp->saved_ip = exp->tuple.dst.ip;
-	exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
-	exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
-	exp->expectfn = ip_nat_callforwarding_expect;
-	exp->dir = !dir;
-
-	/* Try to get same port: if not, try to change it. */
-	for (nated_port = port; nated_port != 0; nated_port++) {
-		exp->tuple.dst.u.tcp.port = htons(nated_port);
-		if (ip_conntrack_expect_related(exp) == 0)
-			break;
-	}
-
-	if (nated_port == 0) {	/* No port available */
-		if (net_ratelimit())
-			printk("ip_nat_q931: out of TCP ports\n");
-		return 0;
-	}
-
-	/* Modify signal */
-	if (!set_h225_addr(pskb, data, dataoff, addr,
-			   ct->tuplehash[!dir].tuple.dst.ip,
-			   nated_port) == 0) {
-		ip_conntrack_unexpect_related(exp);
-		return -1;
-	}
-
-	/* Success */
-	DEBUGP("ip_nat_q931: expect Call Forwarding "
-	       "%u.%u.%u.%u:%hu->%u.%u.%u.%u:%hu\n",
-	       NIPQUAD(exp->tuple.src.ip), ntohs(exp->tuple.src.u.tcp.port),
-	       NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port));
-
-	return 0;
-}
-
-/****************************************************************************/
-static int __init init(void)
-{
-	BUG_ON(rcu_dereference(set_h245_addr_hook) != NULL);
-	BUG_ON(rcu_dereference(set_h225_addr_hook) != NULL);
-	BUG_ON(rcu_dereference(set_sig_addr_hook) != NULL);
-	BUG_ON(rcu_dereference(set_ras_addr_hook) != NULL);
-	BUG_ON(rcu_dereference(nat_rtp_rtcp_hook) != NULL);
-	BUG_ON(rcu_dereference(nat_t120_hook) != NULL);
-	BUG_ON(rcu_dereference(nat_h245_hook) != NULL);
-	BUG_ON(rcu_dereference(nat_callforwarding_hook) != NULL);
-	BUG_ON(rcu_dereference(nat_q931_hook) != NULL);
-
-	rcu_assign_pointer(set_h245_addr_hook, set_h245_addr);
-	rcu_assign_pointer(set_h225_addr_hook, set_h225_addr);
-	rcu_assign_pointer(set_sig_addr_hook, set_sig_addr);
-	rcu_assign_pointer(set_ras_addr_hook, set_ras_addr);
-	rcu_assign_pointer(nat_rtp_rtcp_hook, nat_rtp_rtcp);
-	rcu_assign_pointer(nat_t120_hook, nat_t120);
-	rcu_assign_pointer(nat_h245_hook, nat_h245);
-	rcu_assign_pointer(nat_callforwarding_hook, nat_callforwarding);
-	rcu_assign_pointer(nat_q931_hook, nat_q931);
-
-	DEBUGP("ip_nat_h323: init success\n");
-	return 0;
-}
-
-/****************************************************************************/
-static void __exit fini(void)
-{
-	rcu_assign_pointer(set_h245_addr_hook, NULL);
-	rcu_assign_pointer(set_h225_addr_hook, NULL);
-	rcu_assign_pointer(set_sig_addr_hook, NULL);
-	rcu_assign_pointer(set_ras_addr_hook, NULL);
-	rcu_assign_pointer(nat_rtp_rtcp_hook, NULL);
-	rcu_assign_pointer(nat_t120_hook, NULL);
-	rcu_assign_pointer(nat_h245_hook, NULL);
-	rcu_assign_pointer(nat_callforwarding_hook, NULL);
-	rcu_assign_pointer(nat_q931_hook, NULL);
-	synchronize_rcu();
-}
-
-/****************************************************************************/
-module_init(init);
-module_exit(fini);
-
-MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>");
-MODULE_DESCRIPTION("H.323 NAT helper");
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
deleted file mode 100644
index 24ce4a5023d..00000000000
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ /dev/null
@@ -1,350 +0,0 @@
-/*
- * ip_nat_pptp.c	- Version 3.0
- *
- * NAT support for PPTP (Point to Point Tunneling Protocol).
- * PPTP is a a protocol for creating virtual private networks.
- * It is a specification defined by Microsoft and some vendors
- * working with Microsoft.  PPTP is built on top of a modified
- * version of the Internet Generic Routing Encapsulation Protocol.
- * GRE is defined in RFC 1701 and RFC 1702.  Documentation of
- * PPTP can be found in RFC 2637
- *
- * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- * TODO: - NAT to a unique tuple, not to TCP source port
- * 	   (needs netfilter tuple reservation)
- *
- * Changes:
- *     2002-02-10 - Version 1.3
- *       - Use ip_nat_mangle_tcp_packet() because of cloned skb's
- *	   in local connections (Philip Craig <philipc@snapgear.com>)
- *       - add checks for magicCookie and pptp version
- *       - make argument list of pptp_{out,in}bound_packet() shorter
- *       - move to C99 style initializers
- *       - print version number at module loadtime
- *     2003-09-22 - Version 1.5
- *       - use SNATed tcp sourceport as callid, since we get called before
- *	   TCP header is mangled (Philip Craig <philipc@snapgear.com>)
- *     2004-10-22 - Version 2.0
- *       - kernel 2.6.x version
- *     2005-06-10 - Version 3.0
- *       - kernel >= 2.6.11 version,
- *	   funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
- *
- */
-
-#include <linux/module.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_pptp.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
-
-#define IP_NAT_PPTP_VERSION "3.0"
-
-#define REQ_CID(req, off)		(*(__be16 *)((char *)(req) + (off)))
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP");
-
-
-#if 0
-extern const char *pptp_msg_name[];
-#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
-				       __FUNCTION__, ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static void pptp_nat_expected(struct ip_conntrack *ct,
-			      struct ip_conntrack_expect *exp)
-{
-	struct ip_conntrack *master = ct->master;
-	struct ip_conntrack_expect *other_exp;
-	struct ip_conntrack_tuple t;
-	struct ip_ct_pptp_master *ct_pptp_info;
-	struct ip_nat_pptp *nat_pptp_info;
-	struct ip_nat_range range;
-
-	ct_pptp_info = &master->help.ct_pptp_info;
-	nat_pptp_info = &master->nat.help.nat_pptp_info;
-
-	/* And here goes the grand finale of corrosion... */
-
-	if (exp->dir == IP_CT_DIR_ORIGINAL) {
-		DEBUGP("we are PNS->PAC\n");
-		/* therefore, build tuple for PAC->PNS */
-		t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
-		t.src.u.gre.key = master->help.ct_pptp_info.pac_call_id;
-		t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
-		t.dst.u.gre.key = master->help.ct_pptp_info.pns_call_id;
-		t.dst.protonum = IPPROTO_GRE;
-	} else {
-		DEBUGP("we are PAC->PNS\n");
-		/* build tuple for PNS->PAC */
-		t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
-		t.src.u.gre.key = master->nat.help.nat_pptp_info.pns_call_id;
-		t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
-		t.dst.u.gre.key = master->nat.help.nat_pptp_info.pac_call_id;
-		t.dst.protonum = IPPROTO_GRE;
-	}
-
-	DEBUGP("trying to unexpect other dir: ");
-	DUMP_TUPLE(&t);
-	other_exp = ip_conntrack_expect_find_get(&t);
-	if (other_exp) {
-		ip_conntrack_unexpect_related(other_exp);
-		ip_conntrack_expect_put(other_exp);
-		DEBUGP("success\n");
-	} else {
-		DEBUGP("not found!\n");
-	}
-
-	/* This must be a fresh one. */
-	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
-
-	/* Change src to where master sends to */
-	range.flags = IP_NAT_RANGE_MAP_IPS;
-	range.min_ip = range.max_ip
-		= ct->master->tuplehash[!exp->dir].tuple.dst.ip;
-	if (exp->dir == IP_CT_DIR_ORIGINAL) {
-		range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
-		range.min = range.max = exp->saved_proto;
-	}
-	/* hook doesn't matter, but it has to do source manip */
-	ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
-
-	/* For DST manip, map port here to where it's expected. */
-	range.flags = IP_NAT_RANGE_MAP_IPS;
-	range.min_ip = range.max_ip
-		= ct->master->tuplehash[!exp->dir].tuple.src.ip;
-	if (exp->dir == IP_CT_DIR_REPLY) {
-		range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
-		range.min = range.max = exp->saved_proto;
-	}
-	/* hook doesn't matter, but it has to do destination manip */
-	ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
-}
-
-/* outbound packets == from PNS to PAC */
-static int
-pptp_outbound_pkt(struct sk_buff **pskb,
-		  struct ip_conntrack *ct,
-		  enum ip_conntrack_info ctinfo,
-		  struct PptpControlHeader *ctlh,
-		  union pptp_ctrl_union *pptpReq)
-
-{
-	struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
-	struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
-	u_int16_t msg;
-	__be16 new_callid;
-	unsigned int cid_off;
-
-	new_callid = ct_pptp_info->pns_call_id;
-
-	switch (msg = ntohs(ctlh->messageType)) {
-	case PPTP_OUT_CALL_REQUEST:
-		cid_off = offsetof(union pptp_ctrl_union, ocreq.callID);
-		/* FIXME: ideally we would want to reserve a call ID
-		 * here.  current netfilter NAT core is not able to do
-		 * this :( For now we use TCP source port. This breaks
-		 * multiple calls within one control session */
-
-		/* save original call ID in nat_info */
-		nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
-
-		/* don't use tcph->source since we are at a DSTmanip
-		 * hook (e.g. PREROUTING) and pkt is not mangled yet */
-		new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
-
-		/* save new call ID in ct info */
-		ct_pptp_info->pns_call_id = new_callid;
-		break;
-	case PPTP_IN_CALL_REPLY:
-		cid_off = offsetof(union pptp_ctrl_union, icack.callID);
-		break;
-	case PPTP_CALL_CLEAR_REQUEST:
-		cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
-		break;
-	default:
-		DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
-		      (msg <= PPTP_MSG_MAX)?
-		      pptp_msg_name[msg]:pptp_msg_name[0]);
-		/* fall through */
-
-	case PPTP_SET_LINK_INFO:
-		/* only need to NAT in case PAC is behind NAT box */
-	case PPTP_START_SESSION_REQUEST:
-	case PPTP_START_SESSION_REPLY:
-	case PPTP_STOP_SESSION_REQUEST:
-	case PPTP_STOP_SESSION_REPLY:
-	case PPTP_ECHO_REQUEST:
-	case PPTP_ECHO_REPLY:
-		/* no need to alter packet */
-		return NF_ACCEPT;
-	}
-
-	/* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
-	 * down to here */
-	DEBUGP("altering call id from 0x%04x to 0x%04x\n",
-		ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid));
-
-	/* mangle packet */
-	if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
-				     cid_off + sizeof(struct pptp_pkt_hdr) +
-				     sizeof(struct PptpControlHeader),
-				     sizeof(new_callid), (char *)&new_callid,
-				     sizeof(new_callid)) == 0)
-		return NF_DROP;
-
-	return NF_ACCEPT;
-}
-
-static void
-pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
-	     struct ip_conntrack_expect *expect_reply)
-{
-	struct ip_conntrack *ct = expect_orig->master;
-	struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
-	struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
-
-	/* save original PAC call ID in nat_info */
-	nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
-
-	/* alter expectation for PNS->PAC direction */
-	expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id;
-	expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id;
-	expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id;
-	expect_orig->dir = IP_CT_DIR_ORIGINAL;
-
-	/* alter expectation for PAC->PNS direction */
-	expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id;
-	expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id;
-	expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id;
-	expect_reply->dir = IP_CT_DIR_REPLY;
-}
-
-/* inbound packets == from PAC to PNS */
-static int
-pptp_inbound_pkt(struct sk_buff **pskb,
-		 struct ip_conntrack *ct,
-		 enum ip_conntrack_info ctinfo,
-		 struct PptpControlHeader *ctlh,
-		 union pptp_ctrl_union *pptpReq)
-{
-	struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
-	u_int16_t msg;
-	__be16 new_pcid;
-	unsigned int pcid_off;
-
-	new_pcid = nat_pptp_info->pns_call_id;
-
-	switch (msg = ntohs(ctlh->messageType)) {
-	case PPTP_OUT_CALL_REPLY:
-		pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID);
-		break;
-	case PPTP_IN_CALL_CONNECT:
-		pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID);
-		break;
-	case PPTP_IN_CALL_REQUEST:
-		/* only need to nat in case PAC is behind NAT box */
-		return NF_ACCEPT;
-	case PPTP_WAN_ERROR_NOTIFY:
-		pcid_off = offsetof(union pptp_ctrl_union, wanerr.peersCallID);
-		break;
-	case PPTP_CALL_DISCONNECT_NOTIFY:
-		pcid_off = offsetof(union pptp_ctrl_union, disc.callID);
-		break;
-	case PPTP_SET_LINK_INFO:
-		pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID);
-		break;
-
-	default:
-		DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)?
-			pptp_msg_name[msg]:pptp_msg_name[0]);
-		/* fall through */
-
-	case PPTP_START_SESSION_REQUEST:
-	case PPTP_START_SESSION_REPLY:
-	case PPTP_STOP_SESSION_REQUEST:
-	case PPTP_STOP_SESSION_REPLY:
-	case PPTP_ECHO_REQUEST:
-	case PPTP_ECHO_REPLY:
-		/* no need to alter packet */
-		return NF_ACCEPT;
-	}
-
-	/* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST,
-	 * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */
-
-	/* mangle packet */
-	DEBUGP("altering peer call id from 0x%04x to 0x%04x\n",
-		ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid));
-
-	if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
-				     pcid_off + sizeof(struct pptp_pkt_hdr) +
-				     sizeof(struct PptpControlHeader),
-				     sizeof(new_pcid), (char *)&new_pcid,
-				     sizeof(new_pcid)) == 0)
-		return NF_DROP;
-	return NF_ACCEPT;
-}
-
-
-extern int __init ip_nat_proto_gre_init(void);
-extern void __exit ip_nat_proto_gre_fini(void);
-
-static int __init ip_nat_helper_pptp_init(void)
-{
-	int ret;
-
-	DEBUGP("%s: registering NAT helper\n", __FILE__);
-
-	ret = ip_nat_proto_gre_init();
-	if (ret < 0)
-		return ret;
-
-	BUG_ON(rcu_dereference(ip_nat_pptp_hook_outbound));
-	rcu_assign_pointer(ip_nat_pptp_hook_outbound, pptp_outbound_pkt);
-
-	BUG_ON(rcu_dereference(ip_nat_pptp_hook_inbound));
-	rcu_assign_pointer(ip_nat_pptp_hook_inbound, pptp_inbound_pkt);
-
-	BUG_ON(rcu_dereference(ip_nat_pptp_hook_exp_gre));
-	rcu_assign_pointer(ip_nat_pptp_hook_exp_gre, pptp_exp_gre);
-
-	BUG_ON(rcu_dereference(ip_nat_pptp_hook_expectfn));
-	rcu_assign_pointer(ip_nat_pptp_hook_expectfn, pptp_nat_expected);
-
-	printk("ip_nat_pptp version %s loaded\n", IP_NAT_PPTP_VERSION);
-	return 0;
-}
-
-static void __exit ip_nat_helper_pptp_fini(void)
-{
-	DEBUGP("cleanup_module\n" );
-
-	rcu_assign_pointer(ip_nat_pptp_hook_expectfn, NULL);
-	rcu_assign_pointer(ip_nat_pptp_hook_exp_gre, NULL);
-	rcu_assign_pointer(ip_nat_pptp_hook_inbound, NULL);
-	rcu_assign_pointer(ip_nat_pptp_hook_outbound, NULL);
-	synchronize_rcu();
-
-	ip_nat_proto_gre_fini();
-
-	printk("ip_nat_pptp version %s unloaded\n", IP_NAT_PPTP_VERSION);
-}
-
-module_init(ip_nat_helper_pptp_init);
-module_exit(ip_nat_helper_pptp_fini);
diff --git a/net/ipv4/netfilter/ip_nat_irc.c b/net/ipv4/netfilter/ip_nat_irc.c
deleted file mode 100644
index cfaeea38314..00000000000
--- a/net/ipv4/netfilter/ip_nat_irc.c
+++ /dev/null
@@ -1,122 +0,0 @@
-/* IRC extension for TCP NAT alteration.
- * (C) 2000-2001 by Harald Welte <laforge@gnumonks.org>
- * (C) 2004 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
- * based on a copy of RR's ip_nat_ftp.c
- *
- * ip_nat_irc.c,v 1.16 2001/12/06 07:42:10 laforge Exp
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/kernel.h>
-#include <net/tcp.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_conntrack_irc.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/moduleparam.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("IRC (DCC) NAT helper");
-MODULE_LICENSE("GPL");
-
-static unsigned int help(struct sk_buff **pskb,
-			 enum ip_conntrack_info ctinfo,
-			 unsigned int matchoff,
-			 unsigned int matchlen,
-			 struct ip_conntrack_expect *exp)
-{
-	u_int16_t port;
-	unsigned int ret;
-
-	/* "4294967296 65635 " */
-	char buffer[18];
-
-	DEBUGP("IRC_NAT: info (seq %u + %u) in %u\n",
-	       expect->seq, exp_irc_info->len,
-	       ntohl(tcph->seq));
-
-	/* Reply comes from server. */
-	exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
-	exp->dir = IP_CT_DIR_REPLY;
-
-	/* When you see the packet, we need to NAT it the same as the
-	 * this one. */
-	exp->expectfn = ip_nat_follow_master;
-
-	/* Try to get same port: if not, try to change it. */
-	for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
-		exp->tuple.dst.u.tcp.port = htons(port);
-		if (ip_conntrack_expect_related(exp) == 0)
-			break;
-	}
-
-	if (port == 0)
-		return NF_DROP;
-
-	/*      strlen("\1DCC CHAT chat AAAAAAAA P\1\n")=27
-	 *      strlen("\1DCC SCHAT chat AAAAAAAA P\1\n")=28
-	 *      strlen("\1DCC SEND F AAAAAAAA P S\1\n")=26
-	 *      strlen("\1DCC MOVE F AAAAAAAA P S\1\n")=26
-	 *      strlen("\1DCC TSEND F AAAAAAAA P S\1\n")=27
-	 *              AAAAAAAAA: bound addr (1.0.0.0==16777216, min 8 digits,
-	 *                      255.255.255.255==4294967296, 10 digits)
-	 *              P:         bound port (min 1 d, max 5d (65635))
-	 *              F:         filename   (min 1 d )
-	 *              S:         size       (min 1 d )
-	 *              0x01, \n:  terminators
-	 */
-
-	/* AAA = "us", ie. where server normally talks to. */
-	sprintf(buffer, "%u %u",
-		ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip),
-		port);
-	DEBUGP("ip_nat_irc: Inserting '%s' == %u.%u.%u.%u, port %u\n",
-	       buffer, NIPQUAD(exp->tuple.src.ip), port);
-
-	ret = ip_nat_mangle_tcp_packet(pskb, exp->master, ctinfo,
-				       matchoff, matchlen, buffer,
-				       strlen(buffer));
-	if (ret != NF_ACCEPT)
-		ip_conntrack_unexpect_related(exp);
-	return ret;
-}
-
-static void __exit ip_nat_irc_fini(void)
-{
-	rcu_assign_pointer(ip_nat_irc_hook, NULL);
-	synchronize_rcu();
-}
-
-static int __init ip_nat_irc_init(void)
-{
-	BUG_ON(rcu_dereference(ip_nat_irc_hook));
-	rcu_assign_pointer(ip_nat_irc_hook, help);
-	return 0;
-}
-
-/* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
-static int warn_set(const char *val, struct kernel_param *kp)
-{
-	printk(KERN_INFO KBUILD_MODNAME
-	       ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
-	return 0;
-}
-module_param_call(ports, warn_set, NULL, NULL, 0);
-
-module_init(ip_nat_irc_init);
-module_exit(ip_nat_irc_fini);
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
deleted file mode 100644
index 95810202d84..00000000000
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * ip_nat_proto_gre.c - Version 2.0
- *
- * NAT protocol helper module for GRE.
- *
- * GRE is a generic encapsulation protocol, which is generally not very
- * suited for NAT, as it has no protocol-specific part as port numbers.
- *
- * It has an optional key field, which may help us distinguishing two
- * connections between the same two hosts.
- *
- * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
- *
- * PPTP is built on top of a modified version of GRE, and has a mandatory
- * field called "CallID", which serves us for the same purpose as the key
- * field in plain GRE.
- *
- * Documentation about PPTP can be found in RFC 2637
- *
- * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- */
-
-#include <linux/module.h>
-#include <linux/ip.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
-
-#if 0
-#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
-				       __FUNCTION__, ## args)
-#else
-#define DEBUGP(x, args...)
-#endif
-
-/* is key in given range between min and max */
-static int
-gre_in_range(const struct ip_conntrack_tuple *tuple,
-	     enum ip_nat_manip_type maniptype,
-	     const union ip_conntrack_manip_proto *min,
-	     const union ip_conntrack_manip_proto *max)
-{
-	__be16 key;
-
-	if (maniptype == IP_NAT_MANIP_SRC)
-		key = tuple->src.u.gre.key;
-	else
-		key = tuple->dst.u.gre.key;
-
-	return ntohs(key) >= ntohs(min->gre.key)
-		&& ntohs(key) <= ntohs(max->gre.key);
-}
-
-/* generate unique tuple ... */
-static int
-gre_unique_tuple(struct ip_conntrack_tuple *tuple,
-		 const struct ip_nat_range *range,
-		 enum ip_nat_manip_type maniptype,
-		 const struct ip_conntrack *conntrack)
-{
-	static u_int16_t key;
-	__be16 *keyptr;
-	unsigned int min, i, range_size;
-
-	if (maniptype == IP_NAT_MANIP_SRC)
-		keyptr = &tuple->src.u.gre.key;
-	else
-		keyptr = &tuple->dst.u.gre.key;
-
-	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
-		DEBUGP("%p: NATing GRE PPTP\n", conntrack);
-		min = 1;
-		range_size = 0xffff;
-	} else {
-		min = ntohs(range->min.gre.key);
-		range_size = ntohs(range->max.gre.key) - min + 1;
-	}
-
-	DEBUGP("min = %u, range_size = %u\n", min, range_size);
-
-	for (i = 0; i < range_size; i++, key++) {
-		*keyptr = htons(min + key % range_size);
-		if (!ip_nat_used_tuple(tuple, conntrack))
-			return 1;
-	}
-
-	DEBUGP("%p: no NAT mapping\n", conntrack);
-
-	return 0;
-}
-
-/* manipulate a GRE packet according to maniptype */
-static int
-gre_manip_pkt(struct sk_buff **pskb,
-	      unsigned int iphdroff,
-	      const struct ip_conntrack_tuple *tuple,
-	      enum ip_nat_manip_type maniptype)
-{
-	struct gre_hdr *greh;
-	struct gre_hdr_pptp *pgreh;
-	struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
-	unsigned int hdroff = iphdroff + iph->ihl*4;
-
-	/* pgreh includes two optional 32bit fields which are not required
-	 * to be there.  That's where the magic '8' comes from */
-	if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh)-8))
-		return 0;
-
-	greh = (void *)(*pskb)->data + hdroff;
-	pgreh = (struct gre_hdr_pptp *) greh;
-
-	/* we only have destination manip of a packet, since 'source key'
-	 * is not present in the packet itself */
-	if (maniptype == IP_NAT_MANIP_DST) {
-		/* key manipulation is always dest */
-		switch (greh->version) {
-		case 0:
-			if (!greh->key) {
-				DEBUGP("can't nat GRE w/o key\n");
-				break;
-			}
-			if (greh->csum) {
-				/* FIXME: Never tested this code... */
-				nf_proto_csum_replace4(gre_csum(greh), *pskb,
-							*(gre_key(greh)),
-							tuple->dst.u.gre.key, 0);
-			}
-			*(gre_key(greh)) = tuple->dst.u.gre.key;
-			break;
-		case GRE_VERSION_PPTP:
-			DEBUGP("call_id -> 0x%04x\n",
-				ntohs(tuple->dst.u.gre.key));
-			pgreh->call_id = tuple->dst.u.gre.key;
-			break;
-		default:
-			DEBUGP("can't nat unknown GRE version\n");
-			return 0;
-			break;
-		}
-	}
-	return 1;
-}
-
-/* nat helper struct */
-static struct ip_nat_protocol gre = {
-	.name		= "GRE",
-	.protonum	= IPPROTO_GRE,
-	.manip_pkt	= gre_manip_pkt,
-	.in_range	= gre_in_range,
-	.unique_tuple	= gre_unique_tuple,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-	.range_to_nfattr	= ip_nat_port_range_to_nfattr,
-	.nfattr_to_range	= ip_nat_port_nfattr_to_range,
-#endif
-};
-
-int __init ip_nat_proto_gre_init(void)
-{
-	return ip_nat_protocol_register(&gre);
-}
-
-void __exit ip_nat_proto_gre_fini(void)
-{
-	ip_nat_protocol_unregister(&gre);
-}
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c
deleted file mode 100644
index 22a528ae038..00000000000
--- a/net/ipv4/netfilter/ip_nat_proto_icmp.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/icmp.h>
-#include <linux/if.h>
-
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-
-static int
-icmp_in_range(const struct ip_conntrack_tuple *tuple,
-	      enum ip_nat_manip_type maniptype,
-	      const union ip_conntrack_manip_proto *min,
-	      const union ip_conntrack_manip_proto *max)
-{
-	return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
-	       ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
-}
-
-static int
-icmp_unique_tuple(struct ip_conntrack_tuple *tuple,
-		  const struct ip_nat_range *range,
-		  enum ip_nat_manip_type maniptype,
-		  const struct ip_conntrack *conntrack)
-{
-	static u_int16_t id;
-	unsigned int range_size;
-	unsigned int i;
-
-	range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1;
-	/* If no range specified... */
-	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED))
-		range_size = 0xFFFF;
-
-	for (i = 0; i < range_size; i++, id++) {
-		tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) +
-					     (id % range_size));
-		if (!ip_nat_used_tuple(tuple, conntrack))
-			return 1;
-	}
-	return 0;
-}
-
-static int
-icmp_manip_pkt(struct sk_buff **pskb,
-	       unsigned int iphdroff,
-	       const struct ip_conntrack_tuple *tuple,
-	       enum ip_nat_manip_type maniptype)
-{
-	struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
-	struct icmphdr *hdr;
-	unsigned int hdroff = iphdroff + iph->ihl*4;
-
-	if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
-		return 0;
-
-	hdr = (struct icmphdr *)((*pskb)->data + hdroff);
-	nf_proto_csum_replace2(&hdr->checksum, *pskb,
-			       hdr->un.echo.id, tuple->src.u.icmp.id, 0);
-	hdr->un.echo.id = tuple->src.u.icmp.id;
-	return 1;
-}
-
-struct ip_nat_protocol ip_nat_protocol_icmp = {
-	.name			= "ICMP",
-	.protonum		= IPPROTO_ICMP,
-	.me			= THIS_MODULE,
-	.manip_pkt		= icmp_manip_pkt,
-	.in_range		= icmp_in_range,
-	.unique_tuple		= icmp_unique_tuple,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-	.range_to_nfattr	= ip_nat_port_range_to_nfattr,
-	.nfattr_to_range	= ip_nat_port_nfattr_to_range,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c
deleted file mode 100644
index 14ff24f53a7..00000000000
--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/random.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/if.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-
-static int
-tcp_in_range(const struct ip_conntrack_tuple *tuple,
-	     enum ip_nat_manip_type maniptype,
-	     const union ip_conntrack_manip_proto *min,
-	     const union ip_conntrack_manip_proto *max)
-{
-	__be16 port;
-
-	if (maniptype == IP_NAT_MANIP_SRC)
-		port = tuple->src.u.tcp.port;
-	else
-		port = tuple->dst.u.tcp.port;
-
-	return ntohs(port) >= ntohs(min->tcp.port)
-		&& ntohs(port) <= ntohs(max->tcp.port);
-}
-
-static int
-tcp_unique_tuple(struct ip_conntrack_tuple *tuple,
-		 const struct ip_nat_range *range,
-		 enum ip_nat_manip_type maniptype,
-		 const struct ip_conntrack *conntrack)
-{
-	static u_int16_t port;
-	__be16 *portptr;
-	unsigned int range_size, min, i;
-
-	if (maniptype == IP_NAT_MANIP_SRC)
-		portptr = &tuple->src.u.tcp.port;
-	else
-		portptr = &tuple->dst.u.tcp.port;
-
-	/* If no range specified... */
-	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
-		/* If it's dst rewrite, can't change port */
-		if (maniptype == IP_NAT_MANIP_DST)
-			return 0;
-
-		/* Map privileged onto privileged. */
-		if (ntohs(*portptr) < 1024) {
-			/* Loose convention: >> 512 is credential passing */
-			if (ntohs(*portptr)<512) {
-				min = 1;
-				range_size = 511 - min + 1;
-			} else {
-				min = 600;
-				range_size = 1023 - min + 1;
-			}
-		} else {
-			min = 1024;
-			range_size = 65535 - 1024 + 1;
-		}
-	} else {
-		min = ntohs(range->min.tcp.port);
-		range_size = ntohs(range->max.tcp.port) - min + 1;
-	}
-
-	/* Start from random port to avoid prediction */
-	if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
-		port =  net_random();
-
-	for (i = 0; i < range_size; i++, port++) {
-		*portptr = htons(min + port % range_size);
-		if (!ip_nat_used_tuple(tuple, conntrack)) {
-			return 1;
-		}
-	}
-	return 0;
-}
-
-static int
-tcp_manip_pkt(struct sk_buff **pskb,
-	      unsigned int iphdroff,
-	      const struct ip_conntrack_tuple *tuple,
-	      enum ip_nat_manip_type maniptype)
-{
-	struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
-	struct tcphdr *hdr;
-	unsigned int hdroff = iphdroff + iph->ihl*4;
-	__be32 oldip, newip;
-	__be16 *portptr, newport, oldport;
-	int hdrsize = 8; /* TCP connection tracking guarantees this much */
-
-	/* this could be a inner header returned in icmp packet; in such
-	   cases we cannot update the checksum field since it is outside of
-	   the 8 bytes of transport layer headers we are guaranteed */
-	if ((*pskb)->len >= hdroff + sizeof(struct tcphdr))
-		hdrsize = sizeof(struct tcphdr);
-
-	if (!skb_make_writable(pskb, hdroff + hdrsize))
-		return 0;
-
-	iph = (struct iphdr *)((*pskb)->data + iphdroff);
-	hdr = (struct tcphdr *)((*pskb)->data + hdroff);
-
-	if (maniptype == IP_NAT_MANIP_SRC) {
-		/* Get rid of src ip and src pt */
-		oldip = iph->saddr;
-		newip = tuple->src.ip;
-		newport = tuple->src.u.tcp.port;
-		portptr = &hdr->source;
-	} else {
-		/* Get rid of dst ip and dst pt */
-		oldip = iph->daddr;
-		newip = tuple->dst.ip;
-		newport = tuple->dst.u.tcp.port;
-		portptr = &hdr->dest;
-	}
-
-	oldport = *portptr;
-	*portptr = newport;
-
-	if (hdrsize < sizeof(*hdr))
-		return 1;
-
-	nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1);
-	nf_proto_csum_replace2(&hdr->check, *pskb, oldport, newport, 0);
-	return 1;
-}
-
-struct ip_nat_protocol ip_nat_protocol_tcp = {
-	.name			= "TCP",
-	.protonum		= IPPROTO_TCP,
-	.me			= THIS_MODULE,
-	.manip_pkt		= tcp_manip_pkt,
-	.in_range		= tcp_in_range,
-	.unique_tuple		= tcp_unique_tuple,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-	.range_to_nfattr	= ip_nat_port_range_to_nfattr,
-	.nfattr_to_range	= ip_nat_port_nfattr_to_range,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c
deleted file mode 100644
index dfd52167289..00000000000
--- a/net/ipv4/netfilter/ip_nat_proto_udp.c
+++ /dev/null
@@ -1,144 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/random.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <linux/if.h>
-
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-
-static int
-udp_in_range(const struct ip_conntrack_tuple *tuple,
-	     enum ip_nat_manip_type maniptype,
-	     const union ip_conntrack_manip_proto *min,
-	     const union ip_conntrack_manip_proto *max)
-{
-	__be16 port;
-
-	if (maniptype == IP_NAT_MANIP_SRC)
-		port = tuple->src.u.udp.port;
-	else
-		port = tuple->dst.u.udp.port;
-
-	return ntohs(port) >= ntohs(min->udp.port)
-		&& ntohs(port) <= ntohs(max->udp.port);
-}
-
-static int
-udp_unique_tuple(struct ip_conntrack_tuple *tuple,
-		 const struct ip_nat_range *range,
-		 enum ip_nat_manip_type maniptype,
-		 const struct ip_conntrack *conntrack)
-{
-	static u_int16_t port;
-	__be16 *portptr;
-	unsigned int range_size, min, i;
-
-	if (maniptype == IP_NAT_MANIP_SRC)
-		portptr = &tuple->src.u.udp.port;
-	else
-		portptr = &tuple->dst.u.udp.port;
-
-	/* If no range specified... */
-	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
-		/* If it's dst rewrite, can't change port */
-		if (maniptype == IP_NAT_MANIP_DST)
-			return 0;
-
-		if (ntohs(*portptr) < 1024) {
-			/* Loose convention: >> 512 is credential passing */
-			if (ntohs(*portptr)<512) {
-				min = 1;
-				range_size = 511 - min + 1;
-			} else {
-				min = 600;
-				range_size = 1023 - min + 1;
-			}
-		} else {
-			min = 1024;
-			range_size = 65535 - 1024 + 1;
-		}
-	} else {
-		min = ntohs(range->min.udp.port);
-		range_size = ntohs(range->max.udp.port) - min + 1;
-	}
-
-	/* Start from random port to avoid prediction */
-	if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
-		port = net_random();
-
-	for (i = 0; i < range_size; i++, port++) {
-		*portptr = htons(min + port % range_size);
-		if (!ip_nat_used_tuple(tuple, conntrack))
-			return 1;
-	}
-	return 0;
-}
-
-static int
-udp_manip_pkt(struct sk_buff **pskb,
-	      unsigned int iphdroff,
-	      const struct ip_conntrack_tuple *tuple,
-	      enum ip_nat_manip_type maniptype)
-{
-	struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
-	struct udphdr *hdr;
-	unsigned int hdroff = iphdroff + iph->ihl*4;
-	__be32 oldip, newip;
-	__be16 *portptr, newport;
-
-	if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
-		return 0;
-
-	iph = (struct iphdr *)((*pskb)->data + iphdroff);
-	hdr = (struct udphdr *)((*pskb)->data + hdroff);
-
-	if (maniptype == IP_NAT_MANIP_SRC) {
-		/* Get rid of src ip and src pt */
-		oldip = iph->saddr;
-		newip = tuple->src.ip;
-		newport = tuple->src.u.udp.port;
-		portptr = &hdr->source;
-	} else {
-		/* Get rid of dst ip and dst pt */
-		oldip = iph->daddr;
-		newip = tuple->dst.ip;
-		newport = tuple->dst.u.udp.port;
-		portptr = &hdr->dest;
-	}
-
-	if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) {
-		nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1);
-		nf_proto_csum_replace2(&hdr->check, *pskb, *portptr, newport, 0);
-		if (!hdr->check)
-			hdr->check = CSUM_MANGLED_0;
-	}
-	*portptr = newport;
-	return 1;
-}
-
-struct ip_nat_protocol ip_nat_protocol_udp = {
-	.name			= "UDP",
-	.protonum		= IPPROTO_UDP,
-	.me			= THIS_MODULE,
-	.manip_pkt		= udp_manip_pkt,
-	.in_range		= udp_in_range,
-	.unique_tuple		= udp_unique_tuple,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
-    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-	.range_to_nfattr	= ip_nat_port_range_to_nfattr,
-	.nfattr_to_range	= ip_nat_port_nfattr_to_range,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c
deleted file mode 100644
index 3bf04951724..00000000000
--- a/net/ipv4/netfilter/ip_nat_proto_unknown.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/* The "unknown" protocol.  This is what is used for protocols we
- * don't understand.  It's returned by ip_ct_find_proto().
- */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/netfilter.h>
-#include <linux/if.h>
-
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-
-static int unknown_in_range(const struct ip_conntrack_tuple *tuple,
-			    enum ip_nat_manip_type manip_type,
-			    const union ip_conntrack_manip_proto *min,
-			    const union ip_conntrack_manip_proto *max)
-{
-	return 1;
-}
-
-static int unknown_unique_tuple(struct ip_conntrack_tuple *tuple,
-				const struct ip_nat_range *range,
-				enum ip_nat_manip_type maniptype,
-				const struct ip_conntrack *conntrack)
-{
-	/* Sorry: we can't help you; if it's not unique, we can't frob
-	   anything. */
-	return 0;
-}
-
-static int
-unknown_manip_pkt(struct sk_buff **pskb,
-		  unsigned int iphdroff,
-		  const struct ip_conntrack_tuple *tuple,
-		  enum ip_nat_manip_type maniptype)
-{
-	return 1;
-}
-
-struct ip_nat_protocol ip_nat_unknown_protocol = {
-	.name			= "unknown",
-	/* .me isn't set: getting a ref to this cannot fail. */
-	.manip_pkt		= unknown_manip_pkt,
-	.in_range		= unknown_in_range,
-	.unique_tuple		= unknown_unique_tuple,
-};
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
deleted file mode 100644
index 25415a91e02..00000000000
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ /dev/null
@@ -1,314 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/* Everything about the rules for NAT. */
-#include <linux/types.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/module.h>
-#include <linux/kmod.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <net/checksum.h>
-#include <net/route.h>
-#include <linux/bitops.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-#define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT))
-
-static struct
-{
-	struct ipt_replace repl;
-	struct ipt_standard entries[3];
-	struct ipt_error term;
-} nat_initial_table __initdata
-= { { "nat", NAT_VALID_HOOKS, 4,
-      sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
-      { [NF_IP_PRE_ROUTING] = 0,
-	[NF_IP_POST_ROUTING] = sizeof(struct ipt_standard),
-	[NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 },
-      { [NF_IP_PRE_ROUTING] = 0,
-	[NF_IP_POST_ROUTING] = sizeof(struct ipt_standard),
-	[NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 },
-      0, NULL, { } },
-    {
-	    /* PRE_ROUTING */
-	    { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
-		0,
-		sizeof(struct ipt_entry),
-		sizeof(struct ipt_standard),
-		0, { 0, 0 }, { } },
-	      { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
-		-NF_ACCEPT - 1 } },
-	    /* POST_ROUTING */
-	    { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
-		0,
-		sizeof(struct ipt_entry),
-		sizeof(struct ipt_standard),
-		0, { 0, 0 }, { } },
-	      { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
-		-NF_ACCEPT - 1 } },
-	    /* LOCAL_OUT */
-	    { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
-		0,
-		sizeof(struct ipt_entry),
-		sizeof(struct ipt_standard),
-		0, { 0, 0 }, { } },
-	      { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
-		-NF_ACCEPT - 1 } }
-    },
-    /* ERROR */
-    { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
-	0,
-	sizeof(struct ipt_entry),
-	sizeof(struct ipt_error),
-	0, { 0, 0 }, { } },
-      { { { { IPT_ALIGN(sizeof(struct ipt_error_target)), IPT_ERROR_TARGET } },
-	  { } },
-	"ERROR"
-      }
-    }
-};
-
-static struct xt_table nat_table = {
-	.name		= "nat",
-	.valid_hooks	= NAT_VALID_HOOKS,
-	.lock		= RW_LOCK_UNLOCKED,
-	.me		= THIS_MODULE,
-	.af		= AF_INET,
-};
-
-/* Source NAT */
-static unsigned int ipt_snat_target(struct sk_buff **pskb,
-				    const struct net_device *in,
-				    const struct net_device *out,
-				    unsigned int hooknum,
-				    const struct xt_target *target,
-				    const void *targinfo)
-{
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
-	const struct ip_nat_multi_range_compat *mr = targinfo;
-
-	IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);
-
-	ct = ip_conntrack_get(*pskb, &ctinfo);
-
-	/* Connection must be valid and new. */
-	IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
-			    || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
-	IP_NF_ASSERT(out);
-
-	return ip_nat_setup_info(ct, &mr->range[0], hooknum);
-}
-
-/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */
-static void warn_if_extra_mangle(__be32 dstip, __be32 srcip)
-{
-	static int warned = 0;
-	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
-	struct rtable *rt;
-
-	if (ip_route_output_key(&rt, &fl) != 0)
-		return;
-
-	if (rt->rt_src != srcip && !warned) {
-		printk("NAT: no longer support implicit source local NAT\n");
-		printk("NAT: packet src %u.%u.%u.%u -> dst %u.%u.%u.%u\n",
-		       NIPQUAD(srcip), NIPQUAD(dstip));
-		warned = 1;
-	}
-	ip_rt_put(rt);
-}
-
-static unsigned int ipt_dnat_target(struct sk_buff **pskb,
-				    const struct net_device *in,
-				    const struct net_device *out,
-				    unsigned int hooknum,
-				    const struct xt_target *target,
-				    const void *targinfo)
-{
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
-	const struct ip_nat_multi_range_compat *mr = targinfo;
-
-	IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
-		     || hooknum == NF_IP_LOCAL_OUT);
-
-	ct = ip_conntrack_get(*pskb, &ctinfo);
-
-	/* Connection must be valid and new. */
-	IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
-
-	if (hooknum == NF_IP_LOCAL_OUT
-	    && mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
-		warn_if_extra_mangle(ip_hdr(*pskb)->daddr,
-				     mr->range[0].min_ip);
-
-	return ip_nat_setup_info(ct, &mr->range[0], hooknum);
-}
-
-static int ipt_snat_checkentry(const char *tablename,
-			       const void *entry,
-			       const struct xt_target *target,
-			       void *targinfo,
-			       unsigned int hook_mask)
-{
-	struct ip_nat_multi_range_compat *mr = targinfo;
-
-	/* Must be a valid range */
-	if (mr->rangesize != 1) {
-		printk("SNAT: multiple ranges no longer supported\n");
-		return 0;
-	}
-	return 1;
-}
-
-static int ipt_dnat_checkentry(const char *tablename,
-			       const void *entry,
-			       const struct xt_target *target,
-			       void *targinfo,
-			       unsigned int hook_mask)
-{
-	struct ip_nat_multi_range_compat *mr = targinfo;
-
-	/* Must be a valid range */
-	if (mr->rangesize != 1) {
-		printk("DNAT: multiple ranges no longer supported\n");
-		return 0;
-	}
-	if (mr->range[0].flags & IP_NAT_RANGE_PROTO_RANDOM) {
-		printk("DNAT: port randomization not supported\n");
-		return 0;
-	}
-	return 1;
-}
-
-inline unsigned int
-alloc_null_binding(struct ip_conntrack *conntrack,
-		   struct ip_nat_info *info,
-		   unsigned int hooknum)
-{
-	/* Force range to this IP; let proto decide mapping for
-	   per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
-	   Use reply in case it's already been mangled (eg local packet).
-	*/
-	__be32 ip
-		= (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
-		   ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip
-		   : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
-	struct ip_nat_range range
-		= { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } };
-
-	DEBUGP("Allocating NULL binding for %p (%u.%u.%u.%u)\n", conntrack,
-	       NIPQUAD(ip));
-	return ip_nat_setup_info(conntrack, &range, hooknum);
-}
-
-unsigned int
-alloc_null_binding_confirmed(struct ip_conntrack *conntrack,
-			     struct ip_nat_info *info,
-			     unsigned int hooknum)
-{
-	__be32 ip
-		= (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
-		   ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip
-		   : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
-	u_int16_t all
-		= (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
-		   ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.all
-		   : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.all);
-	struct ip_nat_range range
-		= { IP_NAT_RANGE_MAP_IPS, ip, ip, { all }, { all } };
-
-	DEBUGP("Allocating NULL binding for confirmed %p (%u.%u.%u.%u)\n",
-	       conntrack, NIPQUAD(ip));
-	return ip_nat_setup_info(conntrack, &range, hooknum);
-}
-
-int ip_nat_rule_find(struct sk_buff **pskb,
-		     unsigned int hooknum,
-		     const struct net_device *in,
-		     const struct net_device *out,
-		     struct ip_conntrack *ct,
-		     struct ip_nat_info *info)
-{
-	int ret;
-
-	ret = ipt_do_table(pskb, hooknum, in, out, &nat_table);
-
-	if (ret == NF_ACCEPT) {
-		if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
-			/* NUL mapping */
-			ret = alloc_null_binding(ct, info, hooknum);
-	}
-	return ret;
-}
-
-static struct xt_target ipt_snat_reg = {
-	.name		= "SNAT",
-	.family		= AF_INET,
-	.target		= ipt_snat_target,
-	.targetsize	= sizeof(struct ip_nat_multi_range_compat),
-	.table		= "nat",
-	.hooks		= 1 << NF_IP_POST_ROUTING,
-	.checkentry	= ipt_snat_checkentry,
-};
-
-static struct xt_target ipt_dnat_reg = {
-	.name		= "DNAT",
-	.family		= AF_INET,
-	.target		= ipt_dnat_target,
-	.targetsize	= sizeof(struct ip_nat_multi_range_compat),
-	.table		= "nat",
-	.hooks		= (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT),
-	.checkentry	= ipt_dnat_checkentry,
-};
-
-int __init ip_nat_rule_init(void)
-{
-	int ret;
-
-	ret = ipt_register_table(&nat_table, &nat_initial_table.repl);
-	if (ret != 0)
-		return ret;
-	ret = xt_register_target(&ipt_snat_reg);
-	if (ret != 0)
-		goto unregister_table;
-
-	ret = xt_register_target(&ipt_dnat_reg);
-	if (ret != 0)
-		goto unregister_snat;
-
-	return ret;
-
- unregister_snat:
-	xt_unregister_target(&ipt_snat_reg);
- unregister_table:
-	xt_unregister_table(&nat_table);
-
-	return ret;
-}
-
-void ip_nat_rule_cleanup(void)
-{
-	xt_unregister_target(&ipt_dnat_reg);
-	xt_unregister_target(&ipt_snat_reg);
-	ipt_unregister_table(&nat_table);
-}
diff --git a/net/ipv4/netfilter/ip_nat_sip.c b/net/ipv4/netfilter/ip_nat_sip.c
deleted file mode 100644
index 84953601762..00000000000
--- a/net/ipv4/netfilter/ip_nat_sip.c
+++ /dev/null
@@ -1,282 +0,0 @@
-/* SIP extension for UDP NAT alteration.
- *
- * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
- * based on RR's ip_nat_ftp.c and other modules.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_sip.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
-MODULE_DESCRIPTION("SIP NAT helper");
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-struct addr_map {
-	struct {
-		char		src[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
-		char		dst[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
-		unsigned int	srclen, srciplen;
-		unsigned int	dstlen, dstiplen;
-	} addr[IP_CT_DIR_MAX];
-};
-
-static void addr_map_init(struct ip_conntrack *ct, struct addr_map *map)
-{
-	struct ip_conntrack_tuple *t;
-	enum ip_conntrack_dir dir;
-	unsigned int n;
-
-	for (dir = 0; dir < IP_CT_DIR_MAX; dir++) {
-		t = &ct->tuplehash[dir].tuple;
-
-		n = sprintf(map->addr[dir].src, "%u.%u.%u.%u",
-			    NIPQUAD(t->src.ip));
-		map->addr[dir].srciplen = n;
-		n += sprintf(map->addr[dir].src + n, ":%u",
-			     ntohs(t->src.u.udp.port));
-		map->addr[dir].srclen = n;
-
-		n = sprintf(map->addr[dir].dst, "%u.%u.%u.%u",
-			    NIPQUAD(t->dst.ip));
-		map->addr[dir].dstiplen = n;
-		n += sprintf(map->addr[dir].dst + n, ":%u",
-			     ntohs(t->dst.u.udp.port));
-		map->addr[dir].dstlen = n;
-	}
-}
-
-static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo,
-			struct ip_conntrack *ct, const char **dptr, size_t dlen,
-			enum sip_header_pos pos, struct addr_map *map)
-{
-	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-	unsigned int matchlen, matchoff, addrlen;
-	char *addr;
-
-	if (ct_sip_get_info(*dptr, dlen, &matchoff, &matchlen, pos) <= 0)
-		return 1;
-
-	if ((matchlen == map->addr[dir].srciplen ||
-	     matchlen == map->addr[dir].srclen) &&
-	    memcmp(*dptr + matchoff, map->addr[dir].src, matchlen) == 0) {
-		addr    = map->addr[!dir].dst;
-		addrlen = map->addr[!dir].dstlen;
-	} else if ((matchlen == map->addr[dir].dstiplen ||
-		    matchlen == map->addr[dir].dstlen) &&
-		   memcmp(*dptr + matchoff, map->addr[dir].dst, matchlen) == 0) {
-		addr    = map->addr[!dir].src;
-		addrlen = map->addr[!dir].srclen;
-	} else
-		return 1;
-
-	if (!ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
-				      matchoff, matchlen, addr, addrlen))
-		return 0;
-	*dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr);
-	return 1;
-
-}
-
-static unsigned int ip_nat_sip(struct sk_buff **pskb,
-			       enum ip_conntrack_info ctinfo,
-			       struct ip_conntrack *ct,
-			       const char **dptr)
-{
-	enum sip_header_pos pos;
-	struct addr_map map;
-	int dataoff, datalen;
-
-	dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
-	datalen = (*pskb)->len - dataoff;
-	if (datalen < sizeof("SIP/2.0") - 1)
-		return NF_DROP;
-
-	addr_map_init(ct, &map);
-
-	/* Basic rules: requests and responses. */
-	if (strncmp(*dptr, "SIP/2.0", sizeof("SIP/2.0") - 1) != 0) {
-		/* 10.2: Constructing the REGISTER Request:
-		 *
-		 * The "userinfo" and "@" components of the SIP URI MUST NOT
-		 * be present.
-		 */
-		if (datalen >= sizeof("REGISTER") - 1 &&
-		    strncmp(*dptr, "REGISTER", sizeof("REGISTER") - 1) == 0)
-			pos = POS_REG_REQ_URI;
-		else
-			pos = POS_REQ_URI;
-
-		if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, pos, &map))
-			return NF_DROP;
-	}
-
-	if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_FROM, &map) ||
-	    !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_TO, &map) ||
-	    !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_VIA, &map) ||
-	    !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map))
-		return NF_DROP;
-	return NF_ACCEPT;
-}
-
-static unsigned int mangle_sip_packet(struct sk_buff **pskb,
-				      enum ip_conntrack_info ctinfo,
-				      struct ip_conntrack *ct,
-				      const char **dptr, size_t dlen,
-				      char *buffer, int bufflen,
-				      enum sip_header_pos pos)
-{
-	unsigned int matchlen, matchoff;
-
-	if (ct_sip_get_info(*dptr, dlen, &matchoff, &matchlen, pos) <= 0)
-		return 0;
-
-	if (!ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
-				      matchoff, matchlen, buffer, bufflen))
-		return 0;
-
-	/* We need to reload this. Thanks Patrick. */
-	*dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr);
-	return 1;
-}
-
-static int mangle_content_len(struct sk_buff **pskb,
-			      enum ip_conntrack_info ctinfo,
-			      struct ip_conntrack *ct,
-			      const char *dptr)
-{
-	unsigned int dataoff, matchoff, matchlen;
-	char buffer[sizeof("65536")];
-	int bufflen;
-
-	dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
-
-	/* Get actual SDP lenght */
-	if (ct_sip_get_info(dptr, (*pskb)->len - dataoff, &matchoff,
-			    &matchlen, POS_SDP_HEADER) > 0) {
-
-		/* since ct_sip_get_info() give us a pointer passing 'v='
-		   we need to add 2 bytes in this count. */
-		int c_len = (*pskb)->len - dataoff - matchoff + 2;
-
-		/* Now, update SDP lenght */
-		if (ct_sip_get_info(dptr, (*pskb)->len - dataoff, &matchoff,
-				    &matchlen, POS_CONTENT) > 0) {
-
-			bufflen = sprintf(buffer, "%u", c_len);
-
-			return ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
-							matchoff, matchlen,
-							buffer, bufflen);
-		}
-	}
-	return 0;
-}
-
-static unsigned int mangle_sdp(struct sk_buff **pskb,
-			       enum ip_conntrack_info ctinfo,
-			       struct ip_conntrack *ct,
-			       __be32 newip, u_int16_t port,
-			       const char *dptr)
-{
-	char buffer[sizeof("nnn.nnn.nnn.nnn")];
-	unsigned int dataoff, bufflen;
-
-	dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
-
-	/* Mangle owner and contact info. */
-	bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip));
-	if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
-			       buffer, bufflen, POS_OWNER))
-		return 0;
-
-	if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
-			       buffer, bufflen, POS_CONNECTION))
-		return 0;
-
-	/* Mangle media port. */
-	bufflen = sprintf(buffer, "%u", port);
-	if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
-			       buffer, bufflen, POS_MEDIA))
-		return 0;
-
-	return mangle_content_len(pskb, ctinfo, ct, dptr);
-}
-
-/* So, this packet has hit the connection tracking matching code.
-   Mangle it, and change the expectation to match the new version. */
-static unsigned int ip_nat_sdp(struct sk_buff **pskb,
-			       enum ip_conntrack_info ctinfo,
-			       struct ip_conntrack_expect *exp,
-			       const char *dptr)
-{
-	struct ip_conntrack *ct = exp->master;
-	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-	__be32 newip;
-	u_int16_t port;
-
-	DEBUGP("ip_nat_sdp():\n");
-
-	/* Connection will come from reply */
-	newip = ct->tuplehash[!dir].tuple.dst.ip;
-
-	exp->tuple.dst.ip = newip;
-	exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port;
-	exp->dir = !dir;
-
-	/* When you see the packet, we need to NAT it the same as the
-	   this one. */
-	exp->expectfn = ip_nat_follow_master;
-
-	/* Try to get same port: if not, try to change it. */
-	for (port = ntohs(exp->saved_proto.udp.port); port != 0; port++) {
-		exp->tuple.dst.u.udp.port = htons(port);
-		if (ip_conntrack_expect_related(exp) == 0)
-			break;
-	}
-
-	if (port == 0)
-		return NF_DROP;
-
-	if (!mangle_sdp(pskb, ctinfo, ct, newip, port, dptr)) {
-		ip_conntrack_unexpect_related(exp);
-		return NF_DROP;
-	}
-	return NF_ACCEPT;
-}
-
-static void __exit fini(void)
-{
-	rcu_assign_pointer(ip_nat_sip_hook, NULL);
-	rcu_assign_pointer(ip_nat_sdp_hook, NULL);
-	synchronize_rcu();
-}
-
-static int __init init(void)
-{
-	BUG_ON(rcu_dereference(ip_nat_sip_hook));
-	BUG_ON(rcu_dereference(ip_nat_sdp_hook));
-	rcu_assign_pointer(ip_nat_sip_hook, ip_nat_sip);
-	rcu_assign_pointer(ip_nat_sdp_hook, ip_nat_sdp);
-	return 0;
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
deleted file mode 100644
index 025e0458778..00000000000
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ /dev/null
@@ -1,1333 +0,0 @@
-/*
- * ip_nat_snmp_basic.c
- *
- * Basic SNMP Application Layer Gateway
- *
- * This IP NAT module is intended for use with SNMP network
- * discovery and monitoring applications where target networks use
- * conflicting private address realms.
- *
- * Static NAT is used to remap the networks from the view of the network
- * management system at the IP layer, and this module remaps some application
- * layer addresses to match.
- *
- * The simplest form of ALG is performed, where only tagged IP addresses
- * are modified.  The module does not need to be MIB aware and only scans
- * messages at the ASN.1/BER level.
- *
- * Currently, only SNMPv1 and SNMPv2 are supported.
- *
- * More information on ALG and associated issues can be found in
- * RFC 2962
- *
- * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory
- * McLean & Jochen Friedrich, stripped down for use in the kernel.
- *
- * Copyright (c) 2000 RP Internet (www.rpi.net.au).
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- *
- * Author: James Morris <jmorris@intercode.com.au>
- *
- * Updates:
- * 2000-08-06: Convert to new helper API (Harald Welte).
- *
- */
-#include <linux/in.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/moduleparam.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <net/checksum.h>
-#include <net/udp.h>
-#include <asm/uaccess.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
-MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway");
-
-#define SNMP_PORT 161
-#define SNMP_TRAP_PORT 162
-#define NOCT1(n) (*(u8 *)n)
-
-static int debug;
-static DEFINE_SPINLOCK(snmp_lock);
-
-/*
- * Application layer address mapping mimics the NAT mapping, but
- * only for the first octet in this case (a more flexible system
- * can be implemented if needed).
- */
-struct oct1_map
-{
-	u_int8_t from;
-	u_int8_t to;
-};
-
-
-/*****************************************************************************
- *
- * Basic ASN.1 decoding routines (gxsnmp author Dirk Wisse)
- *
- *****************************************************************************/
-
-/* Class */
-#define ASN1_UNI	0	/* Universal */
-#define ASN1_APL	1	/* Application */
-#define ASN1_CTX	2	/* Context */
-#define ASN1_PRV	3	/* Private */
-
-/* Tag */
-#define ASN1_EOC	0	/* End Of Contents */
-#define ASN1_BOL	1	/* Boolean */
-#define ASN1_INT	2	/* Integer */
-#define ASN1_BTS	3	/* Bit String */
-#define ASN1_OTS	4	/* Octet String */
-#define ASN1_NUL	5	/* Null */
-#define ASN1_OJI	6	/* Object Identifier  */
-#define ASN1_OJD	7	/* Object Description */
-#define ASN1_EXT	8	/* External */
-#define ASN1_SEQ	16	/* Sequence */
-#define ASN1_SET	17	/* Set */
-#define ASN1_NUMSTR	18	/* Numerical String */
-#define ASN1_PRNSTR	19	/* Printable String */
-#define ASN1_TEXSTR	20	/* Teletext String */
-#define ASN1_VIDSTR	21	/* Video String */
-#define ASN1_IA5STR	22	/* IA5 String */
-#define ASN1_UNITIM	23	/* Universal Time */
-#define ASN1_GENTIM	24	/* General Time */
-#define ASN1_GRASTR	25	/* Graphical String */
-#define ASN1_VISSTR	26	/* Visible String */
-#define ASN1_GENSTR	27	/* General String */
-
-/* Primitive / Constructed methods*/
-#define ASN1_PRI	0	/* Primitive */
-#define ASN1_CON	1	/* Constructed */
-
-/*
- * Error codes.
- */
-#define ASN1_ERR_NOERROR		0
-#define ASN1_ERR_DEC_EMPTY		2
-#define ASN1_ERR_DEC_EOC_MISMATCH	3
-#define ASN1_ERR_DEC_LENGTH_MISMATCH	4
-#define ASN1_ERR_DEC_BADVALUE		5
-
-/*
- * ASN.1 context.
- */
-struct asn1_ctx
-{
-	int error;			/* Error condition */
-	unsigned char *pointer;		/* Octet just to be decoded */
-	unsigned char *begin;		/* First octet */
-	unsigned char *end;		/* Octet after last octet */
-};
-
-/*
- * Octet string (not null terminated)
- */
-struct asn1_octstr
-{
-	unsigned char *data;
-	unsigned int len;
-};
-
-static void asn1_open(struct asn1_ctx *ctx,
-		      unsigned char *buf,
-		      unsigned int len)
-{
-	ctx->begin = buf;
-	ctx->end = buf + len;
-	ctx->pointer = buf;
-	ctx->error = ASN1_ERR_NOERROR;
-}
-
-static unsigned char asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch)
-{
-	if (ctx->pointer >= ctx->end) {
-		ctx->error = ASN1_ERR_DEC_EMPTY;
-		return 0;
-	}
-	*ch = *(ctx->pointer)++;
-	return 1;
-}
-
-static unsigned char asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag)
-{
-	unsigned char ch;
-
-	*tag = 0;
-
-	do
-	{
-		if (!asn1_octet_decode(ctx, &ch))
-			return 0;
-		*tag <<= 7;
-		*tag |= ch & 0x7F;
-	} while ((ch & 0x80) == 0x80);
-	return 1;
-}
-
-static unsigned char asn1_id_decode(struct asn1_ctx *ctx,
-				    unsigned int *cls,
-				    unsigned int *con,
-				    unsigned int *tag)
-{
-	unsigned char ch;
-
-	if (!asn1_octet_decode(ctx, &ch))
-		return 0;
-
-	*cls = (ch & 0xC0) >> 6;
-	*con = (ch & 0x20) >> 5;
-	*tag = (ch & 0x1F);
-
-	if (*tag == 0x1F) {
-		if (!asn1_tag_decode(ctx, tag))
-			return 0;
-	}
-	return 1;
-}
-
-static unsigned char asn1_length_decode(struct asn1_ctx *ctx,
-					unsigned int *def,
-					unsigned int *len)
-{
-	unsigned char ch, cnt;
-
-	if (!asn1_octet_decode(ctx, &ch))
-		return 0;
-
-	if (ch == 0x80)
-		*def = 0;
-	else {
-		*def = 1;
-
-		if (ch < 0x80)
-			*len = ch;
-		else {
-			cnt = (unsigned char) (ch & 0x7F);
-			*len = 0;
-
-			while (cnt > 0) {
-				if (!asn1_octet_decode(ctx, &ch))
-					return 0;
-				*len <<= 8;
-				*len |= ch;
-				cnt--;
-			}
-		}
-	}
-	return 1;
-}
-
-static unsigned char asn1_header_decode(struct asn1_ctx *ctx,
-					unsigned char **eoc,
-					unsigned int *cls,
-					unsigned int *con,
-					unsigned int *tag)
-{
-	unsigned int def, len;
-
-	if (!asn1_id_decode(ctx, cls, con, tag))
-		return 0;
-
-	def = len = 0;
-	if (!asn1_length_decode(ctx, &def, &len))
-		return 0;
-
-	if (def)
-		*eoc = ctx->pointer + len;
-	else
-		*eoc = NULL;
-	return 1;
-}
-
-static unsigned char asn1_eoc_decode(struct asn1_ctx *ctx, unsigned char *eoc)
-{
-	unsigned char ch;
-
-	if (eoc == 0) {
-		if (!asn1_octet_decode(ctx, &ch))
-			return 0;
-
-		if (ch != 0x00) {
-			ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
-			return 0;
-		}
-
-		if (!asn1_octet_decode(ctx, &ch))
-			return 0;
-
-		if (ch != 0x00) {
-			ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
-			return 0;
-		}
-		return 1;
-	} else {
-		if (ctx->pointer != eoc) {
-			ctx->error = ASN1_ERR_DEC_LENGTH_MISMATCH;
-			return 0;
-		}
-		return 1;
-	}
-}
-
-static unsigned char asn1_null_decode(struct asn1_ctx *ctx, unsigned char *eoc)
-{
-	ctx->pointer = eoc;
-	return 1;
-}
-
-static unsigned char asn1_long_decode(struct asn1_ctx *ctx,
-				      unsigned char *eoc,
-				      long *integer)
-{
-	unsigned char ch;
-	unsigned int  len;
-
-	if (!asn1_octet_decode(ctx, &ch))
-		return 0;
-
-	*integer = (signed char) ch;
-	len = 1;
-
-	while (ctx->pointer < eoc) {
-		if (++len > sizeof (long)) {
-			ctx->error = ASN1_ERR_DEC_BADVALUE;
-			return 0;
-		}
-
-		if (!asn1_octet_decode(ctx, &ch))
-			return 0;
-
-		*integer <<= 8;
-		*integer |= ch;
-	}
-	return 1;
-}
-
-static unsigned char asn1_uint_decode(struct asn1_ctx *ctx,
-				      unsigned char *eoc,
-				      unsigned int *integer)
-{
-	unsigned char ch;
-	unsigned int  len;
-
-	if (!asn1_octet_decode(ctx, &ch))
-		return 0;
-
-	*integer = ch;
-	if (ch == 0) len = 0;
-	else len = 1;
-
-	while (ctx->pointer < eoc) {
-		if (++len > sizeof (unsigned int)) {
-			ctx->error = ASN1_ERR_DEC_BADVALUE;
-			return 0;
-		}
-
-		if (!asn1_octet_decode(ctx, &ch))
-			return 0;
-
-		*integer <<= 8;
-		*integer |= ch;
-	}
-	return 1;
-}
-
-static unsigned char asn1_ulong_decode(struct asn1_ctx *ctx,
-				       unsigned char *eoc,
-				       unsigned long *integer)
-{
-	unsigned char ch;
-	unsigned int  len;
-
-	if (!asn1_octet_decode(ctx, &ch))
-		return 0;
-
-	*integer = ch;
-	if (ch == 0) len = 0;
-	else len = 1;
-
-	while (ctx->pointer < eoc) {
-		if (++len > sizeof (unsigned long)) {
-			ctx->error = ASN1_ERR_DEC_BADVALUE;
-			return 0;
-		}
-
-		if (!asn1_octet_decode(ctx, &ch))
-			return 0;
-
-		*integer <<= 8;
-		*integer |= ch;
-	}
-	return 1;
-}
-
-static unsigned char asn1_octets_decode(struct asn1_ctx *ctx,
-					unsigned char *eoc,
-					unsigned char **octets,
-					unsigned int *len)
-{
-	unsigned char *ptr;
-
-	*len = 0;
-
-	*octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC);
-	if (*octets == NULL) {
-		if (net_ratelimit())
-			printk("OOM in bsalg (%d)\n", __LINE__);
-		return 0;
-	}
-
-	ptr = *octets;
-	while (ctx->pointer < eoc) {
-		if (!asn1_octet_decode(ctx, (unsigned char *)ptr++)) {
-			kfree(*octets);
-			*octets = NULL;
-			return 0;
-		}
-		(*len)++;
-	}
-	return 1;
-}
-
-static unsigned char asn1_subid_decode(struct asn1_ctx *ctx,
-				       unsigned long *subid)
-{
-	unsigned char ch;
-
-	*subid = 0;
-
-	do {
-		if (!asn1_octet_decode(ctx, &ch))
-			return 0;
-
-		*subid <<= 7;
-		*subid |= ch & 0x7F;
-	} while ((ch & 0x80) == 0x80);
-	return 1;
-}
-
-static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
-				     unsigned char *eoc,
-				     unsigned long **oid,
-				     unsigned int *len)
-{
-	unsigned long subid;
-	unsigned int  size;
-	unsigned long *optr;
-
-	size = eoc - ctx->pointer + 1;
-	*oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
-	if (*oid == NULL) {
-		if (net_ratelimit())
-			printk("OOM in bsalg (%d)\n", __LINE__);
-		return 0;
-	}
-
-	optr = *oid;
-
-	if (!asn1_subid_decode(ctx, &subid)) {
-		kfree(*oid);
-		*oid = NULL;
-		return 0;
-	}
-
-	if (subid < 40) {
-		optr [0] = 0;
-		optr [1] = subid;
-	} else if (subid < 80) {
-		optr [0] = 1;
-		optr [1] = subid - 40;
-	} else {
-		optr [0] = 2;
-		optr [1] = subid - 80;
-	}
-
-	*len = 2;
-	optr += 2;
-
-	while (ctx->pointer < eoc) {
-		if (++(*len) > size) {
-			ctx->error = ASN1_ERR_DEC_BADVALUE;
-			kfree(*oid);
-			*oid = NULL;
-			return 0;
-		}
-
-		if (!asn1_subid_decode(ctx, optr++)) {
-			kfree(*oid);
-			*oid = NULL;
-			return 0;
-		}
-	}
-	return 1;
-}
-
-/*****************************************************************************
- *
- * SNMP decoding routines (gxsnmp author Dirk Wisse)
- *
- *****************************************************************************/
-
-/* SNMP Versions */
-#define SNMP_V1				0
-#define SNMP_V2C			1
-#define SNMP_V2				2
-#define SNMP_V3				3
-
-/* Default Sizes */
-#define SNMP_SIZE_COMM			256
-#define SNMP_SIZE_OBJECTID		128
-#define SNMP_SIZE_BUFCHR		256
-#define SNMP_SIZE_BUFINT		128
-#define SNMP_SIZE_SMALLOBJECTID		16
-
-/* Requests */
-#define SNMP_PDU_GET			0
-#define SNMP_PDU_NEXT			1
-#define SNMP_PDU_RESPONSE		2
-#define SNMP_PDU_SET			3
-#define SNMP_PDU_TRAP1			4
-#define SNMP_PDU_BULK			5
-#define SNMP_PDU_INFORM			6
-#define SNMP_PDU_TRAP2			7
-
-/* Errors */
-#define SNMP_NOERROR			0
-#define SNMP_TOOBIG			1
-#define SNMP_NOSUCHNAME			2
-#define SNMP_BADVALUE			3
-#define SNMP_READONLY			4
-#define SNMP_GENERROR			5
-#define SNMP_NOACCESS			6
-#define SNMP_WRONGTYPE			7
-#define SNMP_WRONGLENGTH		8
-#define SNMP_WRONGENCODING		9
-#define SNMP_WRONGVALUE			10
-#define SNMP_NOCREATION			11
-#define SNMP_INCONSISTENTVALUE		12
-#define SNMP_RESOURCEUNAVAILABLE	13
-#define SNMP_COMMITFAILED		14
-#define SNMP_UNDOFAILED			15
-#define SNMP_AUTHORIZATIONERROR		16
-#define SNMP_NOTWRITABLE		17
-#define SNMP_INCONSISTENTNAME		18
-
-/* General SNMP V1 Traps */
-#define SNMP_TRAP_COLDSTART		0
-#define SNMP_TRAP_WARMSTART		1
-#define SNMP_TRAP_LINKDOWN		2
-#define SNMP_TRAP_LINKUP		3
-#define SNMP_TRAP_AUTFAILURE		4
-#define SNMP_TRAP_EQPNEIGHBORLOSS	5
-#define SNMP_TRAP_ENTSPECIFIC		6
-
-/* SNMPv1 Types */
-#define SNMP_NULL                0
-#define SNMP_INTEGER             1    /* l  */
-#define SNMP_OCTETSTR            2    /* c  */
-#define SNMP_DISPLAYSTR          2    /* c  */
-#define SNMP_OBJECTID            3    /* ul */
-#define SNMP_IPADDR              4    /* uc */
-#define SNMP_COUNTER             5    /* ul */
-#define SNMP_GAUGE               6    /* ul */
-#define SNMP_TIMETICKS           7    /* ul */
-#define SNMP_OPAQUE              8    /* c  */
-
-/* Additional SNMPv2 Types */
-#define SNMP_UINTEGER            5    /* ul */
-#define SNMP_BITSTR              9    /* uc */
-#define SNMP_NSAP               10    /* uc */
-#define SNMP_COUNTER64          11    /* ul */
-#define SNMP_NOSUCHOBJECT       12
-#define SNMP_NOSUCHINSTANCE     13
-#define SNMP_ENDOFMIBVIEW       14
-
-union snmp_syntax
-{
-	unsigned char uc[0];	/* 8 bit unsigned */
-	char c[0];		/* 8 bit signed */
-	unsigned long ul[0];	/* 32 bit unsigned */
-	long l[0];		/* 32 bit signed */
-};
-
-struct snmp_object
-{
-	unsigned long *id;
-	unsigned int id_len;
-	unsigned short type;
-	unsigned int syntax_len;
-	union snmp_syntax syntax;
-};
-
-struct snmp_request
-{
-	unsigned long id;
-	unsigned int error_status;
-	unsigned int error_index;
-};
-
-struct snmp_v1_trap
-{
-	unsigned long *id;
-	unsigned int id_len;
-	unsigned long ip_address;	/* pointer  */
-	unsigned int general;
-	unsigned int specific;
-	unsigned long time;
-};
-
-/* SNMP types */
-#define SNMP_IPA    0
-#define SNMP_CNT    1
-#define SNMP_GGE    2
-#define SNMP_TIT    3
-#define SNMP_OPQ    4
-#define SNMP_C64    6
-
-/* SNMP errors */
-#define SERR_NSO    0
-#define SERR_NSI    1
-#define SERR_EOM    2
-
-static inline void mangle_address(unsigned char *begin,
-				  unsigned char *addr,
-				  const struct oct1_map *map,
-				  __sum16 *check);
-struct snmp_cnv
-{
-	unsigned int class;
-	unsigned int tag;
-	int syntax;
-};
-
-static struct snmp_cnv snmp_conv [] =
-{
-	{ASN1_UNI, ASN1_NUL, SNMP_NULL},
-	{ASN1_UNI, ASN1_INT, SNMP_INTEGER},
-	{ASN1_UNI, ASN1_OTS, SNMP_OCTETSTR},
-	{ASN1_UNI, ASN1_OTS, SNMP_DISPLAYSTR},
-	{ASN1_UNI, ASN1_OJI, SNMP_OBJECTID},
-	{ASN1_APL, SNMP_IPA, SNMP_IPADDR},
-	{ASN1_APL, SNMP_CNT, SNMP_COUNTER},	/* Counter32 */
-	{ASN1_APL, SNMP_GGE, SNMP_GAUGE},	/* Gauge32 == Unsigned32  */
-	{ASN1_APL, SNMP_TIT, SNMP_TIMETICKS},
-	{ASN1_APL, SNMP_OPQ, SNMP_OPAQUE},
-
-	/* SNMPv2 data types and errors */
-	{ASN1_UNI, ASN1_BTS, SNMP_BITSTR},
-	{ASN1_APL, SNMP_C64, SNMP_COUNTER64},
-	{ASN1_CTX, SERR_NSO, SNMP_NOSUCHOBJECT},
-	{ASN1_CTX, SERR_NSI, SNMP_NOSUCHINSTANCE},
-	{ASN1_CTX, SERR_EOM, SNMP_ENDOFMIBVIEW},
-	{0,       0,       -1}
-};
-
-static unsigned char snmp_tag_cls2syntax(unsigned int tag,
-					 unsigned int cls,
-					 unsigned short *syntax)
-{
-	struct snmp_cnv *cnv;
-
-	cnv = snmp_conv;
-
-	while (cnv->syntax != -1) {
-		if (cnv->tag == tag && cnv->class == cls) {
-			*syntax = cnv->syntax;
-			return 1;
-		}
-		cnv++;
-	}
-	return 0;
-}
-
-static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
-					struct snmp_object **obj)
-{
-	unsigned int cls, con, tag, len, idlen;
-	unsigned short type;
-	unsigned char *eoc, *end, *p;
-	unsigned long *lp, *id;
-	unsigned long ul;
-	long l;
-
-	*obj = NULL;
-	id = NULL;
-
-	if (!asn1_header_decode(ctx, &eoc, &cls, &con, &tag))
-		return 0;
-
-	if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
-		return 0;
-
-	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
-		return 0;
-
-	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
-		return 0;
-
-	if (!asn1_oid_decode(ctx, end, &id, &idlen))
-		return 0;
-
-	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) {
-		kfree(id);
-		return 0;
-	}
-
-	if (con != ASN1_PRI) {
-		kfree(id);
-		return 0;
-	}
-
-	type = 0;
-	if (!snmp_tag_cls2syntax(tag, cls, &type)) {
-		kfree(id);
-		return 0;
-	}
-
-	l = 0;
-	switch (type) {
-		case SNMP_INTEGER:
-			len = sizeof(long);
-			if (!asn1_long_decode(ctx, end, &l)) {
-				kfree(id);
-				return 0;
-			}
-			*obj = kmalloc(sizeof(struct snmp_object) + len,
-				       GFP_ATOMIC);
-			if (*obj == NULL) {
-				kfree(id);
-				if (net_ratelimit())
-					printk("OOM in bsalg (%d)\n", __LINE__);
-				return 0;
-			}
-			(*obj)->syntax.l[0] = l;
-			break;
-		case SNMP_OCTETSTR:
-		case SNMP_OPAQUE:
-			if (!asn1_octets_decode(ctx, end, &p, &len)) {
-				kfree(id);
-				return 0;
-			}
-			*obj = kmalloc(sizeof(struct snmp_object) + len,
-				       GFP_ATOMIC);
-			if (*obj == NULL) {
-				kfree(id);
-				if (net_ratelimit())
-					printk("OOM in bsalg (%d)\n", __LINE__);
-				return 0;
-			}
-			memcpy((*obj)->syntax.c, p, len);
-			kfree(p);
-			break;
-		case SNMP_NULL:
-		case SNMP_NOSUCHOBJECT:
-		case SNMP_NOSUCHINSTANCE:
-		case SNMP_ENDOFMIBVIEW:
-			len = 0;
-			*obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
-			if (*obj == NULL) {
-				kfree(id);
-				if (net_ratelimit())
-					printk("OOM in bsalg (%d)\n", __LINE__);
-				return 0;
-			}
-			if (!asn1_null_decode(ctx, end)) {
-				kfree(id);
-				kfree(*obj);
-				*obj = NULL;
-				return 0;
-			}
-			break;
-		case SNMP_OBJECTID:
-			if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) {
-				kfree(id);
-				return 0;
-			}
-			len *= sizeof(unsigned long);
-			*obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
-			if (*obj == NULL) {
-				kfree(lp);
-				kfree(id);
-				if (net_ratelimit())
-					printk("OOM in bsalg (%d)\n", __LINE__);
-				return 0;
-			}
-			memcpy((*obj)->syntax.ul, lp, len);
-			kfree(lp);
-			break;
-		case SNMP_IPADDR:
-			if (!asn1_octets_decode(ctx, end, &p, &len)) {
-				kfree(id);
-				return 0;
-			}
-			if (len != 4) {
-				kfree(p);
-				kfree(id);
-				return 0;
-			}
-			*obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
-			if (*obj == NULL) {
-				kfree(p);
-				kfree(id);
-				if (net_ratelimit())
-					printk("OOM in bsalg (%d)\n", __LINE__);
-				return 0;
-			}
-			memcpy((*obj)->syntax.uc, p, len);
-			kfree(p);
-			break;
-		case SNMP_COUNTER:
-		case SNMP_GAUGE:
-		case SNMP_TIMETICKS:
-			len = sizeof(unsigned long);
-			if (!asn1_ulong_decode(ctx, end, &ul)) {
-				kfree(id);
-				return 0;
-			}
-			*obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
-			if (*obj == NULL) {
-				kfree(id);
-				if (net_ratelimit())
-					printk("OOM in bsalg (%d)\n", __LINE__);
-				return 0;
-			}
-			(*obj)->syntax.ul[0] = ul;
-			break;
-		default:
-			kfree(id);
-			return 0;
-	}
-
-	(*obj)->syntax_len = len;
-	(*obj)->type = type;
-	(*obj)->id = id;
-	(*obj)->id_len = idlen;
-
-	if (!asn1_eoc_decode(ctx, eoc)) {
-		kfree(id);
-		kfree(*obj);
-		*obj = NULL;
-		return 0;
-	}
-	return 1;
-}
-
-static unsigned char snmp_request_decode(struct asn1_ctx *ctx,
-					 struct snmp_request *request)
-{
-	unsigned int cls, con, tag;
-	unsigned char *end;
-
-	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
-		return 0;
-
-	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
-		return 0;
-
-	if (!asn1_ulong_decode(ctx, end, &request->id))
-		return 0;
-
-	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
-		return 0;
-
-	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
-		return 0;
-
-	if (!asn1_uint_decode(ctx, end, &request->error_status))
-		return 0;
-
-	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
-		return 0;
-
-	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
-		return 0;
-
-	if (!asn1_uint_decode(ctx, end, &request->error_index))
-		return 0;
-
-	return 1;
-}
-
-/*
- * Fast checksum update for possibly oddly-aligned UDP byte, from the
- * code example in the draft.
- */
-static void fast_csum(__sum16 *csum,
-		      const unsigned char *optr,
-		      const unsigned char *nptr,
-		      int offset)
-{
-	unsigned char s[4];
-
-	if (offset & 1) {
-		s[0] = s[2] = 0;
-		s[1] = ~*optr;
-		s[3] = *nptr;
-	} else {
-		s[1] = s[3] = 0;
-		s[0] = ~*optr;
-		s[2] = *nptr;
-	}
-
-	*csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum)));
-}
-
-/*
- * Mangle IP address.
- * 	- begin points to the start of the snmp messgae
- *      - addr points to the start of the address
- */
-static inline void mangle_address(unsigned char *begin,
-				  unsigned char *addr,
-				  const struct oct1_map *map,
-				  __sum16 *check)
-{
-	if (map->from == NOCT1(addr)) {
-		u_int32_t old;
-
-		if (debug)
-			memcpy(&old, (unsigned char *)addr, sizeof(old));
-
-		*addr = map->to;
-
-		/* Update UDP checksum if being used */
-		if (*check) {
-			fast_csum(check,
-				  &map->from, &map->to, addr - begin);
-		}
-
-		if (debug)
-			printk(KERN_DEBUG "bsalg: mapped %u.%u.%u.%u to "
-			       "%u.%u.%u.%u\n", NIPQUAD(old), NIPQUAD(*addr));
-	}
-}
-
-static unsigned char snmp_trap_decode(struct asn1_ctx *ctx,
-				      struct snmp_v1_trap *trap,
-				      const struct oct1_map *map,
-				      __sum16 *check)
-{
-	unsigned int cls, con, tag, len;
-	unsigned char *end;
-
-	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
-		return 0;
-
-	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
-		return 0;
-
-	if (!asn1_oid_decode(ctx, end, &trap->id, &trap->id_len))
-		return 0;
-
-	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
-		goto err_id_free;
-
-	if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_IPA) ||
-	      (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_OTS)))
-		goto err_id_free;
-
-	if (!asn1_octets_decode(ctx, end, (unsigned char **)&trap->ip_address, &len))
-		goto err_id_free;
-
-	/* IPv4 only */
-	if (len != 4)
-		goto err_addr_free;
-
-	mangle_address(ctx->begin, ctx->pointer - 4, map, check);
-
-	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
-		goto err_addr_free;
-
-	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
-		goto err_addr_free;
-
-	if (!asn1_uint_decode(ctx, end, &trap->general))
-		goto err_addr_free;
-
-	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
-		goto err_addr_free;
-
-	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
-		goto err_addr_free;
-
-	if (!asn1_uint_decode(ctx, end, &trap->specific))
-		goto err_addr_free;
-
-	if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
-		goto err_addr_free;
-
-	if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_TIT) ||
-	      (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_INT)))
-		goto err_addr_free;
-
-	if (!asn1_ulong_decode(ctx, end, &trap->time))
-		goto err_addr_free;
-
-	return 1;
-
-err_addr_free:
-	kfree((unsigned long *)trap->ip_address);
-
-err_id_free:
-	kfree(trap->id);
-
-	return 0;
-}
-
-/*****************************************************************************
- *
- * Misc. routines
- *
- *****************************************************************************/
-
-static void hex_dump(unsigned char *buf, size_t len)
-{
-	size_t i;
-
-	for (i = 0; i < len; i++) {
-		if (i && !(i % 16))
-			printk("\n");
-		printk("%02x ", *(buf + i));
-	}
-	printk("\n");
-}
-
-/*
- * Parse and mangle SNMP message according to mapping.
- * (And this is the fucking 'basic' method).
- */
-static int snmp_parse_mangle(unsigned char *msg,
-			     u_int16_t len,
-			     const struct oct1_map *map,
-			     __sum16 *check)
-{
-	unsigned char *eoc, *end;
-	unsigned int cls, con, tag, vers, pdutype;
-	struct asn1_ctx ctx;
-	struct asn1_octstr comm;
-	struct snmp_object **obj;
-
-	if (debug > 1)
-		hex_dump(msg, len);
-
-	asn1_open(&ctx, msg, len);
-
-	/*
-	 * Start of SNMP message.
-	 */
-	if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
-		return 0;
-	if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
-		return 0;
-
-	/*
-	 * Version 1 or 2 handled.
-	 */
-	if (!asn1_header_decode(&ctx, &end, &cls, &con, &tag))
-		return 0;
-	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
-		return 0;
-	if (!asn1_uint_decode (&ctx, end, &vers))
-		return 0;
-	if (debug > 1)
-		printk(KERN_DEBUG "bsalg: snmp version: %u\n", vers + 1);
-	if (vers > 1)
-		return 1;
-
-	/*
-	 * Community.
-	 */
-	if (!asn1_header_decode (&ctx, &end, &cls, &con, &tag))
-		return 0;
-	if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OTS)
-		return 0;
-	if (!asn1_octets_decode(&ctx, end, &comm.data, &comm.len))
-		return 0;
-	if (debug > 1) {
-		unsigned int i;
-
-		printk(KERN_DEBUG "bsalg: community: ");
-		for (i = 0; i < comm.len; i++)
-			printk("%c", comm.data[i]);
-		printk("\n");
-	}
-	kfree(comm.data);
-
-	/*
-	 * PDU type
-	 */
-	if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &pdutype))
-		return 0;
-	if (cls != ASN1_CTX || con != ASN1_CON)
-		return 0;
-	if (debug > 1) {
-		unsigned char *pdus[] = {
-			[SNMP_PDU_GET] = "get",
-			[SNMP_PDU_NEXT] = "get-next",
-			[SNMP_PDU_RESPONSE] = "response",
-			[SNMP_PDU_SET] = "set",
-			[SNMP_PDU_TRAP1] = "trapv1",
-			[SNMP_PDU_BULK] = "bulk",
-			[SNMP_PDU_INFORM] = "inform",
-			[SNMP_PDU_TRAP2] = "trapv2"
-		};
-
-		if (pdutype > SNMP_PDU_TRAP2)
-			printk(KERN_DEBUG "bsalg: bad pdu type %u\n", pdutype);
-		else
-			printk(KERN_DEBUG "bsalg: pdu: %s\n", pdus[pdutype]);
-	}
-	if (pdutype != SNMP_PDU_RESPONSE &&
-	    pdutype != SNMP_PDU_TRAP1 && pdutype != SNMP_PDU_TRAP2)
-		return 1;
-
-	/*
-	 * Request header or v1 trap
-	 */
-	if (pdutype == SNMP_PDU_TRAP1) {
-		struct snmp_v1_trap trap;
-		unsigned char ret = snmp_trap_decode(&ctx, &trap, map, check);
-
-		if (ret) {
-			kfree(trap.id);
-			kfree((unsigned long *)trap.ip_address);
-		} else
-			return ret;
-
-	} else {
-		struct snmp_request req;
-
-		if (!snmp_request_decode(&ctx, &req))
-			return 0;
-
-		if (debug > 1)
-			printk(KERN_DEBUG "bsalg: request: id=0x%lx error_status=%u "
-			"error_index=%u\n", req.id, req.error_status,
-			req.error_index);
-	}
-
-	/*
-	 * Loop through objects, look for IP addresses to mangle.
-	 */
-	if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
-		return 0;
-
-	if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
-		return 0;
-
-	obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
-	if (obj == NULL) {
-		if (net_ratelimit())
-			printk(KERN_WARNING "OOM in bsalg(%d)\n", __LINE__);
-		return 0;
-	}
-
-	while (!asn1_eoc_decode(&ctx, eoc)) {
-		unsigned int i;
-
-		if (!snmp_object_decode(&ctx, obj)) {
-			if (*obj) {
-				kfree((*obj)->id);
-				kfree(*obj);
-			}
-			kfree(obj);
-			return 0;
-		}
-
-		if (debug > 1) {
-			printk(KERN_DEBUG "bsalg: object: ");
-			for (i = 0; i < (*obj)->id_len; i++) {
-				if (i > 0)
-					printk(".");
-				printk("%lu", (*obj)->id[i]);
-			}
-			printk(": type=%u\n", (*obj)->type);
-
-		}
-
-		if ((*obj)->type == SNMP_IPADDR)
-			mangle_address(ctx.begin, ctx.pointer - 4 , map, check);
-
-		kfree((*obj)->id);
-		kfree(*obj);
-	}
-	kfree(obj);
-
-	if (!asn1_eoc_decode(&ctx, eoc))
-		return 0;
-
-	return 1;
-}
-
-/*****************************************************************************
- *
- * NAT routines.
- *
- *****************************************************************************/
-
-/*
- * SNMP translation routine.
- */
-static int snmp_translate(struct ip_conntrack *ct,
-			  enum ip_conntrack_info ctinfo,
-			  struct sk_buff **pskb)
-{
-	struct iphdr *iph = ip_hdr(*pskb);
-	struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
-	u_int16_t udplen = ntohs(udph->len);
-	u_int16_t paylen = udplen - sizeof(struct udphdr);
-	int dir = CTINFO2DIR(ctinfo);
-	struct oct1_map map;
-
-	/*
-	 * Determine mappping for application layer addresses based
-	 * on NAT manipulations for the packet.
-	 */
-	if (dir == IP_CT_DIR_ORIGINAL) {
-		/* SNAT traps */
-		map.from = NOCT1(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip);
-		map.to = NOCT1(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip);
-	} else {
-		/* DNAT replies */
-		map.from = NOCT1(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
-		map.to = NOCT1(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip);
-	}
-
-	if (map.from == map.to)
-		return NF_ACCEPT;
-
-	if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr),
-			       paylen, &map, &udph->check)) {
-		if (net_ratelimit())
-			printk(KERN_WARNING "bsalg: parser failed\n");
-		return NF_DROP;
-	}
-	return NF_ACCEPT;
-}
-
-/* We don't actually set up expectations, just adjust internal IP
- * addresses if this is being NATted */
-static int help(struct sk_buff **pskb,
-		struct ip_conntrack *ct,
-		enum ip_conntrack_info ctinfo)
-{
-	int dir = CTINFO2DIR(ctinfo);
-	unsigned int ret;
-	struct iphdr *iph = ip_hdr(*pskb);
-	struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
-
-	/* SNMP replies and originating SNMP traps get mangled */
-	if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
-		return NF_ACCEPT;
-	if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL)
-		return NF_ACCEPT;
-
-	/* No NAT? */
-	if (!(ct->status & IPS_NAT_MASK))
-		return NF_ACCEPT;
-
-	/*
-	 * Make sure the packet length is ok.  So far, we were only guaranteed
-	 * to have a valid length IP header plus 8 bytes, which means we have
-	 * enough room for a UDP header.  Just verify the UDP length field so we
-	 * can mess around with the payload.
-	 */
-	if (ntohs(udph->len) != (*pskb)->len - (iph->ihl << 2)) {
-		 if (net_ratelimit())
-			 printk(KERN_WARNING "SNMP: dropping malformed packet "
-				"src=%u.%u.%u.%u dst=%u.%u.%u.%u\n",
-				NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
-		 return NF_DROP;
-	}
-
-	if (!skb_make_writable(pskb, (*pskb)->len))
-		return NF_DROP;
-
-	spin_lock_bh(&snmp_lock);
-	ret = snmp_translate(ct, ctinfo, pskb);
-	spin_unlock_bh(&snmp_lock);
-	return ret;
-}
-
-static struct ip_conntrack_helper snmp_helper = {
-	.max_expected = 0,
-	.timeout = 180,
-	.me = THIS_MODULE,
-	.help = help,
-	.name = "snmp",
-
-	.tuple = {.src = {.u = {.udp = {.port = __constant_htons(SNMP_PORT)}}},
-		  .dst = {.protonum = IPPROTO_UDP},
-	},
-	.mask = {.src = {.u = {0xFFFF}},
-		 .dst = {.protonum = 0xFF},
-	},
-};
-
-static struct ip_conntrack_helper snmp_trap_helper = {
-	.max_expected = 0,
-	.timeout = 180,
-	.me = THIS_MODULE,
-	.help = help,
-	.name = "snmp_trap",
-
-	.tuple = {.src = {.u = {.udp = {.port = __constant_htons(SNMP_TRAP_PORT)}}},
-		  .dst = {.protonum = IPPROTO_UDP},
-	},
-	.mask = {.src = {.u = {0xFFFF}},
-		 .dst = {.protonum = 0xFF},
-	},
-};
-
-/*****************************************************************************
- *
- * Module stuff.
- *
- *****************************************************************************/
-
-static int __init ip_nat_snmp_basic_init(void)
-{
-	int ret = 0;
-
-	ret = ip_conntrack_helper_register(&snmp_helper);
-	if (ret < 0)
-		return ret;
-	ret = ip_conntrack_helper_register(&snmp_trap_helper);
-	if (ret < 0) {
-		ip_conntrack_helper_unregister(&snmp_helper);
-		return ret;
-	}
-	return ret;
-}
-
-static void __exit ip_nat_snmp_basic_fini(void)
-{
-	ip_conntrack_helper_unregister(&snmp_helper);
-	ip_conntrack_helper_unregister(&snmp_trap_helper);
-}
-
-module_init(ip_nat_snmp_basic_init);
-module_exit(ip_nat_snmp_basic_fini);
-
-module_param(debug, int, 0600);
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
deleted file mode 100644
index 32f7bf661fc..00000000000
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ /dev/null
@@ -1,387 +0,0 @@
-/* This file contains all the functions required for the standalone
-   ip_nat module.
-
-   These are not required by the compatibility layer.
-*/
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/*
- * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
- * 	- new API and handling of conntrack/nat helpers
- * 	- now capable of multiple expectations for one master
- * */
-
-#include <linux/types.h>
-#include <linux/icmp.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <net/ip.h>
-#include <net/checksum.h>
-#include <linux/spinlock.h>
-
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-#ifdef CONFIG_XFRM
-static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
-{
-	struct ip_conntrack *ct;
-	struct ip_conntrack_tuple *t;
-	enum ip_conntrack_info ctinfo;
-	enum ip_conntrack_dir dir;
-	unsigned long statusbit;
-
-	ct = ip_conntrack_get(skb, &ctinfo);
-	if (ct == NULL)
-		return;
-	dir = CTINFO2DIR(ctinfo);
-	t = &ct->tuplehash[dir].tuple;
-
-	if (dir == IP_CT_DIR_ORIGINAL)
-		statusbit = IPS_DST_NAT;
-	else
-		statusbit = IPS_SRC_NAT;
-
-	if (ct->status & statusbit) {
-		fl->fl4_dst = t->dst.ip;
-		if (t->dst.protonum == IPPROTO_TCP ||
-		    t->dst.protonum == IPPROTO_UDP)
-			fl->fl_ip_dport = t->dst.u.tcp.port;
-	}
-
-	statusbit ^= IPS_NAT_MASK;
-
-	if (ct->status & statusbit) {
-		fl->fl4_src = t->src.ip;
-		if (t->dst.protonum == IPPROTO_TCP ||
-		    t->dst.protonum == IPPROTO_UDP)
-			fl->fl_ip_sport = t->src.u.tcp.port;
-	}
-}
-#endif
-
-static unsigned int
-ip_nat_fn(unsigned int hooknum,
-	  struct sk_buff **pskb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  int (*okfn)(struct sk_buff *))
-{
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
-	struct ip_nat_info *info;
-	/* maniptype == SRC for postrouting. */
-	enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
-
-	/* We never see fragments: conntrack defrags on pre-routing
-	   and local-out, and ip_nat_out protects post-routing. */
-	IP_NF_ASSERT(!(ip_hdr(*pskb)->frag_off
-		       & htons(IP_MF|IP_OFFSET)));
-
-	ct = ip_conntrack_get(*pskb, &ctinfo);
-	/* Can't track?  It's not due to stress, or conntrack would
-	   have dropped it.  Hence it's the user's responsibilty to
-	   packet filter it out, or implement conntrack/NAT for that
-	   protocol. 8) --RR */
-	if (!ct) {
-		/* Exception: ICMP redirect to new connection (not in
-		   hash table yet).  We must not let this through, in
-		   case we're doing NAT to the same network. */
-		if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) {
-			struct icmphdr _hdr, *hp;
-
-			hp = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
-						sizeof(_hdr), &_hdr);
-			if (hp != NULL &&
-			    hp->type == ICMP_REDIRECT)
-				return NF_DROP;
-		}
-		return NF_ACCEPT;
-	}
-
-	/* Don't try to NAT if this packet is not conntracked */
-	if (ct == &ip_conntrack_untracked)
-		return NF_ACCEPT;
-
-	switch (ctinfo) {
-	case IP_CT_RELATED:
-	case IP_CT_RELATED+IP_CT_IS_REPLY:
-		if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) {
-			if (!ip_nat_icmp_reply_translation(ct, ctinfo,
-							   hooknum, pskb))
-				return NF_DROP;
-			else
-				return NF_ACCEPT;
-		}
-		/* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
-	case IP_CT_NEW:
-		info = &ct->nat.info;
-
-		/* Seen it before?  This can happen for loopback, retrans,
-		   or local packets.. */
-		if (!ip_nat_initialized(ct, maniptype)) {
-			unsigned int ret;
-
-			if (unlikely(is_confirmed(ct)))
-				/* NAT module was loaded late */
-				ret = alloc_null_binding_confirmed(ct, info,
-								   hooknum);
-			else if (hooknum == NF_IP_LOCAL_IN)
-				/* LOCAL_IN hook doesn't have a chain!  */
-				ret = alloc_null_binding(ct, info, hooknum);
-			else
-				ret = ip_nat_rule_find(pskb, hooknum,
-						       in, out, ct,
-						       info);
-
-			if (ret != NF_ACCEPT) {
-				return ret;
-			}
-		} else
-			DEBUGP("Already setup manip %s for ct %p\n",
-			       maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
-			       ct);
-		break;
-
-	default:
-		/* ESTABLISHED */
-		IP_NF_ASSERT(ctinfo == IP_CT_ESTABLISHED
-			     || ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));
-		info = &ct->nat.info;
-	}
-
-	IP_NF_ASSERT(info);
-	return ip_nat_packet(ct, ctinfo, hooknum, pskb);
-}
-
-static unsigned int
-ip_nat_in(unsigned int hooknum,
-	  struct sk_buff **pskb,
-	  const struct net_device *in,
-	  const struct net_device *out,
-	  int (*okfn)(struct sk_buff *))
-{
-	unsigned int ret;
-	__be32 daddr = ip_hdr(*pskb)->daddr;
-
-	ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
-	if (ret != NF_DROP && ret != NF_STOLEN
-	    && daddr != ip_hdr(*pskb)->daddr) {
-		dst_release((*pskb)->dst);
-		(*pskb)->dst = NULL;
-	}
-	return ret;
-}
-
-static unsigned int
-ip_nat_out(unsigned int hooknum,
-	   struct sk_buff **pskb,
-	   const struct net_device *in,
-	   const struct net_device *out,
-	   int (*okfn)(struct sk_buff *))
-{
-#ifdef CONFIG_XFRM
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
-#endif
-	unsigned int ret;
-
-	/* root is playing with raw sockets. */
-	if ((*pskb)->len < sizeof(struct iphdr)
-	    || ip_hdrlen(*pskb) < sizeof(struct iphdr))
-		return NF_ACCEPT;
-
-	ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
-#ifdef CONFIG_XFRM
-	if (ret != NF_DROP && ret != NF_STOLEN
-	    && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
-		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
-		if (ct->tuplehash[dir].tuple.src.ip !=
-		    ct->tuplehash[!dir].tuple.dst.ip
-		    || ct->tuplehash[dir].tuple.src.u.all !=
-		       ct->tuplehash[!dir].tuple.dst.u.all
-		    )
-			return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP;
-	}
-#endif
-	return ret;
-}
-
-static unsigned int
-ip_nat_local_fn(unsigned int hooknum,
-		struct sk_buff **pskb,
-		const struct net_device *in,
-		const struct net_device *out,
-		int (*okfn)(struct sk_buff *))
-{
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
-	unsigned int ret;
-
-	/* root is playing with raw sockets. */
-	if ((*pskb)->len < sizeof(struct iphdr)
-	    || ip_hdrlen(*pskb) < sizeof(struct iphdr))
-		return NF_ACCEPT;
-
-	ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
-	if (ret != NF_DROP && ret != NF_STOLEN
-	    && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
-		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
-		if (ct->tuplehash[dir].tuple.dst.ip !=
-		    ct->tuplehash[!dir].tuple.src.ip) {
-			if (ip_route_me_harder(pskb, RTN_UNSPEC))
-				ret = NF_DROP;
-		}
-#ifdef CONFIG_XFRM
-		else if (ct->tuplehash[dir].tuple.dst.u.all !=
-			 ct->tuplehash[!dir].tuple.src.u.all)
-			if (ip_xfrm_me_harder(pskb))
-				ret = NF_DROP;
-#endif
-
-	}
-	return ret;
-}
-
-static unsigned int
-ip_nat_adjust(unsigned int hooknum,
-	      struct sk_buff **pskb,
-	      const struct net_device *in,
-	      const struct net_device *out,
-	      int (*okfn)(struct sk_buff *))
-{
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
-
-	ct = ip_conntrack_get(*pskb, &ctinfo);
-	if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
-		DEBUGP("ip_nat_standalone: adjusting sequence number\n");
-		if (!ip_nat_seq_adjust(pskb, ct, ctinfo))
-			return NF_DROP;
-	}
-	return NF_ACCEPT;
-}
-
-/* We must be after connection tracking and before packet filtering. */
-
-static struct nf_hook_ops ip_nat_ops[] = {
-	/* Before packet filtering, change destination */
-	{
-		.hook		= ip_nat_in,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_PRE_ROUTING,
-		.priority	= NF_IP_PRI_NAT_DST,
-	},
-	/* After packet filtering, change source */
-	{
-		.hook		= ip_nat_out,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_POST_ROUTING,
-		.priority	= NF_IP_PRI_NAT_SRC,
-	},
-	/* After conntrack, adjust sequence number */
-	{
-		.hook		= ip_nat_adjust,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_POST_ROUTING,
-		.priority	= NF_IP_PRI_NAT_SEQ_ADJUST,
-	},
-	/* Before packet filtering, change destination */
-	{
-		.hook		= ip_nat_local_fn,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_LOCAL_OUT,
-		.priority	= NF_IP_PRI_NAT_DST,
-	},
-	/* After packet filtering, change source */
-	{
-		.hook		= ip_nat_fn,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_LOCAL_IN,
-		.priority	= NF_IP_PRI_NAT_SRC,
-	},
-	/* After conntrack, adjust sequence number */
-	{
-		.hook		= ip_nat_adjust,
-		.owner		= THIS_MODULE,
-		.pf		= PF_INET,
-		.hooknum	= NF_IP_LOCAL_IN,
-		.priority	= NF_IP_PRI_NAT_SEQ_ADJUST,
-	},
-};
-
-static int __init ip_nat_standalone_init(void)
-{
-	int ret = 0;
-
-	need_conntrack();
-
-#ifdef CONFIG_XFRM
-	BUG_ON(ip_nat_decode_session != NULL);
-	ip_nat_decode_session = nat_decode_session;
-#endif
-	ret = ip_nat_rule_init();
-	if (ret < 0) {
-		printk("ip_nat_init: can't setup rules.\n");
-		goto cleanup_decode_session;
-	}
-	ret = nf_register_hooks(ip_nat_ops, ARRAY_SIZE(ip_nat_ops));
-	if (ret < 0) {
-		printk("ip_nat_init: can't register hooks.\n");
-		goto cleanup_rule_init;
-	}
-	return ret;
-
- cleanup_rule_init:
-	ip_nat_rule_cleanup();
- cleanup_decode_session:
-#ifdef CONFIG_XFRM
-	ip_nat_decode_session = NULL;
-	synchronize_net();
-#endif
-	return ret;
-}
-
-static void __exit ip_nat_standalone_fini(void)
-{
-	nf_unregister_hooks(ip_nat_ops, ARRAY_SIZE(ip_nat_ops));
-	ip_nat_rule_cleanup();
-#ifdef CONFIG_XFRM
-	ip_nat_decode_session = NULL;
-	synchronize_net();
-#endif
-}
-
-module_init(ip_nat_standalone_init);
-module_exit(ip_nat_standalone_fini);
-
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ip_nat_tftp.c b/net/ipv4/netfilter/ip_nat_tftp.c
deleted file mode 100644
index 604793536fc..00000000000
--- a/net/ipv4/netfilter/ip_nat_tftp.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Version: 0.0.7
- *
- * Thu 21 Mar 2002 Harald Welte <laforge@gnumonks.org>
- * 	- Port to newnat API
- *
- * This module currently supports DNAT:
- * iptables -t nat -A PREROUTING -d x.x.x.x -j DNAT --to-dest x.x.x.y
- *
- * and SNAT:
- * iptables -t nat -A POSTROUTING { -j MASQUERADE , -j SNAT --to-source x.x.x.x }
- *
- * It has not been tested with
- * -j SNAT --to-source x.x.x.x-x.x.x.y since I only have one external ip
- * If you do test this please let me know if it works or not.
- *
- */
-
-#include <linux/module.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tftp.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/moduleparam.h>
-
-MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
-MODULE_DESCRIPTION("tftp NAT helper");
-MODULE_LICENSE("GPL");
-
-static unsigned int help(struct sk_buff **pskb,
-			 enum ip_conntrack_info ctinfo,
-			 struct ip_conntrack_expect *exp)
-{
-	struct ip_conntrack *ct = exp->master;
-
-	exp->saved_proto.udp.port
-		= ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
-	exp->dir = IP_CT_DIR_REPLY;
-	exp->expectfn = ip_nat_follow_master;
-	if (ip_conntrack_expect_related(exp) != 0)
-		return NF_DROP;
-	return NF_ACCEPT;
-}
-
-static void __exit ip_nat_tftp_fini(void)
-{
-	rcu_assign_pointer(ip_nat_tftp_hook, NULL);
-	synchronize_rcu();
-}
-
-static int __init ip_nat_tftp_init(void)
-{
-	BUG_ON(rcu_dereference(ip_nat_tftp_hook));
-	rcu_assign_pointer(ip_nat_tftp_hook, help);
-	return 0;
-}
-
-module_init(ip_nat_tftp_init);
-module_exit(ip_nat_tftp_fini);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index d3b16817a99..40e27342139 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -21,15 +21,12 @@
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-
-#include <net/checksum.h>
-
 #include <linux/netfilter_arp.h>
-
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
-#include <net/netfilter/nf_conntrack_compat.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/checksum.h>
 
 #define CLUSTERIP_VERSION "0.8"
 
@@ -310,15 +307,16 @@ target(struct sk_buff **pskb,
        const void *targinfo)
 {
 	const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
+	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
-	u_int32_t *mark, hash;
+	u_int32_t hash;
 
 	/* don't need to clusterip_config_get() here, since refcount
 	 * is only decremented by destroy() - and ip_tables guarantees
 	 * that the ->target() function isn't called after ->destroy() */
 
-	mark = nf_ct_get_mark((*pskb), &ctinfo);
-	if (mark == NULL) {
+	ct = nf_ct_get(*pskb, &ctinfo);
+	if (ct == NULL) {
 		printk(KERN_ERR "CLUSTERIP: no conntrack!\n");
 			/* FIXME: need to drop invalid ones, since replies
 			 * to outgoing connections of other nodes will be
@@ -341,7 +339,7 @@ target(struct sk_buff **pskb,
 
 	switch (ctinfo) {
 		case IP_CT_NEW:
-			*mark = hash;
+			ct->mark = hash;
 			break;
 		case IP_CT_RELATED:
 		case IP_CT_RELATED+IP_CT_IS_REPLY:
@@ -358,7 +356,7 @@ target(struct sk_buff **pskb,
 #ifdef DEBUG_CLUSTERP
 	DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 #endif
-	DEBUGP("hash=%u ct_hash=%u ", hash, *mark);
+	DEBUGP("hash=%u ct_hash=%u ", hash, ct->mark);
 	if (!clusterip_responsible(cipinfo->config, hash)) {
 		DEBUGP("not responsible\n");
 		return NF_DROP;
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index b5955f3a3f8..d4f2d777533 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -19,12 +19,8 @@
 #include <net/ip.h>
 #include <net/checksum.h>
 #include <net/route.h>
-#include <linux/netfilter_ipv4.h>
-#ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_rule.h>
-#else
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#endif
+#include <linux/netfilter_ipv4.h>
 #include <linux/netfilter/x_tables.h>
 
 MODULE_LICENSE("GPL");
@@ -48,7 +44,7 @@ masquerade_check(const char *tablename,
 		 void *targinfo,
 		 unsigned int hook_mask)
 {
-	const struct ip_nat_multi_range_compat *mr = targinfo;
+	const struct nf_nat_multi_range_compat *mr = targinfo;
 
 	if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
 		DEBUGP("masquerade_check: bad MAP_IPS.\n");
@@ -69,33 +65,26 @@ masquerade_target(struct sk_buff **pskb,
 		  const struct xt_target *target,
 		  const void *targinfo)
 {
-#ifdef CONFIG_NF_NAT_NEEDED
+	struct nf_conn *ct;
 	struct nf_conn_nat *nat;
-#endif
-	struct ip_conntrack *ct;
 	enum ip_conntrack_info ctinfo;
-	struct ip_nat_range newrange;
-	const struct ip_nat_multi_range_compat *mr;
+	struct nf_nat_range newrange;
+	const struct nf_nat_multi_range_compat *mr;
 	struct rtable *rt;
 	__be32 newsrc;
 
-	IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);
+	NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING);
 
-	ct = ip_conntrack_get(*pskb, &ctinfo);
-#ifdef CONFIG_NF_NAT_NEEDED
+	ct = nf_ct_get(*pskb, &ctinfo);
 	nat = nfct_nat(ct);
-#endif
-	IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
+
+	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
 			    || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
 
 	/* Source address is 0.0.0.0 - locally generated packet that is
 	 * probably not supposed to be masqueraded.
 	 */
-#ifdef CONFIG_NF_NAT_NEEDED
 	if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
-#else
-	if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip == 0)
-#endif
 		return NF_ACCEPT;
 
 	mr = targinfo;
@@ -107,40 +96,30 @@ masquerade_target(struct sk_buff **pskb,
 	}
 
 	write_lock_bh(&masq_lock);
-#ifdef CONFIG_NF_NAT_NEEDED
 	nat->masq_index = out->ifindex;
-#else
-	ct->nat.masq_index = out->ifindex;
-#endif
 	write_unlock_bh(&masq_lock);
 
 	/* Transfer from original range. */
-	newrange = ((struct ip_nat_range)
+	newrange = ((struct nf_nat_range)
 		{ mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
 		  newsrc, newsrc,
 		  mr->range[0].min, mr->range[0].max });
 
 	/* Hand modified range to generic setup. */
-	return ip_nat_setup_info(ct, &newrange, hooknum);
+	return nf_nat_setup_info(ct, &newrange, hooknum);
 }
 
 static inline int
-device_cmp(struct ip_conntrack *i, void *ifindex)
+device_cmp(struct nf_conn *i, void *ifindex)
 {
-	int ret;
-#ifdef CONFIG_NF_NAT_NEEDED
 	struct nf_conn_nat *nat = nfct_nat(i);
+	int ret;
 
 	if (!nat)
 		return 0;
-#endif
 
 	read_lock_bh(&masq_lock);
-#ifdef CONFIG_NF_NAT_NEEDED
 	ret = (nat->masq_index == (int)(long)ifindex);
-#else
-	ret = (i->nat.masq_index == (int)(long)ifindex);
-#endif
 	read_unlock_bh(&masq_lock);
 
 	return ret;
@@ -156,9 +135,9 @@ static int masq_device_event(struct notifier_block *this,
 		/* Device was downed.  Search entire table for
 		   conntracks which were associated with that device,
 		   and forget them. */
-		IP_NF_ASSERT(dev->ifindex != 0);
+		NF_CT_ASSERT(dev->ifindex != 0);
 
-		ip_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
+		nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
 	}
 
 	return NOTIFY_DONE;
@@ -174,9 +153,9 @@ static int masq_inet_event(struct notifier_block *this,
 		/* IP address was deleted.  Search entire table for
 		   conntracks which were associated with that device,
 		   and forget them. */
-		IP_NF_ASSERT(dev->ifindex != 0);
+		NF_CT_ASSERT(dev->ifindex != 0);
 
-		ip_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
+		nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
 	}
 
 	return NOTIFY_DONE;
@@ -194,7 +173,7 @@ static struct xt_target masquerade = {
 	.name		= "MASQUERADE",
 	.family		= AF_INET,
 	.target		= masquerade_target,
-	.targetsize	= sizeof(struct ip_nat_multi_range_compat),
+	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
 	.table		= "nat",
 	.hooks		= 1 << NF_IP_POST_ROUTING,
 	.checkentry	= masquerade_check,
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index d03f165722d..068c69bce30 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -16,11 +16,7 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter/x_tables.h>
-#ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_rule.h>
-#else
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#endif
 
 #define MODULENAME "NETMAP"
 MODULE_LICENSE("GPL");
@@ -40,7 +36,7 @@ check(const char *tablename,
       void *targinfo,
       unsigned int hook_mask)
 {
-	const struct ip_nat_multi_range_compat *mr = targinfo;
+	const struct nf_nat_multi_range_compat *mr = targinfo;
 
 	if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) {
 		DEBUGP(MODULENAME":check: bad MAP_IPS.\n");
@@ -61,16 +57,16 @@ target(struct sk_buff **pskb,
        const struct xt_target *target,
        const void *targinfo)
 {
-	struct ip_conntrack *ct;
+	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	__be32 new_ip, netmask;
-	const struct ip_nat_multi_range_compat *mr = targinfo;
-	struct ip_nat_range newrange;
+	const struct nf_nat_multi_range_compat *mr = targinfo;
+	struct nf_nat_range newrange;
 
-	IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
+	NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING
 		     || hooknum == NF_IP_POST_ROUTING
 		     || hooknum == NF_IP_LOCAL_OUT);
-	ct = ip_conntrack_get(*pskb, &ctinfo);
+	ct = nf_ct_get(*pskb, &ctinfo);
 
 	netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
 
@@ -80,20 +76,20 @@ target(struct sk_buff **pskb,
 		new_ip = ip_hdr(*pskb)->saddr & ~netmask;
 	new_ip |= mr->range[0].min_ip & netmask;
 
-	newrange = ((struct ip_nat_range)
+	newrange = ((struct nf_nat_range)
 		{ mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
 		  new_ip, new_ip,
 		  mr->range[0].min, mr->range[0].max });
 
 	/* Hand modified range to generic setup. */
-	return ip_nat_setup_info(ct, &newrange, hooknum);
+	return nf_nat_setup_info(ct, &newrange, hooknum);
 }
 
 static struct xt_target target_module = {
 	.name 		= MODULENAME,
 	.family		= AF_INET,
 	.target 	= target,
-	.targetsize	= sizeof(struct ip_nat_multi_range_compat),
+	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
 	.table		= "nat",
 	.hooks		= (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_POST_ROUTING) |
 			  (1 << NF_IP_LOCAL_OUT),
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index c2b6b80670f..68cc76a198e 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -19,11 +19,7 @@
 #include <net/checksum.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter/x_tables.h>
-#ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_rule.h>
-#else
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#endif
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -43,7 +39,7 @@ redirect_check(const char *tablename,
 	       void *targinfo,
 	       unsigned int hook_mask)
 {
-	const struct ip_nat_multi_range_compat *mr = targinfo;
+	const struct nf_nat_multi_range_compat *mr = targinfo;
 
 	if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
 		DEBUGP("redirect_check: bad MAP_IPS.\n");
@@ -64,17 +60,17 @@ redirect_target(struct sk_buff **pskb,
 		const struct xt_target *target,
 		const void *targinfo)
 {
-	struct ip_conntrack *ct;
+	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	__be32 newdst;
-	const struct ip_nat_multi_range_compat *mr = targinfo;
-	struct ip_nat_range newrange;
+	const struct nf_nat_multi_range_compat *mr = targinfo;
+	struct nf_nat_range newrange;
 
-	IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
+	NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING
 		     || hooknum == NF_IP_LOCAL_OUT);
 
-	ct = ip_conntrack_get(*pskb, &ctinfo);
-	IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+	ct = nf_ct_get(*pskb, &ctinfo);
+	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
 
 	/* Local packets: make them go to loopback */
 	if (hooknum == NF_IP_LOCAL_OUT)
@@ -96,20 +92,20 @@ redirect_target(struct sk_buff **pskb,
 	}
 
 	/* Transfer from original range. */
-	newrange = ((struct ip_nat_range)
+	newrange = ((struct nf_nat_range)
 		{ mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
 		  newdst, newdst,
 		  mr->range[0].min, mr->range[0].max });
 
 	/* Hand modified range to generic setup. */
-	return ip_nat_setup_info(ct, &newrange, hooknum);
+	return nf_nat_setup_info(ct, &newrange, hooknum);
 }
 
 static struct xt_target redirect_reg = {
 	.name		= "REDIRECT",
 	.family		= AF_INET,
 	.target		= redirect_target,
-	.targetsize	= sizeof(struct ip_nat_multi_range_compat),
+	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
 	.table		= "nat",
 	.hooks		= (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT),
 	.checkentry	= redirect_check,
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index bd4404e5c68..fe76ffc0cae 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -35,11 +35,7 @@
 #include <net/checksum.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter/x_tables.h>
-#ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_rule.h>
-#else
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#endif
 #include <linux/netfilter_ipv4/ipt_SAME.h>
 
 MODULE_LICENSE("GPL");
@@ -138,17 +134,17 @@ same_target(struct sk_buff **pskb,
 		const struct xt_target *target,
 		const void *targinfo)
 {
-	struct ip_conntrack *ct;
+	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	u_int32_t tmpip, aindex;
 	__be32 new_ip;
 	const struct ipt_same_info *same = targinfo;
-	struct ip_nat_range newrange;
-	const struct ip_conntrack_tuple *t;
+	struct nf_nat_range newrange;
+	const struct nf_conntrack_tuple *t;
 
-	IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
+	NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
 			hooknum == NF_IP_POST_ROUTING);
-	ct = ip_conntrack_get(*pskb, &ctinfo);
+	ct = nf_ct_get(*pskb, &ctinfo);
 
 	t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
 
@@ -157,17 +153,10 @@ same_target(struct sk_buff **pskb,
 	   Here we calculate the index in same->iparray which
 	   holds the ipaddress we should use */
 
-#ifdef CONFIG_NF_NAT_NEEDED
 	tmpip = ntohl(t->src.u3.ip);
 
 	if (!(same->info & IPT_SAME_NODST))
 		tmpip += ntohl(t->dst.u3.ip);
-#else
-	tmpip = ntohl(t->src.ip);
-
-	if (!(same->info & IPT_SAME_NODST))
-		tmpip += ntohl(t->dst.ip);
-#endif
 	aindex = tmpip % same->ipnum;
 
 	new_ip = htonl(same->iparray[aindex]);
@@ -178,13 +167,13 @@ same_target(struct sk_buff **pskb,
 			NIPQUAD(new_ip));
 
 	/* Transfer from original range. */
-	newrange = ((struct ip_nat_range)
+	newrange = ((struct nf_nat_range)
 		{ same->range[0].flags, new_ip, new_ip,
 		  /* FIXME: Use ports from correct range! */
 		  same->range[0].min, same->range[0].max });
 
 	/* Hand modified range to generic setup. */
-	return ip_nat_setup_info(ct, &newrange, hooknum);
+	return nf_nat_setup_info(ct, &newrange, hooknum);
 }
 
 static struct xt_target same_reg = {
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 3c58fea0d39..fcebc968d37 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -33,7 +33,7 @@ static int set_addr(struct sk_buff **pskb,
 		    unsigned int addroff, __be32 ip, __be16 port)
 {
 	enum ip_conntrack_info ctinfo;
-	struct nf_conn *ct = ip_conntrack_get(*pskb, &ctinfo);
+	struct nf_conn *ct = nf_ct_get(*pskb, &ctinfo);
 	struct {
 		__be32 ip;
 		__be16 port;
@@ -383,7 +383,7 @@ static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct,
 static void ip_nat_q931_expect(struct nf_conn *new,
 			       struct nf_conntrack_expect *this)
 {
-	struct ip_nat_range range;
+	struct nf_nat_range range;
 
 	if (this->tuple.src.u3.ip != 0) {	/* Only accept calls from GK */
 		nf_nat_follow_master(new, this);
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 7ba341c22ea..a66888749ce 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -53,7 +53,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
 	struct nf_conntrack_tuple t;
 	struct nf_ct_pptp_master *ct_pptp_info;
 	struct nf_nat_pptp *nat_pptp_info;
-	struct ip_nat_range range;
+	struct nf_nat_range range;
 
 	ct_pptp_info = &nfct_help(master)->help.ct_pptp_info;
 	nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 54698af6d0a..c558f321425 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -25,6 +25,7 @@ config NETFILTER_NETLINK_LOG
 	  and is also scheduled to replace the old syslog-based ipt_LOG
 	  and ip6t_LOG modules.
 
+# Rename this to NF_CONNTRACK in a 2.6.25
 config NF_CONNTRACK_ENABLED
 	tristate "Netfilter connection tracking support"
 	help
@@ -39,42 +40,9 @@ config NF_CONNTRACK_ENABLED
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-choice
-	prompt "Netfilter connection tracking support"
-	depends on NF_CONNTRACK_ENABLED
-
-config NF_CONNTRACK_SUPPORT
-	bool "Layer 3 Independent Connection tracking"
-	help
-	  Layer 3 independent connection tracking is experimental scheme
-	  which generalize ip_conntrack to support other layer 3 protocols.
-
-	  This is required to do Masquerading or other kinds of Network
-	  Address Translation (except for Fast NAT).  It can also be used to
-	  enhance packet filtering (see `Connection state match support'
-	  below).
-
-config IP_NF_CONNTRACK_SUPPORT
-	bool "Layer 3 Dependent Connection tracking (OBSOLETE)"
-	help
-	  The old, Layer 3 dependent ip_conntrack subsystem of netfilter.
-
-	  This is required to do Masquerading or other kinds of Network
-	  Address Translation (except for Fast NAT).  It can also be used to
-	  enhance packet filtering (see `Connection state match support'
-	  below).
-
-endchoice
-
 config NF_CONNTRACK
 	tristate
-	default m if NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=m
-	default y if NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=y
-
-config IP_NF_CONNTRACK
-	tristate
-	default m if IP_NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=m
-	default y if IP_NF_CONNTRACK_SUPPORT && NF_CONNTRACK_ENABLED=y
+	default NF_CONNTRACK_ENABLED
 
 config NF_CT_ACCT
 	bool "Connection tracking flow accounting"
@@ -303,9 +271,8 @@ config NETFILTER_XT_TARGET_CONNMARK
 	tristate  '"CONNMARK" target support'
 	depends on NETFILTER_XTABLES
 	depends on IP_NF_MANGLE || IP6_NF_MANGLE
-	depends on IP_NF_CONNTRACK || NF_CONNTRACK
-	select IP_NF_CONNTRACK_MARK if IP_NF_CONNTRACK
-	select NF_CONNTRACK_MARK if NF_CONNTRACK
+	depends on NF_CONNTRACK
+	select NF_CONNTRACK_MARK
 	help
 	  This option adds a `CONNMARK' target, which allows one to manipulate
 	  the connection mark value.  Similar to the MARK target, but
@@ -366,7 +333,7 @@ config NETFILTER_XT_TARGET_NOTRACK
 	tristate  '"NOTRACK" target support'
 	depends on NETFILTER_XTABLES
 	depends on IP_NF_RAW || IP6_NF_RAW
-	depends on IP_NF_CONNTRACK || NF_CONNTRACK
+	depends on NF_CONNTRACK
 	help
 	  The NOTRACK target allows a select rule to specify
 	  which packets *not* to enter the conntrack/NAT
@@ -387,9 +354,7 @@ config NETFILTER_XT_TARGET_SECMARK
 
 config NETFILTER_XT_TARGET_CONNSECMARK
 	tristate '"CONNSECMARK" target support'
-	depends on NETFILTER_XTABLES && \
-		   ((NF_CONNTRACK && NF_CONNTRACK_SECMARK) || \
-		    (IP_NF_CONNTRACK && IP_NF_CONNTRACK_SECMARK))
+	depends on NETFILTER_XTABLES && NF_CONNTRACK && NF_CONNTRACK_SECMARK
 	help
 	  The CONNSECMARK target copies security markings from packets
 	  to connections, and restores security markings from connections
@@ -437,9 +402,8 @@ config NETFILTER_XT_MATCH_COMMENT
 config NETFILTER_XT_MATCH_CONNBYTES
 	tristate  '"connbytes" per-connection counter match support'
 	depends on NETFILTER_XTABLES
-	depends on IP_NF_CONNTRACK || NF_CONNTRACK
-	select IP_NF_CT_ACCT if IP_NF_CONNTRACK
-	select NF_CT_ACCT if NF_CONNTRACK
+	depends on NF_CONNTRACK
+	select NF_CT_ACCT
 	help
 	  This option adds a `connbytes' match, which allows you to match the
 	  number of bytes and/or packets for each direction within a connection.
@@ -450,9 +414,8 @@ config NETFILTER_XT_MATCH_CONNBYTES
 config NETFILTER_XT_MATCH_CONNMARK
 	tristate  '"connmark" connection mark match support'
 	depends on NETFILTER_XTABLES
-	depends on IP_NF_CONNTRACK || NF_CONNTRACK
-	select IP_NF_CONNTRACK_MARK if IP_NF_CONNTRACK
-	select NF_CONNTRACK_MARK if NF_CONNTRACK
+	depends on NF_CONNTRACK
+	select NF_CONNTRACK_MARK
 	help
 	  This option adds a `connmark' match, which allows you to match the
 	  connection mark value previously set for the session by `CONNMARK'. 
@@ -464,7 +427,7 @@ config NETFILTER_XT_MATCH_CONNMARK
 config NETFILTER_XT_MATCH_CONNTRACK
 	tristate '"conntrack" connection tracking match support'
 	depends on NETFILTER_XTABLES
-	depends on IP_NF_CONNTRACK || NF_CONNTRACK
+	depends on NF_CONNTRACK
 	help
 	  This is a general conntrack match module, a superset of the state match.
 
@@ -508,7 +471,7 @@ config NETFILTER_XT_MATCH_ESP
 config NETFILTER_XT_MATCH_HELPER
 	tristate '"helper" match support'
 	depends on NETFILTER_XTABLES
-	depends on IP_NF_CONNTRACK || NF_CONNTRACK
+	depends on NF_CONNTRACK
 	help
 	  Helper matching allows you to match packets in dynamic connections
 	  tracked by a conntrack-helper, ie. ip_conntrack_ftp
@@ -632,7 +595,7 @@ config NETFILTER_XT_MATCH_SCTP
 config NETFILTER_XT_MATCH_STATE
 	tristate '"state" match support'
 	depends on NETFILTER_XTABLES
-	depends on IP_NF_CONNTRACK || NF_CONNTRACK
+	depends on NF_CONNTRACK
 	help
 	  Connection state matching allows you to match packets based on their
 	  relationship to a tracked connection (ie. previous packets).  This
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 795c058b16a..b03ce009d0b 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -30,10 +30,7 @@ MODULE_ALIAS("ipt_CONNMARK");
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_CONNMARK.h>
-#include <net/netfilter/nf_conntrack_compat.h>
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #include <net/netfilter/nf_conntrack_ecache.h>
-#endif
 
 static unsigned int
 target(struct sk_buff **pskb,
@@ -44,40 +41,33 @@ target(struct sk_buff **pskb,
        const void *targinfo)
 {
 	const struct xt_connmark_target_info *markinfo = targinfo;
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
 	u_int32_t diff;
 	u_int32_t mark;
 	u_int32_t newmark;
-	u_int32_t ctinfo;
-	u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo);
 
-	if (ctmark) {
+	ct = nf_ct_get(*pskb, &ctinfo);
+	if (ct) {
 		switch(markinfo->mode) {
 		case XT_CONNMARK_SET:
-			newmark = (*ctmark & ~markinfo->mask) | markinfo->mark;
-			if (newmark != *ctmark) {
-				*ctmark = newmark;
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-				ip_conntrack_event_cache(IPCT_MARK, *pskb);
-#else
+			newmark = (ct->mark & ~markinfo->mask) | markinfo->mark;
+			if (newmark != ct->mark) {
+				ct->mark = newmark;
 				nf_conntrack_event_cache(IPCT_MARK, *pskb);
-#endif
 			}
 			break;
 		case XT_CONNMARK_SAVE:
-			newmark = (*ctmark & ~markinfo->mask) |
+			newmark = (ct->mark & ~markinfo->mask) |
 				  ((*pskb)->mark & markinfo->mask);
-			if (*ctmark != newmark) {
-				*ctmark = newmark;
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-				ip_conntrack_event_cache(IPCT_MARK, *pskb);
-#else
+			if (ct->mark != newmark) {
+				ct->mark = newmark;
 				nf_conntrack_event_cache(IPCT_MARK, *pskb);
-#endif
 			}
 			break;
 		case XT_CONNMARK_RESTORE:
 			mark = (*pskb)->mark;
-			diff = (*ctmark ^ mark) & markinfo->mask;
+			diff = (ct->mark ^ mark) & markinfo->mask;
 			(*pskb)->mark = mark ^ diff;
 			break;
 		}
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 1ab0db641f9..81c0c58bab4 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -19,7 +19,7 @@
 #include <linux/skbuff.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_CONNSECMARK.h>
-#include <net/netfilter/nf_conntrack_compat.h>
+#include <net/netfilter/nf_conntrack.h>
 
 #define PFX "CONNSECMARK: "
 
@@ -36,12 +36,12 @@ MODULE_ALIAS("ip6t_CONNSECMARK");
 static void secmark_save(struct sk_buff *skb)
 {
 	if (skb->secmark) {
-		u32 *connsecmark;
+		struct nf_conn *ct;
 		enum ip_conntrack_info ctinfo;
 
-		connsecmark = nf_ct_get_secmark(skb, &ctinfo);
-		if (connsecmark && !*connsecmark)
-			*connsecmark = skb->secmark;
+		ct = nf_ct_get(skb, &ctinfo);
+		if (ct && !ct->secmark)
+			ct->secmark = skb->secmark;
 	}
 }
 
@@ -52,12 +52,12 @@ static void secmark_save(struct sk_buff *skb)
 static void secmark_restore(struct sk_buff *skb)
 {
 	if (!skb->secmark) {
-		u32 *connsecmark;
+		struct nf_conn *ct;
 		enum ip_conntrack_info ctinfo;
 
-		connsecmark = nf_ct_get_secmark(skb, &ctinfo);
-		if (connsecmark && *connsecmark)
-			skb->secmark = *connsecmark;
+		ct = nf_ct_get(skb, &ctinfo);
+		if (ct && ct->secmark)
+			skb->secmark = ct->secmark;
 	}
 }
 
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index b874a2008b2..5085fb3d1e2 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -5,7 +5,7 @@
 #include <linux/skbuff.h>
 
 #include <linux/netfilter/x_tables.h>
-#include <net/netfilter/nf_conntrack_compat.h>
+#include <net/netfilter/nf_conntrack.h>
 
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_NOTRACK");
@@ -26,7 +26,7 @@ target(struct sk_buff **pskb,
 	   If there is a real ct entry correspondig to this packet,
 	   it'll hang aroun till timing out. We don't deal with it
 	   for performance reasons. JK */
-	nf_ct_untrack(*pskb);
+	(*pskb)->nfct = &nf_conntrack_untracked.ct_general;
 	(*pskb)->nfctinfo = IP_CT_NEW;
 	nf_conntrack_get((*pskb)->nfct);
 
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 302043bc41b..fec9316a1e1 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -12,9 +12,9 @@
  */
 #include <linux/module.h>
 #include <linux/skbuff.h>
-#include <net/netfilter/nf_conntrack_compat.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_connbytes.h>
+#include <net/netfilter/nf_conntrack.h>
 
 #include <asm/div64.h>
 #include <asm/bitops.h>
@@ -35,13 +35,17 @@ match(const struct sk_buff *skb,
       int *hotdrop)
 {
 	const struct xt_connbytes_info *sinfo = matchinfo;
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
 	u_int64_t what = 0;	/* initialize to make gcc happy */
 	u_int64_t bytes = 0;
 	u_int64_t pkts = 0;
 	const struct ip_conntrack_counter *counters;
 
-	if (!(counters = nf_ct_get_counters(skb)))
-		return 0; /* no match */
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct)
+		return 0;
+	counters = ct->counters;
 
 	switch (sinfo->what) {
 	case XT_CONNBYTES_PKTS:
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 36c2defff23..e1803256c79 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -21,16 +21,15 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_connmark.h>
 
 MODULE_AUTHOR("Henrik Nordstrom <hno@marasytems.com>");
 MODULE_DESCRIPTION("IP tables connmark match module");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_connmark");
 
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter/xt_connmark.h>
-#include <net/netfilter/nf_conntrack_compat.h>
-
 static int
 match(const struct sk_buff *skb,
       const struct net_device *in,
@@ -42,12 +41,14 @@ match(const struct sk_buff *skb,
       int *hotdrop)
 {
 	const struct xt_connmark_info *info = matchinfo;
-	u_int32_t ctinfo;
-	const u_int32_t *ctmark = nf_ct_get_mark(skb, &ctinfo);
-	if (!ctmark)
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (!ct)
 		return 0;
 
-	return (((*ctmark) & info->mask) == info->mark) ^ info->invert;
+	return (((ct->mark) & info->mask) == info->mark) ^ info->invert;
 }
 
 static int
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 2885c378288..f4ea8fe07a5 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -10,121 +10,15 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
-
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
-#else
-#include <net/netfilter/nf_conntrack.h>
-#endif
-
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_conntrack.h>
-#include <net/netfilter/nf_conntrack_compat.h>
+#include <net/netfilter/nf_conntrack.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
 MODULE_DESCRIPTION("iptables connection tracking match module");
 MODULE_ALIAS("ipt_conntrack");
 
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      int *hotdrop)
-{
-	const struct xt_conntrack_info *sinfo = matchinfo;
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
-	unsigned int statebit;
-
-	ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
-
-#define FWINV(bool, invflg) ((bool) ^ !!(sinfo->invflags & invflg))
-
-	if (ct == &ip_conntrack_untracked)
-		statebit = XT_CONNTRACK_STATE_UNTRACKED;
-	else if (ct)
-		statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
-	else
-		statebit = XT_CONNTRACK_STATE_INVALID;
-
-	if (sinfo->flags & XT_CONNTRACK_STATE) {
-		if (ct) {
-			if (test_bit(IPS_SRC_NAT_BIT, &ct->status))
-				statebit |= XT_CONNTRACK_STATE_SNAT;
-			if (test_bit(IPS_DST_NAT_BIT, &ct->status))
-				statebit |= XT_CONNTRACK_STATE_DNAT;
-		}
-		if (FWINV((statebit & sinfo->statemask) == 0,
-			  XT_CONNTRACK_STATE))
-			return 0;
-	}
-
-	if (ct == NULL) {
-		if (sinfo->flags & ~XT_CONNTRACK_STATE)
-			return 0;
-		return 1;
-	}
-
-	if (sinfo->flags & XT_CONNTRACK_PROTO &&
-	    FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum !=
-		  sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum,
-		  XT_CONNTRACK_PROTO))
-		return 0;
-
-	if (sinfo->flags & XT_CONNTRACK_ORIGSRC &&
-	    FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip &
-		   sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
-		  sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip,
-		  XT_CONNTRACK_ORIGSRC))
-		return 0;
-
-	if (sinfo->flags & XT_CONNTRACK_ORIGDST &&
-	    FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip &
-		   sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
-		  sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip,
-		  XT_CONNTRACK_ORIGDST))
-		return 0;
-
-	if (sinfo->flags & XT_CONNTRACK_REPLSRC &&
-	    FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip &
-		   sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) !=
-		  sinfo->tuple[IP_CT_DIR_REPLY].src.ip,
-		  XT_CONNTRACK_REPLSRC))
-		return 0;
-
-	if (sinfo->flags & XT_CONNTRACK_REPLDST &&
-	    FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip &
-		   sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) !=
-		  sinfo->tuple[IP_CT_DIR_REPLY].dst.ip,
-		  XT_CONNTRACK_REPLDST))
-		return 0;
-
-	if (sinfo->flags & XT_CONNTRACK_STATUS &&
-	    FWINV((ct->status & sinfo->statusmask) == 0,
-		  XT_CONNTRACK_STATUS))
-		return 0;
-
-	if (sinfo->flags & XT_CONNTRACK_EXPIRES) {
-		unsigned long expires = timer_pending(&ct->timeout) ?
-					(ct->timeout.expires - jiffies)/HZ : 0;
-
-		if (FWINV(!(expires >= sinfo->expires_min &&
-			    expires <= sinfo->expires_max),
-			  XT_CONNTRACK_EXPIRES))
-			return 0;
-	}
-	return 1;
-}
-
-#else /* CONFIG_IP_NF_CONNTRACK */
 static int
 match(const struct sk_buff *skb,
       const struct net_device *in,
@@ -220,8 +114,6 @@ match(const struct sk_buff *skb,
 	return 1;
 }
 
-#endif /* CONFIG_NF_IP_CONNTRACK */
-
 static int
 checkentry(const char *tablename,
 	   const void *ip,
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 407d1d5da8a..bc70b26ba5b 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -13,18 +13,11 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter.h>
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#else
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_helper.h>
-#endif
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_helper.h>
-#include <net/netfilter/nf_conntrack_compat.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Martin Josefsson <gandalf@netfilter.org>");
@@ -38,55 +31,6 @@ MODULE_ALIAS("ip6t_helper");
 #define DEBUGP(format, args...)
 #endif
 
-#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE)
-static int
-match(const struct sk_buff *skb,
-      const struct net_device *in,
-      const struct net_device *out,
-      const struct xt_match *match,
-      const void *matchinfo,
-      int offset,
-      unsigned int protoff,
-      int *hotdrop)
-{
-	const struct xt_helper_info *info = matchinfo;
-	struct ip_conntrack *ct;
-	enum ip_conntrack_info ctinfo;
-	int ret = info->invert;
-
-	ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
-	if (!ct) {
-		DEBUGP("xt_helper: Eek! invalid conntrack?\n");
-		return ret;
-	}
-
-	if (!ct->master) {
-		DEBUGP("xt_helper: conntrack %p has no master\n", ct);
-		return ret;
-	}
-
-	read_lock_bh(&ip_conntrack_lock);
-	if (!ct->master->helper) {
-		DEBUGP("xt_helper: master ct %p has no helper\n",
-			exp->expectant);
-		goto out_unlock;
-	}
-
-	DEBUGP("master's name = %s , info->name = %s\n",
-		ct->master->helper->name, info->name);
-
-	if (info->name[0] == '\0')
-		ret ^= 1;
-	else
-		ret ^= !strncmp(ct->master->helper->name, info->name,
-				strlen(ct->master->helper->name));
-out_unlock:
-	read_unlock_bh(&ip_conntrack_lock);
-	return ret;
-}
-
-#else /* CONFIG_IP_NF_CONNTRACK */
-
 static int
 match(const struct sk_buff *skb,
       const struct net_device *in,
@@ -134,7 +78,6 @@ out_unlock:
 	read_unlock_bh(&nf_conntrack_lock);
 	return ret;
 }
-#endif
 
 static int check(const char *tablename,
 		 const void *inf,
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index df37b912163..149294f7df7 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -10,7 +10,7 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
-#include <net/netfilter/nf_conntrack_compat.h>
+#include <net/netfilter/nf_conntrack.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_state.h>
 
@@ -36,7 +36,7 @@ match(const struct sk_buff *skb,
 
 	if (nf_ct_is_untracked(skb))
 		statebit = XT_STATE_UNTRACKED;
-	else if (!nf_ct_get_ctinfo(skb, &ctinfo))
+	else if (!nf_ct_get(skb, &ctinfo))
 		statebit = XT_STATE_INVALID;
 	else
 		statebit = XT_STATE_BIT(ctinfo);
-- 
cgit v1.2.3


From b19caa0ca071dce76b0e81e957e7eb7c03d72cf5 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 14 Mar 2007 16:37:52 -0700
Subject: [NETFILTER]: nf_conntrack: switch protocol
 registration/unregistration to mutex

The protocol lookups done by nf_conntrack are already protected by RCU,
there is no need to keep taking nf_conntrack_lock for registration
and unregistration. Switch to a mutex.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto.c | 52 +++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 29 deletions(-)

diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 456155f05c7..e2c4a58603a 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -32,9 +32,9 @@ struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX] __read_mostly;
 struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX] __read_mostly;
 EXPORT_SYMBOL_GPL(nf_ct_l3protos);
 
-#ifdef CONFIG_SYSCTL
-static DEFINE_MUTEX(nf_ct_proto_sysctl_mutex);
+static DEFINE_MUTEX(nf_ct_proto_mutex);
 
+#ifdef CONFIG_SYSCTL
 static int
 nf_ct_register_sysctl(struct ctl_table_header **header, struct ctl_table *path,
 		      struct ctl_table *table, unsigned int *users)
@@ -164,13 +164,13 @@ static int nf_ct_l3proto_register_sysctl(struct nf_conntrack_l3proto *l3proto)
 	int err = 0;
 
 #ifdef CONFIG_SYSCTL
-	mutex_lock(&nf_ct_proto_sysctl_mutex);
+	mutex_lock(&nf_ct_proto_mutex);
 	if (l3proto->ctl_table != NULL) {
 		err = nf_ct_register_sysctl(&l3proto->ctl_table_header,
 					    l3proto->ctl_table_path,
 					    l3proto->ctl_table, NULL);
 	}
-	mutex_unlock(&nf_ct_proto_sysctl_mutex);
+	mutex_unlock(&nf_ct_proto_mutex);
 #endif
 	return err;
 }
@@ -178,11 +178,11 @@ static int nf_ct_l3proto_register_sysctl(struct nf_conntrack_l3proto *l3proto)
 static void nf_ct_l3proto_unregister_sysctl(struct nf_conntrack_l3proto *l3proto)
 {
 #ifdef CONFIG_SYSCTL
-	mutex_lock(&nf_ct_proto_sysctl_mutex);
+	mutex_lock(&nf_ct_proto_mutex);
 	if (l3proto->ctl_table_header != NULL)
 		nf_ct_unregister_sysctl(&l3proto->ctl_table_header,
 					l3proto->ctl_table, NULL);
-	mutex_unlock(&nf_ct_proto_sysctl_mutex);
+	mutex_unlock(&nf_ct_proto_mutex);
 #endif
 }
 
@@ -195,13 +195,13 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
 		goto out;
 	}
 
-	write_lock_bh(&nf_conntrack_lock);
+	mutex_lock(&nf_ct_proto_mutex);
 	if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) {
 		ret = -EBUSY;
 		goto out_unlock;
 	}
 	rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
-	write_unlock_bh(&nf_conntrack_lock);
+	mutex_unlock(&nf_ct_proto_mutex);
 
 	ret = nf_ct_l3proto_register_sysctl(proto);
 	if (ret < 0)
@@ -209,7 +209,7 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
 	return ret;
 
 out_unlock:
-	write_unlock_bh(&nf_conntrack_lock);
+	mutex_unlock(&nf_ct_proto_mutex);
 out:
 	return ret;
 }
@@ -219,11 +219,11 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
 {
 	BUG_ON(proto->l3proto >= AF_MAX);
 
-	write_lock_bh(&nf_conntrack_lock);
+	mutex_lock(&nf_ct_proto_mutex);
 	BUG_ON(nf_ct_l3protos[proto->l3proto] != proto);
 	rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
 			   &nf_conntrack_l3proto_generic);
-	write_unlock_bh(&nf_conntrack_lock);
+	mutex_unlock(&nf_ct_proto_mutex);
 	synchronize_rcu();
 
 	nf_ct_l3proto_unregister_sysctl(proto);
@@ -238,7 +238,7 @@ static int nf_ct_l4proto_register_sysctl(struct nf_conntrack_l4proto *l4proto)
 	int err = 0;
 
 #ifdef CONFIG_SYSCTL
-	mutex_lock(&nf_ct_proto_sysctl_mutex);
+	mutex_lock(&nf_ct_proto_mutex);
 	if (l4proto->ctl_table != NULL) {
 		err = nf_ct_register_sysctl(l4proto->ctl_table_header,
 					    nf_net_netfilter_sysctl_path,
@@ -260,7 +260,7 @@ static int nf_ct_l4proto_register_sysctl(struct nf_conntrack_l4proto *l4proto)
 	}
 #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
 out:
-	mutex_unlock(&nf_ct_proto_sysctl_mutex);
+	mutex_unlock(&nf_ct_proto_mutex);
 #endif /* CONFIG_SYSCTL */
 	return err;
 }
@@ -268,7 +268,7 @@ out:
 static void nf_ct_l4proto_unregister_sysctl(struct nf_conntrack_l4proto *l4proto)
 {
 #ifdef CONFIG_SYSCTL
-	mutex_lock(&nf_ct_proto_sysctl_mutex);
+	mutex_lock(&nf_ct_proto_mutex);
 	if (l4proto->ctl_table_header != NULL &&
 	    *l4proto->ctl_table_header != NULL)
 		nf_ct_unregister_sysctl(l4proto->ctl_table_header,
@@ -279,7 +279,7 @@ static void nf_ct_l4proto_unregister_sysctl(struct nf_conntrack_l4proto *l4proto
 		nf_ct_unregister_sysctl(&l4proto->ctl_compat_table_header,
 					l4proto->ctl_compat_table, NULL);
 #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
-	mutex_unlock(&nf_ct_proto_sysctl_mutex);
+	mutex_unlock(&nf_ct_proto_mutex);
 #endif /* CONFIG_SYSCTL */
 }
 
@@ -297,8 +297,8 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
 	if (l4proto == &nf_conntrack_l4proto_generic)
 		return nf_ct_l4proto_register_sysctl(l4proto);
 
+	mutex_lock(&nf_ct_proto_mutex);
 retry:
-	write_lock_bh(&nf_conntrack_lock);
 	if (nf_ct_protos[l4proto->l3proto]) {
 		if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto]
 				!= &nf_conntrack_l4proto_generic) {
@@ -310,28 +310,22 @@ retry:
 		struct nf_conntrack_l4proto **proto_array;
 		int i;
 
-		write_unlock_bh(&nf_conntrack_lock);
-
 		proto_array = (struct nf_conntrack_l4proto **)
 				kmalloc(MAX_NF_CT_PROTO *
 					 sizeof(struct nf_conntrack_l4proto *),
 					GFP_KERNEL);
 		if (proto_array == NULL) {
 			ret = -ENOMEM;
-			goto out;
+			goto out_unlock;
 		}
 		for (i = 0; i < MAX_NF_CT_PROTO; i++)
 			proto_array[i] = &nf_conntrack_l4proto_generic;
 
-		write_lock_bh(&nf_conntrack_lock);
-		if (nf_ct_protos[l4proto->l3proto]) {
+		if (nf_ct_protos[l4proto->l3proto])
 			/* bad timing, but no problem */
-			write_unlock_bh(&nf_conntrack_lock);
 			kfree(proto_array);
-		} else {
+		else
 			nf_ct_protos[l4proto->l3proto] = proto_array;
-			write_unlock_bh(&nf_conntrack_lock);
-		}
 
 		/*
 		 * Just once because array is never freed until unloading
@@ -341,7 +335,7 @@ retry:
 	}
 
 	rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], l4proto);
-	write_unlock_bh(&nf_conntrack_lock);
+	mutex_unlock(&nf_ct_proto_mutex);
 
 	ret = nf_ct_l4proto_register_sysctl(l4proto);
 	if (ret < 0)
@@ -349,7 +343,7 @@ retry:
 	return ret;
 
 out_unlock:
-	write_unlock_bh(&nf_conntrack_lock);
+	mutex_unlock(&nf_ct_proto_mutex);
 out:
 	return ret;
 }
@@ -364,11 +358,11 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
 		return;
 	}
 
-	write_lock_bh(&nf_conntrack_lock);
+	mutex_lock(&nf_ct_proto_mutex);
 	BUG_ON(nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != l4proto);
 	rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
 			   &nf_conntrack_l4proto_generic);
-	write_unlock_bh(&nf_conntrack_lock);
+	mutex_unlock(&nf_ct_proto_mutex);
 	synchronize_rcu();
 
 	nf_ct_l4proto_unregister_sysctl(l4proto);
-- 
cgit v1.2.3


From ac5357ebac43e191003c2cd0722377dccfa01a84 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 14 Mar 2007 16:38:25 -0700
Subject: [NETFILTER]: nf_conntrack: remove ugly hack in l4proto registration

Remove ugly special-casing of nf_conntrack_l4proto_generic, all it
wants is its sysctl tables registered, so do that explicitly in an
init function and move the remaining protocol initialization and
cleanup code to nf_conntrack_proto.c as well.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_conntrack_core.h |  3 +++
 net/netfilter/nf_conntrack_core.c         | 18 ++--------------
 net/netfilter/nf_conntrack_proto.c        | 34 +++++++++++++++++++++++--------
 3 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 85634e1865c..9fb906688ff 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -27,6 +27,9 @@ extern unsigned int nf_conntrack_in(int pf,
 extern int nf_conntrack_init(void);
 extern void nf_conntrack_cleanup(void);
 
+extern int nf_conntrack_proto_init(void);
+extern void nf_conntrack_proto_fini(void);
+
 struct nf_conntrack_l3proto;
 extern struct nf_conntrack_l3proto *nf_ct_find_l3proto(u_int16_t pf);
 /* Like above, but you already have conntrack read lock. */
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 7694c51f125..9858bcb29aa 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1152,14 +1152,7 @@ void nf_conntrack_cleanup(void)
 	free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc,
 			    nf_conntrack_htable_size);
 
-	nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_generic);
-
-	/* free l3proto protocol tables */
-	for (i = 0; i < PF_MAX; i++)
-		if (nf_ct_protos[i]) {
-			kfree(nf_ct_protos[i]);
-			nf_ct_protos[i] = NULL;
-		}
+	nf_conntrack_proto_fini();
 }
 
 static struct list_head *alloc_hashtable(int size, int *vmalloced)
@@ -1237,7 +1230,6 @@ module_param_call(hashsize, set_hashsize, param_get_uint,
 
 int __init nf_conntrack_init(void)
 {
-	unsigned int i;
 	int ret;
 
 	/* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
@@ -1279,16 +1271,10 @@ int __init nf_conntrack_init(void)
 		goto err_free_conntrack_slab;
 	}
 
-	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_generic);
+	ret = nf_conntrack_proto_init();
 	if (ret < 0)
 		goto out_free_expect_slab;
 
-	/* Don't NEED lock here, but good form anyway. */
-	write_lock_bh(&nf_conntrack_lock);
-	for (i = 0; i < AF_MAX; i++)
-		nf_ct_l3protos[i] = &nf_conntrack_l3proto_generic;
-	write_unlock_bh(&nf_conntrack_lock);
-
 	/* For use by REJECT target */
 	rcu_assign_pointer(ip_ct_attach, __nf_conntrack_attach);
 
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index e2c4a58603a..0ca2f0ba7c7 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -294,9 +294,6 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
 		goto out;
 	}
 
-	if (l4proto == &nf_conntrack_l4proto_generic)
-		return nf_ct_l4proto_register_sysctl(l4proto);
-
 	mutex_lock(&nf_ct_proto_mutex);
 retry:
 	if (nf_ct_protos[l4proto->l3proto]) {
@@ -353,11 +350,6 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
 {
 	BUG_ON(l4proto->l3proto >= PF_MAX);
 
-	if (l4proto == &nf_conntrack_l4proto_generic) {
-		nf_ct_l4proto_unregister_sysctl(l4proto);
-		return;
-	}
-
 	mutex_lock(&nf_ct_proto_mutex);
 	BUG_ON(nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != l4proto);
 	rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
@@ -371,3 +363,29 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
 	nf_ct_iterate_cleanup(kill_l4proto, l4proto);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister);
+
+int nf_conntrack_proto_init(void)
+{
+	unsigned int i;
+	int err;
+
+	err = nf_ct_l4proto_register_sysctl(&nf_conntrack_l4proto_generic);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i < AF_MAX; i++)
+		rcu_assign_pointer(nf_ct_l3protos[i],
+				   &nf_conntrack_l3proto_generic);
+	return 0;
+}
+
+void nf_conntrack_proto_fini(void)
+{
+	unsigned int i;
+
+	nf_ct_l4proto_unregister_sysctl(&nf_conntrack_l4proto_generic);
+
+	/* free l3proto protocol tables */
+	for (i = 0; i < PF_MAX; i++)
+		kfree(nf_ct_protos[i]);
+}
-- 
cgit v1.2.3


From 0661cca9c216322e77dca7f47df107c02ce4e70c Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 14 Mar 2007 16:38:48 -0700
Subject: [NETFILTER]: nf_conntrack: simplify protocol locking

Now that we don't use nf_conntrack_lock anymore but a single mutex for
all protocol handling, no need to release and grab it again for sysctl
registration.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto.c | 45 ++++++++++++--------------------------
 1 file changed, 14 insertions(+), 31 deletions(-)

diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 0ca2f0ba7c7..509725412d3 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -164,13 +164,11 @@ static int nf_ct_l3proto_register_sysctl(struct nf_conntrack_l3proto *l3proto)
 	int err = 0;
 
 #ifdef CONFIG_SYSCTL
-	mutex_lock(&nf_ct_proto_mutex);
 	if (l3proto->ctl_table != NULL) {
 		err = nf_ct_register_sysctl(&l3proto->ctl_table_header,
 					    l3proto->ctl_table_path,
 					    l3proto->ctl_table, NULL);
 	}
-	mutex_unlock(&nf_ct_proto_mutex);
 #endif
 	return err;
 }
@@ -178,11 +176,9 @@ static int nf_ct_l3proto_register_sysctl(struct nf_conntrack_l3proto *l3proto)
 static void nf_ct_l3proto_unregister_sysctl(struct nf_conntrack_l3proto *l3proto)
 {
 #ifdef CONFIG_SYSCTL
-	mutex_lock(&nf_ct_proto_mutex);
 	if (l3proto->ctl_table_header != NULL)
 		nf_ct_unregister_sysctl(&l3proto->ctl_table_header,
 					l3proto->ctl_table, NULL);
-	mutex_unlock(&nf_ct_proto_mutex);
 #endif
 }
 
@@ -190,27 +186,23 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
 {
 	int ret = 0;
 
-	if (proto->l3proto >= AF_MAX) {
-		ret = -EBUSY;
-		goto out;
-	}
+	if (proto->l3proto >= AF_MAX)
+		return -EBUSY;
 
 	mutex_lock(&nf_ct_proto_mutex);
 	if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) {
 		ret = -EBUSY;
 		goto out_unlock;
 	}
-	rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
-	mutex_unlock(&nf_ct_proto_mutex);
 
 	ret = nf_ct_l3proto_register_sysctl(proto);
 	if (ret < 0)
-		nf_conntrack_l3proto_unregister(proto);
-	return ret;
+		goto out_unlock;
+
+	rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
 
 out_unlock:
 	mutex_unlock(&nf_ct_proto_mutex);
-out:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register);
@@ -223,10 +215,10 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
 	BUG_ON(nf_ct_l3protos[proto->l3proto] != proto);
 	rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
 			   &nf_conntrack_l3proto_generic);
+	nf_ct_l3proto_unregister_sysctl(proto);
 	mutex_unlock(&nf_ct_proto_mutex);
-	synchronize_rcu();
 
-	nf_ct_l3proto_unregister_sysctl(proto);
+	synchronize_rcu();
 
 	/* Remove all contrack entries for this protocol */
 	nf_ct_iterate_cleanup(kill_l3proto, proto);
@@ -238,7 +230,6 @@ static int nf_ct_l4proto_register_sysctl(struct nf_conntrack_l4proto *l4proto)
 	int err = 0;
 
 #ifdef CONFIG_SYSCTL
-	mutex_lock(&nf_ct_proto_mutex);
 	if (l4proto->ctl_table != NULL) {
 		err = nf_ct_register_sysctl(l4proto->ctl_table_header,
 					    nf_net_netfilter_sysctl_path,
@@ -260,7 +251,6 @@ static int nf_ct_l4proto_register_sysctl(struct nf_conntrack_l4proto *l4proto)
 	}
 #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
 out:
-	mutex_unlock(&nf_ct_proto_mutex);
 #endif /* CONFIG_SYSCTL */
 	return err;
 }
@@ -268,7 +258,6 @@ out:
 static void nf_ct_l4proto_unregister_sysctl(struct nf_conntrack_l4proto *l4proto)
 {
 #ifdef CONFIG_SYSCTL
-	mutex_lock(&nf_ct_proto_mutex);
 	if (l4proto->ctl_table_header != NULL &&
 	    *l4proto->ctl_table_header != NULL)
 		nf_ct_unregister_sysctl(l4proto->ctl_table_header,
@@ -279,7 +268,6 @@ static void nf_ct_l4proto_unregister_sysctl(struct nf_conntrack_l4proto *l4proto
 		nf_ct_unregister_sysctl(&l4proto->ctl_compat_table_header,
 					l4proto->ctl_compat_table, NULL);
 #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
-	mutex_unlock(&nf_ct_proto_mutex);
 #endif /* CONFIG_SYSCTL */
 }
 
@@ -289,10 +277,8 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
 {
 	int ret = 0;
 
-	if (l4proto->l3proto >= PF_MAX) {
-		ret = -EBUSY;
-		goto out;
-	}
+	if (l4proto->l3proto >= PF_MAX)
+		return -EBUSY;
 
 	mutex_lock(&nf_ct_proto_mutex);
 retry:
@@ -331,17 +317,14 @@ retry:
 		goto retry;
 	}
 
-	rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], l4proto);
-	mutex_unlock(&nf_ct_proto_mutex);
-
 	ret = nf_ct_l4proto_register_sysctl(l4proto);
 	if (ret < 0)
-		nf_conntrack_l4proto_unregister(l4proto);
-	return ret;
+		goto out_unlock;
+
+	rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], l4proto);
 
 out_unlock:
 	mutex_unlock(&nf_ct_proto_mutex);
-out:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register);
@@ -354,10 +337,10 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
 	BUG_ON(nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != l4proto);
 	rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
 			   &nf_conntrack_l4proto_generic);
+	nf_ct_l4proto_unregister_sysctl(l4proto);
 	mutex_unlock(&nf_ct_proto_mutex);
-	synchronize_rcu();
 
-	nf_ct_l4proto_unregister_sysctl(l4proto);
+	synchronize_rcu();
 
 	/* Remove all contrack entries for this protocol */
 	nf_ct_iterate_cleanup(kill_l4proto, l4proto);
-- 
cgit v1.2.3


From c6a1e615d1ba942b9e783079d53f741e4a8e1c89 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 14 Mar 2007 16:39:07 -0700
Subject: [NETFILTER]: nf_conntrack: simplify l4 protocol array allocation

The retrying after an allocation failure is not necessary anymore
since we're holding the mutex the entire time, for the same
reason the double allocation race can't happen anymore.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto.c | 37 ++++++++++++-------------------------
 1 file changed, 12 insertions(+), 25 deletions(-)

diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 509725412d3..e4aad2087f4 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -281,47 +281,34 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
 		return -EBUSY;
 
 	mutex_lock(&nf_ct_proto_mutex);
-retry:
-	if (nf_ct_protos[l4proto->l3proto]) {
-		if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto]
-				!= &nf_conntrack_l4proto_generic) {
-			ret = -EBUSY;
-			goto out_unlock;
-		}
-	} else {
+	if (!nf_ct_protos[l4proto->l3proto]) {
 		/* l3proto may be loaded latter. */
 		struct nf_conntrack_l4proto **proto_array;
 		int i;
 
-		proto_array = (struct nf_conntrack_l4proto **)
-				kmalloc(MAX_NF_CT_PROTO *
-					 sizeof(struct nf_conntrack_l4proto *),
-					GFP_KERNEL);
+		proto_array = kmalloc(MAX_NF_CT_PROTO *
+				      sizeof(struct nf_conntrack_l4proto *),
+				      GFP_KERNEL);
 		if (proto_array == NULL) {
 			ret = -ENOMEM;
 			goto out_unlock;
 		}
+
 		for (i = 0; i < MAX_NF_CT_PROTO; i++)
 			proto_array[i] = &nf_conntrack_l4proto_generic;
-
-		if (nf_ct_protos[l4proto->l3proto])
-			/* bad timing, but no problem */
-			kfree(proto_array);
-		else
-			nf_ct_protos[l4proto->l3proto] = proto_array;
-
-		/*
-		 * Just once because array is never freed until unloading
-		 * nf_conntrack.ko
-		 */
-		goto retry;
+		nf_ct_protos[l4proto->l3proto] = proto_array;
+	} else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] !=
+					&nf_conntrack_l4proto_generic) {
+		ret = -EBUSY;
+		goto out_unlock;
 	}
 
 	ret = nf_ct_l4proto_register_sysctl(l4proto);
 	if (ret < 0)
 		goto out_unlock;
 
-	rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], l4proto);
+	rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
+			   l4proto);
 
 out_unlock:
 	mutex_unlock(&nf_ct_proto_mutex);
-- 
cgit v1.2.3


From a3c5029cf7a96da3acdf6884a21581b5bef310c3 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 14 Mar 2007 16:39:25 -0700
Subject: [NETFILTER]: nfnetlink: use mutex instead of semaphore

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nfnetlink.h          | 13 -----------
 include/net/netfilter/nf_conntrack_l3proto.h |  5 +---
 net/netfilter/nfnetlink.c                    | 35 +++++++++++++++++++---------
 3 files changed, 25 insertions(+), 28 deletions(-)

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 1e9c821f152..6179648a014 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -129,19 +129,6 @@ extern void __nfa_fill(struct sk_buff *skb, int attrtype,
 ({ if (skb_tailroom(skb) < (int)NFA_SPACE(attrlen)) goto nfattr_failure; \
    __nfa_fill(skb, attrtype, attrlen, data); })
 
-extern struct semaphore nfnl_sem;
-
-#define nfnl_shlock()		down(&nfnl_sem)
-#define nfnl_shlock_nowait()	down_trylock(&nfnl_sem)
-
-#define nfnl_shunlock()		do { up(&nfnl_sem); \
-				     if(nfnl && nfnl->sk_receive_queue.qlen) \
-					    nfnl->sk_data_ready(nfnl, 0); \
-                        	} while(0)
-
-extern void nfnl_lock(void);
-extern void nfnl_unlock(void);
-
 extern int nfnetlink_subsys_register(struct nfnetlink_subsystem *n);
 extern int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n);
 
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index eb575cbd4c9..f32f714e5d9 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -90,10 +90,7 @@ extern struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX];
 /* Protocol registration. */
 extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto);
 extern void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto);
-
-extern struct nf_conntrack_l3proto *
-nf_ct_l3proto_find_get(u_int16_t l3proto);
-
+extern struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto);
 extern void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p);
 
 /* Existing built-in protocols */
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index bf23e489e4c..7865a47c981 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -28,6 +28,7 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 #include <net/sock.h>
+#include <net/netlink.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
 
@@ -51,16 +52,28 @@ static char __initdata nfversion[] = "0.30";
 
 static struct sock *nfnl = NULL;
 static struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT];
-DECLARE_MUTEX(nfnl_sem);
+static DEFINE_MUTEX(nfnl_mutex);
 
-void nfnl_lock(void)
+static void nfnl_lock(void)
 {
-	nfnl_shlock();
+	mutex_lock(&nfnl_mutex);
 }
 
-void nfnl_unlock(void)
+static int nfnl_trylock(void)
 {
-	nfnl_shunlock();
+	return !mutex_trylock(&nfnl_mutex);
+}
+
+static void __nfnl_unlock(void)
+{
+	mutex_unlock(&nfnl_mutex);
+}
+
+static void nfnl_unlock(void)
+{
+	mutex_unlock(&nfnl_mutex);
+	if (nfnl->sk_receive_queue.qlen)
+		nfnl->sk_data_ready(nfnl, 0);
 }
 
 int nfnetlink_subsys_register(struct nfnetlink_subsystem *n)
@@ -248,11 +261,11 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb,
 	ss = nfnetlink_get_subsys(type);
 	if (!ss) {
 #ifdef CONFIG_KMOD
-		/* don't call nfnl_shunlock, since it would reenter
+		/* don't call nfnl_unlock, since it would reenter
 		 * with further packet processing */
-		up(&nfnl_sem);
+		__nfnl_unlock();
 		request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type));
-		nfnl_shlock();
+		nfnl_lock();
 		ss = nfnetlink_get_subsys(type);
 		if (!ss)
 #endif
@@ -322,7 +335,7 @@ static void nfnetlink_rcv(struct sock *sk, int len)
 	do {
 		struct sk_buff *skb;
 
-		if (nfnl_shlock_nowait())
+		if (nfnl_trylock())
 			return;
 
 		while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
@@ -337,9 +350,9 @@ static void nfnetlink_rcv(struct sock *sk, int len)
 			kfree_skb(skb);
 		}
 
-		/* don't call nfnl_shunlock, since it would reenter
+		/* don't call nfnl_unlock, since it would reenter
 		 * with further packet processing */
-		up(&nfnl_sem);
+		__nfnl_unlock();
 	} while(nfnl && nfnl->sk_receive_queue.qlen);
 }
 
-- 
cgit v1.2.3


From 73c361862c2be2e4ed6019da283fe1b422107f16 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 14 Mar 2007 16:39:45 -0700
Subject: [NETFILTER]: nfnetlink: use netlink_run_queue()

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink.c | 51 ++++-------------------------------------------
 1 file changed, 4 insertions(+), 47 deletions(-)

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 7865a47c981..5be6ac478fd 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -301,59 +301,16 @@ err_inval:
 	return -1;
 }
 
-/* Process one packet of messages. */
-static inline int nfnetlink_rcv_skb(struct sk_buff *skb)
-{
-	int err;
-	struct nlmsghdr *nlh;
-
-	while (skb->len >= NLMSG_SPACE(0)) {
-		u32 rlen;
-
-		nlh = (struct nlmsghdr *)skb->data;
-		if (nlh->nlmsg_len < sizeof(struct nlmsghdr)
-		    || skb->len < nlh->nlmsg_len)
-			return 0;
-		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
-		if (rlen > skb->len)
-			rlen = skb->len;
-		if (nfnetlink_rcv_msg(skb, nlh, &err)) {
-			if (!err)
-				return -1;
-			netlink_ack(skb, nlh, err);
-		} else
-			if (nlh->nlmsg_flags & NLM_F_ACK)
-				netlink_ack(skb, nlh, 0);
-		skb_pull(skb, rlen);
-	}
-
-	return 0;
-}
-
 static void nfnetlink_rcv(struct sock *sk, int len)
 {
-	do {
-		struct sk_buff *skb;
+	unsigned int qlen = 0;
 
+	do {
 		if (nfnl_trylock())
 			return;
-
-		while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
-			if (nfnetlink_rcv_skb(skb)) {
-				if (skb->len)
-					skb_queue_head(&sk->sk_receive_queue,
-						       skb);
-				else
-					kfree_skb(skb);
-				break;
-			}
-			kfree_skb(skb);
-		}
-
-		/* don't call nfnl_unlock, since it would reenter
-		 * with further packet processing */
+		netlink_run_queue(sk, &qlen, nfnetlink_rcv_msg);
 		__nfnl_unlock();
-	} while(nfnl && nfnl->sk_receive_queue.qlen);
+	} while (qlen);
 }
 
 static void __exit nfnetlink_exit(void)
-- 
cgit v1.2.3


From 010c7d6f867e98c86723f420d485583464fbab45 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 14 Mar 2007 16:40:10 -0700
Subject: [NETFILTER]: nf_conntrack: uninline notifier registration functions

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_conntrack_ecache.h | 30 ++++++-----------------------
 net/netfilter/nf_conntrack_ecache.c         | 23 ++++++++++++++++++++++
 2 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index b62a8a9ec9d..811c9073c53 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -20,30 +20,8 @@ DECLARE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache);
 #define CONNTRACK_ECACHE(x)	(__get_cpu_var(nf_conntrack_ecache).x)
 
 extern struct atomic_notifier_head nf_conntrack_chain;
-extern struct atomic_notifier_head nf_conntrack_expect_chain;
-
-static inline int nf_conntrack_register_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_register(&nf_conntrack_chain, nb);
-}
-
-static inline int nf_conntrack_unregister_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_unregister(&nf_conntrack_chain, nb);
-}
-
-static inline int
-nf_conntrack_expect_register_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_register(&nf_conntrack_expect_chain, nb);
-}
-
-static inline int
-nf_conntrack_expect_unregister_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_unregister(&nf_conntrack_expect_chain,
-			nb);
-}
+extern int nf_conntrack_register_notifier(struct notifier_block *nb);
+extern int nf_conntrack_unregister_notifier(struct notifier_block *nb);
 
 extern void nf_ct_deliver_cached_events(const struct nf_conn *ct);
 extern void __nf_ct_event_cache_init(struct nf_conn *ct);
@@ -71,6 +49,10 @@ static inline void nf_conntrack_event(enum ip_conntrack_events event,
 		atomic_notifier_call_chain(&nf_conntrack_chain, event, ct);
 }
 
+extern struct atomic_notifier_head nf_conntrack_expect_chain;
+extern int nf_conntrack_expect_register_notifier(struct notifier_block *nb);
+extern int nf_conntrack_expect_unregister_notifier(struct notifier_block *nb);
+
 static inline void
 nf_conntrack_expect_event(enum ip_conntrack_expect_events event,
 			  struct nf_conntrack_expect *exp)
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 1a223e0c085..6bd421df2db 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -91,3 +91,26 @@ void nf_ct_event_cache_flush(void)
 	}
 }
 
+int nf_conntrack_register_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&nf_conntrack_chain, nb);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
+
+int nf_conntrack_unregister_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&nf_conntrack_chain, nb);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
+
+int nf_conntrack_expect_register_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&nf_conntrack_expect_chain, nb);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_expect_register_notifier);
+
+int nf_conntrack_expect_unregister_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&nf_conntrack_expect_chain, nb);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_expect_unregister_notifier);
-- 
cgit v1.2.3


From 67ca396606432aae3b747d5e6bb61d0c297eb782 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 14 Mar 2007 16:40:38 -0700
Subject: [NETFILTER]: nfnetlink: remove early debugging messages from
 nfnetlink

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink.c | 42 ++++++------------------------------------
 1 file changed, 6 insertions(+), 36 deletions(-)

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 5be6ac478fd..c8b4f0d29df 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -3,7 +3,7 @@
  *
  * (C) 2001 by Jay Schulist <jschlst@samba.org>,
  * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org>
- * (C) 2005 by Pablo Neira Ayuso <pablo@eurodev.net>
+ * (C) 2005,2007 by Pablo Neira Ayuso <pablo@netfilter.org>
  *
  * Initial netfilter messages via netlink development funded and
  * generally made possible by Network Robots, Inc. (www.networkrobots.com)
@@ -42,14 +42,6 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
 
 static char __initdata nfversion[] = "0.30";
 
-#if 0
-#define DEBUGP(format, args...)	\
-		printk(KERN_DEBUG "%s(%d):%s(): " format, __FILE__, \
-			__LINE__, __FUNCTION__, ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
 static struct sock *nfnl = NULL;
 static struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT];
 static DEFINE_MUTEX(nfnl_mutex);
@@ -78,8 +70,6 @@ static void nfnl_unlock(void)
 
 int nfnetlink_subsys_register(struct nfnetlink_subsystem *n)
 {
-	DEBUGP("registering subsystem ID %u\n", n->subsys_id);
-
 	nfnl_lock();
 	if (subsys_table[n->subsys_id]) {
 		nfnl_unlock();
@@ -93,8 +83,6 @@ int nfnetlink_subsys_register(struct nfnetlink_subsystem *n)
 
 int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n)
 {
-	DEBUGP("unregistering subsystem ID %u\n", n->subsys_id);
-
 	nfnl_lock();
 	subsys_table[n->subsys_id] = NULL;
 	nfnl_unlock();
@@ -118,10 +106,8 @@ nfnetlink_find_client(u_int16_t type, struct nfnetlink_subsystem *ss)
 {
 	u_int8_t cb_id = NFNL_MSG_TYPE(type);
 
-	if (cb_id >= ss->cb_count) {
-		DEBUGP("msgtype %u >= %u, returning\n", type, ss->cb_count);
+	if (cb_id >= ss->cb_count)
 		return NULL;
-	}
 
 	return &ss->cb[cb_id];
 }
@@ -167,11 +153,8 @@ nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys,
 	u_int16_t attr_count;
 	u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
 
-	if (unlikely(cb_id >= subsys->cb_count)) {
-		DEBUGP("msgtype %u >= %u, returning\n",
-			cb_id, subsys->cb_count);
+	if (unlikely(cb_id >= subsys->cb_count))
 		return -EINVAL;
-	}
 
 	min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
 	if (unlikely(nlh->nlmsg_len < min_len))
@@ -235,27 +218,18 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb,
 	struct nfnetlink_subsystem *ss;
 	int type, err = 0;
 
-	DEBUGP("entered; subsys=%u, msgtype=%u\n",
-		 NFNL_SUBSYS_ID(nlh->nlmsg_type),
-		 NFNL_MSG_TYPE(nlh->nlmsg_type));
-
 	if (security_netlink_recv(skb, CAP_NET_ADMIN)) {
-		DEBUGP("missing CAP_NET_ADMIN\n");
 		*errp = -EPERM;
 		return -1;
 	}
 
 	/* Only requests are handled by kernel now. */
-	if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) {
-		DEBUGP("received non-request message\n");
+	if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
 		return 0;
-	}
 
 	/* All the messages must at least contain nfgenmsg */
-	if (nlh->nlmsg_len < NLMSG_SPACE(sizeof(struct nfgenmsg))) {
-		DEBUGP("received message was too short\n");
+	if (nlh->nlmsg_len < NLMSG_SPACE(sizeof(struct nfgenmsg)))
 		return 0;
-	}
 
 	type = nlh->nlmsg_type;
 	ss = nfnetlink_get_subsys(type);
@@ -273,10 +247,8 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb,
 	}
 
 	nc = nfnetlink_find_client(type, ss);
-	if (!nc) {
-		DEBUGP("unable to find client for type %d\n", type);
+	if (!nc)
 		goto err_inval;
-	}
 
 	{
 		u_int16_t attr_count =
@@ -289,14 +261,12 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb,
 		if (err < 0)
 			goto err_inval;
 
-		DEBUGP("calling handler\n");
 		err = nc->call(nfnl, skb, nlh, cda, errp);
 		*errp = err;
 		return err;
 	}
 
 err_inval:
-	DEBUGP("returning -EINVAL\n");
 	*errp = -EINVAL;
 	return -1;
 }
-- 
cgit v1.2.3


From d9e6d029498ab9e943c70f24c027aeda5602196d Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 14 Mar 2007 16:41:03 -0700
Subject: [NETFILTER]: nfnetlink: remove duplicate checks in
 nfnetlink_check_attributes

Remove nfnetlink_check_attributes duplicates message size and callback
id checks. nfnetlink_find_client and nfnetlink_rcv_msg already do
such checks.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index c8b4f0d29df..9d33807ec16 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -149,17 +149,10 @@ static int
 nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys,
 			   struct nlmsghdr *nlh, struct nfattr *cda[])
 {
-	int min_len;
+	int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
 	u_int16_t attr_count;
 	u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
 
-	if (unlikely(cb_id >= subsys->cb_count))
-		return -EINVAL;
-
-	min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
-	if (unlikely(nlh->nlmsg_len < min_len))
-		return -EINVAL;
-
 	attr_count = subsys->cb[cb_id].attr_count;
 	memset(cda, 0, sizeof(struct nfattr *) * attr_count);
 
-- 
cgit v1.2.3


From ac0f1d9894650d900af99bdaed83e110d9dce025 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 14 Mar 2007 16:41:28 -0700
Subject: [NETFILTER]: nfnetlink: remove unrequired check in
 nfnetlink_get_subsys

subsys_table is initialized to NULL, therefore just returns NULL in case
that it is not set.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 9d33807ec16..0b0a9666f6f 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -94,8 +94,7 @@ static inline struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t type)
 {
 	u_int8_t subsys_id = NFNL_SUBSYS_ID(type);
 
-	if (subsys_id >= NFNL_SUBSYS_COUNT
-	    || subsys_table[subsys_id] == NULL)
+	if (subsys_id >= NFNL_SUBSYS_COUNT)
 		return NULL;
 
 	return subsys_table[subsys_id];
-- 
cgit v1.2.3


From 8a2e89533a9b06bc960445dd6034eeab76117424 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 14 Mar 2007 16:41:47 -0700
Subject: [NETFILTER]: nfnetlink: remove unused includes in nfnetlink.c

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 0b0a9666f6f..6dd66546d16 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -30,9 +30,7 @@
 #include <net/sock.h>
 #include <net/netlink.h>
 #include <linux/init.h>
-#include <linux/spinlock.h>
 
-#include <linux/netfilter.h>
 #include <linux/netlink.h>
 #include <linux/netfilter/nfnetlink.h>
 
-- 
cgit v1.2.3


From f4bc177f0ff0bf41b178452877762a9f0184d1a1 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 14 Mar 2007 16:42:11 -0700
Subject: [NETFILTER]: nfnetlink: move EXPORT_SYMBOL declarations next to the
 exported symbol

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 6dd66546d16..9ba6aa486f3 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -78,6 +78,7 @@ int nfnetlink_subsys_register(struct nfnetlink_subsystem *n)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(nfnetlink_subsys_register);
 
 int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n)
 {
@@ -87,6 +88,7 @@ int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister);
 
 static inline struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t type)
 {
@@ -121,6 +123,7 @@ void __nfa_fill(struct sk_buff *skb, int attrtype, int attrlen,
 	memcpy(NFA_DATA(nfa), data, attrlen);
 	memset(NFA_DATA(nfa) + attrlen, 0, NFA_ALIGN(size) - size);
 }
+EXPORT_SYMBOL_GPL(__nfa_fill);
 
 void nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len)
 {
@@ -133,6 +136,7 @@ void nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len)
 		nfa = NFA_NEXT(nfa, len);
 	}
 }
+EXPORT_SYMBOL_GPL(nfattr_parse);
 
 /**
  * nfnetlink_check_attributes - check and parse nfnetlink attributes
@@ -194,11 +198,13 @@ int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
 
 	return err;
 }
+EXPORT_SYMBOL_GPL(nfnetlink_send);
 
 int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags)
 {
 	return netlink_unicast(nfnl, skb, pid, flags);
 }
+EXPORT_SYMBOL_GPL(nfnetlink_unicast);
 
 /* Process one complete nfnetlink message. */
 static int nfnetlink_rcv_msg(struct sk_buff *skb,
@@ -296,10 +302,3 @@ static int __init nfnetlink_init(void)
 
 module_init(nfnetlink_init);
 module_exit(nfnetlink_exit);
-
-EXPORT_SYMBOL_GPL(nfnetlink_subsys_register);
-EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister);
-EXPORT_SYMBOL_GPL(nfnetlink_send);
-EXPORT_SYMBOL_GPL(nfnetlink_unicast);
-EXPORT_SYMBOL_GPL(nfattr_parse);
-EXPORT_SYMBOL_GPL(__nfa_fill);
-- 
cgit v1.2.3


From 8e87e014ec881ce353e1f43340157f519b5d9f30 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 14 Mar 2007 16:42:29 -0700
Subject: [JHASH]: Use const in jhash2

Use const to avoid forcing users to cast const data.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/jhash.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/jhash.h b/include/linux/jhash.h
index 82c7ae412ee..2a2f99fbcb1 100644
--- a/include/linux/jhash.h
+++ b/include/linux/jhash.h
@@ -84,7 +84,7 @@ static inline u32 jhash(const void *key, u32 length, u32 initval)
 /* A special optimized version that handles 1 or more of u32s.
  * The length parameter here is the number of u32s in the key.
  */
-static inline u32 jhash2(u32 *k, u32 length, u32 initval)
+static inline u32 jhash2(const u32 *k, u32 length, u32 initval)
 {
 	u32 a, b, c, len;
 
-- 
cgit v1.2.3


From 9b88790972498d235a2a4d2b66640c3c5b70bb7c Mon Sep 17 00:00:00 2001
From: Sami Farin <safari-netfilter@safari.iki.fi>
Date: Wed, 14 Mar 2007 16:43:00 -0700
Subject: [NETFILTER]: nf_conntrack: use jhash2 in __hash_conntrack

Now it uses jhash, but using jhash2 would be around 3-4 times faster
(on P4).

Signed-off-by: Sami Farin <safari-netfilter@safari.iki.fi>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_core.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 9858bcb29aa..2b0cc7a1771 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -128,10 +128,11 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
 				  unsigned int size, unsigned int rnd)
 {
 	unsigned int a, b;
-	a = jhash((void *)tuple->src.u3.all, sizeof(tuple->src.u3.all),
-		  ((tuple->src.l3num) << 16) | tuple->dst.protonum);
-	b = jhash((void *)tuple->dst.u3.all, sizeof(tuple->dst.u3.all),
-			(tuple->src.u.all << 16) | tuple->dst.u.all);
+
+	a = jhash2(tuple->src.u3.all, ARRAY_SIZE(tuple->src.u3.all),
+		   (tuple->src.l3num << 16) | tuple->dst.protonum);
+	b = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
+		   (tuple->src.u.all << 16) | tuple->dst.u.all);
 
 	return jhash_2words(a, b, rnd) % size;
 }
-- 
cgit v1.2.3


From edda553c324bdc5bb5c2d553b524cab37058a855 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Wed, 14 Mar 2007 16:43:37 -0700
Subject: [NETFILTER]: nf_conntrack: add __nf_copy() to copy members in skb

This unifies the codes to copy netfilter related datas. Note that
__nf_copy() assumes destination skb doesn't have any netfilter
related members.

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 17 +++++++++++++++++
 net/core/skbuff.c      | 28 ++--------------------------
 2 files changed, 19 insertions(+), 26 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 39a6da243b2..62ab1ab0702 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1498,8 +1498,25 @@ static inline void nf_reset(struct sk_buff *skb)
 #endif
 }
 
+/* Note: This doesn't put any conntrack and bridge info in dst. */
+static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
+{
+	dst->nfct = src->nfct;
+	nf_conntrack_get(src->nfct);
+	dst->nfctinfo = src->nfctinfo;
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+	dst->nfct_reasm = src->nfct_reasm;
+	nf_conntrack_get_reasm(src->nfct_reasm);
+#endif
+#ifdef CONFIG_BRIDGE_NETFILTER
+	dst->nf_bridge  = src->nf_bridge;
+	nf_bridge_get(src->nf_bridge);
+#endif
+}
+
 #else /* CONFIG_NETFILTER */
 static inline void nf_reset(struct sk_buff *skb) {}
+static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src) {}
 #endif /* CONFIG_NETFILTER */
 
 #ifdef CONFIG_NETWORK_SECMARK
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b242020c02f..408cc99af6b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -422,19 +422,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 	C(protocol);
 	n->destructor = NULL;
 	C(mark);
-#ifdef CONFIG_NETFILTER
-	C(nfct);
-	nf_conntrack_get(skb->nfct);
-	C(nfctinfo);
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-	C(nfct_reasm);
-	nf_conntrack_get_reasm(skb->nfct_reasm);
-#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
-	C(nf_bridge);
-	nf_bridge_get(skb->nf_bridge);
-#endif
-#endif /*CONFIG_NETFILTER*/
+	__nf_copy(n, skb);
 #ifdef CONFIG_NET_SCHED
 	C(tc_index);
 #ifdef CONFIG_NET_CLS_ACT
@@ -483,22 +471,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->tstamp	= old->tstamp;
 	new->destructor = NULL;
 	new->mark	= old->mark;
-#ifdef CONFIG_NETFILTER
-	new->nfct	= old->nfct;
-	nf_conntrack_get(old->nfct);
-	new->nfctinfo	= old->nfctinfo;
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-	new->nfct_reasm = old->nfct_reasm;
-	nf_conntrack_get_reasm(old->nfct_reasm);
-#endif
+	__nf_copy(new, old);
 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
 	new->ipvs_property = old->ipvs_property;
 #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
-	new->nf_bridge	= old->nf_bridge;
-	nf_bridge_get(old->nf_bridge);
-#endif
-#endif
 #ifdef CONFIG_NET_SCHED
 #ifdef CONFIG_NET_CLS_ACT
 	new->tc_verd = old->tc_verd;
-- 
cgit v1.2.3


From e7ac05f3407a3fb5a1b2ff5d5554899eaa0a10a3 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Wed, 14 Mar 2007 16:44:01 -0700
Subject: [NETFILTER]: nf_conntrack: add nf_copy() to safely copy members in
 skb

This unifies the codes to copy netfilter related datas. Before copying,
nf_copy() puts original members in destination skb.

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 13 +++++++++++++
 net/ipv4/ip_output.c   | 13 +------------
 net/ipv6/ip6_output.c  | 18 +-----------------
 3 files changed, 15 insertions(+), 29 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 62ab1ab0702..47c57be97d4 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1514,9 +1514,22 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 #endif
 }
 
+static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
+{
+	nf_conntrack_put(dst->nfct);
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+	nf_conntrack_put_reasm(dst->nfct_reasm);
+#endif
+#ifdef CONFIG_BRIDGE_NETFILTER
+	nf_bridge_put(dst->nf_bridge);
+#endif
+	__nf_copy(dst, src);
+}
+
 #else /* CONFIG_NETFILTER */
 static inline void nf_reset(struct sk_buff *skb) {}
 static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src) {}
+static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src) {}
 #endif /* CONFIG_NETFILTER */
 
 #ifdef CONFIG_NETWORK_SECMARK
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 11029b9d4cf..11ab100d6c6 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -384,20 +384,9 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 #ifdef CONFIG_NET_SCHED
 	to->tc_index = from->tc_index;
 #endif
-#ifdef CONFIG_NETFILTER
-	/* Connection association is same as pre-frag packet */
-	nf_conntrack_put(to->nfct);
-	to->nfct = from->nfct;
-	nf_conntrack_get(to->nfct);
-	to->nfctinfo = from->nfctinfo;
+	nf_copy(to, from);
 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
 	to->ipvs_property = from->ipvs_property;
-#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
-	nf_bridge_put(to->nf_bridge);
-	to->nf_bridge = from->nf_bridge;
-	nf_bridge_get(to->nf_bridge);
-#endif
 #endif
 	skb_copy_secmark(to, from);
 }
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0f4434eff66..49523c2a9f1 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -504,23 +504,7 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 #ifdef CONFIG_NET_SCHED
 	to->tc_index = from->tc_index;
 #endif
-#ifdef CONFIG_NETFILTER
-	/* Connection association is same as pre-frag packet */
-	nf_conntrack_put(to->nfct);
-	to->nfct = from->nfct;
-	nf_conntrack_get(to->nfct);
-	to->nfctinfo = from->nfctinfo;
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-	nf_conntrack_put_reasm(to->nfct_reasm);
-	to->nfct_reasm = from->nfct_reasm;
-	nf_conntrack_get_reasm(to->nfct_reasm);
-#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
-	nf_bridge_put(to->nf_bridge);
-	to->nf_bridge = from->nf_bridge;
-	nf_bridge_get(to->nf_bridge);
-#endif
-#endif
+	nf_copy(to, from);
 	skb_copy_secmark(to, from);
 }
 
-- 
cgit v1.2.3


From 8f5bd99071212cd16b3449d16639971a44540d51 Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w@1wt.eu>
Date: Wed, 14 Mar 2007 16:44:31 -0700
Subject: [NETFILTER]: TCP conntrack: accept RST|PSH as valid

This combination has been encountered on an IBM AS/400 in response
to packets sent to a closed session. There is no particular reason
to mark it invalid.

Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto_tcp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 9e496319f60..926e302494f 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -774,6 +774,7 @@ static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
 	[TH_SYN|TH_ACK]			= 1,
 	[TH_SYN|TH_ACK|TH_PUSH]		= 1,
 	[TH_RST]			= 1,
+	[TH_RST|TH_PUSH]		= 1,
 	[TH_RST|TH_ACK]			= 1,
 	[TH_RST|TH_ACK|TH_PUSH]		= 1,
 	[TH_FIN|TH_ACK]			= 1,
-- 
cgit v1.2.3


From 5c8ce7c92106434d2bdc9d5dfa5f62bf4546b296 Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w@1wt.eu>
Date: Wed, 14 Mar 2007 16:44:53 -0700
Subject: [NETFILTER]: TCP conntrack: factorize out the PUSH flag

The PUSH flag is accepted with every other valid combination.
Let's get it out of the tcp_valid_flags table and reduce the
number of combinations we have to handle. This does not
significantly reduce the table size however (8 bytes).

Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_proto_tcp.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 926e302494f..a1363626bcc 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -764,27 +764,18 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tcp_update);
 #define	TH_ECE	0x40
 #define	TH_CWR	0x80
 
-/* table of valid flag combinations - ECE and CWR are always valid */
-static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
+/* table of valid flag combinations - PUSH, ECE and CWR are always valid */
+static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] =
 {
 	[TH_SYN]			= 1,
-	[TH_SYN|TH_PUSH]		= 1,
 	[TH_SYN|TH_URG]			= 1,
-	[TH_SYN|TH_PUSH|TH_URG]		= 1,
 	[TH_SYN|TH_ACK]			= 1,
-	[TH_SYN|TH_ACK|TH_PUSH]		= 1,
 	[TH_RST]			= 1,
-	[TH_RST|TH_PUSH]		= 1,
 	[TH_RST|TH_ACK]			= 1,
-	[TH_RST|TH_ACK|TH_PUSH]		= 1,
 	[TH_FIN|TH_ACK]			= 1,
+	[TH_FIN|TH_ACK|TH_URG]		= 1,
 	[TH_ACK]			= 1,
-	[TH_ACK|TH_PUSH]		= 1,
 	[TH_ACK|TH_URG]			= 1,
-	[TH_ACK|TH_URG|TH_PUSH]		= 1,
-	[TH_FIN|TH_ACK|TH_PUSH]		= 1,
-	[TH_FIN|TH_ACK|TH_URG]		= 1,
-	[TH_FIN|TH_ACK|TH_URG|TH_PUSH]	= 1,
 };
 
 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
@@ -831,7 +822,7 @@ static int tcp_error(struct sk_buff *skb,
 	}
 
 	/* Check TCP flags. */
-	tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR));
+	tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH));
 	if (!tcp_valid_flags[tcpflags]) {
 		if (LOG_INVALID(IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
-- 
cgit v1.2.3


From c8e2078cfe414a99cf6f2f2f1d78c7e75392e9d4 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 14 Mar 2007 16:45:19 -0700
Subject: [NETFILTER]: ctnetlink: add support for internal tcp connection
 tracking flags handling

This patch let userspace programs set the IP_CT_TCP_BE_LIBERAL flag to
force the pickup of established connections.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nf_conntrack_tcp.h    |  5 +++
 include/linux/netfilter/nfnetlink_conntrack.h |  4 +++
 net/netfilter/nf_conntrack_proto_tcp.c        | 45 ++++++++++++++++++++++++++-
 3 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h
index 007af4c2770..22ce29995f1 100644
--- a/include/linux/netfilter/nf_conntrack_tcp.h
+++ b/include/linux/netfilter/nf_conntrack_tcp.h
@@ -30,6 +30,11 @@ enum tcp_conntrack {
 /* Be liberal in window checking */
 #define IP_CT_TCP_FLAG_BE_LIBERAL		0x08
 
+struct nf_ct_tcp_flags {
+	u_int8_t flags;
+	u_int8_t mask;
+};
+
 #ifdef __KERNEL__
 
 struct ip_ct_tcp_state {
diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h
index b5883ccee29..d7c35039721 100644
--- a/include/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/linux/netfilter/nfnetlink_conntrack.h
@@ -83,6 +83,10 @@ enum ctattr_protoinfo {
 enum ctattr_protoinfo_tcp {
 	CTA_PROTOINFO_TCP_UNSPEC,
 	CTA_PROTOINFO_TCP_STATE,
+	CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
+	CTA_PROTOINFO_TCP_WSCALE_REPLY,
+	CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
+	CTA_PROTOINFO_TCP_FLAGS_REPLY,
 	__CTA_PROTOINFO_TCP_MAX
 };
 #define CTA_PROTOINFO_TCP_MAX (__CTA_PROTOINFO_TCP_MAX - 1)
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index a1363626bcc..8439768f9d1 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1101,11 +1101,26 @@ static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
 			 const struct nf_conn *ct)
 {
 	struct nfattr *nest_parms;
+	struct nf_ct_tcp_flags tmp = {};
 
 	read_lock_bh(&tcp_lock);
 	nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
 	NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
 		&ct->proto.tcp.state);
+
+	NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL, sizeof(u_int8_t),
+		&ct->proto.tcp.seen[0].td_scale);
+
+	NFA_PUT(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY, sizeof(u_int8_t),
+		&ct->proto.tcp.seen[1].td_scale);
+
+	tmp.flags = ct->proto.tcp.seen[0].flags;
+	NFA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
+		sizeof(struct nf_ct_tcp_flags), &tmp);
+
+	tmp.flags = ct->proto.tcp.seen[1].flags;
+	NFA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
+		sizeof(struct nf_ct_tcp_flags), &tmp);
 	read_unlock_bh(&tcp_lock);
 
 	NFA_NEST_END(skb, nest_parms);
@@ -1118,7 +1133,11 @@ nfattr_failure:
 }
 
 static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
-	[CTA_PROTOINFO_TCP_STATE-1]	= sizeof(u_int8_t),
+	[CTA_PROTOINFO_TCP_STATE-1]	      = sizeof(u_int8_t),
+	[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1] = sizeof(u_int8_t),
+	[CTA_PROTOINFO_TCP_WSCALE_REPLY-1]    = sizeof(u_int8_t),
+	[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1]  = sizeof(struct nf_ct_tcp_flags),
+	[CTA_PROTOINFO_TCP_FLAGS_REPLY-1]     = sizeof(struct nf_ct_tcp_flags)
 };
 
 static int nfattr_to_tcp(struct nfattr *cda[], struct nf_conn *ct)
@@ -1142,6 +1161,30 @@ static int nfattr_to_tcp(struct nfattr *cda[], struct nf_conn *ct)
 	write_lock_bh(&tcp_lock);
 	ct->proto.tcp.state =
 		*(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
+
+	if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1]) {
+		struct nf_ct_tcp_flags *attr =
+			NFA_DATA(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL-1]);
+		ct->proto.tcp.seen[0].flags &= ~attr->mask;
+		ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
+	}
+
+	if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY-1]) {
+		struct nf_ct_tcp_flags *attr =
+			NFA_DATA(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY-1]);
+		ct->proto.tcp.seen[1].flags &= ~attr->mask;
+		ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
+	}
+
+	if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1] &&
+	    tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1] &&
+	    ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
+	    ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
+		ct->proto.tcp.seen[0].td_scale = *(u_int8_t *)
+			NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL-1]);
+		ct->proto.tcp.seen[1].td_scale = *(u_int8_t *)
+			NFA_DATA(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY-1]);
+	}
 	write_unlock_bh(&tcp_lock);
 
 	return 0;
-- 
cgit v1.2.3


From ac6d141dc7d1d0eeec850d1b451dca83ce649684 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 14 Mar 2007 16:45:39 -0700
Subject: [NETFILTER]: nfnetlink: parse attributes with nfattr_parse in
 nfnetlink_check_attribute

Use nfattr_parse to parse attributes, this patch also modifies the default
behaviour since unknown attributes will be ignored instead of returning
EINVAL. This ensure backward compatibility: new libraries with new
attributes and old kernels can work.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink.c | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 9ba6aa486f3..dec36abdf94 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -151,26 +151,14 @@ nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys,
 			   struct nlmsghdr *nlh, struct nfattr *cda[])
 {
 	int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
-	u_int16_t attr_count;
 	u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
-
-	attr_count = subsys->cb[cb_id].attr_count;
-	memset(cda, 0, sizeof(struct nfattr *) * attr_count);
+	u_int16_t attr_count = subsys->cb[cb_id].attr_count;
 
 	/* check attribute lengths. */
 	if (likely(nlh->nlmsg_len > min_len)) {
 		struct nfattr *attr = NFM_NFA(NLMSG_DATA(nlh));
 		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
-
-		while (NFA_OK(attr, attrlen)) {
-			unsigned flavor = NFA_TYPE(attr);
-			if (flavor) {
-				if (flavor > attr_count)
-					return -EINVAL;
-				cda[flavor - 1] = attr;
-			}
-			attr = NFA_NEXT(attr, attrlen);
-		}
+		nfattr_parse(cda, attr_count, attr, attrlen);
 	}
 
 	/* implicit: if nlmsg_len == min_len, we return 0, and an empty
-- 
cgit v1.2.3


From eeeb03745bf9ea352df2373b9cb5fa14e60a2de0 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 14 Mar 2007 21:04:34 -0300
Subject: [SK_BUFF]: More skb_put related conversions to
 skb_reset_transport_header

This is similar to the skb_reset_network_header(), i.e. at the point we reset
the transport header pointer/offset skb->tail is equal to skb->data.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/af_irda.c     | 19 +++++++++----------
 net/netrom/af_netrom.c |  7 +++----
 net/rose/af_rose.c     |  5 +++--
 net/x25/af_x25.c       |  5 +++--
 4 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index c3cd2ba123e..bf994c85e45 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1274,7 +1274,6 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
 	struct sock *sk = sock->sk;
 	struct irda_sock *self;
 	struct sk_buff *skb;
-	unsigned char *asmptr;
 	int err;
 
 	IRDA_DEBUG(4, "%s(), len=%zd\n", __FUNCTION__, len);
@@ -1317,9 +1316,9 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
 		return -ENOBUFS;
 
 	skb_reserve(skb, self->max_header_size + 16);
-
-	asmptr = skb->h.raw = skb_put(skb, len);
-	err = memcpy_fromiovec(asmptr, msg->msg_iov, len);
+	skb_reset_transport_header(skb);
+	skb_put(skb, len);
+	err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
 	if (err) {
 		kfree_skb(skb);
 		return err;
@@ -1530,7 +1529,6 @@ static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock,
 	struct sock *sk = sock->sk;
 	struct irda_sock *self;
 	struct sk_buff *skb;
-	unsigned char *asmptr;
 	int err;
 
 	IRDA_DEBUG(4, "%s(), len=%zd\n", __FUNCTION__, len);
@@ -1566,10 +1564,11 @@ static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock,
 		return -ENOBUFS;
 
 	skb_reserve(skb, self->max_header_size);
+	skb_reset_transport_header(skb);
 
 	IRDA_DEBUG(4, "%s(), appending user data\n", __FUNCTION__);
-	asmptr = skb->h.raw = skb_put(skb, len);
-	err = memcpy_fromiovec(asmptr, msg->msg_iov, len);
+	skb_put(skb, len);
+	err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
 	if (err) {
 		kfree_skb(skb);
 		return err;
@@ -1602,7 +1601,6 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock,
 	__u8 pid = 0;
 	int bound = 0;
 	struct sk_buff *skb;
-	unsigned char *asmptr;
 	int err;
 
 	IRDA_DEBUG(4, "%s(), len=%zd\n", __FUNCTION__, len);
@@ -1662,10 +1660,11 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock,
 		return -ENOBUFS;
 
 	skb_reserve(skb, self->max_header_size);
+	skb_reset_transport_header(skb);
 
 	IRDA_DEBUG(4, "%s(), appending user data\n", __FUNCTION__);
-	asmptr = skb->h.raw = skb_put(skb, len);
-	err = memcpy_fromiovec(asmptr, msg->msg_iov, len);
+	skb_put(skb, len);
+	err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
 	if (err) {
 		kfree_skb(skb);
 		return err;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 8d0f30a015d..053fa26ff90 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1074,6 +1074,7 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock,
 		goto out;
 
 	skb_reserve(skb, size - len);
+	skb_reset_transport_header(skb);
 
 	/*
 	 *	Push down the NET/ROM header
@@ -1094,14 +1095,12 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock,
 	/*
 	 *	Put the data on the end
 	 */
+	skb_put(skb, len);
 
-	skb->h.raw = skb_put(skb, len);
-
-	asmptr = skb->h.raw;
 	SOCK_DEBUG(sk, "NET/ROM: Appending user data\n");
 
 	/* User data follows immediately after the NET/ROM transport header */
-	if (memcpy_fromiovec(asmptr, msg->msg_iov, len)) {
+	if (memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len)) {
 		kfree_skb(skb);
 		err = -EFAULT;
 		goto out;
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 6d8684a11ac..1511697b22b 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1105,9 +1105,10 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
 	 */
 	SOCK_DEBUG(sk, "ROSE: Appending user data\n");
 
-	asmptr = skb->h.raw = skb_put(skb, len);
+	skb_reset_transport_header(skb);
+	skb_put(skb, len);
 
-	err = memcpy_fromiovec(asmptr, msg->msg_iov, len);
+	err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
 	if (err) {
 		kfree_skb(skb);
 		return err;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index fc713059ccd..adcda8ebee9 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1058,9 +1058,10 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock,
 	 */
 	SOCK_DEBUG(sk, "x25_sendmsg: Copying user data\n");
 
-	asmptr = skb->h.raw = skb_put(skb, len);
+	skb_reset_transport_header(skb);
+	skb_put(skb, len);
 
-	rc = memcpy_fromiovec(asmptr, msg->msg_iov, len);
+	rc = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
 	if (rc)
 		goto out_kfree_skb;
 
-- 
cgit v1.2.3


From 55f79cc0c02f9ce8f85e965e9679796f62b790f5 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 14 Mar 2007 21:05:03 -0300
Subject: [IPV6]: Reset the network header in ip6_nd_hdr

ip6_nd_hdr is always called immediately after a alloc_skb + skb_reserve
sequence, i.e. when skb->tail is equal to skb->data, making it correct to use
skb_reset_network_header().

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 49523c2a9f1..32e8c3f73c7 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -263,7 +263,8 @@ int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
 
 	totlen = len + sizeof(struct ipv6hdr);
 
-	skb->nh.raw = skb_put(skb, sizeof(struct ipv6hdr));
+	skb_reset_network_header(skb);
+	skb_put(skb, sizeof(struct ipv6hdr));
 	hdr = ipv6_hdr(skb);
 
 	*(__be32*)hdr = htonl(0x60000000);
-- 
cgit v1.2.3


From d10ba34b001944a8d1c8adb5646140ef089c432b Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 14 Mar 2007 21:05:37 -0300
Subject: [SK_BUFF]: More skb_put related skb_reset_transport_header

This time we have to set it to skb->tail that is not anymore equal to
skb->data, so we either add a new helper or just add the skb->tail - skb->data
offset, for now do the later.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c            |  3 ++-
 net/ipv4/ipvs/ip_vs_xmit.c |  3 ++-
 net/ipv6/mcast.c           |  5 +++--
 net/ipv6/ndisc.c           | 20 ++++++++++++--------
 4 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 8f0df7b4dfe..b0efd279ddb 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -333,7 +333,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 	((u8*)&pip[1])[2] = 0;
 	((u8*)&pip[1])[3] = 0;
 
-	skb->h.raw = skb_put(skb, sizeof(*pig));
+	skb->h.raw = skb->nh.raw + sizeof(struct iphdr) + 4;
+	skb_put(skb, sizeof(*pig));
 	pig = igmpv3_report_hdr(skb);
 	pig->type = IGMPV3_HOST_MEMBERSHIP_REPORT;
 	pig->resv1 = 0;
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index a7aee682203..c6276d08b31 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -323,6 +323,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	struct iphdr  *old_iph = ip_hdr(skb);
 	u8     tos = old_iph->tos;
 	__be16 df = old_iph->frag_off;
+	unsigned char *old_h = skb_transport_header(skb);
 	struct iphdr  *iph;			/* Our new IP header */
 	int    max_headroom;			/* The extra header space needed */
 	int    mtu;
@@ -380,7 +381,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		old_iph = ip_hdr(skb);
 	}
 
-	skb->h.raw = (void *) old_iph;
+	skb->h.raw = old_h;
 
 	/* fix old IP header checksum */
 	ip_send_check(old_iph);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index c6436f5e3e9..07e86ebb46b 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1423,8 +1423,9 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
 
 	memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
 
-	pmr =(struct mld2_report *)skb_put(skb, sizeof(*pmr));
-	skb->h.raw = (unsigned char *)pmr;
+	skb_set_transport_header(skb, skb->tail - skb->data);
+	skb_put(skb, sizeof(*pmr));
+	pmr = (struct mld2_report *)skb_transport_header(skb);
 	pmr->type = ICMPV6_MLD2_REPORT;
 	pmr->resv1 = 0;
 	pmr->csum = 0;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index f9a85ab594d..f8e619772fb 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -492,8 +492,9 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
 	ip6_nd_hdr(sk, skb, dev, src_addr, daddr, IPPROTO_ICMPV6, len);
 
-	msg = (struct nd_msg *)skb_put(skb, len);
-	skb->h.raw = (unsigned char*)msg;
+	skb_set_transport_header(skb, skb->tail - skb->data);
+	skb_put(skb, len);
+	msg = (struct nd_msg *)skb_transport_header(skb);
 
 	msg->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
 	msg->icmph.icmp6_code = 0;
@@ -583,8 +584,9 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
 	ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
 
-	msg = (struct nd_msg *)skb_put(skb, len);
-	skb->h.raw = (unsigned char*)msg;
+	skb_set_transport_header(skb, skb->tail - skb->data);
+	skb_put(skb, len);
+	msg = (struct nd_msg *)skb_transport_header(skb);
 	msg->icmph.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION;
 	msg->icmph.icmp6_code = 0;
 	msg->icmph.icmp6_cksum = 0;
@@ -683,8 +685,9 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
 	ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
 
-	hdr = (struct icmp6hdr *)skb_put(skb, len);
-	skb->h.raw = (unsigned char*)hdr;
+	skb_set_transport_header(skb, skb->tail - skb->data);
+	skb_put(skb, len);
+	hdr = icmp6_hdr(skb);
 	hdr->icmp6_type = NDISC_ROUTER_SOLICITATION;
 	hdr->icmp6_code = 0;
 	hdr->icmp6_cksum = 0;
@@ -1519,8 +1522,9 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 	ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr,
 		   IPPROTO_ICMPV6, len);
 
-	icmph = (struct icmp6hdr *)skb_put(buff, len);
-	buff->h.raw = (unsigned char*)icmph;
+	skb_set_transport_header(buff, buff->tail - buff->data);
+	skb_put(buff, len);
+	icmph = icmp6_hdr(buff);
 
 	memset(icmph, 0, sizeof(struct icmp6hdr));
 	icmph->icmp6_type = NDISC_REDIRECT;
-- 
cgit v1.2.3


From 0a6114d94b6d6f82e81cb8e0d8b0d4cf50739fec Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2007 21:08:55 -0300
Subject: [KBUILD]: Unifdef headers changed by the skb layer header
 refactorings

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/Kbuild | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index e81e301a4d7..ea86f2e0271 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -69,9 +69,7 @@ header-y += hdsmart.h
 header-y += hysdn_if.h
 header-y += i2c-dev.h
 header-y += i8k.h
-header-y += icmp.h
 header-y += if_arcnet.h
-header-y += if_arp.h
 header-y += if_bonding.h
 header-y += if_cablemodem.h
 header-y += if_fc.h
@@ -88,7 +86,6 @@ header-y += if_tunnel.h
 header-y += in6.h
 header-y += in_route.h
 header-y += ioctl.h
-header-y += ip.h
 header-y += ipmi_msgdefs.h
 header-y += ip_mp_alg.h
 header-y += ipsec.h
@@ -210,8 +207,10 @@ unifdef-y += hiddev.h
 unifdef-y += hpet.h
 unifdef-y += i2c.h
 unifdef-y += i2o-dev.h
+unifdef-y += icmp.h
 unifdef-y += icmpv6.h
 unifdef-y += if_addr.h
+unifdef-y += if_arp.h
 unifdef-y += if_bridge.h
 unifdef-y += if_ec.h
 unifdef-y += if_eql.h
@@ -231,6 +230,7 @@ unifdef-y += inet_diag.h
 unifdef-y += in.h
 unifdef-y += inotify.h
 unifdef-y += input.h
+unifdef-y += ip.h
 unifdef-y += ipc.h
 unifdef-y += ipmi.h
 unifdef-y += ipv6.h
-- 
cgit v1.2.3


From ddc7b8e32b22fe8b45d306b7d99472d4b560add6 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 15 Mar 2007 21:42:27 -0300
Subject: [SK_BUFF]: Some more layer header conversions

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c               | 30 ++++++++++++++++++------------
 net/core/skbuff.c               |  2 +-
 net/ipv4/ipmr.c                 |  3 ++-
 net/ipv6/xfrm6_mode_beet.c      |  3 ++-
 net/ipv6/xfrm6_mode_ro.c        |  3 ++-
 net/ipv6/xfrm6_mode_transport.c |  3 ++-
 6 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 160d4f01c46..ae8cf9a285f 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2357,8 +2357,12 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 		*vlan_encapsulated_proto = htons(ETH_P_IP);
 	}
 
-	iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr));
-	udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr));
+	skb_set_network_header(skb, skb->tail - skb->data);
+	skb->h.raw = skb->nh.raw + sizeof(struct iphdr);
+	skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr));
+
+	iph = ip_hdr(skb);
+	udph = udp_hdr(skb);
 
 	memcpy(eth, pkt_dev->hh, 12);
 	*(__be16 *) & eth[12] = protocol;
@@ -2387,12 +2391,11 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	iph->check = 0;
 	iph->check = ip_fast_csum((void *)iph, iph->ihl);
 	skb->protocol = protocol;
-	skb->mac.raw = ((u8 *) iph) - 14 - pkt_dev->nr_labels*sizeof(u32) -
-		VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev);
+	skb->mac.raw = (skb->nh.raw - ETH_HLEN -
+			pkt_dev->nr_labels * sizeof(u32) -
+			VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev));
 	skb->dev = odev;
 	skb->pkt_type = PACKET_HOST;
-	skb->nh.raw = (unsigned char *)iph;
-	skb->h.raw = (unsigned char *)udph;
 
 	if (pkt_dev->nfrags <= 0)
 		pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
@@ -2693,8 +2696,12 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 		*vlan_encapsulated_proto = htons(ETH_P_IPV6);
 	}
 
-	iph = (struct ipv6hdr *)skb_put(skb, sizeof(struct ipv6hdr));
-	udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr));
+	skb_set_network_header(skb, skb->tail - skb->data);
+	skb->h.raw = skb->nh.raw + sizeof(struct ipv6hdr);
+	skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr));
+
+	iph = ipv6_hdr(skb);
+	udph = udp_hdr(skb);
 
 	memcpy(eth, pkt_dev->hh, 12);
 	*(__be16 *) & eth[12] = protocol;
@@ -2731,13 +2738,12 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	ipv6_addr_copy(&iph->daddr, &pkt_dev->cur_in6_daddr);
 	ipv6_addr_copy(&iph->saddr, &pkt_dev->cur_in6_saddr);
 
-	skb->mac.raw = ((u8 *) iph) - 14 - pkt_dev->nr_labels*sizeof(u32) -
-		VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev);
+	skb->mac.raw = (skb->nh.raw - ETH_HLEN -
+			pkt_dev->nr_labels * sizeof(u32) -
+			VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev));
 	skb->protocol = protocol;
 	skb->dev = odev;
 	skb->pkt_type = PACKET_HOST;
-	skb->nh.raw = (unsigned char *)iph;
-	skb->h.raw = (unsigned char *)udph;
 
 	if (pkt_dev->nfrags <= 0)
 		pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 408cc99af6b..87e000633f4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1905,7 +1905,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 
 		skb_reserve(nskb, headroom);
 		skb_reset_mac_header(nskb);
-		nskb->nh.raw = nskb->data + skb->mac_len;
+		skb_set_network_header(nskb, skb->mac_len);
 		nskb->h.raw = nskb->nh.raw + (skb->h.raw - skb->nh.raw);
 		memcpy(skb_put(nskb, doffset), skb->data, doffset);
 
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 8f45c95db45..357894259f8 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -580,7 +580,8 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 	 *	Copy the IP header
 	 */
 
-	skb->nh.raw = skb_put(skb, ihl);
+	skb_set_network_header(skb, skb->tail - skb->data);
+	skb_put(skb, ihl);
 	memcpy(skb->data,pkt->data,ihl);
 	ip_hdr(skb)->protocol = 0;			/* Flag to the kernel this is a route add */
 	msg = (struct igmpmsg *)skb_network_header(skb);
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index 0cc96ece003..8a01b0da2dd 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -41,7 +41,8 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 	iph = ipv6_hdr(skb);
 
 	hdr_len = ip6_find_1stfragopt(skb, &prevhdr);
-	skb->nh.raw = prevhdr - x->props.header_len;
+	skb_set_network_header(skb,
+			       (prevhdr - x->props.header_len) - skb->data);
 	skb_set_transport_header(skb, hdr_len);
 	memmove(skb->data, iph, hdr_len);
 
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
index da48ecf3fe9..6ad6d7ac6bd 100644
--- a/net/ipv6/xfrm6_mode_ro.c
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -53,7 +53,8 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
 	iph = ipv6_hdr(skb);
 
 	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
-	skb->nh.raw = prevhdr - x->props.header_len;
+	skb_set_network_header(skb,
+			       (prevhdr - x->props.header_len) - skb->data);
 	skb_set_transport_header(skb, hdr_len);
 	memmove(skb->data, iph, hdr_len);
 	return 0;
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index d526f4e9c65..eb1864b5aae 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -35,7 +35,8 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 	iph = ipv6_hdr(skb);
 
 	hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
-	skb->nh.raw = prevhdr - x->props.header_len;
+	skb_set_network_header(skb,
+			       (prevhdr - x->props.header_len) - skb->data);
 	skb_set_transport_header(skb, hdr_len);
 	memmove(skb->data, iph, hdr_len);
 	return 0;
-- 
cgit v1.2.3


From 641b9e0e8b7f96425da6ce98f3361e3af0baee29 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 16 Mar 2007 01:18:42 -0700
Subject: [NET_SCHED]: Use ktime as clocksource

Get rid of the manual clock source selection mess and use ktime. Also
use a scalar representation, which allows to clean up pkt_sched.h a bit
more and results in less ktime_to_ns() calls in most cases.

The PSCHED_US2JIFFIE/PSCHED_JIFFIE2US macros are implemented quite
inefficient by this patch, following patches will convert all qdiscs
to hrtimers and get rid of them entirely.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h | 169 ++++--------------------------------------------
 kernel/hrtimer.c        |   1 +
 net/sched/Kconfig       |  56 ----------------
 net/sched/sch_api.c     |  77 +---------------------
 net/sched/sch_hfsc.c    |  31 +--------
 5 files changed, 19 insertions(+), 315 deletions(-)

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index f6afee73235..1c12afd113d 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -2,6 +2,7 @@
 #define __NET_PKT_SCHED_H
 
 #include <linux/jiffies.h>
+#include <linux/ktime.h>
 #include <net/sch_generic.h>
 
 struct qdisc_walker
@@ -37,176 +38,32 @@ static inline void *qdisc_priv(struct Qdisc *q)
    The things are not so bad, because we may use artifical
    clock evaluated by integration of network data flow
    in the most critical places.
-
-   Note: we do not use fastgettimeofday.
-   The reason is that, when it is not the same thing as
-   gettimeofday, it returns invalid timestamp, which is
-   not updated, when net_bh is active.
  */
 
-/* General note about internal clock.
-
-   Any clock source returns time intervals, measured in units
-   close to 1usec. With source CONFIG_NET_SCH_CLK_GETTIMEOFDAY it is precisely
-   microseconds, otherwise something close but different chosen to minimize
-   arithmetic cost. Ratio usec/internal untis in form nominator/denominator
-   may be read from /proc/net/psched.
- */
-
-
-#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
-
-typedef struct timeval	psched_time_t;
-typedef long		psched_tdiff_t;
-
-#define PSCHED_GET_TIME(stamp) do_gettimeofday(&(stamp))
-#define PSCHED_US2JIFFIE(usecs) usecs_to_jiffies(usecs)
-#define PSCHED_JIFFIE2US(delay) jiffies_to_usecs(delay)
-
-#else /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */
-
 typedef u64	psched_time_t;
 typedef long	psched_tdiff_t;
 
-#ifdef CONFIG_NET_SCH_CLK_JIFFIES
-
-#if HZ < 96
-#define PSCHED_JSCALE 14
-#elif HZ >= 96 && HZ < 192
-#define PSCHED_JSCALE 13
-#elif HZ >= 192 && HZ < 384
-#define PSCHED_JSCALE 12
-#elif HZ >= 384 && HZ < 768
-#define PSCHED_JSCALE 11
-#elif HZ >= 768
-#define PSCHED_JSCALE 10
-#endif
-
-#define PSCHED_GET_TIME(stamp) ((stamp) = (get_jiffies_64()<<PSCHED_JSCALE))
-#define PSCHED_US2JIFFIE(delay) (((delay)+(1<<PSCHED_JSCALE)-1)>>PSCHED_JSCALE)
-#define PSCHED_JIFFIE2US(delay) ((delay)<<PSCHED_JSCALE)
-
-#endif /* CONFIG_NET_SCH_CLK_JIFFIES */
-#ifdef CONFIG_NET_SCH_CLK_CPU
-#include <asm/timex.h>
-
-extern psched_tdiff_t psched_clock_per_hz;
-extern int psched_clock_scale;
-extern psched_time_t psched_time_base;
-extern cycles_t psched_time_mark;
-
-#define PSCHED_GET_TIME(stamp)						\
-do {									\
-	cycles_t cur = get_cycles();					\
-	if (sizeof(cycles_t) == sizeof(u32)) {				\
-		if (cur <= psched_time_mark)				\
-			psched_time_base += 0x100000000ULL;		\
-		psched_time_mark = cur;					\
-		(stamp) = (psched_time_base + cur)>>psched_clock_scale;	\
-	} else {							\
-		(stamp) = cur>>psched_clock_scale;			\
-	}								\
-} while (0)
-#define PSCHED_US2JIFFIE(delay) (((delay)+psched_clock_per_hz-1)/psched_clock_per_hz)
-#define PSCHED_JIFFIE2US(delay) ((delay)*psched_clock_per_hz)
-
-#endif /* CONFIG_NET_SCH_CLK_CPU */
-
-#endif /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */
-
-#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
-#define PSCHED_TDIFF(tv1, tv2) \
-({ \
-	   int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \
-	   int __delta = (tv1).tv_usec - (tv2).tv_usec; \
-	   if (__delta_sec) { \
-	           switch (__delta_sec) { \
-		   default: \
-			   __delta = 0; \
-		   case 2: \
-			   __delta += USEC_PER_SEC; \
-		   case 1: \
-			   __delta += USEC_PER_SEC; \
-	           } \
-	   } \
-	   __delta; \
-})
-
-static inline int
-psched_tod_diff(int delta_sec, int bound)
-{
-	int delta;
-
-	if (bound <= USEC_PER_SEC || delta_sec > (0x7FFFFFFF/USEC_PER_SEC)-1)
-		return bound;
-	delta = delta_sec * USEC_PER_SEC;
-	if (delta > bound || delta < 0)
-		delta = bound;
-	return delta;
-}
-
-#define PSCHED_TDIFF_SAFE(tv1, tv2, bound) \
-({ \
-	   int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \
-	   int __delta = (tv1).tv_usec - (tv2).tv_usec; \
-	   switch (__delta_sec) { \
-	   default: \
-		   __delta = psched_tod_diff(__delta_sec, bound);  break; \
-	   case 2: \
-		   __delta += USEC_PER_SEC; \
-	   case 1: \
-		   __delta += USEC_PER_SEC; \
-	   case 0: \
- 		   if (__delta > bound || __delta < 0) \
- 			__delta = bound; \
-	   } \
-	   __delta; \
-})
-
-#define PSCHED_TLESS(tv1, tv2) (((tv1).tv_usec < (tv2).tv_usec && \
-				(tv1).tv_sec <= (tv2).tv_sec) || \
-				 (tv1).tv_sec < (tv2).tv_sec)
-
-#define PSCHED_TADD2(tv, delta, tv_res) \
-({ \
-	   int __delta = (tv).tv_usec + (delta); \
-	   (tv_res).tv_sec = (tv).tv_sec; \
-	   while (__delta >= USEC_PER_SEC) { (tv_res).tv_sec++; __delta -= USEC_PER_SEC; } \
-	   (tv_res).tv_usec = __delta; \
-})
-
-#define PSCHED_TADD(tv, delta) \
-({ \
-	   (tv).tv_usec += (delta); \
-	   while ((tv).tv_usec >= USEC_PER_SEC) { (tv).tv_sec++; \
-		 (tv).tv_usec -= USEC_PER_SEC; } \
-})
-
-/* Set/check that time is in the "past perfect";
-   it depends on concrete representation of system time
- */
-
-#define PSCHED_SET_PASTPERFECT(t)	((t).tv_sec = 0)
-#define PSCHED_IS_PASTPERFECT(t)	((t).tv_sec == 0)
+/* Avoid doing 64 bit divide by 1000 */
+#define PSCHED_US2NS(x)			((s64)(x) << 10)
+#define PSCHED_NS2US(x)			((x) >> 10)
 
-#define	PSCHED_AUDIT_TDIFF(t) ({ if ((t) > 2000000) (t) = 2000000; })
+#define PSCHED_TICKS_PER_SEC		PSCHED_NS2US(NSEC_PER_SEC)
+#define PSCHED_GET_TIME(stamp) \
+	((stamp) = PSCHED_NS2US(ktime_to_ns(ktime_get())))
 
-#else /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */
+#define PSCHED_US2JIFFIE(usecs)		usecs_to_jiffies(PSCHED_US2NS((usecs)) / NSEC_PER_USEC)
+#define PSCHED_JIFFIE2US(delay)		PSCHED_NS2US(jiffies_to_usecs((delay)) * NSEC_PER_USEC)
 
-#define PSCHED_TDIFF(tv1, tv2) (long)((tv1) - (tv2))
+#define PSCHED_TDIFF(tv1, tv2)		(long)((tv1) - (tv2))
 #define PSCHED_TDIFF_SAFE(tv1, tv2, bound) \
-	min_t(long long, (tv1) - (tv2), bound)
-
-
-#define PSCHED_TLESS(tv1, tv2) ((tv1) < (tv2))
+					min_t(long long, (tv1) - (tv2), bound)
+#define PSCHED_TLESS(tv1, tv2)		((tv1) < (tv2))
 #define PSCHED_TADD2(tv, delta, tv_res) ((tv_res) = (tv) + (delta))
-#define PSCHED_TADD(tv, delta) ((tv) += (delta))
+#define PSCHED_TADD(tv, delta)		((tv) += (delta))
 #define PSCHED_SET_PASTPERFECT(t)	((t) = 0)
 #define PSCHED_IS_PASTPERFECT(t)	((t) == 0)
 #define	PSCHED_AUDIT_TDIFF(t)
 
-#endif /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */
-
 extern struct Qdisc_ops pfifo_qdisc_ops;
 extern struct Qdisc_ops bfifo_qdisc_ops;
 
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index b74860aaf5f..f5cfde8c902 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -59,6 +59,7 @@ ktime_t ktime_get(void)
 
 	return timespec_to_ktime(now);
 }
+EXPORT_SYMBOL_GPL(ktime_get);
 
 /**
  * ktime_get_real - get the real (wall-) time in ktime_t format
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index f4544dd8647..475df8449be 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -46,62 +46,6 @@ config NET_SCH_FIFO
 
 if NET_SCHED
 
-choice
-	prompt "Packet scheduler clock source"
-	default NET_SCH_CLK_GETTIMEOFDAY
-	---help---
-	  Packet schedulers need a monotonic clock that increments at a static
-	  rate. The kernel provides several suitable interfaces, each with
-	  different properties:
-	  
-	  - high resolution (us or better)
-	  - fast to read (minimal locking, no i/o access)
-	  - synchronized on all processors
-	  - handles cpu clock frequency changes
-
-	  but nothing provides all of the above.
-
-config NET_SCH_CLK_JIFFIES
-	bool "Timer interrupt"
-	---help---
-	  Say Y here if you want to use the timer interrupt (jiffies) as clock
-	  source. This clock source is fast, synchronized on all processors and
-	  handles cpu clock frequency changes, but its resolution is too low
-	  for accurate shaping except at very low speed.
-
-config NET_SCH_CLK_GETTIMEOFDAY
-	bool "gettimeofday"
-	---help---
-	  Say Y here if you want to use gettimeofday as clock source. This clock
-	  source has high resolution, is synchronized on all processors and
-	  handles cpu clock frequency changes, but it is slow.
-
-	  Choose this if you need a high resolution clock source but can't use
-	  the CPU's cycle counter.
-
-# don't allow on SMP x86 because they can have unsynchronized TSCs.
-# gettimeofday is a good alternative
-config NET_SCH_CLK_CPU
-	bool "CPU cycle counter"
-	depends on ((X86_TSC || X86_64) && !SMP) || ALPHA || SPARC64 || PPC64 || IA64
-	---help---
-	  Say Y here if you want to use the CPU's cycle counter as clock source.
-	  This is a cheap and high resolution clock source, but on some
-	  architectures it is not synchronized on all processors and doesn't
-	  handle cpu clock frequency changes.
-
-	  The useable cycle counters are:
-
-	  	x86/x86_64	- Timestamp Counter
-		alpha		- Cycle Counter
-		sparc64		- %ticks register
-		ppc64		- Time base
-		ia64		- Interval Time Counter
-
-	  Choose this if your CPU's cycle counter is working properly.
-
-endchoice
-
 comment "Queueing/Scheduling"
 
 config NET_SCH_CBQ
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 4a927a5e1fa..d71bf79eb80 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1175,15 +1175,12 @@ reclassify:
 	return -1;
 }
 
-static int psched_us_per_tick = 1;
-static int psched_tick_per_us = 1;
-
 #ifdef CONFIG_PROC_FS
 static int psched_show(struct seq_file *seq, void *v)
 {
 	seq_printf(seq, "%08x %08x %08x %08x\n",
-		      psched_tick_per_us, psched_us_per_tick,
-		      1000000, HZ);
+		   (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
+		   1000000, HZ);
 
 	return 0;
 }
@@ -1202,80 +1199,10 @@ static const struct file_operations psched_fops = {
 };
 #endif
 
-#ifdef CONFIG_NET_SCH_CLK_CPU
-psched_tdiff_t psched_clock_per_hz;
-int psched_clock_scale;
-EXPORT_SYMBOL(psched_clock_per_hz);
-EXPORT_SYMBOL(psched_clock_scale);
-
-psched_time_t psched_time_base;
-cycles_t psched_time_mark;
-EXPORT_SYMBOL(psched_time_mark);
-EXPORT_SYMBOL(psched_time_base);
-
-/*
- * Periodically adjust psched_time_base to avoid overflow
- * with 32-bit get_cycles(). Safe up to 4GHz CPU.
- */
-static void psched_tick(unsigned long);
-static DEFINE_TIMER(psched_timer, psched_tick, 0, 0);
-
-static void psched_tick(unsigned long dummy)
-{
-	if (sizeof(cycles_t) == sizeof(u32)) {
-		psched_time_t dummy_stamp;
-		PSCHED_GET_TIME(dummy_stamp);
-		psched_timer.expires = jiffies + 1*HZ;
-		add_timer(&psched_timer);
-	}
-}
-
-int __init psched_calibrate_clock(void)
-{
-	psched_time_t stamp, stamp1;
-	struct timeval tv, tv1;
-	psched_tdiff_t delay;
-	long rdelay;
-	unsigned long stop;
-
-	psched_tick(0);
-	stop = jiffies + HZ/10;
-	PSCHED_GET_TIME(stamp);
-	do_gettimeofday(&tv);
-	while (time_before(jiffies, stop)) {
-		barrier();
-		cpu_relax();
-	}
-	PSCHED_GET_TIME(stamp1);
-	do_gettimeofday(&tv1);
-
-	delay = PSCHED_TDIFF(stamp1, stamp);
-	rdelay = tv1.tv_usec - tv.tv_usec;
-	rdelay += (tv1.tv_sec - tv.tv_sec)*1000000;
-	if (rdelay > delay)
-		return -1;
-	delay /= rdelay;
-	psched_tick_per_us = delay;
-	while ((delay>>=1) != 0)
-		psched_clock_scale++;
-	psched_us_per_tick = 1<<psched_clock_scale;
-	psched_clock_per_hz = (psched_tick_per_us*(1000000/HZ))>>psched_clock_scale;
-	return 0;
-}
-#endif
-
 static int __init pktsched_init(void)
 {
 	struct rtnetlink_link *link_p;
 
-#ifdef CONFIG_NET_SCH_CLK_CPU
-	if (psched_calibrate_clock() < 0)
-		return -1;
-#elif defined(CONFIG_NET_SCH_CLK_JIFFIES)
-	psched_tick_per_us = HZ<<PSCHED_JSCALE;
-	psched_us_per_tick = 1000000;
-#endif
-
 	link_p = rtnetlink_links[PF_UNSPEC];
 
 	/* Setup rtnetlink links. It is made here to avoid
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 407c6fb1ba1..f85cfba647f 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -195,20 +195,6 @@ struct hfsc_sched
 	struct timer_list wd_timer;		/* watchdog timer */
 };
 
-/*
- * macros
- */
-#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
-#include <linux/time.h>
-#undef PSCHED_GET_TIME
-#define PSCHED_GET_TIME(stamp)						\
-do {									\
-	struct timeval tv;						\
-	do_gettimeofday(&tv);						\
-	(stamp) = 1ULL * USEC_PER_SEC * tv.tv_sec + tv.tv_usec;		\
-} while (0)
-#endif
-
 #define	HT_INFINITY	0xffffffffffffffffULL	/* infinite time value */
 
 
@@ -394,28 +380,17 @@ cftree_update(struct hfsc_class *cl)
  *	ism: (psched_us/byte) << ISM_SHIFT
  *	dx: psched_us
  *
- * Clock source resolution (CONFIG_NET_SCH_CLK_*)
- *  JIFFIES: for 48<=HZ<=1534 resolution is between 0.63us and 1.27us.
- *  CPU: resolution is between 0.5us and 1us.
- *  GETTIMEOFDAY: resolution is exactly 1us.
+ * The clock source resolution with ktime is 1.024us.
  *
  * sm and ism are scaled in order to keep effective digits.
  * SM_SHIFT and ISM_SHIFT are selected to keep at least 4 effective
  * digits in decimal using the following table.
  *
- * Note: We can afford the additional accuracy (altq hfsc keeps at most
- * 3 effective digits) thanks to the fact that linux clock is bounded
- * much more tightly.
- *
  *  bits/sec      100Kbps     1Mbps     10Mbps     100Mbps    1Gbps
  *  ------------+-------------------------------------------------------
- *  bytes/0.5us   6.25e-3    62.5e-3    625e-3     6250e-e    62500e-3
- *  bytes/us      12.5e-3    125e-3     1250e-3    12500e-3   125000e-3
- *  bytes/1.27us  15.875e-3  158.75e-3  1587.5e-3  15875e-3   158750e-3
+ *  bytes/1.024us 12.8e-3    128e-3     1280e-3    12800e-3   128000e-3
  *
- *  0.5us/byte    160        16         1.6        0.16       0.016
- *  us/byte       80         8          0.8        0.08       0.008
- *  1.27us/byte   63         6.3        0.63       0.063      0.0063
+ *  1.024us/byte  78.125     7.8125     0.78125    0.078125   0.0078125
  */
 #define	SM_SHIFT	20
 #define	ISM_SHIFT	18
-- 
cgit v1.2.3


From 4179477f637caa730626bd597fdf28c5bad73565 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 16 Mar 2007 01:19:15 -0700
Subject: [NET_SCHED]: Add hrtimer based qdisc watchdog

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h | 10 ++++++++++
 net/sched/sch_api.c     | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+)

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 1c12afd113d..b090d55d5eb 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -64,6 +64,16 @@ typedef long	psched_tdiff_t;
 #define PSCHED_IS_PASTPERFECT(t)	((t) == 0)
 #define	PSCHED_AUDIT_TDIFF(t)
 
+struct qdisc_watchdog {
+	struct hrtimer	timer;
+	struct Qdisc	*qdisc;
+};
+
+extern void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc);
+extern void qdisc_watchdog_schedule(struct qdisc_watchdog *wd,
+				    psched_time_t expires);
+extern void qdisc_watchdog_cancel(struct qdisc_watchdog *wd);
+
 extern struct Qdisc_ops pfifo_qdisc_ops;
 extern struct Qdisc_ops bfifo_qdisc_ops;
 
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index d71bf79eb80..6bc395cb235 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -34,6 +34,7 @@
 #include <linux/kmod.h>
 #include <linux/list.h>
 #include <linux/bitops.h>
+#include <linux/hrtimer.h>
 
 #include <net/sock.h>
 #include <net/pkt_sched.h>
@@ -291,6 +292,41 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab)
 	}
 }
 
+static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
+{
+	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
+						 timer);
+
+	wd->qdisc->flags &= ~TCQ_F_THROTTLED;
+	netif_schedule(wd->qdisc->dev);
+	return HRTIMER_NORESTART;
+}
+
+void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
+{
+	hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	wd->timer.function = qdisc_watchdog;
+	wd->qdisc = qdisc;
+}
+EXPORT_SYMBOL(qdisc_watchdog_init);
+
+void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
+{
+	ktime_t time;
+
+	wd->qdisc->flags |= TCQ_F_THROTTLED;
+	time = ktime_set(0, 0);
+	time = ktime_add_ns(time, PSCHED_US2NS(expires));
+	hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
+}
+EXPORT_SYMBOL(qdisc_watchdog_schedule);
+
+void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
+{
+	hrtimer_cancel(&wd->timer);
+	wd->qdisc->flags &= ~TCQ_F_THROTTLED;
+}
+EXPORT_SYMBOL(qdisc_watchdog_cancel);
 
 /* Allocate an unique handle from space managed by kernel */
 
-- 
cgit v1.2.3


From ed2b229a97fd537857ad8441ab8b5996b15eadfd Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 16 Mar 2007 01:19:33 -0700
Subject: [NET_SCHED]: sch_hfsc: use hrtimer based watchdog

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_hfsc.c | 32 +++++++-------------------------
 1 file changed, 7 insertions(+), 25 deletions(-)

diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index f85cfba647f..3cc2714fd5a 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -59,7 +59,6 @@
 #include <linux/skbuff.h>
 #include <linux/string.h>
 #include <linux/slab.h>
-#include <linux/timer.h>
 #include <linux/list.h>
 #include <linux/rbtree.h>
 #include <linux/init.h>
@@ -192,7 +191,7 @@ struct hfsc_sched
 	struct list_head droplist;		/* active leaf class list (for
 						   dropping) */
 	struct sk_buff_head requeue;		/* requeued packet */
-	struct timer_list wd_timer;		/* watchdog timer */
+	struct qdisc_watchdog watchdog;		/* watchdog timer */
 };
 
 #define	HT_INFINITY	0xffffffffffffffffULL	/* infinite time value */
@@ -1434,21 +1433,11 @@ hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 }
 
 static void
-hfsc_watchdog(unsigned long arg)
-{
-	struct Qdisc *sch = (struct Qdisc *)arg;
-
-	sch->flags &= ~TCQ_F_THROTTLED;
-	netif_schedule(sch->dev);
-}
-
-static void
-hfsc_schedule_watchdog(struct Qdisc *sch, u64 cur_time)
+hfsc_schedule_watchdog(struct Qdisc *sch)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
 	struct hfsc_class *cl;
 	u64 next_time = 0;
-	long delay;
 
 	if ((cl = eltree_get_minel(q)) != NULL)
 		next_time = cl->cl_e;
@@ -1457,11 +1446,7 @@ hfsc_schedule_watchdog(struct Qdisc *sch, u64 cur_time)
 			next_time = q->root.cl_cfmin;
 	}
 	WARN_ON(next_time == 0);
-	delay = next_time - cur_time;
-	delay = PSCHED_US2JIFFIE(delay);
-
-	sch->flags |= TCQ_F_THROTTLED;
-	mod_timer(&q->wd_timer, jiffies + delay);
+	qdisc_watchdog_schedule(&q->watchdog, next_time);
 }
 
 static int
@@ -1498,9 +1483,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt)
 
 	list_add(&q->root.hlist, &q->clhash[hfsc_hash(q->root.classid)]);
 
-	init_timer(&q->wd_timer);
-	q->wd_timer.function = hfsc_watchdog;
-	q->wd_timer.data = (unsigned long)sch;
+	qdisc_watchdog_init(&q->watchdog, sch);
 
 	return 0;
 }
@@ -1570,8 +1553,7 @@ hfsc_reset_qdisc(struct Qdisc *sch)
 	__skb_queue_purge(&q->requeue);
 	q->eligible = RB_ROOT;
 	INIT_LIST_HEAD(&q->droplist);
-	del_timer(&q->wd_timer);
-	sch->flags &= ~TCQ_F_THROTTLED;
+	qdisc_watchdog_cancel(&q->watchdog);
 	sch->q.qlen = 0;
 }
 
@@ -1587,7 +1569,7 @@ hfsc_destroy_qdisc(struct Qdisc *sch)
 			hfsc_destroy_class(sch, cl);
 	}
 	__skb_queue_purge(&q->requeue);
-	del_timer(&q->wd_timer);
+	qdisc_watchdog_cancel(&q->watchdog);
 }
 
 static int
@@ -1673,7 +1655,7 @@ hfsc_dequeue(struct Qdisc *sch)
 		cl = vttree_get_minvt(&q->root, cur_time);
 		if (cl == NULL) {
 			sch->qstats.overlimits++;
-			hfsc_schedule_watchdog(sch, cur_time);
+			hfsc_schedule_watchdog(sch);
 			return NULL;
 		}
 	}
-- 
cgit v1.2.3


From f7f593e383145931cb2a65df62c31ce1bcc0cffc Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 16 Mar 2007 01:20:07 -0700
Subject: [NET_SCHED]: sch_tbf: use hrtimer based watchdog

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_tbf.c | 31 +++++++------------------------
 1 file changed, 7 insertions(+), 24 deletions(-)

diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 85da8daa61d..f14692f3a14 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -127,8 +127,8 @@ struct tbf_sched_data
 	long	tokens;			/* Current number of B tokens */
 	long	ptokens;		/* Current number of P tokens */
 	psched_time_t	t_c;		/* Time check-point */
-	struct timer_list wd_timer;	/* Watchdog timer */
 	struct Qdisc	*qdisc;		/* Inner qdisc, default - bfifo queue */
+	struct qdisc_watchdog watchdog;	/* Watchdog timer */
 };
 
 #define L2T(q,L)   ((q)->R_tab->data[(L)>>(q)->R_tab->rate.cell_log])
@@ -185,14 +185,6 @@ static unsigned int tbf_drop(struct Qdisc* sch)
 	return len;
 }
 
-static void tbf_watchdog(unsigned long arg)
-{
-	struct Qdisc *sch = (struct Qdisc*)arg;
-
-	sch->flags &= ~TCQ_F_THROTTLED;
-	netif_schedule(sch->dev);
-}
-
 static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
@@ -202,7 +194,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
 
 	if (skb) {
 		psched_time_t now;
-		long toks, delay;
+		long toks;
 		long ptoks = 0;
 		unsigned int len = skb->len;
 
@@ -230,12 +222,8 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
 			return skb;
 		}
 
-		delay = PSCHED_US2JIFFIE(max_t(long, -toks, -ptoks));
-
-		if (delay == 0)
-			delay = 1;
-
-		mod_timer(&q->wd_timer, jiffies+delay);
+		qdisc_watchdog_schedule(&q->watchdog,
+					now + max_t(long, -toks, -ptoks));
 
 		/* Maybe we have a shorter packet in the queue,
 		   which can be sent now. It sounds cool,
@@ -254,7 +242,6 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
 			sch->qstats.drops++;
 		}
 
-		sch->flags |= TCQ_F_THROTTLED;
 		sch->qstats.overlimits++;
 	}
 	return NULL;
@@ -269,8 +256,7 @@ static void tbf_reset(struct Qdisc* sch)
 	PSCHED_GET_TIME(q->t_c);
 	q->tokens = q->buffer;
 	q->ptokens = q->mtu;
-	sch->flags &= ~TCQ_F_THROTTLED;
-	del_timer(&q->wd_timer);
+	qdisc_watchdog_cancel(&q->watchdog);
 }
 
 static struct Qdisc *tbf_create_dflt_qdisc(struct Qdisc *sch, u32 limit)
@@ -378,10 +364,7 @@ static int tbf_init(struct Qdisc* sch, struct rtattr *opt)
 		return -EINVAL;
 
 	PSCHED_GET_TIME(q->t_c);
-	init_timer(&q->wd_timer);
-	q->wd_timer.function = tbf_watchdog;
-	q->wd_timer.data = (unsigned long)sch;
-
+	qdisc_watchdog_init(&q->watchdog, sch);
 	q->qdisc = &noop_qdisc;
 
 	return tbf_change(sch, opt);
@@ -391,7 +374,7 @@ static void tbf_destroy(struct Qdisc *sch)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
 
-	del_timer(&q->wd_timer);
+	qdisc_watchdog_cancel(&q->watchdog);
 
 	if (q->P_tab)
 		qdisc_put_rtab(q->P_tab);
-- 
cgit v1.2.3


From 59cb5c6734021acc68590c7c2e0e92ad9a4952c6 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 16 Mar 2007 01:20:31 -0700
Subject: [NET_SCHED]: sch_netem: use hrtimer based watchdog

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_netem.c | 25 +++++--------------------
 1 file changed, 5 insertions(+), 20 deletions(-)

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 1ccbfb55b0b..915f82a2cc3 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -54,7 +54,7 @@
 
 struct netem_sched_data {
 	struct Qdisc	*qdisc;
-	struct timer_list timer;
+	struct qdisc_watchdog watchdog;
 
 	u32 latency;
 	u32 loss;
@@ -284,7 +284,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 			sch->flags &= ~TCQ_F_THROTTLED;
 			return skb;
 		} else {
-			psched_tdiff_t delay = PSCHED_TDIFF(cb->time_to_send, now);
+			qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
 
 			if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) {
 				qdisc_tree_decrease_qlen(q->qdisc, 1);
@@ -292,32 +292,19 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 				printk(KERN_ERR "netem: queue discpline %s could not requeue\n",
 				       q->qdisc->ops->id);
 			}
-
-			mod_timer(&q->timer, jiffies + PSCHED_US2JIFFIE(delay));
-			sch->flags |= TCQ_F_THROTTLED;
 		}
 	}
 
 	return NULL;
 }
 
-static void netem_watchdog(unsigned long arg)
-{
-	struct Qdisc *sch = (struct Qdisc *)arg;
-
-	pr_debug("netem_watchdog qlen=%d\n", sch->q.qlen);
-	sch->flags &= ~TCQ_F_THROTTLED;
-	netif_schedule(sch->dev);
-}
-
 static void netem_reset(struct Qdisc *sch)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
 
 	qdisc_reset(q->qdisc);
 	sch->q.qlen = 0;
-	sch->flags &= ~TCQ_F_THROTTLED;
-	del_timer_sync(&q->timer);
+	qdisc_watchdog_cancel(&q->watchdog);
 }
 
 /* Pass size change message down to embedded FIFO */
@@ -567,9 +554,7 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt)
 	if (!opt)
 		return -EINVAL;
 
-	init_timer(&q->timer);
-	q->timer.function = netem_watchdog;
-	q->timer.data = (unsigned long) sch;
+	qdisc_watchdog_init(&q->watchdog, sch);
 
 	q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops,
 				     TC_H_MAKE(sch->handle, 1));
@@ -590,7 +575,7 @@ static void netem_destroy(struct Qdisc *sch)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
 
-	del_timer_sync(&q->timer);
+	qdisc_watchdog_cancel(&q->watchdog);
 	qdisc_destroy(q->qdisc);
 	kfree(q->delay_dist);
 }
-- 
cgit v1.2.3


From 88a993540a65c38865f83961520494b4ad5d0363 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 16 Mar 2007 01:21:11 -0700
Subject: [NET_SCHED]: sch_cbq: use hrtimer based watchdog

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_cbq.c | 28 +++++++---------------------
 1 file changed, 7 insertions(+), 21 deletions(-)

diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 76c92e710a3..d29d1212934 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -181,11 +181,11 @@ struct cbq_sched_data
 	unsigned		pmask;
 
 	struct timer_list	delay_timer;
-	struct timer_list	wd_timer;	/* Watchdog timer,
+	struct qdisc_watchdog	watchdog;	/* Watchdog timer,
 						   started when CBQ has
 						   backlog, but cannot
 						   transmit just now */
-	long			wd_expires;
+	psched_tdiff_t		wd_expires;
 	int			toplevel;
 	u32			hgenerator;
 };
@@ -604,14 +604,6 @@ static void cbq_ovl_drop(struct cbq_class *cl)
 	cbq_ovl_classic(cl);
 }
 
-static void cbq_watchdog(unsigned long arg)
-{
-	struct Qdisc *sch = (struct Qdisc*)arg;
-
-	sch->flags &= ~TCQ_F_THROTTLED;
-	netif_schedule(sch->dev);
-}
-
 static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
 {
 	struct cbq_class *cl;
@@ -1063,13 +1055,9 @@ cbq_dequeue(struct Qdisc *sch)
 
 	if (sch->q.qlen) {
 		sch->qstats.overlimits++;
-		if (q->wd_expires) {
-			long delay = PSCHED_US2JIFFIE(q->wd_expires);
-			if (delay <= 0)
-				delay = 1;
-			mod_timer(&q->wd_timer, jiffies + delay);
-			sch->flags |= TCQ_F_THROTTLED;
-		}
+		if (q->wd_expires)
+			qdisc_watchdog_schedule(&q->watchdog,
+						q->now + q->wd_expires);
 	}
 	return NULL;
 }
@@ -1276,7 +1264,7 @@ cbq_reset(struct Qdisc* sch)
 	q->pmask = 0;
 	q->tx_class = NULL;
 	q->tx_borrowed = NULL;
-	del_timer(&q->wd_timer);
+	qdisc_watchdog_cancel(&q->watchdog);
 	del_timer(&q->delay_timer);
 	q->toplevel = TC_CBQ_MAXLEVEL;
 	PSCHED_GET_TIME(q->now);
@@ -1446,9 +1434,7 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
 	q->link.minidle = -0x7FFFFFFF;
 	q->link.stats_lock = &sch->dev->queue_lock;
 
-	init_timer(&q->wd_timer);
-	q->wd_timer.data = (unsigned long)sch;
-	q->wd_timer.function = cbq_watchdog;
+	qdisc_watchdog_init(&q->watchdog, sch);
 	init_timer(&q->delay_timer);
 	q->delay_timer.data = (unsigned long)sch;
 	q->delay_timer.function = cbq_undelay;
-- 
cgit v1.2.3


From e9054a339eb275c756efeeaee42af484ac72a3f4 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 16 Mar 2007 01:21:40 -0700
Subject: [NET_SCHED]: sch_cbq: fix cbq_undelay_prio for non-active priorites

cbq_undelay_prio is supposed to return a time delta, but returns the
current time for non-active priorities, causing cbq_undelay to mark
the priority as active and schedule a timer for twice the current
time.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_cbq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index d29d1212934..32f6a308bad 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -612,7 +612,7 @@ static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
 	unsigned long sched = now;
 
 	if (cl_prev == NULL)
-		return now;
+		return 0;
 
 	do {
 		cl = cl_prev->next_alive;
-- 
cgit v1.2.3


From 1a13cb63d679da328cfa339c89b8b2d0eba3b81e Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 16 Mar 2007 01:22:20 -0700
Subject: [NET_SCHED]: sch_cbq: use hrtimer for delay_timer

Switch delay_timer to hrtimer.

The class penalty parameter is changed to use psched ticks as units.
Since iproute never supported using this and the only existing user
(libnl) incorrectly assumes psched ticks as units anyway, this
shouldn't break anything.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_cbq.c | 70 +++++++++++++++++++++++++++++++----------------------
 1 file changed, 41 insertions(+), 29 deletions(-)

diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 32f6a308bad..0491fad97c0 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -112,7 +112,7 @@ struct cbq_class
 
 	/* Overlimit strategy parameters */
 	void			(*overlimit)(struct cbq_class *cl);
-	long			penalty;
+	psched_tdiff_t		penalty;
 
 	/* General scheduler (WRR) parameters */
 	long			allot;
@@ -143,7 +143,7 @@ struct cbq_class
 	psched_time_t		undertime;
 	long			avgidle;
 	long			deficit;	/* Saved deficit for WRR */
-	unsigned long		penalized;
+	psched_time_t		penalized;
 	struct gnet_stats_basic bstats;
 	struct gnet_stats_queue qstats;
 	struct gnet_stats_rate_est rate_est;
@@ -180,7 +180,7 @@ struct cbq_sched_data
 	psched_time_t		now_rt;		/* Cached real time */
 	unsigned		pmask;
 
-	struct timer_list	delay_timer;
+	struct hrtimer		delay_timer;
 	struct qdisc_watchdog	watchdog;	/* Watchdog timer,
 						   started when CBQ has
 						   backlog, but cannot
@@ -549,7 +549,8 @@ static void cbq_ovl_delay(struct cbq_class *cl)
 	psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
 
 	if (!cl->delayed) {
-		unsigned long sched = jiffies;
+		psched_time_t sched = q->now;
+		ktime_t expires;
 
 		delay += cl->offtime;
 		if (cl->avgidle < 0)
@@ -559,14 +560,18 @@ static void cbq_ovl_delay(struct cbq_class *cl)
 		PSCHED_TADD2(q->now, delay, cl->undertime);
 
 		if (delay > 0) {
-			sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
+			sched += delay + cl->penalty;
 			cl->penalized = sched;
 			cl->cpriority = TC_CBQ_MAXPRIO;
 			q->pmask |= (1<<TC_CBQ_MAXPRIO);
-			if (del_timer(&q->delay_timer) &&
-			    (long)(q->delay_timer.expires - sched) > 0)
-				q->delay_timer.expires = sched;
-			add_timer(&q->delay_timer);
+
+			expires = ktime_set(0, 0);
+			expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
+			if (hrtimer_try_to_cancel(&q->delay_timer) &&
+			    ktime_to_ns(ktime_sub(q->delay_timer.expires,
+						  expires)) > 0)
+				q->delay_timer.expires = expires;
+			hrtimer_restart(&q->delay_timer);
 			cl->delayed = 1;
 			cl->xstats.overactions++;
 			return;
@@ -583,7 +588,7 @@ static void cbq_ovl_lowprio(struct cbq_class *cl)
 {
 	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 
-	cl->penalized = jiffies + cl->penalty;
+	cl->penalized = q->now + cl->penalty;
 
 	if (cl->cpriority != cl->priority2) {
 		cl->cpriority = cl->priority2;
@@ -604,19 +609,19 @@ static void cbq_ovl_drop(struct cbq_class *cl)
 	cbq_ovl_classic(cl);
 }
 
-static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
+static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
+				       psched_time_t now)
 {
 	struct cbq_class *cl;
 	struct cbq_class *cl_prev = q->active[prio];
-	unsigned long now = jiffies;
-	unsigned long sched = now;
+	psched_time_t sched = now;
 
 	if (cl_prev == NULL)
 		return 0;
 
 	do {
 		cl = cl_prev->next_alive;
-		if ((long)(now - cl->penalized) > 0) {
+		if (now - cl->penalized > 0) {
 			cl_prev->next_alive = cl->next_alive;
 			cl->next_alive = NULL;
 			cl->cpriority = cl->priority;
@@ -632,30 +637,34 @@ static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
 			}
 
 			cl = cl_prev->next_alive;
-		} else if ((long)(sched - cl->penalized) > 0)
+		} else if (sched - cl->penalized > 0)
 			sched = cl->penalized;
 	} while ((cl_prev = cl) != q->active[prio]);
 
-	return (long)(sched - now);
+	return sched - now;
 }
 
-static void cbq_undelay(unsigned long arg)
+static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 {
-	struct Qdisc *sch = (struct Qdisc*)arg;
-	struct cbq_sched_data *q = qdisc_priv(sch);
-	long delay = 0;
+	struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
+						delay_timer);
+	struct Qdisc *sch = q->watchdog.qdisc;
+	psched_time_t now;
+	psched_tdiff_t delay = 0;
 	unsigned pmask;
 
+	PSCHED_GET_TIME(now);
+
 	pmask = q->pmask;
 	q->pmask = 0;
 
 	while (pmask) {
 		int prio = ffz(~pmask);
-		long tmp;
+		psched_tdiff_t tmp;
 
 		pmask &= ~(1<<prio);
 
-		tmp = cbq_undelay_prio(q, prio);
+		tmp = cbq_undelay_prio(q, prio, now);
 		if (tmp > 0) {
 			q->pmask |= 1<<prio;
 			if (tmp < delay || delay == 0)
@@ -664,12 +673,16 @@ static void cbq_undelay(unsigned long arg)
 	}
 
 	if (delay) {
-		q->delay_timer.expires = jiffies + delay;
-		add_timer(&q->delay_timer);
+		ktime_t time;
+
+		time = ktime_set(0, 0);
+		time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
+		hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 	}
 
 	sch->flags &= ~TCQ_F_THROTTLED;
 	netif_schedule(sch->dev);
+	return HRTIMER_NORESTART;
 }
 
 
@@ -1265,7 +1278,7 @@ cbq_reset(struct Qdisc* sch)
 	q->tx_class = NULL;
 	q->tx_borrowed = NULL;
 	qdisc_watchdog_cancel(&q->watchdog);
-	del_timer(&q->delay_timer);
+	hrtimer_cancel(&q->delay_timer);
 	q->toplevel = TC_CBQ_MAXLEVEL;
 	PSCHED_GET_TIME(q->now);
 	q->now_rt = q->now;
@@ -1367,7 +1380,7 @@ static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
 	default:
 		return -EINVAL;
 	}
-	cl->penalty = (ovl->penalty*HZ)/1000;
+	cl->penalty = ovl->penalty;
 	return 0;
 }
 
@@ -1435,8 +1448,7 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
 	q->link.stats_lock = &sch->dev->queue_lock;
 
 	qdisc_watchdog_init(&q->watchdog, sch);
-	init_timer(&q->delay_timer);
-	q->delay_timer.data = (unsigned long)sch;
+	hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	q->delay_timer.function = cbq_undelay;
 	q->toplevel = TC_CBQ_MAXLEVEL;
 	PSCHED_GET_TIME(q->now);
@@ -1514,7 +1526,7 @@ static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
 	opt.strategy = cl->ovl_strategy;
 	opt.priority2 = cl->priority2+1;
 	opt.pad = 0;
-	opt.penalty = (cl->penalty*1000)/HZ;
+	opt.penalty = cl->penalty;
 	RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
 	return skb->len;
 
-- 
cgit v1.2.3


From fb983d4578e238b7f483b4f8f39f3a0f35d34d16 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 16 Mar 2007 01:22:39 -0700
Subject: [NET_SCHED]: sch_htb: use hrtimer based watchdog

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 91 ++++++++++++++++++-----------------------------------
 1 file changed, 31 insertions(+), 60 deletions(-)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 3c3294d0104..4d84200f097 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -128,7 +128,7 @@ struct htb_class {
 	} un;
 	struct rb_node node[TC_HTB_NUMPRIO];	/* node for self or feed tree */
 	struct rb_node pq_node;	/* node for event queue */
-	unsigned long pq_key;	/* the same type as jiffies global */
+	psched_time_t pq_key;
 
 	int prio_activity;	/* for which prios are we active */
 	enum htb_cmode cmode;	/* current mode of the class */
@@ -179,10 +179,7 @@ struct htb_sched {
 	struct rb_root wait_pq[TC_HTB_MAXDEPTH];
 
 	/* time of nearest event per level (row) */
-	unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
-
-	/* cached value of jiffies in dequeue */
-	unsigned long jiffies;
+	psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
 
 	/* whether we hit non-work conserving class during this dequeue; we use */
 	int nwc_hit;		/* this to disable mindelay complaint in dequeue */
@@ -195,7 +192,7 @@ struct htb_sched {
 
 	int rate2quantum;	/* quant = rate / rate2quantum */
 	psched_time_t now;	/* cached dequeue time */
-	struct timer_list timer;	/* send delay timer */
+	struct qdisc_watchdog watchdog;
 #ifdef HTB_RATECM
 	struct timer_list rttim;	/* rate computer timer */
 	int recmp_bucket;	/* which hash bucket to recompute next */
@@ -342,19 +339,19 @@ static void htb_add_to_wait_tree(struct htb_sched *q,
 {
 	struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
 
-	cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay);
-	if (cl->pq_key == q->jiffies)
+	cl->pq_key = q->now + delay;
+	if (cl->pq_key == q->now)
 		cl->pq_key++;
 
 	/* update the nearest event cache */
-	if (time_after(q->near_ev_cache[cl->level], cl->pq_key))
+	if (q->near_ev_cache[cl->level] > cl->pq_key)
 		q->near_ev_cache[cl->level] = cl->pq_key;
 
 	while (*p) {
 		struct htb_class *c;
 		parent = *p;
 		c = rb_entry(parent, struct htb_class, pq_node);
-		if (time_after_eq(cl->pq_key, c->pq_key))
+		if (cl->pq_key >= c->pq_key)
 			p = &parent->rb_right;
 		else
 			p = &parent->rb_left;
@@ -679,14 +676,6 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
 	return NET_XMIT_SUCCESS;
 }
 
-static void htb_timer(unsigned long arg)
-{
-	struct Qdisc *sch = (struct Qdisc *)arg;
-	sch->flags &= ~TCQ_F_THROTTLED;
-	wmb();
-	netif_schedule(sch->dev);
-}
-
 #ifdef HTB_RATECM
 #define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0
 static void htb_rate_timer(unsigned long arg)
@@ -778,11 +767,11 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
 /**
  * htb_do_events - make mode changes to classes at the level
  *
- * Scans event queue for pending events and applies them. Returns jiffies to
+ * Scans event queue for pending events and applies them. Returns time of
  * next pending event (0 for no event in pq).
- * Note: Aplied are events whose have cl->pq_key <= jiffies.
+ * Note: Applied are events whose have cl->pq_key <= q->now.
  */
-static long htb_do_events(struct htb_sched *q, int level)
+static psched_time_t htb_do_events(struct htb_sched *q, int level)
 {
 	int i;
 
@@ -795,9 +784,9 @@ static long htb_do_events(struct htb_sched *q, int level)
 			return 0;
 
 		cl = rb_entry(p, struct htb_class, pq_node);
-		if (time_after(cl->pq_key, q->jiffies)) {
-			return cl->pq_key - q->jiffies;
-		}
+		if (cl->pq_key > q->now)
+			return cl->pq_key;
+
 		htb_safe_rb_erase(p, q->wait_pq + level);
 		diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
 		htb_change_class_mode(q, cl, &diff);
@@ -806,7 +795,7 @@ static long htb_do_events(struct htb_sched *q, int level)
 	}
 	if (net_ratelimit())
 		printk(KERN_WARNING "htb: too many events !\n");
-	return HZ / 10;
+	return q->now + PSCHED_TICKS_PER_SEC / 10;
 }
 
 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
@@ -958,30 +947,12 @@ next:
 	return skb;
 }
 
-static void htb_delay_by(struct Qdisc *sch, long delay)
-{
-	struct htb_sched *q = qdisc_priv(sch);
-	if (delay <= 0)
-		delay = 1;
-	if (unlikely(delay > 5 * HZ)) {
-		if (net_ratelimit())
-			printk(KERN_INFO "HTB delay %ld > 5sec\n", delay);
-		delay = 5 * HZ;
-	}
-	/* why don't use jiffies here ? because expires can be in past */
-	mod_timer(&q->timer, q->jiffies + delay);
-	sch->flags |= TCQ_F_THROTTLED;
-	sch->qstats.overlimits++;
-}
-
 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 {
 	struct sk_buff *skb = NULL;
 	struct htb_sched *q = qdisc_priv(sch);
 	int level;
-	long min_delay;
-
-	q->jiffies = jiffies;
+	psched_time_t next_event;
 
 	/* try to dequeue direct packets as high prio (!) to minimize cpu work */
 	skb = __skb_dequeue(&q->direct_queue);
@@ -995,21 +966,23 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 		goto fin;
 	PSCHED_GET_TIME(q->now);
 
-	min_delay = LONG_MAX;
+	next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
 	q->nwc_hit = 0;
 	for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
 		/* common case optimization - skip event handler quickly */
 		int m;
-		long delay;
-		if (time_after_eq(q->jiffies, q->near_ev_cache[level])) {
-			delay = htb_do_events(q, level);
-			q->near_ev_cache[level] =
-			    q->jiffies + (delay ? delay : HZ);
+		psched_time_t event;
+
+		if (q->now >= q->near_ev_cache[level]) {
+			event = htb_do_events(q, level);
+			q->near_ev_cache[level] = event ? event :
+							  PSCHED_TICKS_PER_SEC;
 		} else
-			delay = q->near_ev_cache[level] - q->jiffies;
+			event = q->near_ev_cache[level];
+
+		if (event && next_event > event)
+			next_event = event;
 
-		if (delay && min_delay > delay)
-			min_delay = delay;
 		m = ~q->row_mask[level];
 		while (m != (int)(-1)) {
 			int prio = ffz(m);
@@ -1022,7 +995,8 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 			}
 		}
 	}
-	htb_delay_by(sch, min_delay > 5 * HZ ? 5 * HZ : min_delay);
+	sch->qstats.overlimits++;
+	qdisc_watchdog_schedule(&q->watchdog, next_event);
 fin:
 	return skb;
 }
@@ -1075,8 +1049,7 @@ static void htb_reset(struct Qdisc *sch)
 
 		}
 	}
-	sch->flags &= ~TCQ_F_THROTTLED;
-	del_timer(&q->timer);
+	qdisc_watchdog_cancel(&q->watchdog);
 	__skb_queue_purge(&q->direct_queue);
 	sch->q.qlen = 0;
 	memset(q->row, 0, sizeof(q->row));
@@ -1113,14 +1086,12 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt)
 	for (i = 0; i < TC_HTB_NUMPRIO; i++)
 		INIT_LIST_HEAD(q->drops + i);
 
-	init_timer(&q->timer);
+	qdisc_watchdog_init(&q->watchdog, sch);
 	skb_queue_head_init(&q->direct_queue);
 
 	q->direct_qlen = sch->dev->tx_queue_len;
 	if (q->direct_qlen < 2)	/* some devices have zero tx_queue_len */
 		q->direct_qlen = 2;
-	q->timer.function = htb_timer;
-	q->timer.data = (unsigned long)sch;
 
 #ifdef HTB_RATECM
 	init_timer(&q->rttim);
@@ -1341,7 +1312,7 @@ static void htb_destroy(struct Qdisc *sch)
 {
 	struct htb_sched *q = qdisc_priv(sch);
 
-	del_timer_sync(&q->timer);
+	qdisc_watchdog_cancel(&q->watchdog);
 #ifdef HTB_RATECM
 	del_timer_sync(&q->rttim);
 #endif
-- 
cgit v1.2.3


From 00c04af9df3d26e5a8093da850e982a7b6aeada7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 16 Mar 2007 01:23:02 -0700
Subject: [NET_SCHED]: kill jiffie conversion macros

Now that all packet schedulers have been converted to hrtimers most users
of PSCHED_JIFFIE2US and PSCHED_US2JIFFIE are gone. The remaining users use
it to convert external time units to packet scheduler clock ticks, so use
PSCHED_TICKS_PER_SEC instead.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h |  3 ---
 net/sched/sch_hfsc.c    | 12 ++++++------
 net/sched/sch_htb.c     |  2 +-
 3 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index b090d55d5eb..6555e57ff6c 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -51,9 +51,6 @@ typedef long	psched_tdiff_t;
 #define PSCHED_GET_TIME(stamp) \
 	((stamp) = PSCHED_NS2US(ktime_to_ns(ktime_get())))
 
-#define PSCHED_US2JIFFIE(usecs)		usecs_to_jiffies(PSCHED_US2NS((usecs)) / NSEC_PER_USEC)
-#define PSCHED_JIFFIE2US(delay)		PSCHED_NS2US(jiffies_to_usecs((delay)) * NSEC_PER_USEC)
-
 #define PSCHED_TDIFF(tv1, tv2)		(long)((tv1) - (tv2))
 #define PSCHED_TDIFF_SAFE(tv1, tv2, bound) \
 					min_t(long long, (tv1) - (tv2), bound)
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 3cc2714fd5a..5197b6caaf2 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -434,8 +434,8 @@ m2sm(u32 m)
 	u64 sm;
 
 	sm = ((u64)m << SM_SHIFT);
-	sm += PSCHED_JIFFIE2US(HZ) - 1;
-	do_div(sm, PSCHED_JIFFIE2US(HZ));
+	sm += PSCHED_TICKS_PER_SEC - 1;
+	do_div(sm, PSCHED_TICKS_PER_SEC);
 	return sm;
 }
 
@@ -448,7 +448,7 @@ m2ism(u32 m)
 	if (m == 0)
 		ism = HT_INFINITY;
 	else {
-		ism = ((u64)PSCHED_JIFFIE2US(HZ) << ISM_SHIFT);
+		ism = ((u64)PSCHED_TICKS_PER_SEC << ISM_SHIFT);
 		ism += m - 1;
 		do_div(ism, m);
 	}
@@ -461,7 +461,7 @@ d2dx(u32 d)
 {
 	u64 dx;
 
-	dx = ((u64)d * PSCHED_JIFFIE2US(HZ));
+	dx = ((u64)d * PSCHED_TICKS_PER_SEC);
 	dx += USEC_PER_SEC - 1;
 	do_div(dx, USEC_PER_SEC);
 	return dx;
@@ -473,7 +473,7 @@ sm2m(u64 sm)
 {
 	u64 m;
 
-	m = (sm * PSCHED_JIFFIE2US(HZ)) >> SM_SHIFT;
+	m = (sm * PSCHED_TICKS_PER_SEC) >> SM_SHIFT;
 	return (u32)m;
 }
 
@@ -484,7 +484,7 @@ dx2d(u64 dx)
 	u64 d;
 
 	d = dx * USEC_PER_SEC;
-	do_div(d, PSCHED_JIFFIE2US(HZ));
+	do_div(d, PSCHED_TICKS_PER_SEC);
 	return (u32)d;
 }
 
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 4d84200f097..f76c20c0a10 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1469,7 +1469,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		/* set class to be in HTB_CAN_SEND state */
 		cl->tokens = hopt->buffer;
 		cl->ctokens = hopt->cbuffer;
-		cl->mbuffer = PSCHED_JIFFIE2US(HZ * 60);	/* 1min */
+		cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC;	/* 1min */
 		PSCHED_GET_TIME(cl->t_c);
 		cl->cmode = HTB_CAN_SEND;
 
-- 
cgit v1.2.3


From 4361cb17f0df5491fe6e2c3ae1defc98e9a64a79 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 16 Mar 2007 01:23:28 -0700
Subject: [NET_SCHED]: Export real timer resolution in /proc/net/psched

The timer resolution exported in /proc/net/psched is used by userspace to
calculate HTB's burst values. Currently it is set to HZ, since we're now
using hrtimers, use KTIME_MONOTONIC_RES, which makes HTB use smaller burst
values.

This patch also affects libnl, which incorrectly uses this value for
the SFQ perturbation parameter, which is always in seconds, and some
routing cache values, which are in USER_HZ, so both cases are broken
anyway.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 6bc395cb235..047ae62ff4f 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1216,7 +1216,7 @@ static int psched_show(struct seq_file *seq, void *v)
 {
 	seq_printf(seq, "%08x %08x %08x %08x\n",
 		   (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
-		   1000000, HZ);
+		   1000000, (u32)NSEC_PER_SEC/ktime_to_ns(KTIME_MONOTONIC_RES));
 
 	return 0;
 }
-- 
cgit v1.2.3


From bb239acf5679ee1936f6b1b034ad260c4fec89c8 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 16 Mar 2007 12:31:28 -0700
Subject: [NET_SCHED]: sch_cbq: fix watchdog scheduled too late

q->now is increased during dequeue and doesn't contain the current time
afterwards, resulting in a too large timeout value for the qdisc watchdog.
Use "now" instead, which still contains the current time.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_cbq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 0491fad97c0..d83414d828d 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1070,7 +1070,7 @@ cbq_dequeue(struct Qdisc *sch)
 		sch->qstats.overlimits++;
 		if (q->wd_expires)
 			qdisc_watchdog_schedule(&q->watchdog,
-						q->now + q->wd_expires);
+						now + q->wd_expires);
 	}
 	return NULL;
 }
-- 
cgit v1.2.3


From 514bca322cb9220308d22691ac1e74038bfabac3 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 16 Mar 2007 12:34:52 -0700
Subject: [NET_SCHED]: Fix warning

net/sched/sch_api.c: In function 'psched_show':
net/sched/sch_api.c:1219: warning: format '%08x' expects type 'unsigned int', but argument 6 has type 's64'

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 047ae62ff4f..ecbdc6b42a9 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1216,7 +1216,8 @@ static int psched_show(struct seq_file *seq, void *v)
 {
 	seq_printf(seq, "%08x %08x %08x %08x\n",
 		   (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
-		   1000000, (u32)NSEC_PER_SEC/ktime_to_ns(KTIME_MONOTONIC_RES));
+		   1000000,
+		   (u32)NSEC_PER_SEC/(u32)ktime_to_ns(KTIME_MONOTONIC_RES));
 
 	return 0;
 }
-- 
cgit v1.2.3


From bff9b61ce330df04c6830d823c30c04203543f01 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 16 Mar 2007 17:19:57 -0300
Subject: [SK_BUFF]: Use the helpers to get the layer header pointer

Some more cases...

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c                         | 2 +-
 net/ipv4/ip_output.c                    | 2 +-
 net/ipv6/exthdrs.c                      | 2 +-
 net/ipv6/netfilter/nf_conntrack_reasm.c | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index b0efd279ddb..4695ada1d9b 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -348,7 +348,7 @@ static int igmpv3_sendpack(struct sk_buff *skb)
 {
 	struct iphdr *pip = ip_hdr(skb);
 	struct igmphdr *pig = igmp_hdr(skb);
-	const int iplen = skb->tail - skb->nh.raw;
+	const int iplen = skb->tail - skb_network_header(skb);
 	const int igmplen = skb->tail - skb_transport_header(skb);
 
 	pip->tot_len = htons(iplen);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 11ab100d6c6..11a6ac756f8 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -601,7 +601,7 @@ slow_path:
 		/*
 		 *	Copy a block of the IP datagram.
 		 */
-		if (skb_copy_bits(skb, ptr, skb2->h.raw, len))
+		if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))
 			BUG();
 		left -= len;
 
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index f25ee773f52..f763409ea74 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -506,7 +506,7 @@ looped_back:
 		kfree_skb(skb);
 		*skbp = skb = skb2;
 		opt = IP6CB(skb2);
-		hdr = (struct ipv6_rt_hdr *) skb2->h.raw;
+		hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb2);
 	}
 
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 015950522c8..84ce5b3c4b2 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -627,7 +627,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 
 	/* We have to remove fragment header from datagram and to relocate
 	 * header in order to calculate ICV correctly. */
-	skb_network_header(head)[fq->nhoffset] = head->h.raw[0];
+	skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0];
 	memmove(head->head + sizeof(struct frag_hdr), head->head,
 		(head->data - head->head) - sizeof(struct frag_hdr));
 	head->mac.raw += sizeof(struct frag_hdr);
@@ -787,7 +787,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
 
 	skb_set_transport_header(clone, fhoff);
 	hdr = ipv6_hdr(clone);
-	fhdr = (struct frag_hdr *)clone->h.raw;
+	fhdr = (struct frag_hdr *)skb_transport_header(clone);
 
 	if (!(fhdr->frag_off & htons(0xFFF9))) {
 		DEBUGP("Invalid fragment offset\n");
-- 
cgit v1.2.3


From cfe1fc7759fdacb0c650b575daed1692bf3eaece Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 16 Mar 2007 17:26:39 -0300
Subject: [SK_BUFF]: Introduce skb_network_header_len

For the common sequence "skb->h.raw - skb->nh.raw", similar to skb->mac_len,
that is precalculated tho, don't think we need to bloat skb with one more
member, so just use this new helper, reducing the number of non-skbuff.h
references to the layer headers even more.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/gianfar.c                   | 2 +-
 drivers/net/pasemi_mac.c                | 4 ++--
 include/linux/skbuff.h                  | 5 +++++
 net/core/skbuff.c                       | 2 +-
 net/ipv4/ip_output.c                    | 2 +-
 net/ipv6/esp6.c                         | 3 +--
 net/ipv6/exthdrs.c                      | 8 ++++----
 net/ipv6/ip6_input.c                    | 2 +-
 net/ipv6/ip6_output.c                   | 4 ++--
 net/ipv6/mcast.c                        | 2 +-
 net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +-
 net/ipv6/raw.c                          | 2 +-
 net/ipv6/reassembly.c                   | 8 +++++---
 net/ipv6/xfrm6_policy.c                 | 2 +-
 14 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index b9f44602c5e..b666a0cc064 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -953,7 +953,7 @@ static inline void gfar_tx_checksum(struct sk_buff *skb, struct txfcb *fcb)
 	 * l4os is the distance between the start of the
 	 * l3 hdr and the l4 hdr */
 	fcb->l3os = (u16)(skb_network_offset(skb) - GMAC_FCB_LEN);
-	fcb->l4os = (u16)(skb->h.raw - skb->nh.raw);
+	fcb->l4os = skb_network_header_len(skb);
 
 	fcb->flags = flags;
 }
diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c
index 1d8129986cc..76fe9dd8e84 100644
--- a/drivers/net/pasemi_mac.c
+++ b/drivers/net/pasemi_mac.c
@@ -734,12 +734,12 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev)
 		switch (ip_hdr(skb)->protocol) {
 		case IPPROTO_TCP:
 			dflags |= XCT_MACTX_CSUM_TCP;
-			dflags |= XCT_MACTX_IPH((skb->h.raw - skb->nh.raw) >> 2);
+			dflags |= XCT_MACTX_IPH(skb_network_header_len(skb) >> 2);
 			dflags |= XCT_MACTX_IPO(nh - skb->data);
 			break;
 		case IPPROTO_UDP:
 			dflags |= XCT_MACTX_CSUM_UDP;
-			dflags |= XCT_MACTX_IPH((skb->h.raw - skb->nh.raw) >> 2);
+			dflags |= XCT_MACTX_IPH(skb_network_header_len(skb) >> 2);
 			dflags |= XCT_MACTX_IPO(nh - skb->data);
 			break;
 		}
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 47c57be97d4..230dd43fc9b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -992,6 +992,11 @@ static inline int skb_network_offset(const struct sk_buff *skb)
 	return skb->nh.raw - skb->data;
 }
 
+static inline u32 skb_network_header_len(const struct sk_buff *skb)
+{
+	return skb->h.raw - skb->nh.raw;
+}
+
 static inline unsigned char *skb_mac_header(const struct sk_buff *skb)
 {
 	return skb->mac.raw;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 87e000633f4..f38af6c01b1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1906,7 +1906,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 		skb_reserve(nskb, headroom);
 		skb_reset_mac_header(nskb);
 		skb_set_network_header(nskb, skb->mac_len);
-		nskb->h.raw = nskb->nh.raw + (skb->h.raw - skb->nh.raw);
+		nskb->h.raw = nskb->nh.raw + skb_network_header_len(skb);
 		memcpy(skb_put(nskb, doffset), skb->data, doffset);
 
 		if (!sg) {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 11a6ac756f8..02988fb262d 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1187,7 +1187,7 @@ int ip_push_pending_frames(struct sock *sk)
 	if (skb->data < skb_network_header(skb))
 		__skb_pull(skb, skb_network_offset(skb));
 	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
-		__skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
+		__skb_pull(tmp_skb, skb_network_header_len(skb));
 		*tail_skb = tmp_skb;
 		tail_skb = &(tmp_skb->next);
 		skb->len += tmp_skb->len;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 436eb9e6a6c..7fdf84dee73 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -147,8 +147,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
 	int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4);
 	int alen = esp->auth.icv_trunc_len;
 	int elen = skb->len - sizeof(struct ipv6_esp_hdr) - esp->conf.ivlen - alen;
-
-	int hdr_len = skb->h.raw - skb->nh.raw;
+	int hdr_len = skb_network_header_len(skb);
 	int nfrags;
 	int ret = 0;
 
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index f763409ea74..f34cc2bd489 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -143,7 +143,7 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
 	struct sk_buff *skb = *skbp;
 	struct tlvtype_proc *curr;
 	const unsigned char *nh = skb_network_header(skb);
-	int off = skb->h.raw - skb->nh.raw;
+	int off = skb_network_header_len(skb);
 	int len = (skb_transport_header(skb)[1] + 1) << 3;
 
 	if (skb_transport_offset(skb) + len > skb_headlen(skb))
@@ -297,7 +297,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
 		return -1;
 	}
 
-	opt->lastopt = opt->dst1 = skb->h.raw - skb->nh.raw;
+	opt->lastopt = opt->dst1 = skb_network_header_len(skb);
 #ifdef CONFIG_IPV6_MIP6
 	dstbuf = opt->dst1;
 #endif
@@ -443,7 +443,7 @@ looped_back:
 			break;
 		}
 
-		opt->lastopt = opt->srcrt = skb->h.raw - skb->nh.raw;
+		opt->lastopt = opt->srcrt = skb_network_header_len(skb);
 		skb->h.raw += (hdr->hdrlen + 1) << 3;
 		opt->dst0 = opt->dst1;
 		opt->dst1 = 0;
@@ -738,7 +738,7 @@ int ipv6_parse_hopopts(struct sk_buff **skbp)
 
 	/*
 	 * skb_network_header(skb) is equal to skb->data, and
-	 * skb->h.raw - skb->nh.raw is always equal to
+	 * skb_network_header_len(skb) is always equal to
 	 * sizeof(struct ipv6hdr) by definition of
 	 * hop-by-hop options.
 	 */
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 44275411d1a..cf0c4406b59 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -182,7 +182,7 @@ resubmit:
 			nf_reset(skb);
 
 			skb_postpull_rcsum(skb, skb_network_header(skb),
-					   skb->h.raw - skb->nh.raw);
+					   skb_network_header_len(skb));
 			hdr = ipv6_hdr(skb);
 			if (ipv6_addr_is_multicast(&hdr->daddr) &&
 			    !ipv6_chk_mcast_addr(skb->dev, &hdr->daddr,
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 32e8c3f73c7..57a32608075 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1325,7 +1325,7 @@ int ip6_push_pending_frames(struct sock *sk)
 	if (skb->data < skb_network_header(skb))
 		__skb_pull(skb, skb_network_offset(skb));
 	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
-		__skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
+		__skb_pull(tmp_skb, skb_network_header_len(skb));
 		*tail_skb = tmp_skb;
 		tail_skb = &(tmp_skb->next);
 		skb->len += tmp_skb->len;
@@ -1337,7 +1337,7 @@ int ip6_push_pending_frames(struct sock *sk)
 	}
 
 	ipv6_addr_copy(final_dst, &fl->fl6_dst);
-	__skb_pull(skb, skb->h.raw - skb->nh.raw);
+	__skb_pull(skb, skb_network_header_len(skb));
 	if (opt && opt->opt_flen)
 		ipv6_push_frag_opts(skb, opt, &proto);
 	if (opt && opt->opt_nflen)
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 07e86ebb46b..4c45bcce75e 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1168,7 +1168,7 @@ int igmp6_event_query(struct sk_buff *skb)
 
 	/* compute payload length excluding extension headers */
 	len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr);
-	len -= skb->h.raw - skb->nh.raw;
+	len -= skb_network_header_len(skb);
 
 	/* Drop queries with not link local source */
 	if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL))
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 84ce5b3c4b2..490e7e151f2 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -657,7 +657,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 	/* Yes, and fold redundant checksum back. 8) */
 	if (head->ip_summed == CHECKSUM_COMPLETE)
 		head->csum = csum_partial(skb_network_header(head),
-					  head->h.raw - head->nh.raw,
+					  skb_network_header_len(head),
 					  head->csum);
 
 	fq->fragments = NULL;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 116257d59a3..f925ca7c1a5 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -362,7 +362,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
 
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
 		skb_postpull_rcsum(skb, skb_network_header(skb),
-				   skb->h.raw - skb->nh.raw);
+				   skb_network_header_len(skb));
 		if (!csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
 				     &ipv6_hdr(skb)->daddr,
 				     skb->len, inet->num, skb->csum))
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 31d4271ea54..6dfacfa7a59 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -679,7 +679,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 	/* Yes, and fold redundant checksum back. 8) */
 	if (head->ip_summed == CHECKSUM_COMPLETE)
 		head->csum = csum_partial(skb_network_header(head),
-					  head->h.raw - head->nh.raw,
+					  skb_network_header_len(head),
 					  head->csum);
 
 	rcu_read_lock();
@@ -715,13 +715,15 @@ static int ipv6_frag_rcv(struct sk_buff **skbp)
 	/* Jumbo payload inhibits frag. header */
 	if (hdr->payload_len==0) {
 		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
-		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw);
+		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+				  skb_network_header_len(skb));
 		return -1;
 	}
 	if (!pskb_may_pull(skb, (skb_transport_offset(skb) +
 				 sizeof(struct frag_hdr)))) {
 		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS);
-		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw);
+		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+				  skb_network_header_len(skb));
 		return -1;
 	}
 
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index b93bfb87f49..ef746d4f313 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -270,7 +270,7 @@ error:
 static inline void
 _decode_session6(struct sk_buff *skb, struct flowi *fl)
 {
-	u16 offset = skb->h.raw - skb->nh.raw;
+	u16 offset = skb_network_header_len(skb);
 	struct ipv6hdr *hdr = ipv6_hdr(skb);
 	struct ipv6_opt_hdr *exthdr;
 	const unsigned char *nh = skb_network_header(skb);
-- 
cgit v1.2.3


From b0e380b1d8a8e0aca215df97702f99815f05c094 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 10 Apr 2007 21:21:55 -0700
Subject: [SK_BUFF]: unions of just one member don't get anything done, kill
 them

Renaming skb->h to skb->transport_header, skb->nh to skb->network_header and
skb->mac to skb->mac_header, to match the names of the associated helpers
(skb[_[re]set]_{transport,network,mac}_header).

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/arcnet/arcnet.c                   | 17 ++++++----
 drivers/net/bonding/bond_3ad.c                |  4 +--
 drivers/net/bonding/bond_alb.c                |  2 +-
 drivers/net/wireless/hostap/hostap_80211_rx.c |  2 +-
 include/linux/if_vlan.h                       |  4 +--
 include/linux/skbuff.h                        | 49 +++++++++++----------------
 net/802/psnap.c                               |  2 +-
 net/8021q/vlan_dev.c                          |  2 +-
 net/appletalk/ddp.c                           |  8 ++---
 net/bridge/br_netfilter.c                     | 24 ++++++-------
 net/core/dev.c                                |  6 ++--
 net/core/pktgen.c                             | 16 ++++-----
 net/core/skbuff.c                             | 21 ++++++------
 net/ieee80211/ieee80211_rx.c                  |  2 +-
 net/ipv4/ah4.c                                |  4 +--
 net/ipv4/igmp.c                               |  2 +-
 net/ipv4/ip_gre.c                             |  2 +-
 net/ipv4/ip_output.c                          | 11 +++---
 net/ipv4/ipcomp.c                             |  2 +-
 net/ipv4/ipip.c                               |  4 +--
 net/ipv4/ipmr.c                               |  8 ++---
 net/ipv4/ipvs/ip_vs_xmit.c                    |  4 +--
 net/ipv4/netfilter/ipt_LOG.c                  |  2 +-
 net/ipv4/netfilter/ipt_ULOG.c                 |  2 +-
 net/ipv4/raw.c                                |  2 +-
 net/ipv4/xfrm4_mode_beet.c                    |  4 +--
 net/ipv4/xfrm4_mode_transport.c               |  8 ++---
 net/ipv4/xfrm4_mode_tunnel.c                  |  2 +-
 net/ipv6/ah6.c                                |  8 ++---
 net/ipv6/exthdrs.c                            |  6 ++--
 net/ipv6/ip6_input.c                          |  2 +-
 net/ipv6/ip6_output.c                         | 11 +++---
 net/ipv6/ip6_tunnel.c                         |  4 +--
 net/ipv6/ipcomp6.c                            |  2 +-
 net/ipv6/netfilter/ip6t_LOG.c                 |  2 +-
 net/ipv6/netfilter/nf_conntrack_reasm.c       |  4 +--
 net/ipv6/raw.c                                |  2 +-
 net/ipv6/reassembly.c                         |  8 ++---
 net/ipv6/sit.c                                |  4 +--
 net/ipv6/xfrm6_mode_beet.c                    |  4 +--
 net/ipv6/xfrm6_mode_transport.c               |  4 +--
 net/ipv6/xfrm6_mode_tunnel.c                  |  4 +--
 net/llc/llc_input.c                           |  2 +-
 net/packet/af_packet.c                        | 28 +++++++--------
 net/sctp/input.c                              |  8 ++---
 net/sctp/ipv6.c                               |  8 ++---
 46 files changed, 162 insertions(+), 165 deletions(-)

diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index 83004fdab0a..681e20b8466 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -519,9 +519,12 @@ static int arcnet_header(struct sk_buff *skb, struct net_device *dev,
 		 * real header when we do rebuild_header.
 		 */
 		*(uint16_t *) skb_push(skb, 2) = type;
-		if (skb->nh.raw - skb->mac.raw != 2)
+		/*
+		 * XXX: Why not use skb->mac_len?
+		 */
+		if (skb->network_header - skb->mac_header != 2)
 			BUGMSG(D_NORMAL, "arcnet_header: Yikes!  diff (%d) is not 2!\n",
-			       (int)(skb->nh.raw - skb->mac.raw));
+			       (int)(skb->network_header - skb->mac_header));
 		return -2;	/* return error -- can't transmit yet! */
 	}
 	else {
@@ -554,11 +557,13 @@ static int arcnet_rebuild_header(struct sk_buff *skb)
 	unsigned short type;
 	uint8_t daddr=0;
 	struct ArcProto *proto;
-
-	if (skb->nh.raw - skb->mac.raw != 2) {
+	/*
+	 * XXX: Why not use skb->mac_len?
+	 */
+	if (skb->network_header - skb->mac_header != 2) {
 		BUGMSG(D_NORMAL,
-		     "rebuild_header: shouldn't be here! (hdrsize=%d)\n",
-		     (int)(skb->nh.raw - skb->mac.raw));
+		       "rebuild_header: shouldn't be here! (hdrsize=%d)\n",
+		       (int)(skb->network_header - skb->mac_header));
 		return 0;
 	}
 	type = *(uint16_t *) skb_pull(skb, 2);
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 05c870d6f6c..7e03f41ae2c 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -885,7 +885,7 @@ static int ad_lacpdu_send(struct port *port)
 
 	skb->dev = slave->dev;
 	skb_reset_mac_header(skb);
-	skb->nh.raw = skb->mac.raw + ETH_HLEN;
+	skb->network_header = skb->mac_header + ETH_HLEN;
 	skb->protocol = PKT_TYPE_LACPDU;
 	skb->priority = TC_PRIO_CONTROL;
 
@@ -929,7 +929,7 @@ static int ad_marker_send(struct port *port, struct marker *marker)
 
 	skb->dev = slave->dev;
 	skb_reset_mac_header(skb);
-	skb->nh.raw = skb->mac.raw + ETH_HLEN;
+	skb->network_header = skb->mac_header + ETH_HLEN;
 	skb->protocol = PKT_TYPE_LACPDU;
 
 	marker_header = (struct marker_header *)skb_put(skb, length);
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index b8cf777542f..92c3b6f6a8e 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -896,7 +896,7 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[])
 		memcpy(data, &pkt, size);
 
 		skb_reset_mac_header(skb);
-		skb->nh.raw = skb->mac.raw + ETH_HLEN;
+		skb->network_header = skb->mac_header + ETH_HLEN;
 		skb->protocol = pkt.type;
 		skb->priority = TC_PRIO_CONTROL;
 		skb->dev = slave->dev;
diff --git a/drivers/net/wireless/hostap/hostap_80211_rx.c b/drivers/net/wireless/hostap/hostap_80211_rx.c
index 7b7c1ca8f1f..35a3a50724f 100644
--- a/drivers/net/wireless/hostap/hostap_80211_rx.c
+++ b/drivers/net/wireless/hostap/hostap_80211_rx.c
@@ -1077,7 +1077,7 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb,
 		skb2->protocol = __constant_htons(ETH_P_802_3);
 		skb_reset_mac_header(skb2);
 		skb_reset_network_header(skb2);
-		/* skb2->nh.raw += ETH_HLEN; */
+		/* skb2->network_header += ETH_HLEN; */
 		dev_queue_xmit(skb2);
 	}
 
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 544490d9d0b..81e9bc93569 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -275,8 +275,8 @@ static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, unsigned short
 	veth->h_vlan_TCI = htons(tag);
 
 	skb->protocol = __constant_htons(ETH_P_8021Q);
-	skb->mac.raw -= VLAN_HLEN;
-	skb->nh.raw -= VLAN_HLEN;
+	skb->mac_header -= VLAN_HLEN;
+	skb->network_header -= VLAN_HLEN;
 
 	return skb;
 }
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 230dd43fc9b..c45ad126327 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -69,8 +69,8 @@
  *	NONE: skb is checksummed by protocol or csum is not required.
  *
  *	PARTIAL: device is required to csum packet as seen by hard_start_xmit
- *	from skb->h.raw to the end and to record the checksum
- *	at skb->h.raw+skb->csum.
+ *	from skb->transport_header to the end and to record the checksum
+ *	at skb->transport_header + skb->csum.
  *
  *	Device must show its capabilities in dev->features, set
  *	at device setup time.
@@ -188,8 +188,8 @@ enum {
  *	@dev: Device we arrived on/are leaving by
  *	@iif: ifindex of device we arrived on
  *	@h: Transport layer header
- *	@nh: Network layer header
- *	@mac: Link layer header
+ *	@network_header: Network layer header
+ *	@mac_header: Link layer header
  *	@dst: destination entry
  *	@sp: the security path, used for xfrm
  *	@cb: Control buffer. Free for use by every layer. Put private vars here
@@ -236,18 +236,9 @@ struct sk_buff {
 	int			iif;
 	/* 4 byte hole on 64 bit*/
 
-	union {
-		unsigned char	*raw;
-	} h;
-
-	union {
-		unsigned char	*raw;
-	} nh;
-
-	union {
-	  	unsigned char 	*raw;
-	} mac;
-
+	unsigned char		*transport_header;
+	unsigned char		*network_header;
+	unsigned char		*mac_header;
 	struct  dst_entry	*dst;
 	struct	sec_path	*sp;
 
@@ -953,68 +944,68 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
 
 static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
 {
-	return skb->h.raw;
+	return skb->transport_header;
 }
 
 static inline void skb_reset_transport_header(struct sk_buff *skb)
 {
-	skb->h.raw = skb->data;
+	skb->transport_header = skb->data;
 }
 
 static inline void skb_set_transport_header(struct sk_buff *skb,
 					    const int offset)
 {
-	skb->h.raw = skb->data + offset;
+	skb->transport_header = skb->data + offset;
 }
 
 static inline int skb_transport_offset(const struct sk_buff *skb)
 {
-	return skb->h.raw - skb->data;
+	return skb->transport_header - skb->data;
 }
 
 static inline unsigned char *skb_network_header(const struct sk_buff *skb)
 {
-	return skb->nh.raw;
+	return skb->network_header;
 }
 
 static inline void skb_reset_network_header(struct sk_buff *skb)
 {
-	skb->nh.raw = skb->data;
+	skb->network_header = skb->data;
 }
 
 static inline void skb_set_network_header(struct sk_buff *skb, const int offset)
 {
-	skb->nh.raw = skb->data + offset;
+	skb->network_header = skb->data + offset;
 }
 
 static inline int skb_network_offset(const struct sk_buff *skb)
 {
-	return skb->nh.raw - skb->data;
+	return skb->network_header - skb->data;
 }
 
 static inline u32 skb_network_header_len(const struct sk_buff *skb)
 {
-	return skb->h.raw - skb->nh.raw;
+	return skb->transport_header - skb->network_header;
 }
 
 static inline unsigned char *skb_mac_header(const struct sk_buff *skb)
 {
-	return skb->mac.raw;
+	return skb->mac_header;
 }
 
 static inline int skb_mac_header_was_set(const struct sk_buff *skb)
 {
-	return skb->mac.raw != NULL;
+	return skb->mac_header != NULL;
 }
 
 static inline void skb_reset_mac_header(struct sk_buff *skb)
 {
-	skb->mac.raw = skb->data;
+	skb->mac_header = skb->data;
 }
 
 static inline void skb_set_mac_header(struct sk_buff *skb, const int offset)
 {
-	skb->mac.raw = skb->data + offset;
+	skb->mac_header = skb->data + offset;
 }
 
 /*
diff --git a/net/802/psnap.c b/net/802/psnap.c
index 7cba1f42608..04ee43e7538 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -59,7 +59,7 @@ static int snap_rcv(struct sk_buff *skb, struct net_device *dev,
 	proto = find_snap_client(skb_transport_header(skb));
 	if (proto) {
 		/* Pass the frame on. */
-		skb->h.raw  += 5;
+		skb->transport_header += 5;
 		skb_pull_rcsum(skb, 5);
 		rc = proto->rcvfunc(skb, dev, &snap_packet_type, orig_dev);
 	} else {
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 0991e293940..42a35bed088 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -83,7 +83,7 @@ static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
 			/* Lifted from Gleb's VLAN code... */
 			memmove(skb->data - ETH_HLEN,
 				skb->data - VLAN_ETH_HLEN, 12);
-			skb->mac.raw += VLAN_HLEN;
+			skb->mac_header += VLAN_HLEN;
 		}
 	}
 
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 137341b4d83..f6a92a0b7aa 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1383,10 +1383,10 @@ free_it:
  *	@pt - packet type
  *
  *	Receive a packet (in skb) from device dev. This has come from the SNAP
- *	decoder, and on entry skb->h.raw is the DDP header, skb->len is the DDP
- *	header, skb->len is the DDP length. The physical headers have been
- *	extracted. PPP should probably pass frames marked as for this layer.
- *	[ie ARPHRD_ETHERTALK]
+ *	decoder, and on entry skb->transport_header is the DDP header, skb->len
+ *	is the DDP header, skb->len is the DDP length. The physical headers
+ *	have been extracted. PPP should probably pass frames marked as for this
+ *	layer.  [ie ARPHRD_ETHERTALK]
  */
 static int atalk_rcv(struct sk_buff *skb, struct net_device *dev,
 		     struct packet_type *pt, struct net_device *orig_dev)
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index f2796c97b4a..8cee7fdc16c 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -174,7 +174,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 	skb->dev = nf_bridge->physindev;
 	if (skb->protocol == htons(ETH_P_8021Q)) {
 		skb_push(skb, VLAN_HLEN);
-		skb->nh.raw -= VLAN_HLEN;
+		skb->network_header -= VLAN_HLEN;
 	}
 	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
 		       br_handle_frame_finish, 1);
@@ -255,7 +255,7 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
 	else {
 		if (skb->protocol == htons(ETH_P_8021Q)) {
 			skb_pull(skb, VLAN_HLEN);
-			skb->nh.raw += VLAN_HLEN;
+			skb->network_header += VLAN_HLEN;
 		}
 		skb->dst->output(skb);
 	}
@@ -325,7 +325,7 @@ bridged_dnat:
 				if (skb->protocol ==
 				    htons(ETH_P_8021Q)) {
 					skb_push(skb, VLAN_HLEN);
-					skb->nh.raw -= VLAN_HLEN;
+					skb->network_header -= VLAN_HLEN;
 				}
 				NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING,
 					       skb, skb->dev, NULL,
@@ -344,7 +344,7 @@ bridged_dnat:
 	skb->dev = nf_bridge->physindev;
 	if (skb->protocol == htons(ETH_P_8021Q)) {
 		skb_push(skb, VLAN_HLEN);
-		skb->nh.raw -= VLAN_HLEN;
+		skb->network_header -= VLAN_HLEN;
 	}
 	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
 		       br_handle_frame_finish, 1);
@@ -497,7 +497,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
 
 		if (skb->protocol == htons(ETH_P_8021Q)) {
 			skb_pull_rcsum(skb, VLAN_HLEN);
-			skb->nh.raw += VLAN_HLEN;
+			skb->network_header += VLAN_HLEN;
 		}
 		return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn);
 	}
@@ -514,7 +514,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
 
 	if (skb->protocol == htons(ETH_P_8021Q)) {
 		skb_pull_rcsum(skb, VLAN_HLEN);
-		skb->nh.raw += VLAN_HLEN;
+		skb->network_header += VLAN_HLEN;
 	}
 
 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
@@ -595,7 +595,7 @@ static int br_nf_forward_finish(struct sk_buff *skb)
 	}
 	if (skb->protocol == htons(ETH_P_8021Q)) {
 		skb_push(skb, VLAN_HLEN);
-		skb->nh.raw -= VLAN_HLEN;
+		skb->network_header -= VLAN_HLEN;
 	}
 	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_FORWARD, skb, in,
 		       skb->dev, br_forward_finish, 1);
@@ -631,7 +631,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb,
 
 	if (skb->protocol == htons(ETH_P_8021Q)) {
 		skb_pull(*pskb, VLAN_HLEN);
-		(*pskb)->nh.raw += VLAN_HLEN;
+		(*pskb)->network_header += VLAN_HLEN;
 	}
 
 	nf_bridge = skb->nf_bridge;
@@ -667,13 +667,13 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb,
 		if (!IS_VLAN_ARP(skb))
 			return NF_ACCEPT;
 		skb_pull(*pskb, VLAN_HLEN);
-		(*pskb)->nh.raw += VLAN_HLEN;
+		(*pskb)->network_header += VLAN_HLEN;
 	}
 
 	if (arp_hdr(skb)->ar_pln != 4) {
 		if (IS_VLAN_ARP(skb)) {
 			skb_push(*pskb, VLAN_HLEN);
-			(*pskb)->nh.raw -= VLAN_HLEN;
+			(*pskb)->network_header -= VLAN_HLEN;
 		}
 		return NF_ACCEPT;
 	}
@@ -723,7 +723,7 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
 	}
 	if (skb->protocol == htons(ETH_P_8021Q)) {
 		skb_push(skb, VLAN_HLEN);
-		skb->nh.raw -= VLAN_HLEN;
+		skb->network_header -= VLAN_HLEN;
 	}
 
 	NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev,
@@ -790,7 +790,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
 
 	if (skb->protocol == htons(ETH_P_8021Q)) {
 		skb_pull(skb, VLAN_HLEN);
-		skb->nh.raw += VLAN_HLEN;
+		skb->network_header += VLAN_HLEN;
 	}
 
 	nf_bridge_save_header(skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index 30fcc7f9d4e..6562e5736e2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1077,7 +1077,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 				skb_reset_network_header(skb2);
 			}
 
-			skb2->h.raw = skb2->nh.raw;
+			skb2->transport_header = skb2->network_header;
 			skb2->pkt_type = PACKET_OUTGOING;
 			ptype->func(skb2, skb->dev, ptype, skb->dev);
 		}
@@ -1207,7 +1207,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 	BUG_ON(skb_shinfo(skb)->frag_list);
 
 	skb_reset_mac_header(skb);
-	skb->mac_len = skb->nh.raw - skb->mac.raw;
+	skb->mac_len = skb->network_header - skb->mac_header;
 	__skb_pull(skb, skb->mac_len);
 
 	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
@@ -1774,7 +1774,7 @@ int netif_receive_skb(struct sk_buff *skb)
 
 	skb_reset_network_header(skb);
 	skb_reset_transport_header(skb);
-	skb->mac_len = skb->nh.raw - skb->mac.raw;
+	skb->mac_len = skb->network_header - skb->mac_header;
 
 	pt_prev = NULL;
 
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index ae8cf9a285f..9da8357addc 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2358,7 +2358,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	}
 
 	skb_set_network_header(skb, skb->tail - skb->data);
-	skb->h.raw = skb->nh.raw + sizeof(struct iphdr);
+	skb->transport_header = skb->network_header + sizeof(struct iphdr);
 	skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr));
 
 	iph = ip_hdr(skb);
@@ -2391,9 +2391,9 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	iph->check = 0;
 	iph->check = ip_fast_csum((void *)iph, iph->ihl);
 	skb->protocol = protocol;
-	skb->mac.raw = (skb->nh.raw - ETH_HLEN -
-			pkt_dev->nr_labels * sizeof(u32) -
-			VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev));
+	skb->mac_header = (skb->network_header - ETH_HLEN -
+			   pkt_dev->nr_labels * sizeof(u32) -
+			   VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev));
 	skb->dev = odev;
 	skb->pkt_type = PACKET_HOST;
 
@@ -2697,7 +2697,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	}
 
 	skb_set_network_header(skb, skb->tail - skb->data);
-	skb->h.raw = skb->nh.raw + sizeof(struct ipv6hdr);
+	skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
 	skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr));
 
 	iph = ipv6_hdr(skb);
@@ -2738,9 +2738,9 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	ipv6_addr_copy(&iph->daddr, &pkt_dev->cur_in6_daddr);
 	ipv6_addr_copy(&iph->saddr, &pkt_dev->cur_in6_saddr);
 
-	skb->mac.raw = (skb->nh.raw - ETH_HLEN -
-			pkt_dev->nr_labels * sizeof(u32) -
-			VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev));
+	skb->mac_header = (skb->network_header - ETH_HLEN -
+			   pkt_dev->nr_labels * sizeof(u32) -
+			   VLAN_TAG_SIZE(pkt_dev) - SVLAN_TAG_SIZE(pkt_dev));
 	skb->protocol = protocol;
 	skb->dev = odev;
 	skb->pkt_type = PACKET_HOST;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f38af6c01b1..1e71764be4a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -396,9 +396,9 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 	n->sk = NULL;
 	C(tstamp);
 	C(dev);
-	C(h);
-	C(nh);
-	C(mac);
+	C(transport_header);
+	C(network_header);
+	C(mac_header);
 	C(dst);
 	dst_clone(skb->dst);
 	C(sp);
@@ -461,9 +461,9 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 #ifdef CONFIG_INET
 	new->sp		= secpath_get(old->sp);
 #endif
-	new->h.raw	= old->h.raw + offset;
-	new->nh.raw	= old->nh.raw + offset;
-	new->mac.raw	= old->mac.raw + offset;
+	new->transport_header = old->transport_header + offset;
+	new->network_header   = old->network_header + offset;
+	new->mac_header	      = old->mac_header + offset;
 	memcpy(new->cb, old->cb, sizeof(old->cb));
 	new->local_df	= old->local_df;
 	new->fclone	= SKB_FCLONE_UNAVAILABLE;
@@ -639,9 +639,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 	skb->end      = data + size;
 	skb->data    += off;
 	skb->tail    += off;
-	skb->mac.raw += off;
-	skb->h.raw   += off;
-	skb->nh.raw  += off;
+	skb->transport_header += off;
+	skb->network_header   += off;
+	skb->mac_header	      += off;
 	skb->cloned   = 0;
 	skb->nohdr    = 0;
 	atomic_set(&skb_shinfo(skb)->dataref, 1);
@@ -1906,7 +1906,8 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 		skb_reserve(nskb, headroom);
 		skb_reset_mac_header(nskb);
 		skb_set_network_header(nskb, skb->mac_len);
-		nskb->h.raw = nskb->nh.raw + skb_network_header_len(skb);
+		nskb->transport_header = (nskb->network_header +
+					  skb_network_header_len(skb));
 		memcpy(skb_put(nskb, doffset), skb->data, doffset);
 
 		if (!sg) {
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index 7f5a352800a..59a765c49cf 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -793,7 +793,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 		skb2->protocol = __constant_htons(ETH_P_802_3);
 		skb_reset_mac_header(skb2);
 		skb_reset_network_header(skb2);
-		/* skb2->nh.raw += ETH_HLEN; */
+		/* skb2->network_header += ETH_HLEN; */
 		dev_queue_xmit(skb2);
 	}
 #endif
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index e1bb9e0aa5f..6da8ff597ad 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -181,9 +181,9 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 		}
 	}
 	((struct iphdr*)work_buf)->protocol = ah->nexthdr;
-	skb->nh.raw += ah_hlen;
+	skb->network_header += ah_hlen;
 	memcpy(skb_network_header(skb), work_buf, ihl);
-	skb->h.raw = skb->nh.raw;
+	skb->transport_header = skb->network_header;
 	__skb_pull(skb, ah_hlen + ihl);
 
 	return 0;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 4695ada1d9b..1fc637fb675 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -333,7 +333,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 	((u8*)&pip[1])[2] = 0;
 	((u8*)&pip[1])[3] = 0;
 
-	skb->h.raw = skb->nh.raw + sizeof(struct iphdr) + 4;
+	skb->transport_header = skb->network_header + sizeof(struct iphdr) + 4;
 	skb_put(skb, sizeof(*pig));
 	pig = igmpv3_report_hdr(skb);
 	pig->type = IGMPV3_HOST_MEMBERSHIP_REPORT;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index e6a9e452fd6..f49afaa8129 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -828,7 +828,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 		old_iph = ip_hdr(skb);
 	}
 
-	skb->h.raw = skb->nh.raw;
+	skb->transport_header = skb->network_header;
 	skb_push(skb, gre_hlen);
 	skb_reset_network_header(skb);
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 02988fb262d..875da382d9b 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -582,7 +582,7 @@ slow_path:
 		skb_reserve(skb2, ll_rs);
 		skb_put(skb2, len + hlen);
 		skb_reset_network_header(skb2);
-		skb2->h.raw = skb2->nh.raw + hlen;
+		skb2->transport_header = skb2->network_header + hlen;
 
 		/*
 		 *	Charge the memory for the fragment to any owner
@@ -713,7 +713,7 @@ static inline int ip_ufo_append_data(struct sock *sk,
 		skb_reset_network_header(skb);
 
 		/* initialize protocol header pointer */
-		skb->h.raw = skb->nh.raw + fragheaderlen;
+		skb->transport_header = skb->network_header + fragheaderlen;
 
 		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb->csum = 0;
@@ -918,7 +918,8 @@ alloc_new_skb:
 			 */
 			data = skb_put(skb, fraglen);
 			skb_set_network_header(skb, exthdrlen);
-			skb->h.raw = skb->nh.raw + fragheaderlen;
+			skb->transport_header = (skb->network_header +
+						 fragheaderlen);
 			data += fragheaderlen;
 
 			if (fraggap) {
@@ -1112,8 +1113,8 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 			 */
 			skb_put(skb, fragheaderlen + fraggap);
 			skb_reset_network_header(skb);
-			skb->h.raw = skb->nh.raw + fragheaderlen;
-
+			skb->transport_header = (skb->network_header +
+						 fragheaderlen);
 			if (fraggap) {
 				skb->csum = skb_copy_and_csum_bits(skb_prev,
 								   maxfraglen,
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 1f13cc507a4..ba348b1e5f8 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -87,7 +87,7 @@ static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb)
 	iph = ip_hdr(skb);
 	ipch = (void *)skb->data;
 	iph->protocol = ipch->nexthdr;
-	skb->h.raw = skb->nh.raw + sizeof(*ipch);
+	skb->transport_header = skb->network_header + sizeof(*ipch);
 	__skb_pull(skb, sizeof(*ipch));
 	err = ipcomp_decompress(x, skb);
 
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index b32b5011480..37ab3917017 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -485,7 +485,7 @@ static int ipip_rcv(struct sk_buff *skb)
 
 		secpath_reset(skb);
 
-		skb->mac.raw = skb->nh.raw;
+		skb->mac_header = skb->network_header;
 		skb_reset_network_header(skb);
 		skb->protocol = htons(ETH_P_IP);
 		skb->pkt_type = PACKET_HOST;
@@ -617,7 +617,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 		old_iph = ip_hdr(skb);
 	}
 
-	skb->h.raw = skb->nh.raw;
+	skb->transport_header = skb->network_header;
 	skb_push(skb, sizeof(struct iphdr));
 	skb_reset_network_header(skb);
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 357894259f8..50d0b301380 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -597,7 +597,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 	msg->im_msgtype = assert;
 	igmp->code 	=	0;
 	ip_hdr(skb)->tot_len = htons(skb->len);			/* Fix the length */
-	skb->h.raw = skb->nh.raw;
+	skb->transport_header = skb->network_header;
 	}
 
 	if (mroute_socket == NULL) {
@@ -1102,7 +1102,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
 	struct iphdr *old_iph = ip_hdr(skb);
 
 	skb_push(skb, sizeof(struct iphdr));
-	skb->h.raw = skb->nh.raw;
+	skb->transport_header = skb->network_header;
 	skb_reset_network_header(skb);
 	iph = ip_hdr(skb);
 
@@ -1461,7 +1461,7 @@ int pim_rcv_v1(struct sk_buff * skb)
 	if (reg_dev == NULL)
 		goto drop;
 
-	skb->mac.raw = skb->nh.raw;
+	skb->mac_header = skb->network_header;
 	skb_pull(skb, (u8*)encap - skb->data);
 	skb_reset_network_header(skb);
 	skb->dev = reg_dev;
@@ -1517,7 +1517,7 @@ static int pim_rcv(struct sk_buff * skb)
 	if (reg_dev == NULL)
 		goto drop;
 
-	skb->mac.raw = skb->nh.raw;
+	skb->mac_header = skb->network_header;
 	skb_pull(skb, (u8*)encap - skb->data);
 	skb_reset_network_header(skb);
 	skb->dev = reg_dev;
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index c6276d08b31..fded9b2f227 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -323,7 +323,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	struct iphdr  *old_iph = ip_hdr(skb);
 	u8     tos = old_iph->tos;
 	__be16 df = old_iph->frag_off;
-	unsigned char *old_h = skb_transport_header(skb);
+	unsigned char *old_transport_header = skb->transport_header;
 	struct iphdr  *iph;			/* Our new IP header */
 	int    max_headroom;			/* The extra header space needed */
 	int    mtu;
@@ -381,7 +381,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		old_iph = ip_hdr(skb);
 	}
 
-	skb->h.raw = old_h;
+	skb->transport_header = old_transport_header;
 
 	/* fix old IP header checksum */
 	ip_send_check(old_iph);
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index c697971fe31..2fa36618c51 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -399,7 +399,7 @@ ipt_log_packet(unsigned int pf,
 		/* MAC logging for input chain only. */
 		printk("MAC=");
 		if (skb->dev && skb->dev->hard_header_len
-		    && skb->mac.raw != skb->nh.raw) {
+		    && skb->mac_header != skb->network_header) {
 			int i;
 			const unsigned char *p = skb_mac_header(skb);
 			for (i = 0; i < skb->dev->hard_header_len; i++,p++)
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index fae2a34d23d..ace711e2b05 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -251,7 +251,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
 		*(pm->prefix) = '\0';
 
 	if (in && in->hard_header_len > 0
-	    && skb->mac.raw != skb->nh.raw
+	    && skb->mac_header != skb->network_header
 	    && in->hard_header_len <= ULOG_MAC_LEN) {
 		memcpy(pm->mac, skb_mac_header(skb), in->hard_header_len);
 		pm->mac_len = in->hard_header_len;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bf101dc1a97..24d7c9f3191 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -297,7 +297,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 
 	skb->ip_summed = CHECKSUM_NONE;
 
-	skb->h.raw = skb->nh.raw;
+	skb->transport_header = skb->network_header;
 	err = memcpy_fromiovecend((void *)iph, from, 0, length);
 	if (err)
 		goto error_fault;
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index 74859dfb3a2..3650e027ce7 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -33,7 +33,7 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 	int hdrlen, optlen;
 
 	iph = ip_hdr(skb);
-	skb->h.raw = skb->nh.raw;
+	skb->transport_header = skb->network_header;
 
 	hdrlen = 0;
 	optlen = iph->ihl * 4 - sizeof(*iph);
@@ -43,7 +43,7 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 	skb_push(skb, x->props.header_len + hdrlen);
 	skb_reset_network_header(skb);
 	top_iph = ip_hdr(skb);
-	skb->h.raw += sizeof(*iph) - hdrlen;
+	skb->transport_header += sizeof(*iph) - hdrlen;
 
 	memmove(top_iph, iph, sizeof(*iph));
 	if (unlikely(optlen)) {
diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c
index dc8834ea375..601047161ea 100644
--- a/net/ipv4/xfrm4_mode_transport.c
+++ b/net/ipv4/xfrm4_mode_transport.c
@@ -26,9 +26,7 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb)
 	struct iphdr *iph = ip_hdr(skb);
 	int ihl = iph->ihl * 4;
 
-	skb->h.raw = skb->nh.raw;
-	skb->h.raw += ihl;
-
+	skb->transport_header = skb->network_header + ihl;
 	skb_push(skb, x->props.header_len);
 	skb_reset_network_header(skb);
 	memmove(skb_network_header(skb), iph, ihl);
@@ -47,10 +45,10 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int ihl = skb->data - skb_transport_header(skb);
 
-	if (skb->h.raw != skb->nh.raw) {
+	if (skb->transport_header != skb->network_header) {
 		memmove(skb_transport_header(skb),
 			skb_network_header(skb), ihl);
-		skb->nh.raw = skb->h.raw;
+		skb->network_header = skb->transport_header;
 	}
 	ip_hdr(skb)->tot_len = htons(skb->len + ihl);
 	skb_reset_transport_header(skb);
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 521e52f055c..a2f2e6a5ec5 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -47,7 +47,7 @@ static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 	int flags;
 
 	iph = ip_hdr(skb);
-	skb->h.raw = skb->nh.raw;
+	skb->transport_header = skb->network_header;
 
 	skb_push(skb, x->props.header_len);
 	skb_reset_network_header(skb);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index d2af4fe3725..b696c840120 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -316,8 +316,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 	 *
 	 * To erase AH:
 	 * Keeping copy of cleared headers. After AH processing,
-	 * Moving the pointer of skb->nh.raw by using skb_pull as long as AH
-	 * header length. Then copy back the copy as long as hdr_len
+	 * Moving the pointer of skb->network_header by using skb_pull as long
+	 * as AH header length. Then copy back the copy as long as hdr_len
 	 * If destination header following AH exists, copy it into after [Ext2].
 	 *
 	 * |<>|[IPv6][Ext1][Ext2][Dest][Payload]
@@ -384,9 +384,9 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 		}
 	}
 
-	skb->nh.raw += ah_hlen;
+	skb->network_header += ah_hlen;
 	memcpy(skb_network_header(skb), tmp_hdr, hdr_len);
-	skb->h.raw = skb->nh.raw;
+	skb->transport_header = skb->network_header;
 	__skb_pull(skb, ah_hlen + hdr_len);
 
 	kfree(tmp_hdr);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index f34cc2bd489..a6a275db88c 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -306,7 +306,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
 	if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) {
 		dst_release(dst);
 		skb = *skbp;
-		skb->h.raw += (skb_transport_header(skb)[1] + 1) << 3;
+		skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
 		opt = IP6CB(skb);
 #ifdef CONFIG_IPV6_MIP6
 		opt->nhoff = dstbuf;
@@ -444,7 +444,7 @@ looped_back:
 		}
 
 		opt->lastopt = opt->srcrt = skb_network_header_len(skb);
-		skb->h.raw += (hdr->hdrlen + 1) << 3;
+		skb->transport_header += (hdr->hdrlen + 1) << 3;
 		opt->dst0 = opt->dst1;
 		opt->dst1 = 0;
 		opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb);
@@ -752,7 +752,7 @@ int ipv6_parse_hopopts(struct sk_buff **skbp)
 	opt->hop = sizeof(struct ipv6hdr);
 	if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) {
 		skb = *skbp;
-		skb->h.raw += (skb_transport_header(skb)[1] + 1) << 3;
+		skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
 		opt = IP6CB(skb);
 		opt->nhoff = sizeof(struct ipv6hdr);
 		return 1;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index cf0c4406b59..be0ee8a34f9 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -101,7 +101,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	if (hdr->version != 6)
 		goto err;
 
-	skb->h.raw = skb->nh.raw + sizeof(*hdr);
+	skb->transport_header = skb->network_header + sizeof(*hdr);
 	IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
 
 	pkt_len = ntohs(hdr->payload_len);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 57a32608075..b2c092c6b9d 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -733,7 +733,8 @@ slow_path:
 		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 		skb_reset_network_header(frag);
 		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
-		frag->h.raw = frag->nh.raw + hlen + sizeof(struct frag_hdr);
+		frag->transport_header = (frag->network_header + hlen +
+					  sizeof(struct frag_hdr));
 
 		/*
 		 *	Charge the memory for the fragment to any owner
@@ -761,7 +762,7 @@ slow_path:
 		/*
 		 *	Copy a block of the IP datagram.
 		 */
-		if (skb_copy_bits(skb, ptr, frag->h.raw, len))
+		if (skb_copy_bits(skb, ptr, skb_transport_header(skb), len))
 			BUG();
 		left -= len;
 
@@ -976,7 +977,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
 		skb_reset_network_header(skb);
 
 		/* initialize protocol header pointer */
-		skb->h.raw = skb->nh.raw + fragheaderlen;
+		skb->transport_header = skb->network_header + fragheaderlen;
 
 		skb->ip_summed = CHECKSUM_PARTIAL;
 		skb->csum = 0;
@@ -1198,8 +1199,8 @@ alloc_new_skb:
 			data = skb_put(skb, fraglen);
 			skb_set_network_header(skb, exthdrlen);
 			data += fragheaderlen;
-			skb->h.raw = skb->nh.raw + fragheaderlen;
-
+			skb->transport_header = (skb->network_header +
+						 fragheaderlen);
 			if (fraggap) {
 				skb->csum = skb_copy_and_csum_bits(
 					skb_prev, maxfraglen,
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 05b59a77bc6..a0902fbdb4e 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -701,7 +701,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
 			goto discard;
 		}
 		secpath_reset(skb);
-		skb->mac.raw = skb->nh.raw;
+		skb->mac_header = skb->network_header;
 		skb_reset_network_header(skb);
 		skb->protocol = htons(protocol);
 		skb->pkt_type = PACKET_HOST;
@@ -898,7 +898,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
 	dst_release(skb->dst);
 	skb->dst = dst_clone(dst);
 
-	skb->h.raw = skb->nh.raw;
+	skb->transport_header = skb->network_header;
 
 	proto = fl->proto;
 	if (encap_limit >= 0) {
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 5555c98dea0..7691a1b5caa 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -81,7 +81,7 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
 	/* Remove ipcomp header and decompress original payload */
 	iph = ipv6_hdr(skb);
 	ipch = (void *)skb->data;
-	skb->h.raw = skb->nh.raw + sizeof(*ipch);
+	skb->transport_header = skb->network_header + sizeof(*ipch);
 	__skb_pull(skb, sizeof(*ipch));
 
 	/* decompression */
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 54d176187f3..b465e24e90b 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -396,7 +396,7 @@ ip6t_log_packet(unsigned int pf,
 		/* MAC logging for input chain only. */
 		printk("MAC=");
 		if (skb->dev && (len = skb->dev->hard_header_len) &&
-		    skb->mac.raw != skb->nh.raw) {
+		    skb->mac_header != skb->network_header) {
 			const unsigned char *p = skb_mac_header(skb);
 			int i;
 
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 490e7e151f2..b7889ceef55 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -630,8 +630,8 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 	skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0];
 	memmove(head->head + sizeof(struct frag_hdr), head->head,
 		(head->data - head->head) - sizeof(struct frag_hdr));
-	head->mac.raw += sizeof(struct frag_hdr);
-	head->nh.raw += sizeof(struct frag_hdr);
+	head->mac_header += sizeof(struct frag_hdr);
+	head->network_header += sizeof(struct frag_hdr);
 
 	skb_shinfo(head)->frag_list = head->next;
 	skb_reset_transport_header(head);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index f925ca7c1a5..8705f6a502d 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -583,7 +583,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
 
 	skb->ip_summed = CHECKSUM_NONE;
 
-	skb->h.raw = skb->nh.raw;
+	skb->transport_header = skb->network_header;
 	err = memcpy_fromiovecend((void *)iph, from, 0, length);
 	if (err)
 		goto error_fault;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 6dfacfa7a59..de795c04e34 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -646,11 +646,11 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 	/* We have to remove fragment header from datagram and to relocate
 	 * header in order to calculate ICV correctly. */
 	nhoff = fq->nhoffset;
-	skb_network_header(head)[nhoff] = head->h.raw[0];
+	skb_network_header(head)[nhoff] = skb_transport_header(head)[0];
 	memmove(head->head + sizeof(struct frag_hdr), head->head,
 		(head->data - head->head) - sizeof(struct frag_hdr));
-	head->mac.raw += sizeof(struct frag_hdr);
-	head->nh.raw += sizeof(struct frag_hdr);
+	head->mac_header += sizeof(struct frag_hdr);
+	head->network_header += sizeof(struct frag_hdr);
 
 	skb_shinfo(head)->frag_list = head->next;
 	skb_reset_transport_header(head);
@@ -732,7 +732,7 @@ static int ipv6_frag_rcv(struct sk_buff **skbp)
 
 	if (!(fhdr->frag_off & htons(0xFFF9))) {
 		/* It is not a fragmented frame */
-		skb->h.raw += sizeof(struct frag_hdr);
+		skb->transport_header += sizeof(struct frag_hdr);
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMOKS);
 
 		IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 1e8827b90aa..27fe10ffacb 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -382,7 +382,7 @@ static int ipip6_rcv(struct sk_buff *skb)
 	read_lock(&ipip6_lock);
 	if ((tunnel = ipip6_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
 		secpath_reset(skb);
-		skb->mac.raw = skb->nh.raw;
+		skb->mac_header = skb->network_header;
 		skb_reset_network_header(skb);
 		IPCB(skb)->flags = 0;
 		skb->protocol = htons(ETH_P_IPV6);
@@ -553,7 +553,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 		iph6 = ipv6_hdr(skb);
 	}
 
-	skb->h.raw = skb->nh.raw;
+	skb->transport_header = skb->network_header;
 	skb_push(skb, sizeof(struct iphdr));
 	skb_reset_network_header(skb);
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index 8a01b0da2dd..2e61d6ddece 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -48,8 +48,8 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	skb_reset_network_header(skb);
 	top_iph = ipv6_hdr(skb);
-	skb->h.raw = skb->nh.raw + sizeof(struct ipv6hdr);
-	skb->nh.raw += offsetof(struct ipv6hdr, nexthdr);
+	skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
+	skb->network_header += offsetof(struct ipv6hdr, nexthdr);
 
 	ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
 	ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index eb1864b5aae..c026bfea820 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -54,10 +54,10 @@ static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int ihl = skb->data - skb_transport_header(skb);
 
-	if (skb->h.raw != skb->nh.raw) {
+	if (skb->transport_header != skb->network_header) {
 		memmove(skb_transport_header(skb),
 			skb_network_header(skb), ihl);
-		skb->nh.raw = skb->h.raw;
+		skb->network_header = skb->transport_header;
 	}
 	ipv6_hdr(skb)->payload_len = htons(skb->len + ihl -
 					   sizeof(struct ipv6hdr));
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 21d65df7479..a6c0cdf46ad 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -55,8 +55,8 @@ static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	skb_reset_network_header(skb);
 	top_iph = ipv6_hdr(skb);
-	skb->h.raw = skb->nh.raw + sizeof(struct ipv6hdr);
-	skb->nh.raw += offsetof(struct ipv6hdr, nexthdr);
+	skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
+	skb->network_header   += offsetof(struct ipv6hdr, nexthdr);
 
 	top_iph->version = 6;
 	if (xdst->route->ops->family == AF_INET6) {
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index b3f65d1e80b..099ed8fec14 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -112,7 +112,7 @@ static inline int llc_fixup_skb(struct sk_buff *skb)
 	if (unlikely(!pskb_may_pull(skb, llc_len)))
 		return 0;
 
-	skb->h.raw += llc_len;
+	skb->transport_header += llc_len;
 	skb_pull(skb, llc_len);
 	if (skb->protocol == htons(ETH_P_802_2)) {
 		__be16 pdulen = eth_hdr(skb)->h_proto;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index a059cc7be67..51c059b09a3 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -114,22 +114,22 @@ On receive:
 -----------
 
 Incoming, dev->hard_header!=NULL
-   mac.raw -> ll header
-   data    -> data
+   mac_header -> ll header
+   data       -> data
 
 Outgoing, dev->hard_header!=NULL
-   mac.raw -> ll header
-   data    -> ll header
+   mac_header -> ll header
+   data       -> ll header
 
 Incoming, dev->hard_header==NULL
-   mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
-	      PPP makes it, that is wrong, because introduce assymetry
-	      between rx and tx paths.
-   data    -> data
+   mac_header -> UNKNOWN position. It is very likely, that it points to ll
+		 header.  PPP makes it, that is wrong, because introduce
+                 assymetry between rx and tx paths.
+   data       -> data
 
 Outgoing, dev->hard_header==NULL
-   mac.raw -> data. ll header is still not built!
-   data    -> data
+   mac_header -> data. ll header is still not built!
+   data       -> data
 
 Resume
   If dev->hard_header==NULL we are unlikely to restore sensible ll header.
@@ -139,12 +139,12 @@ On transmit:
 ------------
 
 dev->hard_header != NULL
-   mac.raw -> ll header
-   data    -> ll header
+   mac_header -> ll header
+   data       -> ll header
 
 dev->hard_header == NULL (ll header is added by device, we cannot control it)
-   mac.raw -> data
-   data -> data
+   mac_header -> data
+   data       -> data
 
    We should set nh.raw on output to correct posistion,
    packet classifier depends on it.
diff --git a/net/sctp/input.c b/net/sctp/input.c
index f38e91b3871..87feee166da 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -522,14 +522,14 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
 	}
 
 	/* Fix up skb to look at the embedded net header. */
-	saveip = skb->nh.raw;
-	savesctp  = skb->h.raw;
+	saveip = skb->network_header;
+	savesctp = skb->transport_header;
 	skb_reset_network_header(skb);
 	skb_set_transport_header(skb, ihlen);
 	sk = sctp_err_lookup(AF_INET, skb, sctp_hdr(skb), &asoc, &transport);
 	/* Put back, the original pointers. */
-	skb->nh.raw = saveip;
-	skb->h.raw = savesctp;
+	skb->network_header = saveip;
+	skb->transport_header = savesctp;
 	if (!sk) {
 		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
 		return;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index cd0af923878..afcb0093c29 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -132,14 +132,14 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	idev = in6_dev_get(skb->dev);
 
 	/* Fix up skb to look at the embedded net header. */
-	saveip = skb->nh.raw;
-	savesctp  = skb->h.raw;
+	saveip	 = skb->network_header;
+	savesctp = skb->transport_header;
 	skb_reset_network_header(skb);
 	skb_set_transport_header(skb, offset);
 	sk = sctp_err_lookup(AF_INET6, skb, sctp_hdr(skb), &asoc, &transport);
 	/* Put back, the original pointers. */
-	skb->nh.raw = saveip;
-	skb->h.raw = savesctp;
+	skb->network_header   = saveip;
+	skb->transport_header = savesctp;
 	if (!sk) {
 		ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
 		goto out;
-- 
cgit v1.2.3


From 2e07fa9cd3bac1e28cfe3131ed86b053afb02fc9 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 10 Apr 2007 21:22:35 -0700
Subject: [SK_BUFF]: Use offsets for skb->{mac,network,transport}_header on
 64bit architectures

With this we save 8 bytes per network packet, leaving a 4 bytes hole to be used
in further shrinking work, likely with the offsetization of other pointers,
such as ->{data,tail,end}, at the cost of adds, that were minimized by the
usual practice of setting skb->{mac,nh,n}.raw to a local variable that is then
accessed multiple times in each function, it also is not more expensive than
before with regards to most of the handling of such headers, like setting one
of these headers to another (transport to network, etc), or subtracting, adding
to/from it, comparing them, etc.

Now we have this layout for sk_buff on a x86_64 machine:

[acme@mica net-2.6.22]$ pahole vmlinux sk_buff
struct sk_buff {
	struct sk_buff *       next;             /*   0   8 */
	struct sk_buff *       prev;             /*   8   8 */
	struct rb_node         rb;               /*  16  24 */
	struct sock *          sk;               /*  40   8 */
	ktime_t                tstamp;           /*  48   8 */
	struct net_device *    dev;              /*  56   8 */
	/* --- cacheline 1 boundary (64 bytes) --- */
	struct net_device *    input_dev;        /*  64   8 */
	sk_buff_data_t         transport_header; /*  72   4 */
	sk_buff_data_t         network_header;   /*  76   4 */
	sk_buff_data_t         mac_header;       /*  80   4 */

	/* XXX 4 bytes hole, try to pack */

	struct dst_entry *     dst;              /*  88   8 */
	struct sec_path *      sp;               /*  96   8 */
	char                   cb[48];           /* 104  48 */
	/* cacheline 2 boundary (128 bytes) was 24 bytes ago*/
	unsigned int           len;              /* 152   4 */
	unsigned int           data_len;         /* 156   4 */
	unsigned int           mac_len;          /* 160   4 */
	union {
		__wsum         csum;             /*       4 */
		__u32          csum_offset;      /*       4 */
	};                                       /* 164   4 */
	__u32                  priority;         /* 168   4 */
	__u8                   local_df:1;       /* 172   1 */
	__u8                   cloned:1;         /* 172   1 */
	__u8                   ip_summed:2;      /* 172   1 */
	__u8                   nohdr:1;          /* 172   1 */
	__u8                   nfctinfo:3;       /* 172   1 */
	__u8                   pkt_type:3;       /* 173   1 */
	__u8                   fclone:2;         /* 173   1 */
	__u8                   ipvs_property:1;  /* 173   1 */

	/* XXX 2 bits hole, try to pack */

	__be16                 protocol;         /* 174   2 */
	void    (*destructor)(struct sk_buff *); /* 176   8 */
	struct nf_conntrack *  nfct;             /* 184   8 */
	/* --- cacheline 3 boundary (192 bytes) --- */
	struct sk_buff *       nfct_reasm;       /* 192   8 */
	struct nf_bridge_info *nf_bridge;        /* 200   8 */
	__u16                  tc_index;         /* 208   2 */
	__u16                  tc_verd;          /* 210   2 */
	dma_cookie_t           dma_cookie;       /* 212   4 */
	__u32                  secmark;          /* 216   4 */
	__u32                  mark;             /* 220   4 */
	unsigned int           truesize;         /* 224   4 */
	atomic_t               users;            /* 228   4 */
	unsigned char *        head;             /* 232   8 */
	unsigned char *        data;             /* 240   8 */
	unsigned char *        tail;             /* 248   8 */
	/* --- cacheline 4 boundary (256 bytes) --- */
	unsigned char *        end;              /* 256   8 */
}; /* size: 264, cachelines: 5 */
   /* sum members: 260, holes: 1, sum holes: 4 */
   /* bit holes: 1, sum bit holes: 2 bits */
   /* last cacheline: 8 bytes */

On 32 bits nothing changes, and pointers continue to be used with the compiler
turning all this abstraction layer into dust. But there are some sk_buff
validation tricks that are now possible, humm... :-)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h     | 104 +++++++++++++++++++++++++++++++++++++--------
 net/core/skbuff.c          |  18 ++++++--
 net/ipv4/ipvs/ip_vs_xmit.c |   2 +-
 net/sctp/input.c           |   4 +-
 net/sctp/ipv6.c            |   2 +-
 5 files changed, 104 insertions(+), 26 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c45ad126327..2e740550062 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -179,6 +179,16 @@ enum {
 	SKB_GSO_TCPV6 = 1 << 4,
 };
 
+#if BITS_PER_LONG > 32
+#define NET_SKBUFF_DATA_USES_OFFSET 1
+#endif
+
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+typedef unsigned int sk_buff_data_t;
+#else
+typedef unsigned char *sk_buff_data_t;
+#endif
+
 /** 
  *	struct sk_buff - socket buffer
  *	@next: Next buffer in list
@@ -236,9 +246,9 @@ struct sk_buff {
 	int			iif;
 	/* 4 byte hole on 64 bit*/
 
-	unsigned char		*transport_header;
-	unsigned char		*network_header;
-	unsigned char		*mac_header;
+	sk_buff_data_t		transport_header;
+	sk_buff_data_t		network_header;
+	sk_buff_data_t		mac_header;
 	struct  dst_entry	*dst;
 	struct	sec_path	*sp;
 
@@ -942,50 +952,92 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
 	skb->tail += len;
 }
 
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
 static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
 {
-	return skb->transport_header;
+	return skb->head + skb->transport_header;
 }
 
 static inline void skb_reset_transport_header(struct sk_buff *skb)
 {
-	skb->transport_header = skb->data;
+	skb->transport_header = skb->data - skb->head;
 }
 
 static inline void skb_set_transport_header(struct sk_buff *skb,
 					    const int offset)
 {
-	skb->transport_header = skb->data + offset;
-}
-
-static inline int skb_transport_offset(const struct sk_buff *skb)
-{
-	return skb->transport_header - skb->data;
+	skb_reset_transport_header(skb);
+	skb->transport_header += offset;
 }
 
 static inline unsigned char *skb_network_header(const struct sk_buff *skb)
 {
-	return skb->network_header;
+	return skb->head + skb->network_header;
 }
 
 static inline void skb_reset_network_header(struct sk_buff *skb)
 {
-	skb->network_header = skb->data;
+	skb->network_header = skb->data - skb->head;
 }
 
 static inline void skb_set_network_header(struct sk_buff *skb, const int offset)
 {
-	skb->network_header = skb->data + offset;
+	skb_reset_network_header(skb);
+	skb->network_header += offset;
 }
 
-static inline int skb_network_offset(const struct sk_buff *skb)
+static inline unsigned char *skb_mac_header(const struct sk_buff *skb)
 {
-	return skb->network_header - skb->data;
+	return skb->head + skb->mac_header;
 }
 
-static inline u32 skb_network_header_len(const struct sk_buff *skb)
+static inline int skb_mac_header_was_set(const struct sk_buff *skb)
 {
-	return skb->transport_header - skb->network_header;
+	return skb->mac_header != ~0U;
+}
+
+static inline void skb_reset_mac_header(struct sk_buff *skb)
+{
+	skb->mac_header = skb->data - skb->head;
+}
+
+static inline void skb_set_mac_header(struct sk_buff *skb, const int offset)
+{
+	skb_reset_mac_header(skb);
+	skb->mac_header += offset;
+}
+
+#else /* NET_SKBUFF_DATA_USES_OFFSET */
+
+static inline unsigned char *skb_transport_header(const struct sk_buff *skb)
+{
+	return skb->transport_header;
+}
+
+static inline void skb_reset_transport_header(struct sk_buff *skb)
+{
+	skb->transport_header = skb->data;
+}
+
+static inline void skb_set_transport_header(struct sk_buff *skb,
+					    const int offset)
+{
+	skb->transport_header = skb->data + offset;
+}
+
+static inline unsigned char *skb_network_header(const struct sk_buff *skb)
+{
+	return skb->network_header;
+}
+
+static inline void skb_reset_network_header(struct sk_buff *skb)
+{
+	skb->network_header = skb->data;
+}
+
+static inline void skb_set_network_header(struct sk_buff *skb, const int offset)
+{
+	skb->network_header = skb->data + offset;
 }
 
 static inline unsigned char *skb_mac_header(const struct sk_buff *skb)
@@ -1007,6 +1059,22 @@ static inline void skb_set_mac_header(struct sk_buff *skb, const int offset)
 {
 	skb->mac_header = skb->data + offset;
 }
+#endif /* NET_SKBUFF_DATA_USES_OFFSET */
+
+static inline int skb_transport_offset(const struct sk_buff *skb)
+{
+	return skb_transport_header(skb) - skb->data;
+}
+
+static inline u32 skb_network_header_len(const struct sk_buff *skb)
+{
+	return skb->transport_header - skb->network_header;
+}
+
+static inline int skb_network_offset(const struct sk_buff *skb)
+{
+	return skb_network_header(skb) - skb->data;
+}
 
 /*
  * CPUs often take a performance hit when accessing unaligned memory
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 1e71764be4a..a48b0868126 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -448,11 +448,12 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 
 static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 {
+#ifndef NET_SKBUFF_DATA_USES_OFFSET
 	/*
 	 *	Shift between the two data areas in bytes
 	 */
 	unsigned long offset = new->data - old->data;
-
+#endif
 	new->sk		= NULL;
 	new->dev	= old->dev;
 	new->priority	= old->priority;
@@ -461,9 +462,15 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 #ifdef CONFIG_INET
 	new->sp		= secpath_get(old->sp);
 #endif
-	new->transport_header = old->transport_header + offset;
-	new->network_header   = old->network_header + offset;
-	new->mac_header	      = old->mac_header + offset;
+	new->transport_header = old->transport_header;
+	new->network_header   = old->network_header;
+	new->mac_header	      = old->mac_header;
+#ifndef NET_SKBUFF_DATA_USES_OFFSET
+	/* {transport,network,mac}_header are relative to skb->head */
+	new->transport_header += offset;
+	new->network_header   += offset;
+	new->mac_header	      += offset;
+#endif
 	memcpy(new->cb, old->cb, sizeof(old->cb));
 	new->local_df	= old->local_df;
 	new->fclone	= SKB_FCLONE_UNAVAILABLE;
@@ -639,9 +646,12 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 	skb->end      = data + size;
 	skb->data    += off;
 	skb->tail    += off;
+#ifndef NET_SKBUFF_DATA_USES_OFFSET
+	/* {transport,network,mac}_header are relative to skb->head */
 	skb->transport_header += off;
 	skb->network_header   += off;
 	skb->mac_header	      += off;
+#endif
 	skb->cloned   = 0;
 	skb->nohdr    = 0;
 	atomic_set(&skb_shinfo(skb)->dataref, 1);
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index fded9b2f227..900ce29db38 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -323,7 +323,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	struct iphdr  *old_iph = ip_hdr(skb);
 	u8     tos = old_iph->tos;
 	__be16 df = old_iph->frag_off;
-	unsigned char *old_transport_header = skb->transport_header;
+	sk_buff_data_t old_transport_header = skb->transport_header;
 	struct iphdr  *iph;			/* Our new IP header */
 	int    max_headroom;			/* The extra header space needed */
 	int    mtu;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 87feee166da..1ff47b18724 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -513,7 +513,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
 	struct sctp_association *asoc = NULL;
 	struct sctp_transport *transport;
 	struct inet_sock *inet;
-	char *saveip, *savesctp;
+	sk_buff_data_t saveip, savesctp;
 	int err;
 
 	if (skb->len < ihlen + 8) {
@@ -527,7 +527,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
 	skb_reset_network_header(skb);
 	skb_set_transport_header(skb, ihlen);
 	sk = sctp_err_lookup(AF_INET, skb, sctp_hdr(skb), &asoc, &transport);
-	/* Put back, the original pointers. */
+	/* Put back, the original values. */
 	skb->network_header = saveip;
 	skb->transport_header = savesctp;
 	if (!sk) {
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index afcb0093c29..5b0cdda4b44 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -126,7 +126,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	struct sctp_association *asoc;
 	struct sctp_transport *transport;
 	struct ipv6_pinfo *np;
-	char *saveip, *savesctp;
+	sk_buff_data_t saveip, savesctp;
 	int err;
 
 	idev = in6_dev_get(skb->dev);
-- 
cgit v1.2.3


From 5c81cd75fa63eaf2df0b8904508e53e953f316cf Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <samuel@sortiz.org>
Date: Fri, 16 Mar 2007 20:35:25 -0700
Subject: [IrDA]: removing stir4200 useless include

stir4200 doesn't need to include irlap.h

Signed-off-by: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/irda/stir4200.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/irda/stir4200.c b/drivers/net/irda/stir4200.c
index a22175f4ea8..aec86a21434 100644
--- a/drivers/net/irda/stir4200.c
+++ b/drivers/net/irda/stir4200.c
@@ -52,7 +52,6 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <net/irda/irda.h>
-#include <net/irda/irlap.h>
 #include <net/irda/irda_device.h>
 #include <net/irda/wrapper.h>
 #include <net/irda/crc.h>
-- 
cgit v1.2.3


From c7630a4b932af254d61947a3a7e3831de92c7fb5 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <samuel@sortiz.org>
Date: Fri, 16 Mar 2007 20:38:23 -0700
Subject: [IrDA]: irda lockdep annotation

Rmmoding irda triggers a lockdep false positive.

Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irqueue.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c
index 92662330dbc..d058b467f9e 100644
--- a/net/irda/irqueue.c
+++ b/net/irda/irqueue.c
@@ -384,6 +384,9 @@ EXPORT_SYMBOL(hashbin_new);
  *    for deallocating this structure if it's complex. If not the user can
  *    just supply kfree, which should take care of the job.
  */
+#ifdef CONFIG_LOCKDEP
+static int hashbin_lock_depth = 0;
+#endif
 int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func)
 {
 	irda_queue_t* queue;
@@ -395,7 +398,8 @@ int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func)
 
 	/* Synchronize */
 	if ( hashbin->hb_type & HB_LOCK ) {
-		spin_lock_irqsave(&hashbin->hb_spinlock, flags);
+		spin_lock_irqsave_nested(&hashbin->hb_spinlock, flags,
+					 hashbin_lock_depth++);
 	}
 
 	/*
@@ -419,6 +423,9 @@ int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func)
 	/* Release lock */
 	if ( hashbin->hb_type & HB_LOCK) {
 		spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
+#ifdef CONFIG_LOCKDEP
+		hashbin_lock_depth--;
+#endif
 	}
 
 	/*
-- 
cgit v1.2.3


From afdf27c95629634ea40703197b6788e454d31609 Mon Sep 17 00:00:00 2001
From: Peter Kovar <peter.kovar@gmail.com>
Date: Fri, 16 Mar 2007 20:39:25 -0700
Subject: [IrDA]: SMC SuperIO Chip LPC47N227 not identified properly

SMC SuperIO Chip LPC47N227 used for IrDA is not detected because its device
identification byte can be 0x7A instead of 0x5A.

Patch from Peter Kovar <peter.kovar@gmail.com>
Cc: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/irda/smsc-ircc2.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/irda/smsc-ircc2.c b/drivers/net/irda/smsc-ircc2.c
index 103a2d18ed2..e8453868d74 100644
--- a/drivers/net/irda/smsc-ircc2.c
+++ b/drivers/net/irda/smsc-ircc2.c
@@ -315,6 +315,7 @@ static struct smsc_chip __initdata lpc_chips_flat[] =
 {
 	/* Base address 0x2E or 0x4E */
 	{ "47N227",	KEY55_1|FIR|SERx4,	0x5a, 0x00 },
+	{ "47N227",	KEY55_1|FIR|SERx4,	0x7a, 0x00 },
 	{ "47N267",	KEY55_1|FIR|SERx4,	0x5e, 0x00 },
 	{ NULL }
 };
-- 
cgit v1.2.3


From be8bd86321fa7f06359d866ef61fb4d2f3e9dce9 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 19 Apr 2007 20:34:51 -0700
Subject: [VLAN] vlan_dev: Use skb_reset_network_header().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 42a35bed088..7ff6b794848 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -382,7 +382,7 @@ int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 		}
 
 		skb->protocol = htons(ETH_P_8021Q);
-		skb->nh.raw = skb->data;
+		skb_reset_network_header(skb);
 	}
 
 	/* Before delegating work to the lower layer, enter our MAC-address */
-- 
cgit v1.2.3


From 27a884dc3cb63b93c2b3b643f5b31eed5f8a4d26 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 19 Apr 2007 20:29:13 -0700
Subject: [SK_BUFF]: Convert skb->tail to sk_buff_data_t

So that it is also an offset from skb->head, reduces its size from 8 to 4 bytes
on 64bit architectures, allowing us to combine the 4 bytes hole left by the
layer headers conversion, reducing struct sk_buff size to 256 bytes, i.e. 4
64byte cachelines, and since the sk_buff slab cache is SLAB_HWCACHE_ALIGN...
:-)

Many calculations that previously required that skb->{transport,network,
mac}_header be first converted to a pointer now can be done directly, being
meaningful as offsets or pointers.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/ia64/sn/kernel/xpnet.c                   | 10 ++---
 drivers/atm/he.c                              |  4 +-
 drivers/atm/idt77252.c                        |  3 +-
 drivers/atm/nicstar.c                         | 10 +++--
 drivers/infiniband/hw/amso1100/c2.c           |  5 ++-
 drivers/isdn/i4l/isdn_net.c                   |  2 +-
 drivers/media/dvb/dvb-core/dvb_net.c          | 10 +++--
 drivers/net/cris/eth_v10.c                    |  2 +-
 drivers/net/cxgb3/sge.c                       |  6 +--
 drivers/net/e1000/e1000_main.c                |  4 +-
 drivers/net/ibm_emac/ibm_emac_core.c          |  2 +-
 drivers/net/macb.c                            |  3 +-
 drivers/net/pcmcia/nmclan_cs.c                |  2 +-
 drivers/net/s2io.c                            |  4 +-
 drivers/net/tulip/uli526x.c                   | 14 +++++--
 drivers/net/wan/hdlc_fr.c                     |  2 +-
 drivers/net/wan/lmc/lmc_main.c                |  4 +-
 drivers/net/wireless/hostap/hostap_80211_rx.c |  2 +-
 drivers/s390/net/ctcmain.c                    | 11 ++++--
 drivers/s390/net/netiucv.c                    | 10 +++--
 drivers/usb/atm/usbatm.c                      | 10 ++---
 drivers/usb/net/asix.c                        |  6 +--
 drivers/usb/net/gl620a.c                      |  2 +-
 drivers/usb/net/net1080.c                     |  2 +-
 drivers/usb/net/rndis_host.c                  |  2 +-
 include/linux/netfilter/nfnetlink.h           |  4 +-
 include/linux/netlink.h                       |  2 +-
 include/linux/rtnetlink.h                     |  6 +--
 include/linux/skbuff.h                        | 57 +++++++++++++++++++++------
 include/net/inet_ecn.h                        |  6 +--
 include/net/netlink.h                         |  8 ++--
 include/net/pkt_cls.h                         |  2 +-
 kernel/audit.c                                |  8 ++--
 net/atm/lec.c                                 |  2 +-
 net/bluetooth/rfcomm/core.c                   |  2 +-
 net/core/dev.c                                |  4 +-
 net/core/filter.c                             |  2 +-
 net/core/gen_stats.c                          |  4 +-
 net/core/pktgen.c                             |  4 +-
 net/core/skbuff.c                             | 35 +++++++++-------
 net/core/wireless.c                           |  4 +-
 net/decnet/dn_nsp_out.c                       |  6 ++-
 net/decnet/dn_route.c                         |  4 +-
 net/decnet/dn_table.c                         |  8 ++--
 net/decnet/netfilter/dn_rtmsg.c               |  2 +-
 net/econet/af_econet.c                        |  2 +-
 net/ieee80211/ieee80211_rx.c                  |  2 +-
 net/ipv4/esp4.c                               |  8 ++--
 net/ipv4/icmp.c                               |  3 +-
 net/ipv4/igmp.c                               |  4 +-
 net/ipv4/inet_diag.c                          | 12 +++---
 net/ipv4/ip_sockglue.c                        |  2 +-
 net/ipv4/ipmr.c                               |  9 +++--
 net/ipv4/ipvs/ip_vs_ftp.c                     |  4 +-
 net/ipv4/netfilter/arpt_mangle.c              |  8 ++--
 net/ipv4/netfilter/ip_queue.c                 |  4 +-
 net/ipv4/netfilter/nf_nat_helper.c            |  3 +-
 net/ipv4/tcp.c                                |  2 +-
 net/ipv4/tcp_output.c                         |  2 +-
 net/ipv6/datagram.c                           |  2 +-
 net/ipv6/esp6.c                               |  8 ++--
 net/ipv6/exthdrs.c                            |  2 +-
 net/ipv6/icmp.c                               |  3 +-
 net/ipv6/ip6_output.c                         |  2 +-
 net/ipv6/mcast.c                              |  6 +--
 net/ipv6/mip6.c                               |  4 +-
 net/ipv6/ndisc.c                              | 19 ++++-----
 net/ipv6/netfilter/ip6_queue.c                |  4 +-
 net/ipv6/raw.c                                |  2 +-
 net/irda/ircomm/ircomm_param.c                |  4 +-
 net/irda/irlan/irlan_common.c                 |  2 +-
 net/irda/qos.c                                | 14 +++----
 net/netfilter/nf_conntrack_netlink.c          | 16 +++-----
 net/netfilter/nfnetlink_log.c                 |  3 +-
 net/netfilter/nfnetlink_queue.c               |  4 +-
 net/netlink/af_netlink.c                      |  2 +-
 net/packet/af_packet.c                        |  2 +-
 net/sched/act_api.c                           | 52 ++++++++++++------------
 net/sched/act_gact.c                          |  2 +-
 net/sched/act_ipt.c                           |  2 +-
 net/sched/act_mirred.c                        |  2 +-
 net/sched/act_pedit.c                         |  2 +-
 net/sched/act_police.c                        |  8 ++--
 net/sched/act_simple.c                        |  2 +-
 net/sched/cls_api.c                           | 14 +++----
 net/sched/cls_basic.c                         |  4 +-
 net/sched/cls_fw.c                            |  4 +-
 net/sched/cls_route.c                         |  4 +-
 net/sched/cls_rsvp.h                          |  4 +-
 net/sched/cls_tcindex.c                       |  6 +--
 net/sched/cls_u32.c                           |  6 +--
 net/sched/ematch.c                            | 17 ++++----
 net/sched/sch_api.c                           |  8 ++--
 net/sched/sch_atm.c                           |  4 +-
 net/sched/sch_cbq.c                           | 20 +++++-----
 net/sched/sch_hfsc.c                          |  6 +--
 net/sched/sch_htb.c                           | 10 ++---
 net/sched/sch_ingress.c                       |  4 +-
 net/sched/sch_netem.c                         |  4 +-
 net/sched/sch_prio.c                          |  2 +-
 net/sched/sch_sfq.c                           |  2 +-
 net/sched/sch_tbf.c                           |  4 +-
 net/sctp/input.c                              |  4 +-
 net/sctp/inqueue.c                            |  8 ++--
 net/sctp/sm_make_chunk.c                      |  4 +-
 net/sctp/sm_statefuns.c                       |  4 +-
 net/tipc/config.c                             |  2 +-
 net/tipc/socket.c                             |  2 +-
 net/xfrm/xfrm_user.c                          | 40 +++++++++----------
 security/selinux/netlink.c                    |  2 +-
 110 files changed, 396 insertions(+), 329 deletions(-)

diff --git a/arch/ia64/sn/kernel/xpnet.c b/arch/ia64/sn/kernel/xpnet.c
index 68d59d912c9..eb416c95967 100644
--- a/arch/ia64/sn/kernel/xpnet.c
+++ b/arch/ia64/sn/kernel/xpnet.c
@@ -264,7 +264,7 @@ xpnet_receive(partid_t partid, int channel, struct xpnet_message *msg)
 
 	dev_dbg(xpnet, "<skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
 		"skb->end=0x%p skb->len=%d\n", (void *) skb->head,
-		(void *) skb->data, (void *) skb->tail, (void *) skb->end,
+		(void *)skb->data, skb_tail_pointer(skb), (void *)skb->end,
 		skb->len);
 
 	skb->protocol = eth_type_trans(skb, xpnet_device);
@@ -272,7 +272,7 @@ xpnet_receive(partid_t partid, int channel, struct xpnet_message *msg)
 
 	dev_dbg(xpnet, "passing skb to network layer; \n\tskb->head=0x%p "
 		"skb->data=0x%p skb->tail=0x%p skb->end=0x%p skb->len=%d\n",
-		(void *) skb->head, (void *) skb->data, (void *) skb->tail,
+		(void *)skb->head, (void *)skb->data, skb_tail_pointer(skb),
 		(void *) skb->end, skb->len);
 
 
@@ -475,7 +475,7 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
 		"skb->end=0x%p skb->len=%d\n", (void *) skb->head,
-		(void *) skb->data, (void *) skb->tail, (void *) skb->end,
+		(void *)skb->data, skb_tail_pointer(skb), (void *)skb->end,
 		skb->len);
 
 
@@ -497,7 +497,7 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* get the beginning of the first cacheline and end of last */
 	start_addr = ((u64) skb->data & ~(L1_CACHE_BYTES - 1));
-	end_addr = L1_CACHE_ALIGN((u64) skb->tail);
+	end_addr = L1_CACHE_ALIGN((u64)skb_tail_pointer(skb));
 
 	/* calculate how many bytes to embed in the XPC message */
 	embedded_bytes = 0;
@@ -573,7 +573,7 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		msg->magic = XPNET_MAGIC;
 		msg->size = end_addr - start_addr;
 		msg->leadin_ignore = (u64) skb->data - start_addr;
-		msg->tailout_ignore = end_addr - (u64) skb->tail;
+		msg->tailout_ignore = end_addr - (u64)skb_tail_pointer(skb);
 		msg->buf_pa = __pa(start_addr);
 
 		dev_dbg(xpnet, "sending XPC message to %d:%d\nmsg->buf_pa="
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index 8510026b690..d33aba6864c 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -1901,13 +1901,13 @@ he_service_rbrq(struct he_dev *he_dev, int group)
 			case ATM_AAL0:
 				/* 2.10.1.5 raw cell receive */
 				skb->len = ATM_AAL0_SDU;
-				skb->tail = skb->data + skb->len;
+				skb_set_tail_pointer(skb, skb->len);
 				break;
 			case ATM_AAL5:
 				/* 2.10.1.2 aal5 receive */
 
 				skb->len = AAL5_LEN(skb->data, he_vcc->pdu_len);
-				skb->tail = skb->data + skb->len;
+				skb_set_tail_pointer(skb, skb->len);
 #ifdef USE_CHECKSUM_HW
 				if (vcc->vpi == 0 && vcc->vci >= ATM_NOT_RSV_VCI) {
 					skb->ip_summed = CHECKSUM_COMPLETE;
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index b4b80140c39..1e49799cd6c 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -1816,7 +1816,8 @@ push_rx_skb(struct idt77252_dev *card, struct sk_buff *skb, int queue)
 	u32 handle;
 	u32 addr;
 
-	skb->data = skb->tail = skb->head;
+	skb->data = skb->head;
+	skb_reset_tail_pointer(skb);
 	skb->len = 0;
 
 	skb_reserve(skb, 16);
diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c
index aab9b3733d5..26f4b703349 100644
--- a/drivers/atm/nicstar.c
+++ b/drivers/atm/nicstar.c
@@ -2208,7 +2208,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
          if (i == 1 && ns_rsqe_eopdu(rsqe))
             *((u32 *) sb->data) |= 0x00000002;
          skb_put(sb, NS_AAL0_HEADER);
-         memcpy(sb->tail, cell, ATM_CELL_PAYLOAD);
+         memcpy(skb_tail_pointer(sb), cell, ATM_CELL_PAYLOAD);
          skb_put(sb, ATM_CELL_PAYLOAD);
          ATM_SKB(sb)->vcc = vcc;
 	 __net_timestamp(sb);
@@ -2252,7 +2252,8 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
       vc->rx_iov = iovb;
       NS_SKB(iovb)->iovcnt = 0;
       iovb->len = 0;
-      iovb->tail = iovb->data = iovb->head;
+      iovb->data = iovb->head;
+      skb_reset_tail_pointer(iovb);
       NS_SKB(iovb)->vcc = vcc;
       /* IMPORTANT: a pointer to the sk_buff containing the small or large
                     buffer is stored as iovec base, NOT a pointer to the 
@@ -2265,7 +2266,8 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
       recycle_iovec_rx_bufs(card, (struct iovec *) iovb->data, NS_MAX_IOVECS);
       NS_SKB(iovb)->iovcnt = 0;
       iovb->len = 0;
-      iovb->tail = iovb->data = iovb->head;
+      iovb->data = iovb->head;
+      skb_reset_tail_pointer(iovb);
       NS_SKB(iovb)->vcc = vcc;
    }
    iov = &((struct iovec *) iovb->data)[NS_SKB(iovb)->iovcnt++];
@@ -2489,7 +2491,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
             {
                lb = (struct sk_buff *) iov->iov_base;
                tocopy = min_t(int, remaining, iov->iov_len);
-               memcpy(hb->tail, lb->data, tocopy);
+               memcpy(skb_tail_pointer(hb), lb->data, tocopy);
                skb_put(hb, tocopy);
                iov++;
                remaining -= tocopy;
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
index 7698feafa6a..58bc272bd40 100644
--- a/drivers/infiniband/hw/amso1100/c2.c
+++ b/drivers/infiniband/hw/amso1100/c2.c
@@ -439,7 +439,8 @@ static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem)
 	}
 
 	/* Setup the skb for reuse since we're dropping this pkt */
-	elem->skb->tail = elem->skb->data = elem->skb->head;
+	elem->skb->data = elem->skb->head;
+	skb_reset_tail_pointer(elem->skb);
 
 	/* Zero out the rxp hdr in the sk_buff */
 	memset(elem->skb->data, 0, sizeof(*rxp_hdr));
@@ -521,7 +522,7 @@ static void c2_rx_interrupt(struct net_device *netdev)
 		 * "sizeof(struct c2_rxp_hdr)".
 		 */
 		skb->data += sizeof(*rxp_hdr);
-		skb->tail = skb->data + buflen;
+		skb_set_tail_pointer(skb, buflen);
 		skb->len = buflen;
 		skb->protocol = eth_type_trans(skb, netdev);
 
diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c
index cd3b1fa4a41..aa83277aba7 100644
--- a/drivers/isdn/i4l/isdn_net.c
+++ b/drivers/isdn/i4l/isdn_net.c
@@ -881,7 +881,7 @@ isdn_net_log_skb(struct sk_buff * skb, isdn_net_local * lp)
 
 	addinfo[0] = '\0';
 	/* This check stolen from 2.1.72 dev_queue_xmit_nit() */
-	if (p < skb->data || p >= skb->tail) {
+	if (p < skb->data || skb->network_header >= skb->tail) {
 		/* fall back to old isdn_net_log_packet method() */
 		char * buf = skb->data;
 
diff --git a/drivers/media/dvb/dvb-core/dvb_net.c b/drivers/media/dvb/dvb-core/dvb_net.c
index c6b004182d9..9de177a5b9f 100644
--- a/drivers/media/dvb/dvb-core/dvb_net.c
+++ b/drivers/media/dvb/dvb-core/dvb_net.c
@@ -600,6 +600,7 @@ static void dvb_net_ule( struct net_device *dev, const u8 *buf, size_t buf_len )
 			/* Check CRC32, we've got it in our skb already. */
 			unsigned short ulen = htons(priv->ule_sndu_len);
 			unsigned short utype = htons(priv->ule_sndu_type);
+			const u8 *tail;
 			struct kvec iov[3] = {
 				{ &ulen, sizeof ulen },
 				{ &utype, sizeof utype },
@@ -613,10 +614,11 @@ static void dvb_net_ule( struct net_device *dev, const u8 *buf, size_t buf_len )
 			}
 
 			ule_crc = iov_crc32(ule_crc, iov, 3);
-			expected_crc = *((u8 *)priv->ule_skb->tail - 4) << 24 |
-				       *((u8 *)priv->ule_skb->tail - 3) << 16 |
-				       *((u8 *)priv->ule_skb->tail - 2) << 8 |
-				       *((u8 *)priv->ule_skb->tail - 1);
+			tail = skb_tail_pointer(priv->ule_skb);
+			expected_crc = *(tail - 4) << 24 |
+				       *(tail - 3) << 16 |
+				       *(tail - 2) << 8 |
+				       *(tail - 1);
 			if (ule_crc != expected_crc) {
 				printk(KERN_WARNING "%lu: CRC32 check FAILED: %08x / %08x, SNDU len %d type %#x, ts_remain %d, next 2: %x.\n",
 				       priv->ts_count, ule_crc, expected_crc, priv->ule_sndu_len, priv->ule_sndu_type, ts_remain, ts_remain > 2 ? *(unsigned short *)from_where : 0);
diff --git a/drivers/net/cris/eth_v10.c b/drivers/net/cris/eth_v10.c
index 98643801a3b..7feb9c56114 100644
--- a/drivers/net/cris/eth_v10.c
+++ b/drivers/net/cris/eth_v10.c
@@ -1348,7 +1348,7 @@ e100_rx(struct net_device *dev)
 
 #ifdef ETHDEBUG
 		printk("head = 0x%x, data = 0x%x, tail = 0x%x, end = 0x%x\n",
-		  skb->head, skb->data, skb->tail, skb->end);
+		  skb->head, skb->data, skb_tail_pointer(skb), skb->end);
 		printk("copying packet to 0x%x.\n", skb_data_ptr);
 #endif
 
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 610e4769efa..c5faf1380e1 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -1325,13 +1325,13 @@ static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
 	flits = skb_transport_offset(skb) / 8;
 	sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
 	sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
-			     skb->tail - skb_transport_header(skb),
+			     skb->tail - skb->transport_header,
 			     adap->pdev);
 	if (need_skb_unmap()) {
 		setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
 		skb->destructor = deferred_unmap_destructor;
 		((struct unmap_info *)skb->cb)->len = (skb->tail -
-						       skb_transport_header(skb));
+						       skb->transport_header);
 	}
 
 	write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
@@ -1353,7 +1353,7 @@ static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
 		return 1;	/* packet fits as immediate data */
 
 	flits = skb_transport_offset(skb) / 8;	/* headers */
-	if (skb->tail != skb_transport_header(skb))
+	if (skb->tail != skb->transport_header)
 		cnt++;
 	return flits_to_desc(flits + sgl_len(cnt));
 }
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index e86deb2ef82..e7c93f44f81 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -3304,7 +3304,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 				 * NOTE: this is a TSO only workaround
 				 * if end byte alignment not correct move us
 				 * into the next dword */
-				if ((unsigned long)(skb->tail - 1) & 4)
+				if ((unsigned long)(skb_tail_pointer(skb) - 1) & 4)
 					break;
 				/* fall through */
 			case e1000_82571:
@@ -4388,7 +4388,7 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter,
 				PCI_DMA_FROMDEVICE);
 			vaddr = kmap_atomic(ps_page->ps_page[0],
 			                    KM_SKB_DATA_SOFTIRQ);
-			memcpy(skb->tail, vaddr, l1);
+			memcpy(skb_tail_pointer(skb), vaddr, l1);
 			kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
 			pci_dma_sync_single_for_device(pdev,
 				ps_page_dma->ps_page_dma[0],
diff --git a/drivers/net/ibm_emac/ibm_emac_core.c b/drivers/net/ibm_emac/ibm_emac_core.c
index b1ad62d89eb..3d82d46f499 100644
--- a/drivers/net/ibm_emac/ibm_emac_core.c
+++ b/drivers/net/ibm_emac/ibm_emac_core.c
@@ -1338,7 +1338,7 @@ static inline int emac_rx_sg_append(struct ocp_enet_private *dev, int slot)
 			dev_kfree_skb(dev->rx_sg_skb);
 			dev->rx_sg_skb = NULL;
 		} else {
-			cacheable_memcpy(dev->rx_sg_skb->tail,
+			cacheable_memcpy(skb_tail_pointer(dev->rx_sg_skb),
 					 dev->rx_skb[slot]->data, len);
 			skb_put(dev->rx_sg_skb, len);
 			emac_recycle_rx_skb(dev, slot, len);
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index 0c3649be0d0..98bf51afcee 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -575,7 +575,8 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	int i;
 	dev_dbg(&bp->pdev->dev,
 		"start_xmit: len %u head %p data %p tail %p end %p\n",
-		skb->len, skb->head, skb->data, skb->tail, skb->end);
+		skb->len, skb->head, skb->data,
+		skb_tail_pointer(skb), skb->end);
 	dev_dbg(&bp->pdev->dev,
 		"data:");
 	for (i = 0; i < 16; i++)
diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c
index ec0af65cd5d..73da611fd53 100644
--- a/drivers/net/pcmcia/nmclan_cs.c
+++ b/drivers/net/pcmcia/nmclan_cs.c
@@ -1185,7 +1185,7 @@ static int mace_rx(struct net_device *dev, unsigned char RxCnt)
 	skb_reserve(skb, 2);
 	insw(ioaddr + AM2150_RCV, skb_put(skb, pkt_len), pkt_len>>1);
 	if (pkt_len & 1)
-	    *(skb->tail-1) = inb(ioaddr + AM2150_RCV);
+	    *(skb_tail_pointer(skb) - 1) = inb(ioaddr + AM2150_RCV);
 	skb->protocol = eth_type_trans(skb, dev);
 	
 	netif_rx(skb); /* Send the packet to the upper (protocol) layers. */
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index 46ebf141ee5..600d3ff347f 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -2195,7 +2195,7 @@ static int fill_rxd_3buf(struct s2io_nic *nic, struct RxD_t *rxdp, struct \
 	frag_list->next = NULL;
 	tmp = (void *)ALIGN((long)frag_list->data, ALIGN_SIZE + 1);
 	frag_list->data = tmp;
-	frag_list->tail = tmp;
+	skb_reset_tail_pointer(frag_list);
 
 	/* Buffer-2 receives L4 data payload */
 	((struct RxD3*)rxdp)->Buffer2_ptr = pci_map_single(nic->pdev,
@@ -2349,7 +2349,7 @@ static int fill_rx_buffers(struct s2io_nic *nic, int ring_no)
 			tmp += ALIGN_SIZE;
 			tmp &= ~ALIGN_SIZE;
 			skb->data = (void *) (unsigned long)tmp;
-			skb->tail = (void *) (unsigned long)tmp;
+			skb_reset_tail_pointer(skb);
 
 			if (!(((struct RxD3*)rxdp)->Buffer0_ptr))
 				((struct RxD3*)rxdp)->Buffer0_ptr =
diff --git a/drivers/net/tulip/uli526x.c b/drivers/net/tulip/uli526x.c
index 9a5850fa644..e46f4cb02c1 100644
--- a/drivers/net/tulip/uli526x.c
+++ b/drivers/net/tulip/uli526x.c
@@ -829,7 +829,9 @@ static void uli526x_rx_packet(struct net_device *dev, struct uli526x_board_info
 					!= NULL) ) {
 					/* size less than COPY_SIZE, allocate a rxlen SKB */
 					skb_reserve(skb, 2); /* 16byte align */
-					memcpy(skb_put(skb, rxlen), rxptr->rx_skb_ptr->tail, rxlen);
+					memcpy(skb_put(skb, rxlen),
+					       skb_tail_pointer(rxptr->rx_skb_ptr),
+					       rxlen);
 					uli526x_reuse_skb(db, rxptr->rx_skb_ptr);
 				} else
 					skb_put(skb, rxlen);
@@ -1175,7 +1177,10 @@ static void uli526x_reuse_skb(struct uli526x_board_info *db, struct sk_buff * sk
 
 	if (!(rxptr->rdes0 & cpu_to_le32(0x80000000))) {
 		rxptr->rx_skb_ptr = skb;
-		rxptr->rdes2 = cpu_to_le32( pci_map_single(db->pdev, skb->tail, RX_ALLOC_SIZE, PCI_DMA_FROMDEVICE) );
+		rxptr->rdes2 = cpu_to_le32(pci_map_single(db->pdev,
+							  skb_tail_pointer(skb),
+							  RX_ALLOC_SIZE,
+							  PCI_DMA_FROMDEVICE));
 		wmb();
 		rxptr->rdes0 = cpu_to_le32(0x80000000);
 		db->rx_avail_cnt++;
@@ -1339,7 +1344,10 @@ static void allocate_rx_buffer(struct uli526x_board_info *db)
 		if ( ( skb = dev_alloc_skb(RX_ALLOC_SIZE) ) == NULL )
 			break;
 		rxptr->rx_skb_ptr = skb; /* FIXME (?) */
-		rxptr->rdes2 = cpu_to_le32( pci_map_single(db->pdev, skb->tail, RX_ALLOC_SIZE, PCI_DMA_FROMDEVICE) );
+		rxptr->rdes2 = cpu_to_le32(pci_map_single(db->pdev,
+							  skb_tail_pointer(skb),
+							  RX_ALLOC_SIZE,
+							  PCI_DMA_FROMDEVICE));
 		wmb();
 		rxptr->rdes0 = cpu_to_le32(0x80000000);
 		rxptr = rxptr->next_rx_desc;
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index b747228c719..aeb2789adf2 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -533,7 +533,7 @@ static void fr_lmi_send(struct net_device *dev, int fullrep)
 		skb->protocol = __constant_htons(NLPID_CCITT_ANSI_LMI);
 		fr_hard_header(&skb, LMI_CCITT_ANSI_DLCI);
 	}
-	data = skb->tail;
+	data = skb_tail_pointer(skb);
 	data[i++] = LMI_CALLREF;
 	data[i++] = dce ? LMI_STATUS : LMI_STATUS_ENQUIRY;
 	if (lmi == LMI_ANSI)
diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index d4851465c83..b731f3aae0d 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c
@@ -1636,7 +1636,7 @@ static int lmc_rx (struct net_device *dev) /*fold00*/
             if (nsb) {
                 sc->lmc_rxq[i] = nsb;
                 nsb->dev = dev;
-                sc->lmc_rxring[i].buffer1 = virt_to_bus (nsb->tail);
+                sc->lmc_rxring[i].buffer1 = virt_to_bus(skb_tail_pointer(nsb));
             }
             sc->failed_recv_alloc = 1;
             goto skip_packet;
@@ -1679,7 +1679,7 @@ static int lmc_rx (struct net_device *dev) /*fold00*/
             if (nsb) {
                 sc->lmc_rxq[i] = nsb;
                 nsb->dev = dev;
-                sc->lmc_rxring[i].buffer1 = virt_to_bus (nsb->tail);
+                sc->lmc_rxring[i].buffer1 = virt_to_bus(skb_tail_pointer(nsb));
                 /* Transferred to 21140 below */
             }
             else {
diff --git a/drivers/net/wireless/hostap/hostap_80211_rx.c b/drivers/net/wireless/hostap/hostap_80211_rx.c
index 35a3a50724f..5e3e9e26270 100644
--- a/drivers/net/wireless/hostap/hostap_80211_rx.c
+++ b/drivers/net/wireless/hostap/hostap_80211_rx.c
@@ -922,7 +922,7 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb,
 		if (frag != 0)
 			flen -= hdrlen;
 
-		if (frag_skb->tail + flen > frag_skb->end) {
+		if (skb_tail_pointer(frag_skb) + flen > frag_skb->end) {
 			printk(KERN_WARNING "%s: host decrypted and "
 			       "reassembled frame did not fit skb\n",
 			       dev->name);
diff --git a/drivers/s390/net/ctcmain.c b/drivers/s390/net/ctcmain.c
index 787c0131704..54e3f806cd5 100644
--- a/drivers/s390/net/ctcmain.c
+++ b/drivers/s390/net/ctcmain.c
@@ -706,7 +706,8 @@ ch_action_txdone(fsm_instance * fi, int event, void *arg)
 			spin_unlock(&ch->collect_lock);
 			return;
 		}
-		ch->trans_skb->tail = ch->trans_skb->data = ch->trans_skb_data;
+		ch->trans_skb->data = ch->trans_skb_data;
+		skb_reset_tail_pointer(ch->trans_skb);
 		ch->trans_skb->len = 0;
 		if (ch->prof.maxmulti < (ch->collect_len + 2))
 			ch->prof.maxmulti = ch->collect_len + 2;
@@ -831,7 +832,8 @@ ch_action_rx(fsm_instance * fi, int event, void *arg)
 		ctc_unpack_skb(ch, skb);
 	}
  again:
-	skb->data = skb->tail = ch->trans_skb_data;
+	skb->data = ch->trans_skb_data;
+	skb_reset_tail_pointer(skb);
 	skb->len = 0;
 	if (ctc_checkalloc_buffer(ch, 1))
 		return;
@@ -2226,7 +2228,8 @@ transmit_skb(struct channel *ch, struct sk_buff *skb)
 		 * IDAL support in CTC is broken, so we have to
 		 * care about skb's above 2G ourselves.
 		 */
-		hi = ((unsigned long) skb->tail + LL_HEADER_LENGTH) >> 31;
+		hi = ((unsigned long)skb_tail_pointer(skb) +
+		      LL_HEADER_LENGTH) >> 31;
 		if (hi) {
 			nskb = alloc_skb(skb->len, GFP_ATOMIC | GFP_DMA);
 			if (!nskb) {
@@ -2262,7 +2265,7 @@ transmit_skb(struct channel *ch, struct sk_buff *skb)
 				return -EBUSY;
 			}
 
-			ch->trans_skb->tail = ch->trans_skb->data;
+			skb_reset_tail_pointer(ch->trans_skb);
 			ch->trans_skb->len = 0;
 			ch->ccw[1].count = skb->len;
 			memcpy(skb_put(ch->trans_skb, skb->len), skb->data,
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index 82edf201440..cd42bd54988 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -689,7 +689,8 @@ static void conn_action_rx(fsm_instance *fi, int event, void *arg)
 			       msg->length, conn->max_buffsize);
 		return;
 	}
-	conn->rx_buff->data = conn->rx_buff->tail = conn->rx_buff->head;
+	conn->rx_buff->data = conn->rx_buff->head;
+	skb_reset_tail_pointer(conn->rx_buff);
 	conn->rx_buff->len = 0;
 	rc = iucv_message_receive(conn->path, msg, 0, conn->rx_buff->data,
 				  msg->length, NULL);
@@ -735,7 +736,8 @@ static void conn_action_txdone(fsm_instance *fi, int event, void *arg)
 			}
 		}
 	}
-	conn->tx_buff->data = conn->tx_buff->tail = conn->tx_buff->head;
+	conn->tx_buff->data = conn->tx_buff->head;
+	skb_reset_tail_pointer(conn->tx_buff);
 	conn->tx_buff->len = 0;
 	spin_lock_irqsave(&conn->collect_lock, saveflags);
 	while ((skb = skb_dequeue(&conn->collect_queue))) {
@@ -1164,8 +1166,8 @@ static int netiucv_transmit_skb(struct iucv_connection *conn,
 		 * Copy the skb to a new allocated skb in lowmem only if the
 		 * data is located above 2G in memory or tailroom is < 2.
 		 */
-		unsigned long hi =
-			((unsigned long)(skb->tail + NETIUCV_HDRLEN)) >> 31;
+		unsigned long hi = ((unsigned long)(skb_tail_pointer(skb) +
+				    NETIUCV_HDRLEN)) >> 31;
 		int copied = 0;
 		if (hi || (skb_tailroom(skb) < 2)) {
 			nskb = alloc_skb(skb->len + NETIUCV_HDRLEN +
diff --git a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c
index ec63b0ee074..4d8f282b23d 100644
--- a/drivers/usb/atm/usbatm.c
+++ b/drivers/usb/atm/usbatm.c
@@ -335,15 +335,15 @@ static void usbatm_extract_one_cell(struct usbatm_data *instance, unsigned char
 
 	sarb = instance->cached_vcc->sarb;
 
-	if (sarb->tail + ATM_CELL_PAYLOAD > sarb->end) {
+	if (skb_tail_pointer(sarb) + ATM_CELL_PAYLOAD > sarb->end) {
 		atm_rldbg(instance, "%s: buffer overrun (sarb->len %u, vcc: 0x%p)!\n",
 				__func__, sarb->len, vcc);
 		/* discard cells already received */
 		skb_trim(sarb, 0);
-		UDSL_ASSERT(sarb->tail + ATM_CELL_PAYLOAD <= sarb->end);
+		UDSL_ASSERT(skb_tail_pointer(sarb) + ATM_CELL_PAYLOAD <= sarb->end);
 	}
 
-	memcpy(sarb->tail, source + ATM_CELL_HEADER, ATM_CELL_PAYLOAD);
+	memcpy(skb_tail_pointer(sarb), source + ATM_CELL_HEADER, ATM_CELL_PAYLOAD);
 	__skb_put(sarb, ATM_CELL_PAYLOAD);
 
 	if (pti & 1) {
@@ -370,7 +370,7 @@ static void usbatm_extract_one_cell(struct usbatm_data *instance, unsigned char
 			goto out;
 		}
 
-		if (crc32_be(~0, sarb->tail - pdu_length, pdu_length) != 0xc704dd7b) {
+		if (crc32_be(~0, skb_tail_pointer(sarb) - pdu_length, pdu_length) != 0xc704dd7b) {
 			atm_rldbg(instance, "%s: packet failed crc check (vcc: 0x%p)!\n",
 				  __func__, vcc);
 			atomic_inc(&vcc->stats->rx_err);
@@ -396,7 +396,7 @@ static void usbatm_extract_one_cell(struct usbatm_data *instance, unsigned char
 			goto out;	/* atm_charge increments rx_drop */
 		}
 
-		memcpy(skb->data, sarb->tail - pdu_length, length);
+		memcpy(skb->data, skb_tail_pointer(sarb) - pdu_length, length);
 		__skb_put(skb, length);
 
 		vdbg("%s: sending skb 0x%p, skb->len %u, skb->truesize %u",
diff --git a/drivers/usb/net/asix.c b/drivers/usb/net/asix.c
index 5808ea08245..f56e2dab371 100644
--- a/drivers/usb/net/asix.c
+++ b/drivers/usb/net/asix.c
@@ -298,7 +298,7 @@ static int asix_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 		if (ax_skb) {
 			ax_skb->len = size;
 			ax_skb->data = packet;
-			ax_skb->tail = packet + size;
+			skb_set_tail_pointer(ax_skb, size);
 			usbnet_skb_return(dev, ax_skb);
 		} else {
 			return 0;
@@ -338,7 +338,7 @@ static struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 	    && ((headroom + tailroom) >= (4 + padlen))) {
 		if ((headroom < 4) || (tailroom < padlen)) {
 			skb->data = memmove(skb->head + 4, skb->data, skb->len);
-			skb->tail = skb->data + skb->len;
+			skb_set_tail_pointer(skb, skb->len);
 		}
 	} else {
 		struct sk_buff *skb2;
@@ -356,7 +356,7 @@ static struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 
 	if ((skb->len % 512) == 0) {
 		cpu_to_le32s(&padbytes);
-		memcpy( skb->tail, &padbytes, sizeof(padbytes));
+		memcpy(skb_tail_pointer(skb), &padbytes, sizeof(padbytes));
 		skb_put(skb, sizeof(padbytes));
 	}
 	return skb;
diff --git a/drivers/usb/net/gl620a.c b/drivers/usb/net/gl620a.c
index d257a8e026d..031cf5ca4db 100644
--- a/drivers/usb/net/gl620a.c
+++ b/drivers/usb/net/gl620a.c
@@ -157,7 +157,7 @@ genelink_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
 		if ((headroom < (4 + 4*1)) || (tailroom < padlen)) {
 			skb->data = memmove(skb->head + (4 + 4*1),
 					     skb->data, skb->len);
-			skb->tail = skb->data + skb->len;
+			skb_set_tail_pointer(skb, skb->len);
 		}
 	} else {
 		struct sk_buff	*skb2;
diff --git a/drivers/usb/net/net1080.c b/drivers/usb/net/net1080.c
index ccebfdef475..19bf8dae70c 100644
--- a/drivers/usb/net/net1080.c
+++ b/drivers/usb/net/net1080.c
@@ -520,7 +520,7 @@ net1080_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
 			skb->data = memmove(skb->head
 						+ sizeof (struct nc_header),
 					    skb->data, skb->len);
-			skb->tail = skb->data + len;
+			skb_set_tail_pointer(skb, len);
 			goto encapsulate;
 		}
 	}
diff --git a/drivers/usb/net/rndis_host.c b/drivers/usb/net/rndis_host.c
index 39a21c74fdf..1d36772ba6e 100644
--- a/drivers/usb/net/rndis_host.c
+++ b/drivers/usb/net/rndis_host.c
@@ -588,7 +588,7 @@ rndis_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags)
 		if (likely((sizeof *hdr) <= room)) {
 			skb->data = memmove(skb->head + sizeof *hdr,
 					    skb->data, len);
-			skb->tail = skb->data + len;
+			skb_set_tail_pointer(skb, len);
 			goto fill;
 		}
 	}
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 6179648a014..e1ea5dfbbbd 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -62,11 +62,11 @@ struct nfattr
 #define NFA_DATA(nfa)   ((void *)(((char *)(nfa)) + NFA_LENGTH(0)))
 #define NFA_PAYLOAD(nfa) ((int)((nfa)->nfa_len) - NFA_LENGTH(0))
 #define NFA_NEST(skb, type) \
-({	struct nfattr *__start = (struct nfattr *) (skb)->tail; \
+({	struct nfattr *__start = (struct nfattr *)skb_tail_pointer(skb); \
 	NFA_PUT(skb, (NFNL_NFA_NEST | type), 0, NULL); \
 	__start;  })
 #define NFA_NEST_END(skb, start) \
-({      (start)->nfa_len = ((skb)->tail - (unsigned char *) (start)); \
+({      (start)->nfa_len = skb_tail_pointer(skb) - (unsigned char *)(start); \
         (skb)->len; })
 #define NFA_NEST_CANCEL(skb, start) \
 ({      if (start) \
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index a9d3ad5bc80..68a632b372e 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -229,7 +229,7 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags)
 		  (cb)->nlh->nlmsg_seq, type, len, flags)
 
 #define NLMSG_END(skb, nlh) \
-({	(nlh)->nlmsg_len = (skb)->tail - (unsigned char *) (nlh); \
+({	(nlh)->nlmsg_len = skb_tail_pointer(skb) - (unsigned char *)(nlh); \
 	(skb)->len; })
 
 #define NLMSG_CANCEL(skb, nlh) \
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 4a629ea70cc..3a4cb242ecd 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -605,7 +605,7 @@ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const voi
 
 #define RTA_PUT_NOHDR(skb, attrlen, data) \
 ({	RTA_APPEND(skb, RTA_ALIGN(attrlen), data); \
-	memset(skb->tail - (RTA_ALIGN(attrlen) - attrlen), 0, \
+	memset(skb_tail_pointer(skb) - (RTA_ALIGN(attrlen) - attrlen), 0, \
 	       RTA_ALIGN(attrlen) - attrlen); })
 
 #define RTA_PUT_U8(skb, attrtype, value) \
@@ -637,12 +637,12 @@ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const voi
 	RTA_PUT(skb, attrtype, 0, NULL);
 
 #define RTA_NEST(skb, type) \
-({	struct rtattr *__start = (struct rtattr *) (skb)->tail; \
+({	struct rtattr *__start = (struct rtattr *)skb_tail_pointer(skb); \
 	RTA_PUT(skb, type, 0, NULL); \
 	__start;  })
 
 #define RTA_NEST_END(skb, start) \
-({	(start)->rta_len = ((skb)->tail - (unsigned char *) (start)); \
+({	(start)->rta_len = skb_tail_pointer(skb) - (unsigned char *)(start); \
 	(skb)->len; })
 
 #define RTA_NEST_CANCEL(skb, start) \
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2e740550062..e1c2392ecb5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -246,9 +246,6 @@ struct sk_buff {
 	int			iif;
 	/* 4 byte hole on 64 bit*/
 
-	sk_buff_data_t		transport_header;
-	sk_buff_data_t		network_header;
-	sk_buff_data_t		mac_header;
 	struct  dst_entry	*dst;
 	struct	sec_path	*sp;
 
@@ -303,13 +300,16 @@ struct sk_buff {
 
 	__u32			mark;
 
+	sk_buff_data_t		transport_header;
+	sk_buff_data_t		network_header;
+	sk_buff_data_t		mac_header;
 	/* These elements must be at the end, see alloc_skb() for details.  */
-	unsigned int		truesize;
-	atomic_t		users;
+	sk_buff_data_t		tail;
 	unsigned char		*head,
 				*data,
-				*tail,
 				*end;
+	unsigned int		truesize;
+	atomic_t		users;
 };
 
 #ifdef __KERNEL__
@@ -812,12 +812,45 @@ static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
 #define SKB_FRAG_ASSERT(skb) 	BUG_ON(skb_shinfo(skb)->frag_list)
 #define SKB_LINEAR_ASSERT(skb)  BUG_ON(skb_is_nonlinear(skb))
 
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb)
+{
+	return skb->head + skb->tail;
+}
+
+static inline void skb_reset_tail_pointer(struct sk_buff *skb)
+{
+	skb->tail = skb->data - skb->head;
+}
+
+static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset)
+{
+	skb_reset_tail_pointer(skb);
+	skb->tail += offset;
+}
+#else /* NET_SKBUFF_DATA_USES_OFFSET */
+static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb)
+{
+	return skb->tail;
+}
+
+static inline void skb_reset_tail_pointer(struct sk_buff *skb)
+{
+	skb->tail = skb->data;
+}
+
+static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset)
+{
+	skb->tail = skb->data + offset;
+}
+#endif /* NET_SKBUFF_DATA_USES_OFFSET */
+
 /*
  *	Add data to an sk_buff
  */
 static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
 {
-	unsigned char *tmp = skb->tail;
+	unsigned char *tmp = skb_tail_pointer(skb);
 	SKB_LINEAR_ASSERT(skb);
 	skb->tail += len;
 	skb->len  += len;
@@ -835,11 +868,11 @@ static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
  */
 static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
 {
-	unsigned char *tmp = skb->tail;
+	unsigned char *tmp = skb_tail_pointer(skb);
 	SKB_LINEAR_ASSERT(skb);
 	skb->tail += len;
 	skb->len  += len;
-	if (unlikely(skb->tail>skb->end))
+	if (unlikely(skb_tail_pointer(skb) > skb->end))
 		skb_over_panic(skb, len, current_text_addr());
 	return tmp;
 }
@@ -935,7 +968,7 @@ static inline int skb_headroom(const struct sk_buff *skb)
  */
 static inline int skb_tailroom(const struct sk_buff *skb)
 {
-	return skb_is_nonlinear(skb) ? 0 : skb->end - skb->tail;
+	return skb_is_nonlinear(skb) ? 0 : skb->end - skb_tail_pointer(skb);
 }
 
 /**
@@ -1127,8 +1160,8 @@ static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
 		WARN_ON(1);
 		return;
 	}
-	skb->len  = len;
-	skb->tail = skb->data + len;
+	skb->len = len;
+	skb_set_tail_pointer(skb, len);
 }
 
 /**
diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index 06a2c69a89e..de8399a7977 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -114,14 +114,12 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb)
 {
 	switch (skb->protocol) {
 	case __constant_htons(ETH_P_IP):
-		if (skb_network_header(skb) + sizeof(struct iphdr) <=
-		    skb->tail)
+		if (skb->network_header + sizeof(struct iphdr) <= skb->tail)
 			return IP_ECN_set_ce(ip_hdr(skb));
 		break;
 
 	case __constant_htons(ETH_P_IPV6):
-		if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
-		    skb->tail)
+		if (skb->network_header + sizeof(struct ipv6hdr) <= skb->tail)
 			return IP6_ECN_set_ce(ipv6_hdr(skb));
 		break;
 	}
diff --git a/include/net/netlink.h b/include/net/netlink.h
index bcaf67b7a19..2c7ab107f20 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -525,7 +525,7 @@ static inline struct sk_buff *nlmsg_new(size_t payload, gfp_t flags)
  */
 static inline int nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
-	nlh->nlmsg_len = skb->tail - (unsigned char *) nlh;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - (unsigned char *)nlh;
 
 	return skb->len;
 }
@@ -538,7 +538,7 @@ static inline int nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh)
  */
 static inline void *nlmsg_get_pos(struct sk_buff *skb)
 {
-	return skb->tail;
+	return skb_tail_pointer(skb);
 }
 
 /**
@@ -940,7 +940,7 @@ static inline unsigned long nla_get_msecs(struct nlattr *nla)
  */
 static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype)
 {
-	struct nlattr *start = (struct nlattr *) skb->tail;
+	struct nlattr *start = (struct nlattr *)skb_tail_pointer(skb);
 
 	if (nla_put(skb, attrtype, 0, NULL) < 0)
 		return NULL;
@@ -960,7 +960,7 @@ static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype)
  */
 static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start)
 {
-	start->nla_len = skb->tail - (unsigned char *) start;
+	start->nla_len = skb_tail_pointer(skb) - (unsigned char *)start;
 	return skb->len;
 }
 
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index dcb3a91f136..4129df70807 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -337,7 +337,7 @@ static inline unsigned char * tcf_get_base_ptr(struct sk_buff *skb, int layer)
 static inline int tcf_valid_offset(const struct sk_buff *skb,
 				   const unsigned char *ptr, const int len)
 {
-	return unlikely((ptr + len) < skb->tail && ptr > skb->head);
+	return unlikely((ptr + len) < skb_tail_pointer(skb) && ptr > skb->head);
 }
 
 #ifdef CONFIG_NET_CLS_IND
diff --git a/kernel/audit.c b/kernel/audit.c
index 76c9a11b72d..ea8521417d1 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1073,7 +1073,7 @@ static void audit_log_vformat(struct audit_buffer *ab, const char *fmt,
 			goto out;
 	}
 	va_copy(args2, args);
-	len = vsnprintf(skb->tail, avail, fmt, args);
+	len = vsnprintf(skb_tail_pointer(skb), avail, fmt, args);
 	if (len >= avail) {
 		/* The printk buffer is 1024 bytes long, so if we get
 		 * here and AUDIT_BUFSIZ is at least 1024, then we can
@@ -1082,7 +1082,7 @@ static void audit_log_vformat(struct audit_buffer *ab, const char *fmt,
 			max_t(unsigned, AUDIT_BUFSIZ, 1+len-avail));
 		if (!avail)
 			goto out;
-		len = vsnprintf(skb->tail, avail, fmt, args2);
+		len = vsnprintf(skb_tail_pointer(skb), avail, fmt, args2);
 	}
 	if (len > 0)
 		skb_put(skb, len);
@@ -1143,7 +1143,7 @@ void audit_log_hex(struct audit_buffer *ab, const unsigned char *buf,
 			return;
 	}
 
-	ptr = skb->tail;
+	ptr = skb_tail_pointer(skb);
 	for (i=0; i<len; i++) {
 		*ptr++ = hex[(buf[i] & 0xF0)>>4]; /* Upper nibble */
 		*ptr++ = hex[buf[i] & 0x0F];	  /* Lower nibble */
@@ -1175,7 +1175,7 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen,
 		if (!avail)
 			return;
 	}
-	ptr = skb->tail;
+	ptr = skb_tail_pointer(skb);
 	*ptr++ = '"';
 	memcpy(ptr, string, slen);
 	ptr += slen;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index d339645dc79..a8c6b285e06 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -283,7 +283,7 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	DPRINTK("skbuff head:%lx data:%lx tail:%lx end:%lx\n",
-		(long)skb->head, (long)skb->data, (long)skb->tail,
+		(long)skb->head, (long)skb->data, (long)skb_tail_pointer(skb),
 		(long)skb->end);
 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
 	if (memcmp(skb->data, bridge_ula_lec, sizeof(bridge_ula_lec)) == 0)
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 94f45736056..10cc13cfae6 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1567,7 +1567,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
 
 	/* Trim FCS */
 	skb->len--; skb->tail--;
-	fcs = *(u8 *) skb->tail;
+	fcs = *(u8 *)skb_tail_pointer(skb);
 
 	if (__check_fcs(skb->data, type, fcs)) {
 		BT_ERR("bad checksum in packet");
diff --git a/net/core/dev.c b/net/core/dev.c
index 6562e5736e2..86dc9f693f6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1069,7 +1069,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 			skb_reset_mac_header(skb2);
 
 			if (skb_network_header(skb2) < skb2->data ||
-			    skb_network_header(skb2) > skb2->tail) {
+			    skb2->network_header > skb2->tail) {
 				if (net_ratelimit())
 					printk(KERN_CRIT "protocol %04x is "
 					       "buggy, dev %s\n",
@@ -1175,7 +1175,7 @@ int skb_checksum_help(struct sk_buff *skb)
 	BUG_ON(offset > (int)skb->len);
 	csum = skb_checksum(skb, offset, skb->len-offset, 0);
 
-	offset = skb->tail - skb_transport_header(skb);
+	offset = skb->tail - skb->transport_header;
 	BUG_ON(offset <= 0);
 	BUG_ON(skb->csum_offset + 2 > offset);
 
diff --git a/net/core/filter.c b/net/core/filter.c
index d2358a5e633..bd903aaf7aa 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -46,7 +46,7 @@ static void *__load_pointer(struct sk_buff *skb, int k)
 	else if (k >= SKF_LL_OFF)
 		ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
 
-	if (ptr >= skb->head && ptr < skb->tail)
+	if (ptr >= skb->head && ptr < skb_tail_pointer(skb))
 		return ptr;
 	return NULL;
 }
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 259473d0559..bcc25591d8a 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -61,7 +61,7 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
 	spin_lock_bh(lock);
 	d->lock = lock;
 	if (type)
-		d->tail = (struct rtattr *) skb->tail;
+		d->tail = (struct rtattr *)skb_tail_pointer(skb);
 	d->skb = skb;
 	d->compat_tc_stats = tc_stats_type;
 	d->compat_xstats = xstats_type;
@@ -212,7 +212,7 @@ int
 gnet_stats_finish_copy(struct gnet_dump *d)
 {
 	if (d->tail)
-		d->tail->rta_len = d->skb->tail - (u8 *) d->tail;
+		d->tail->rta_len = skb_tail_pointer(d->skb) - (u8 *)d->tail;
 
 	if (d->compat_tc_stats)
 		if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats,
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 9da8357addc..f9469ea530c 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2357,7 +2357,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 		*vlan_encapsulated_proto = htons(ETH_P_IP);
 	}
 
-	skb_set_network_header(skb, skb->tail - skb->data);
+	skb->network_header = skb->tail;
 	skb->transport_header = skb->network_header + sizeof(struct iphdr);
 	skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr));
 
@@ -2696,7 +2696,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 		*vlan_encapsulated_proto = htons(ETH_P_IPV6);
 	}
 
-	skb_set_network_header(skb, skb->tail - skb->data);
+	skb->network_header = skb->tail;
 	skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
 	skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr));
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a48b0868126..ddcbc4d10da 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -87,8 +87,9 @@ static struct kmem_cache *skbuff_fclone_cache __read_mostly;
 void skb_over_panic(struct sk_buff *skb, int sz, void *here)
 {
 	printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
-			  "data:%p tail:%p end:%p dev:%s\n",
-	       here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end,
+			  "data:%p tail:%#lx end:%p dev:%s\n",
+	       here, skb->len, sz, skb->head, skb->data,
+	       (unsigned long)skb->tail, skb->end,
 	       skb->dev ? skb->dev->name : "<NULL>");
 	BUG();
 }
@@ -105,8 +106,9 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here)
 void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 {
 	printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
-			  "data:%p tail:%p end:%p dev:%s\n",
-	       here, skb->len, sz, skb->head, skb->data, skb->tail, skb->end,
+			  "data:%p tail:%#lx end:%p dev:%s\n",
+	       here, skb->len, sz, skb->head, skb->data,
+	       (unsigned long)skb->tail, skb->end,
 	       skb->dev ? skb->dev->name : "<NULL>");
 	BUG();
 }
@@ -167,7 +169,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	atomic_set(&skb->users, 1);
 	skb->head = data;
 	skb->data = data;
-	skb->tail = data;
+	skb_reset_tail_pointer(skb);
 	skb->end  = data + size;
 	/* make sure we initialize shinfo sequentially */
 	shinfo = skb_shinfo(skb);
@@ -629,7 +631,12 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 
 	/* Copy only real data... and, alas, header. This should be
 	 * optimized for the cases when header is void. */
-	memcpy(data + nhead, skb->head, skb->tail - skb->head);
+	memcpy(data + nhead, skb->head,
+		skb->tail
+#ifndef NET_SKBUFF_DATA_USES_OFFSET
+		- skb->head
+#endif
+		);
 	memcpy(data + size, skb->end, sizeof(struct skb_shared_info));
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
@@ -645,9 +652,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 	skb->head     = data;
 	skb->end      = data + size;
 	skb->data    += off;
-	skb->tail    += off;
 #ifndef NET_SKBUFF_DATA_USES_OFFSET
-	/* {transport,network,mac}_header are relative to skb->head */
+	/* {transport,network,mac}_header and tail are relative to skb->head */
+	skb->tail	      += off;
 	skb->transport_header += off;
 	skb->network_header   += off;
 	skb->mac_header	      += off;
@@ -762,7 +769,7 @@ int skb_pad(struct sk_buff *skb, int pad)
 		return 0;
 	}
 
-	ntail = skb->data_len + pad - (skb->end - skb->tail);
+	ntail = skb->data_len + pad - (skb->end - skb_tail_pointer(skb));
 	if (likely(skb_cloned(skb) || ntail > 0)) {
 		err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC);
 		if (unlikely(err))
@@ -863,7 +870,7 @@ done:
 	} else {
 		skb->len       = len;
 		skb->data_len  = 0;
-		skb->tail      = skb->data + len;
+		skb_set_tail_pointer(skb, len);
 	}
 
 	return 0;
@@ -900,7 +907,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
 	 * plus 128 bytes for future expansions. If we have enough
 	 * room at tail, reallocate without expansion only if skb is cloned.
 	 */
-	int i, k, eat = (skb->tail + delta) - skb->end;
+	int i, k, eat = (skb_tail_pointer(skb) + delta) - skb->end;
 
 	if (eat > 0 || skb_cloned(skb)) {
 		if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0,
@@ -908,7 +915,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
 			return NULL;
 	}
 
-	if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
+	if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta))
 		BUG();
 
 	/* Optimization: no fragments, no reasons to preestimate
@@ -1004,7 +1011,7 @@ pull_pages:
 	skb->tail     += delta;
 	skb->data_len -= delta;
 
-	return skb->tail;
+	return skb_tail_pointer(skb);
 }
 
 /* Copy some data bits from skb to kernel buffer. */
@@ -1539,7 +1546,7 @@ static inline void skb_split_inside_header(struct sk_buff *skb,
 	skb1->len		   += skb1->data_len;
 	skb->data_len		   = 0;
 	skb->len		   = len;
-	skb->tail		   = skb->data + len;
+	skb_set_tail_pointer(skb, len);
 }
 
 static inline void skb_split_no_header(struct sk_buff *skb,
diff --git a/net/core/wireless.c b/net/core/wireless.c
index 7c6a5db544f..4a777b68e3b 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -1938,7 +1938,7 @@ static inline int rtnetlink_fill_iwinfo(struct sk_buff *	skb,
 {
 	struct ifinfomsg *r;
 	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
+	unsigned char	 *b = skb_tail_pointer(skb);
 
 	nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(*r));
 	r = NLMSG_DATA(nlh);
@@ -1952,7 +1952,7 @@ static inline int rtnetlink_fill_iwinfo(struct sk_buff *	skb,
 	/* Add the wireless events in the netlink packet */
 	RTA_PUT(skb, IFLA_WIRELESS, event_len, event);
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 nlmsg_failure:
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 84b8c5b45fe..7404653880b 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -681,8 +681,10 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg)
 	if (scp->peer.sdn_objnum)
 		type = 0;
 
-	skb_put(skb, dn_sockaddr2username(&scp->peer, skb->tail, type));
-	skb_put(skb, dn_sockaddr2username(&scp->addr, skb->tail, 2));
+	skb_put(skb, dn_sockaddr2username(&scp->peer,
+					  skb_tail_pointer(skb), type));
+	skb_put(skb, dn_sockaddr2username(&scp->addr,
+					  skb_tail_pointer(skb), 2));
 
 	menuver = DN_MENUVER_ACC | DN_MENUVER_USR;
 	if (scp->peer.sdn_flags & SDF_PROXY)
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index bb73bf16630..9678b096b84 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1468,7 +1468,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 	struct dn_route *rt = (struct dn_route *)skb->dst;
 	struct rtmsg *r;
 	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	long expires;
 
 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags);
@@ -1509,7 +1509,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 	if (rt->fl.iif)
 		RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif);
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 nlmsg_failure:
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 780a141f834..544c4554074 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -295,7 +295,7 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 {
 	struct rtmsg *rtm;
 	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 
 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
 	rtm = NLMSG_DATA(nlh);
@@ -337,13 +337,13 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 			nhp->rtnh_ifindex = nh->nh_oif;
 			if (nh->nh_gw)
 				RTA_PUT(skb, RTA_GATEWAY, 2, &nh->nh_gw);
-			nhp->rtnh_len = skb->tail - (unsigned char *)nhp;
+			nhp->rtnh_len = skb_tail_pointer(skb) - (unsigned char *)nhp;
 		} endfor_nexthops(fi);
 		mp_head->rta_type = RTA_MULTIPATH;
-		mp_head->rta_len = skb->tail - (u8*)mp_head;
+		mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
 	}
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 0e62def05a5..ceefd9dd0c9 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -33,7 +33,7 @@ static struct sk_buff *dnrmg_build_message(struct sk_buff *rt_skb, int *errp)
 {
 	struct sk_buff *skb = NULL;
 	size_t size;
-	unsigned char *old_tail;
+	sk_buff_data_t old_tail;
 	struct nlmsghdr *nlh;
 	unsigned char *ptr;
 	struct nf_dn_rtmsg *rtm;
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 78993dadb53..b5524f32ac2 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -366,7 +366,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
 			fh->cb = cb;
 			fh->port = port;
 			if (sock->type != SOCK_DGRAM) {
-				skb->tail = skb->data;
+				skb_reset_tail_pointer(skb);
 				skb->len = 0;
 			} else if (res < 0)
 				goto out_free;
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index 59a765c49cf..2b854941e06 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -595,7 +595,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 		if (frag != 0)
 			flen -= hdrlen;
 
-		if (frag_skb->tail + flen > frag_skb->end) {
+		if (skb_tail_pointer(frag_skb) + flen > frag_skb->end) {
 			printk(KERN_WARNING "%s: host decrypted and "
 			       "reassembled frame did not fit skb\n",
 			       dev->name);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index de019f9fbfe..5e5613930ff 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -21,6 +21,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	struct blkcipher_desc desc;
 	struct esp_data *esp;
 	struct sk_buff *trailer;
+	u8 *tail;
 	int blksize;
 	int clen;
 	int alen;
@@ -49,12 +50,13 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 		goto error;
 
 	/* Fill padding... */
+	tail = skb_tail_pointer(trailer);
 	do {
 		int i;
 		for (i=0; i<clen-skb->len - 2; i++)
-			*(u8*)(trailer->tail + i) = i+1;
+			tail[i] = i + 1;
 	} while (0);
-	*(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2;
+	tail[clen - skb->len - 2] = (clen - skb->len) - 2;
 	pskb_put(skb, trailer, clen - skb->len);
 
 	__skb_push(skb, skb->data - skb_network_header(skb));
@@ -62,7 +64,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	esph = (struct ip_esp_hdr *)(skb_network_header(skb) +
 				     top_iph->ihl * 4);
 	top_iph->tot_len = htons(skb->len + alen);
-	*(u8*)(trailer->tail - 1) = top_iph->protocol;
+	*(skb_tail_pointer(skb) - 1) = top_iph->protocol;
 
 	/* this is non-NULL only with UDP Encapsulation */
 	if (x->encap) {
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 8372f8b8f0c..d38cbba92a4 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -450,7 +450,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 	 */
 	iph = ip_hdr(skb_in);
 
-	if ((u8 *)iph < skb_in->head || (u8 *)(iph + 1) > skb_in->tail)
+	if ((u8 *)iph < skb_in->head ||
+	    (skb_in->network_header + sizeof(*iph)) > skb_in->tail)
 		goto out;
 
 	/*
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 1fc637fb675..2506021c293 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -348,8 +348,8 @@ static int igmpv3_sendpack(struct sk_buff *skb)
 {
 	struct iphdr *pip = ip_hdr(skb);
 	struct igmphdr *pig = igmp_hdr(skb);
-	const int iplen = skb->tail - skb_network_header(skb);
-	const int igmplen = skb->tail - skb_transport_header(skb);
+	const int iplen = skb->tail - skb->network_header;
+	const int igmplen = skb->tail - skb->transport_header;
 
 	pip->tot_len = htons(iplen);
 	ip_send_check(pip);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 5df71cd08da..37362cd1d07 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -60,7 +60,7 @@ static int inet_csk_diag_fill(struct sock *sk,
 	struct nlmsghdr  *nlh;
 	void *info = NULL;
 	struct inet_diag_meminfo  *minfo = NULL;
-	unsigned char	 *b = skb->tail;
+	unsigned char	 *b = skb_tail_pointer(skb);
 	const struct inet_diag_handler *handler;
 
 	handler = inet_diag_table[unlh->nlmsg_type];
@@ -147,7 +147,7 @@ static int inet_csk_diag_fill(struct sock *sk,
 	    icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info)
 		icsk->icsk_ca_ops->get_info(sk, ext, skb);
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 rtattr_failure:
@@ -163,7 +163,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
 {
 	long tmo;
 	struct inet_diag_msg *r;
-	const unsigned char *previous_tail = skb->tail;
+	const unsigned char *previous_tail = skb_tail_pointer(skb);
 	struct nlmsghdr *nlh = NLMSG_PUT(skb, pid, seq,
 					 unlh->nlmsg_type, sizeof(*r));
 
@@ -205,7 +205,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
 			       &tw6->tw_v6_daddr);
 	}
 #endif
-	nlh->nlmsg_len = skb->tail - previous_tail;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - previous_tail;
 	return skb->len;
 nlmsg_failure:
 	skb_trim(skb, previous_tail - skb->data);
@@ -535,7 +535,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 {
 	const struct inet_request_sock *ireq = inet_rsk(req);
 	struct inet_sock *inet = inet_sk(sk);
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct inet_diag_msg *r;
 	struct nlmsghdr *nlh;
 	long tmo;
@@ -574,7 +574,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 			       &inet6_rsk(req)->rmt_addr);
 	}
 #endif
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 
 	return skb->len;
 
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index fcb35cd5ccf..c199d231173 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -316,7 +316,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
 	serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
 	serr->port = port;
 
-	__skb_pull(skb, skb->tail - skb->data);
+	__skb_pull(skb, skb_tail_pointer(skb) - skb->data);
 	skb_reset_transport_header(skb);
 
 	if (sock_queue_err_skb(sk, skb))
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 50d0b301380..ea0a491dce9 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -513,7 +513,8 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 
 			if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
-				nlh->nlmsg_len = skb->tail - (u8*)nlh;
+				nlh->nlmsg_len = (skb_tail_pointer(skb) -
+						  (u8 *)nlh);
 			} else {
 				nlh->nlmsg_type = NLMSG_ERROR;
 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
@@ -580,7 +581,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 	 *	Copy the IP header
 	 */
 
-	skb_set_network_header(skb, skb->tail - skb->data);
+	skb->network_header = skb->tail;
 	skb_put(skb, ihl);
 	memcpy(skb->data,pkt->data,ihl);
 	ip_hdr(skb)->protocol = 0;			/* Flag to the kernel this is a route add */
@@ -1544,7 +1545,7 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
 	int ct;
 	struct rtnexthop *nhp;
 	struct net_device *dev = vif_table[c->mfc_parent].dev;
-	u8 *b = skb->tail;
+	u8 *b = skb_tail_pointer(skb);
 	struct rtattr *mp_head;
 
 	if (dev)
@@ -1564,7 +1565,7 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
 		}
 	}
 	mp_head->rta_type = RTA_MULTIPATH;
-	mp_head->rta_len = skb->tail - (u8*)mp_head;
+	mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
 	rtm->rtm_type = RTN_MULTICAST;
 	return 1;
 
diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c
index 25bd6896730..344ddbbdc75 100644
--- a/net/ipv4/ipvs/ip_vs_ftp.c
+++ b/net/ipv4/ipvs/ip_vs_ftp.c
@@ -162,7 +162,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		iph = ip_hdr(*pskb);
 		th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
 		data = (char *)th + (th->doff << 2);
-		data_limit = (*pskb)->tail;
+		data_limit = skb_tail_pointer(*pskb);
 
 		if (ip_vs_ftp_get_addrport(data, data_limit,
 					   SERVER_STRING,
@@ -269,7 +269,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	   the length of the header in 32-bit multiples, it is accurate
 	   to calculate data address by th+HLEN*4 */
 	data = data_start = (char *)th + (th->doff << 2);
-	data_limit = (*pskb)->tail;
+	data_limit = skb_tail_pointer(*pskb);
 
 	while (data <= data_limit - 6) {
 		if (strnicmp(data, "PASV\r\n", 6) == 0) {
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index b4450f1ccc1..6298d404e7c 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -37,28 +37,28 @@ target(struct sk_buff **pskb,
 	/* We assume that pln and hln were checked in the match */
 	if (mangle->flags & ARPT_MANGLE_SDEV) {
 		if (ARPT_DEV_ADDR_LEN_MAX < hln ||
-		   (arpptr + hln > (**pskb).tail))
+		   (arpptr + hln > skb_tail_pointer(*pskb)))
 			return NF_DROP;
 		memcpy(arpptr, mangle->src_devaddr, hln);
 	}
 	arpptr += hln;
 	if (mangle->flags & ARPT_MANGLE_SIP) {
 		if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
-		   (arpptr + pln > (**pskb).tail))
+		   (arpptr + pln > skb_tail_pointer(*pskb)))
 			return NF_DROP;
 		memcpy(arpptr, &mangle->u_s.src_ip, pln);
 	}
 	arpptr += pln;
 	if (mangle->flags & ARPT_MANGLE_TDEV) {
 		if (ARPT_DEV_ADDR_LEN_MAX < hln ||
-		   (arpptr + hln > (**pskb).tail))
+		   (arpptr + hln > skb_tail_pointer(*pskb)))
 			return NF_DROP;
 		memcpy(arpptr, mangle->tgt_devaddr, hln);
 	}
 	arpptr += hln;
 	if (mangle->flags & ARPT_MANGLE_TIP) {
 		if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
-		   (arpptr + pln > (**pskb).tail))
+		   (arpptr + pln > skb_tail_pointer(*pskb)))
 			return NF_DROP;
 		memcpy(arpptr, &mangle->u_t.tgt_ip, pln);
 	}
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 5842f1aa973..15e0d200223 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -191,7 +191,7 @@ ipq_flush(int verdict)
 static struct sk_buff *
 ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
 {
-	unsigned char *old_tail;
+	sk_buff_data_t old_tail;
 	size_t size = 0;
 	size_t data_len = 0;
 	struct sk_buff *skb;
@@ -235,7 +235,7 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
 	if (!skb)
 		goto nlmsg_failure;
 
-	old_tail= skb->tail;
+	old_tail = skb->tail;
 	nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
 	pmsg = NLMSG_DATA(nlh);
 	memset(pmsg, 0, sizeof(*pmsg));
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index c2c92ff1278..8a40fbe842b 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -92,7 +92,8 @@ static void mangle_contents(struct sk_buff *skb,
 	/* move post-replacement */
 	memmove(data + match_offset + rep_len,
 		data + match_offset + match_len,
-		skb->tail - (data + match_offset + match_len));
+		skb->tail - (skb->network_header + dataoff +
+			     match_offset + match_len));
 
 	/* insert data from buffer */
 	memcpy(data + match_offset, rep_buffer, rep_len);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2b214cc3724..18a09a78ca0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2231,7 +2231,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
 		th->cwr = 0;
 	} while (skb->next);
 
-	delta = htonl(oldlen + (skb->tail - skb_transport_header(skb)) +
+	delta = htonl(oldlen + (skb->tail - skb->transport_header) +
 		      skb->data_len);
 	th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
 				(__force u32)delta));
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 29c53fbb220..c22cdcd8432 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -733,7 +733,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
 	}
 	skb_shinfo(skb)->nr_frags = k;
 
-	skb->tail = skb->data;
+	skb_reset_tail_pointer(skb);
 	skb->data_len -= len;
 	skb->len = skb->data_len;
 }
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index f16f4f0c581..4a355fea409 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -268,7 +268,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
 	serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
 	serr->port = fl->fl_ip_dport;
 
-	__skb_pull(skb, skb->tail - skb->data);
+	__skb_pull(skb, skb_tail_pointer(skb) - skb->data);
 	skb_reset_transport_header(skb);
 
 	if (sock_queue_err_skb(sk, skb))
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 7fdf84dee73..b8e8914cc00 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -51,6 +51,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	int clen;
 	int alen;
 	int nfrags;
+	u8 *tail;
 	struct esp_data *esp = x->data;
 	int hdr_len = (skb_transport_offset(skb) +
 		       sizeof(*esph) + esp->conf.ivlen);
@@ -78,18 +79,19 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	}
 
 	/* Fill padding... */
+	tail = skb_tail_pointer(trailer);
 	do {
 		int i;
 		for (i=0; i<clen-skb->len - 2; i++)
-			*(u8*)(trailer->tail + i) = i+1;
+			tail[i] = i + 1;
 	} while (0);
-	*(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2;
+	tail[clen-skb->len - 2] = (clen - skb->len) - 2;
 	pskb_put(skb, trailer, clen - skb->len);
 
 	top_iph = (struct ipv6hdr *)__skb_push(skb, hdr_len);
 	esph = (struct ipv6_esp_hdr *)skb_transport_header(skb);
 	top_iph->payload_len = htons(skb->len + alen - sizeof(*top_iph));
-	*(u8 *)(trailer->tail - 1) = *skb_network_header(skb);
+	*(skb_tail_pointer(skb) - 1) = *skb_network_header(skb);
 	*skb_network_header(skb) = IPPROTO_ESP;
 
 	esph->spi = x->id.spi;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index a6a275db88c..275d2e812a4 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -51,7 +51,7 @@
 int ipv6_find_tlv(struct sk_buff *skb, int offset, int type)
 {
 	const unsigned char *nh = skb_network_header(skb);
-	int packet_len = skb->tail - nh;
+	int packet_len = skb->tail - skb->network_header;
 	struct ipv6_opt_hdr *hdr;
 	int len;
 
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index d3edc3cf1ce..e94992ab92e 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -317,7 +317,8 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
 	int hlimit, tclass;
 	int err = 0;
 
-	if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail)
+	if ((u8 *)hdr < skb->head ||
+	    (skb->network_header + sizeof(*hdr)) > skb->tail)
 		return;
 
 	/*
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index b2c092c6b9d..e2b8db6b9ae 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -514,7 +514,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 	u16 offset = sizeof(struct ipv6hdr);
 	struct ipv6_opt_hdr *exthdr =
 				(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
-	unsigned int packet_len = skb->tail - skb_network_header(skb);
+	unsigned int packet_len = skb->tail - skb->network_header;
 	int found_rhdr = 0;
 	*nexthdr = &ipv6_hdr(skb)->nexthdr;
 
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 4c45bcce75e..6c2758951d6 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1423,7 +1423,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
 
 	memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
 
-	skb_set_transport_header(skb, skb->tail - skb->data);
+	skb_set_transport_header(skb, skb_tail_pointer(skb) - skb->data);
 	skb_put(skb, sizeof(*pmr));
 	pmr = (struct mld2_report *)skb_transport_header(skb);
 	pmr->type = ICMPV6_MLD2_REPORT;
@@ -1468,8 +1468,8 @@ static void mld_sendpack(struct sk_buff *skb)
 	int err;
 
 	IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
-	payload_len = skb->tail - skb_network_header(skb) - sizeof(*pip6);
-	mldlen = skb->tail - skb_transport_header(skb);
+	payload_len = (skb->tail - skb->network_header) - sizeof(*pip6);
+	mldlen = skb->tail - skb->transport_header;
 	pip6->payload_len = htons(payload_len);
 
 	pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index f0288e92fb5..6ed763ee678 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -260,7 +260,7 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb,
 	struct ipv6_opt_hdr *exthdr =
 				   (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
 	const unsigned char *nh = skb_network_header(skb);
-	unsigned int packet_len = skb->tail - nh;
+	unsigned int packet_len = skb->tail - skb->network_header;
 	int found_rhdr = 0;
 
 	*nexthdr = &ipv6_hdr(skb)->nexthdr;
@@ -392,7 +392,7 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb,
 	struct ipv6_opt_hdr *exthdr =
 				   (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
 	const unsigned char *nh = skb_network_header(skb);
-	unsigned int packet_len = skb->tail - nh;
+	unsigned int packet_len = skb->tail - skb->network_header;
 	int found_rhdr = 0;
 
 	*nexthdr = &ipv6_hdr(skb)->nexthdr;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index f8e619772fb..b1cf7081647 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -492,7 +492,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
 	ip6_nd_hdr(sk, skb, dev, src_addr, daddr, IPPROTO_ICMPV6, len);
 
-	skb_set_transport_header(skb, skb->tail - skb->data);
+	skb->transport_header = skb->tail;
 	skb_put(skb, len);
 	msg = (struct nd_msg *)skb_transport_header(skb);
 
@@ -584,7 +584,7 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
 	ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
 
-	skb_set_transport_header(skb, skb->tail - skb->data);
+	skb->transport_header = skb->tail;
 	skb_put(skb, len);
 	msg = (struct nd_msg *)skb_transport_header(skb);
 	msg->icmph.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION;
@@ -685,7 +685,7 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
 	ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
 
-	skb_set_transport_header(skb, skb->tail - skb->data);
+	skb->transport_header = skb->tail;
 	skb_put(skb, len);
 	hdr = icmp6_hdr(skb);
 	hdr->icmp6_type = NDISC_ROUTER_SOLICITATION;
@@ -767,7 +767,8 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 	struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
 	struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
 	u8 *lladdr = NULL;
-	u32 ndoptlen = skb->tail - msg->opt;
+	u32 ndoptlen = skb->tail - (skb->transport_header +
+				    offsetof(struct nd_msg, opt));
 	struct ndisc_options ndopts;
 	struct net_device *dev = skb->dev;
 	struct inet6_ifaddr *ifp;
@@ -945,7 +946,8 @@ static void ndisc_recv_na(struct sk_buff *skb)
 	struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
 	struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
 	u8 *lladdr = NULL;
-	u32 ndoptlen = skb->tail - msg->opt;
+	u32 ndoptlen = skb->tail - (skb->transport_header +
+				    offsetof(struct nd_msg, opt));
 	struct ndisc_options ndopts;
 	struct net_device *dev = skb->dev;
 	struct inet6_ifaddr *ifp;
@@ -1111,8 +1113,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 
 	__u8 * opt = (__u8 *)(ra_msg + 1);
 
-	optlen = (skb->tail - skb_transport_header(skb)) -
-		  sizeof(struct ra_msg);
+	optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg);
 
 	if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
 		ND_PRINTK2(KERN_WARNING
@@ -1361,7 +1362,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
 		return;
 	}
 
-	optlen = skb->tail - skb_transport_header(skb);
+	optlen = skb->tail - skb->transport_header;
 	optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
 
 	if (optlen < 0) {
@@ -1522,7 +1523,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 	ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr,
 		   IPPROTO_ICMPV6, len);
 
-	skb_set_transport_header(buff, buff->tail - buff->data);
+	skb_set_transport_header(buff, skb_tail_pointer(buff) - buff->data);
 	skb_put(buff, len);
 	icmph = icmp6_hdr(buff);
 
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 66a2c413525..5cfce218c5e 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -189,7 +189,7 @@ ipq_flush(int verdict)
 static struct sk_buff *
 ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
 {
-	unsigned char *old_tail;
+	sk_buff_data_t old_tail;
 	size_t size = 0;
 	size_t data_len = 0;
 	struct sk_buff *skb;
@@ -233,7 +233,7 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
 	if (!skb)
 		goto nlmsg_failure;
 
-	old_tail= skb->tail;
+	old_tail = skb->tail;
 	nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
 	pmsg = NLMSG_DATA(nlh);
 	memset(pmsg, 0, sizeof(*pmsg));
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 8705f6a502d..2b3be68b70a 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1077,7 +1077,7 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
 			spin_lock_bh(&sk->sk_receive_queue.lock);
 			skb = skb_peek(&sk->sk_receive_queue);
 			if (skb != NULL)
-				amount = skb->tail - skb_transport_header(skb);
+				amount = skb->tail - skb->transport_header;
 			spin_unlock_bh(&sk->sk_receive_queue.lock);
 			return put_user(amount, (int __user *)arg);
 		}
diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c
index 01d7c9c7b3b..e5e4792a031 100644
--- a/net/irda/ircomm/ircomm_param.c
+++ b/net/irda/ircomm/ircomm_param.c
@@ -133,8 +133,8 @@ int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush)
 	 * Inserting is a little bit tricky since we don't know how much
 	 * room we will need. But this should hopefully work OK
 	 */
-	count = irda_param_insert(self, pi, skb->tail, skb_tailroom(skb),
-				  &ircomm_param_info);
+	count = irda_param_insert(self, pi, skb_tail_pointer(skb),
+				  skb_tailroom(skb), &ircomm_param_info);
 	if (count < 0) {
 		IRDA_WARNING("%s(), no room for parameter!\n", __FUNCTION__);
 		spin_unlock_irqrestore(&self->spinlock, flags);
diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c
index fcf9d659962..ed69773b0f8 100644
--- a/net/irda/irlan/irlan_common.c
+++ b/net/irda/irlan/irlan_common.c
@@ -1039,7 +1039,7 @@ static int __irlan_insert_param(struct sk_buff *skb, char *param, int type,
 	}
 
 	/* Insert at end of sk-buffer */
-	frame = skb->tail;
+	frame = skb_tail_pointer(skb);
 
 	/* Make space for data */
 	if (skb_tailroom(skb) < (param_len+value_len+3)) {
diff --git a/net/irda/qos.c b/net/irda/qos.c
index 349012c926b..aeb18cf1dca 100644
--- a/net/irda/qos.c
+++ b/net/irda/qos.c
@@ -469,49 +469,49 @@ int irlap_insert_qos_negotiation_params(struct irlap_cb *self,
 	int ret;
 
 	/* Insert data rate */
-	ret = irda_param_insert(self, PI_BAUD_RATE, skb->tail,
+	ret = irda_param_insert(self, PI_BAUD_RATE, skb_tail_pointer(skb),
 				skb_tailroom(skb), &irlap_param_info);
 	if (ret < 0)
 		return ret;
 	skb_put(skb, ret);
 
 	/* Insert max turnaround time */
-	ret = irda_param_insert(self, PI_MAX_TURN_TIME, skb->tail,
+	ret = irda_param_insert(self, PI_MAX_TURN_TIME, skb_tail_pointer(skb),
 				skb_tailroom(skb), &irlap_param_info);
 	if (ret < 0)
 		return ret;
 	skb_put(skb, ret);
 
 	/* Insert data size */
-	ret = irda_param_insert(self, PI_DATA_SIZE, skb->tail,
+	ret = irda_param_insert(self, PI_DATA_SIZE, skb_tail_pointer(skb),
 				skb_tailroom(skb), &irlap_param_info);
 	if (ret < 0)
 		return ret;
 	skb_put(skb, ret);
 
 	/* Insert window size */
-	ret = irda_param_insert(self, PI_WINDOW_SIZE, skb->tail,
+	ret = irda_param_insert(self, PI_WINDOW_SIZE, skb_tail_pointer(skb),
 				skb_tailroom(skb), &irlap_param_info);
 	if (ret < 0)
 		return ret;
 	skb_put(skb, ret);
 
 	/* Insert additional BOFs */
-	ret = irda_param_insert(self, PI_ADD_BOFS, skb->tail,
+	ret = irda_param_insert(self, PI_ADD_BOFS, skb_tail_pointer(skb),
 				skb_tailroom(skb), &irlap_param_info);
 	if (ret < 0)
 		return ret;
 	skb_put(skb, ret);
 
 	/* Insert minimum turnaround time */
-	ret = irda_param_insert(self, PI_MIN_TURN_TIME, skb->tail,
+	ret = irda_param_insert(self, PI_MIN_TURN_TIME, skb_tail_pointer(skb),
 				skb_tailroom(skb), &irlap_param_info);
 	if (ret < 0)
 		return ret;
 	skb_put(skb, ret);
 
 	/* Insert link disconnect/threshold time */
-	ret = irda_param_insert(self, PI_LINK_DISC, skb->tail,
+	ret = irda_param_insert(self, PI_LINK_DISC, skb_tail_pointer(skb),
 				skb_tailroom(skb), &irlap_param_info);
 	if (ret < 0)
 		return ret;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 48f05314ebf..442300c633d 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -268,9 +268,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
 	struct nfattr *nest_parms;
-	unsigned char *b;
-
-	b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 
 	event |= NFNL_SUBSYS_CTNETLINK << 8;
 	nlh    = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
@@ -303,7 +301,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 	    ctnetlink_dump_use(skb, ct) < 0)
 		goto nfattr_failure;
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 nlmsg_failure:
@@ -322,7 +320,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 	struct nf_conn *ct = (struct nf_conn *)ptr;
 	struct sk_buff *skb;
 	unsigned int type;
-	unsigned char *b;
+	sk_buff_data_t b;
 	unsigned int flags = 0, group;
 
 	/* ignore our fake conntrack entry */
@@ -1152,9 +1150,7 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 {
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
-	unsigned char *b;
-
-	b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 
 	event |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
 	nlh    = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
@@ -1168,7 +1164,7 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 	if (ctnetlink_exp_dump_expect(skb, exp) < 0)
 		goto nfattr_failure;
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 nlmsg_failure:
@@ -1186,7 +1182,7 @@ static int ctnetlink_expect_event(struct notifier_block *this,
 	struct nf_conntrack_expect *exp = (struct nf_conntrack_expect *)ptr;
 	struct sk_buff *skb;
 	unsigned int type;
-	unsigned char *b;
+	sk_buff_data_t b;
 	int flags = 0;
 
 	if (events & IPEXP_NEW) {
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 5eeebd2efa7..9709f94787f 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -409,15 +409,14 @@ __build_packet_message(struct nfulnl_instance *inst,
 			const struct nf_loginfo *li,
 			const char *prefix, unsigned int plen)
 {
-	unsigned char *old_tail;
 	struct nfulnl_msg_packet_hdr pmsg;
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
 	__be32 tmp_uint;
+	sk_buff_data_t old_tail = inst->skb->tail;
 
 	UDEBUG("entered\n");
 
-	old_tail = inst->skb->tail;
 	nlh = NLMSG_PUT(inst->skb, 0, 0,
 			NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET,
 			sizeof(struct nfgenmsg));
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index cfbee39f61d..b6585caa431 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -338,7 +338,7 @@ static struct sk_buff *
 nfqnl_build_packet_message(struct nfqnl_instance *queue,
 			   struct nfqnl_queue_entry *entry, int *errp)
 {
-	unsigned char *old_tail;
+	sk_buff_data_t old_tail;
 	size_t size;
 	size_t data_len = 0;
 	struct sk_buff *skb;
@@ -404,7 +404,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 	if (!skb)
 		goto nlmsg_failure;
 
-	old_tail= skb->tail;
+	old_tail = skb->tail;
 	nlh = NLMSG_PUT(skb, 0, 0,
 			NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
 			sizeof(struct nfgenmsg));
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 50dc5edb775..fdb6eb13cbc 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -785,7 +785,7 @@ static inline struct sk_buff *netlink_trim(struct sk_buff *skb,
 
 	skb_orphan(skb);
 
-	delta = skb->end - skb->tail;
+	delta = skb->end - skb_tail_pointer(skb);
 	if (delta * 2 < skb->truesize)
 		return skb;
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 51c059b09a3..36388b2f32f 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -775,7 +775,7 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
 		err = -EINVAL;
 		res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
 		if (sock->type != SOCK_DGRAM) {
-			skb->tail = skb->data;
+			skb_reset_tail_pointer(skb);
 			skb->len = 0;
 		} else if (res < 0)
 			goto out_free;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index cb21617a567..28326fb1fc4 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -93,7 +93,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
 				continue;
 			a->priv = p;
 			a->order = n_i;
-			r = (struct rtattr*) skb->tail;
+			r = (struct rtattr *)skb_tail_pointer(skb);
 			RTA_PUT(skb, a->order, 0, NULL);
 			err = tcf_action_dump_1(skb, a, 0, 0);
 			if (err < 0) {
@@ -101,7 +101,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
 				skb_trim(skb, (u8*)r - skb->data);
 				goto done;
 			}
-			r->rta_len = skb->tail - (u8*)r;
+			r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
 			n_i++;
 			if (n_i >= TCA_ACT_MAX_PRIO)
 				goto done;
@@ -125,7 +125,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
 	struct rtattr *r ;
 	int i= 0, n_i = 0;
 
-	r = (struct rtattr*) skb->tail;
+	r = (struct rtattr *)skb_tail_pointer(skb);
 	RTA_PUT(skb, a->order, 0, NULL);
 	RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
 	for (i = 0; i < (hinfo->hmask + 1); i++) {
@@ -140,7 +140,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
 		}
 	}
 	RTA_PUT(skb, TCA_FCNT, 4, &n_i);
-	r->rta_len = skb->tail - (u8*)r;
+	r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
 
 	return n_i;
 rtattr_failure:
@@ -423,7 +423,7 @@ int
 tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
 	int err = -EINVAL;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *r;
 
 	if (a->ops == NULL || a->ops->dump == NULL)
@@ -432,10 +432,10 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 	RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind);
 	if (tcf_action_copy_stats(skb, a, 0))
 		goto rtattr_failure;
-	r = (struct rtattr*) skb->tail;
+	r = (struct rtattr *)skb_tail_pointer(skb);
 	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
 	if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) {
-		r->rta_len = skb->tail - (u8*)r;
+		r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
 		return err;
 	}
 
@@ -449,17 +449,17 @@ tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref)
 {
 	struct tc_action *a;
 	int err = -EINVAL;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *r ;
 
 	while ((a = act) != NULL) {
-		r = (struct rtattr*) skb->tail;
+		r = (struct rtattr *)skb_tail_pointer(skb);
 		act = a->next;
 		RTA_PUT(skb, a->order, 0, NULL);
 		err = tcf_action_dump_1(skb, a, bind, ref);
 		if (err < 0)
 			goto errout;
-		r->rta_len = skb->tail - (u8*)r;
+		r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
 	}
 
 	return 0;
@@ -635,7 +635,7 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
 {
 	struct tcamsg *t;
 	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *x;
 
 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
@@ -645,15 +645,15 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
 
-	x = (struct rtattr*) skb->tail;
+	x = (struct rtattr *)skb_tail_pointer(skb);
 	RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
 
 	if (tcf_action_dump(skb, a, bind, ref) < 0)
 		goto rtattr_failure;
 
-	x->rta_len = skb->tail - (u8*)x;
+	x->rta_len = skb_tail_pointer(skb) - (u8 *)x;
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 rtattr_failure:
@@ -767,7 +767,7 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid)
 		return -ENOBUFS;
 	}
 
-	b = (unsigned char *)skb->tail;
+	b = skb_tail_pointer(skb);
 
 	if (rtattr_parse_nested(tb, TCA_ACT_MAX, rta) < 0)
 		goto err_out;
@@ -783,16 +783,16 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid)
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
 
-	x = (struct rtattr *) skb->tail;
+	x = (struct rtattr *)skb_tail_pointer(skb);
 	RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
 
 	err = a->ops->walk(skb, &dcb, RTM_DELACTION, a);
 	if (err < 0)
 		goto rtattr_failure;
 
-	x->rta_len = skb->tail - (u8 *) x;
+	x->rta_len = skb_tail_pointer(skb) - (u8 *)x;
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	nlh->nlmsg_flags |= NLM_F_ROOT;
 	module_put(a->ops->owner);
 	kfree(a);
@@ -884,7 +884,7 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
 	if (!skb)
 		return -ENOBUFS;
 
-	b = (unsigned char *)skb->tail;
+	b = skb_tail_pointer(skb);
 
 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
 	t = NLMSG_DATA(nlh);
@@ -892,15 +892,15 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
 
-	x = (struct rtattr*) skb->tail;
+	x = (struct rtattr *)skb_tail_pointer(skb);
 	RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
 
 	if (tcf_action_dump(skb, a, 0, 0) < 0)
 		goto rtattr_failure;
 
-	x->rta_len = skb->tail - (u8*)x;
+	x->rta_len = skb_tail_pointer(skb) - (u8 *)x;
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	NETLINK_CB(skb).dst_group = RTNLGRP_TC;
 
 	err = rtnetlink_send(skb, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
@@ -1015,7 +1015,7 @@ static int
 tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *x;
 	struct tc_action_ops *a_o;
 	struct tc_action a;
@@ -1048,7 +1048,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
 
-	x = (struct rtattr *) skb->tail;
+	x = (struct rtattr *)skb_tail_pointer(skb);
 	RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
 
 	ret = a_o->walk(skb, cb, RTM_GETACTION, &a);
@@ -1056,12 +1056,12 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 		goto rtattr_failure;
 
 	if (ret > 0) {
-		x->rta_len = skb->tail - (u8 *) x;
+		x->rta_len = skb_tail_pointer(skb) - (u8 *)x;
 		ret = skb->len;
 	} else
 		skb_trim(skb, (u8*)x - skb->data);
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	if (NETLINK_CB(cb->skb).pid && ret)
 		nlh->nlmsg_flags |= NLM_F_MULTI;
 	module_put(a_o->owner);
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 87d0faf3286..aad748b3b38 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -155,7 +155,7 @@ static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result
 
 static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_gact opt;
 	struct tcf_gact *gact = a->priv;
 	struct tcf_t t;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 47f0b132423..2ccfd5b20fa 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -245,7 +245,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
 
 static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_ipt *ipt = a->priv;
 	struct ipt_entry_target *t;
 	struct tcf_t tm;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 3e93683e9ab..15f6ecdaf61 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -206,7 +206,7 @@ bad_mirred:
 
 static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_mirred *m = a->priv;
 	struct tc_mirred opt;
 	struct tcf_t t;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 20813eee8af..d654cea1a46 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -195,7 +195,7 @@ done:
 static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
 			  int bind, int ref)
 {
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_pedit *p = a->priv;
 	struct tc_pedit *opt;
 	struct tcf_t t;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 10a5a5c36f7..068b2376366 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -80,7 +80,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
 				continue;
 			a->priv = p;
 			a->order = index;
-			r = (struct rtattr*) skb->tail;
+			r = (struct rtattr *)skb_tail_pointer(skb);
 			RTA_PUT(skb, a->order, 0, NULL);
 			if (type == RTM_DELACTION)
 				err = tcf_action_dump_1(skb, a, 0, 1);
@@ -91,7 +91,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
 				skb_trim(skb, (u8*)r - skb->data);
 				goto done;
 			}
-			r->rta_len = skb->tail - (u8*)r;
+			r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
 			n_i++;
 		}
 	}
@@ -326,7 +326,7 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
 static int
 tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 {
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_police *police = a->priv;
 	struct tc_police opt;
 
@@ -572,7 +572,7 @@ EXPORT_SYMBOL(tcf_police);
 
 int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police)
 {
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_police opt;
 
 	opt.index = police->tcf_index;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index c7971182af0..ecbcfa59b76 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -155,7 +155,7 @@ static inline int tcf_simp_cleanup(struct tc_action *a, int bind)
 static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
 				int bind, int ref)
 {
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_defact *d = a->priv;
 	struct tc_defact opt;
 	struct tcf_t t;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 5c6ffdb77d2..84231baf77d 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -323,7 +323,7 @@ tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh,
 {
 	struct tcmsg *tcm;
 	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 
 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
 	tcm = NLMSG_DATA(nlh);
@@ -340,7 +340,7 @@ tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh,
 		if (tp->ops->dump && tp->ops->dump(tp, fh, skb, tcm) < 0)
 			goto rtattr_failure;
 	}
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 nlmsg_failure:
@@ -563,30 +563,30 @@ tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts,
 		 * to work with both old and new modes of entering
 		 * tc data even if iproute2  was newer - jhs
 		 */
-		struct rtattr * p_rta = (struct rtattr*) skb->tail;
+		struct rtattr *p_rta = (struct rtattr *)skb_tail_pointer(skb);
 
 		if (exts->action->type != TCA_OLD_COMPAT) {
 			RTA_PUT(skb, map->action, 0, NULL);
 			if (tcf_action_dump(skb, exts->action, 0, 0) < 0)
 				goto rtattr_failure;
-			p_rta->rta_len = skb->tail - (u8*)p_rta;
+			p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta;
 		} else if (map->police) {
 			RTA_PUT(skb, map->police, 0, NULL);
 			if (tcf_action_dump_old(skb, exts->action, 0, 0) < 0)
 				goto rtattr_failure;
-			p_rta->rta_len = skb->tail - (u8*)p_rta;
+			p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta;
 		}
 	}
 #elif defined CONFIG_NET_CLS_POLICE
 	if (map->police && exts->police) {
-		struct rtattr * p_rta = (struct rtattr*) skb->tail;
+		struct rtattr *p_rta = (struct rtattr *)skb_tail_pointer(skb);
 
 		RTA_PUT(skb, map->police, 0, NULL);
 
 		if (tcf_police_dump(skb, exts->police) < 0)
 			goto rtattr_failure;
 
-		p_rta->rta_len = skb->tail - (u8*)p_rta;
+		p_rta->rta_len = skb_tail_pointer(skb) - (u8 *)p_rta;
 	}
 #endif
 	return 0;
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 4a91f082a81..800ec2ac326 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -245,7 +245,7 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
 		      struct sk_buff *skb, struct tcmsg *t)
 {
 	struct basic_filter *f = (struct basic_filter *) fh;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 
 	if (f == NULL)
@@ -263,7 +263,7 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
 	    tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
 		goto rtattr_failure;
 
-	rta->rta_len = (skb->tail - b);
+	rta->rta_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 rtattr_failure:
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 5dbb9d451f7..f5f355852a8 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -348,7 +348,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
 {
 	struct fw_head *head = (struct fw_head *)tp->root;
 	struct fw_filter *f = (struct fw_filter*)fh;
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 
 	if (f == NULL)
@@ -374,7 +374,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
 	if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0)
 		goto rtattr_failure;
 
-	rta->rta_len = skb->tail - b;
+	rta->rta_len = skb_tail_pointer(skb) - b;
 
 	if (tcf_exts_dump_stats(skb, &f->exts, &fw_ext_map) < 0)
 		goto rtattr_failure;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index abc47cc48ad..1f94df36239 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -562,7 +562,7 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
 		       struct sk_buff *skb, struct tcmsg *t)
 {
 	struct route4_filter *f = (struct route4_filter*)fh;
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 	u32 id;
 
@@ -591,7 +591,7 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
 	if (tcf_exts_dump(skb, &f->exts, &route_ext_map) < 0)
 		goto rtattr_failure;
 
-	rta->rta_len = skb->tail - b;
+	rta->rta_len = skb_tail_pointer(skb) - b;
 
 	if (tcf_exts_dump_stats(skb, &f->exts, &route_ext_map) < 0)
 		goto rtattr_failure;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 6f373b020eb..87ed6f3c507 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -593,7 +593,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
 {
 	struct rsvp_filter *f = (struct rsvp_filter*)fh;
 	struct rsvp_session *s;
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 	struct tc_rsvp_pinfo pinfo;
 
@@ -623,7 +623,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
 	if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
 		goto rtattr_failure;
 
-	rta->rta_len = skb->tail - b;
+	rta->rta_len = skb_tail_pointer(skb) - b;
 
 	if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
 		goto rtattr_failure;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 7563fdcef4b..0537d6066b4 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -448,7 +448,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
 {
 	struct tcindex_data *p = PRIV(tp);
 	struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 
 	DPRINTK("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p,b %p\n",
@@ -463,7 +463,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
 		RTA_PUT(skb,TCA_TCINDEX_SHIFT,sizeof(p->shift),&p->shift);
 		RTA_PUT(skb,TCA_TCINDEX_FALL_THROUGH,sizeof(p->fall_through),
 		    &p->fall_through);
-		rta->rta_len = skb->tail-b;
+		rta->rta_len = skb_tail_pointer(skb) - b;
 	} else {
 		if (p->perfect) {
 			t->tcm_handle = r-p->perfect;
@@ -486,7 +486,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
 
 		if (tcf_exts_dump(skb, &r->exts, &tcindex_ext_map) < 0)
 			goto rtattr_failure;
-		rta->rta_len = skb->tail-b;
+		rta->rta_len = skb_tail_pointer(skb) - b;
 
 		if (tcf_exts_dump_stats(skb, &r->exts, &tcindex_ext_map) < 0)
 			goto rtattr_failure;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 695b34051b9..fa11bb75004 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -213,7 +213,7 @@ check_terminal:
 			off2 = 0;
 		}
 
-		if (ptr < skb->tail)
+		if (ptr < skb_tail_pointer(skb))
 			goto next_ht;
 	}
 
@@ -718,7 +718,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
 		     struct sk_buff *skb, struct tcmsg *t)
 {
 	struct tc_u_knode *n = (struct tc_u_knode*)fh;
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 
 	if (n == NULL)
@@ -765,7 +765,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
 #endif
 	}
 
-	rta->rta_len = skb->tail - b;
+	rta->rta_len = skb_tail_pointer(skb) - b;
 	if (TC_U32_KEY(n->handle))
 		if (tcf_exts_dump_stats(skb, &n->exts, &u32_ext_map) < 0)
 			goto rtattr_failure;
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 959c306c571..63146d339d8 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -418,17 +418,19 @@ void tcf_em_tree_destroy(struct tcf_proto *tp, struct tcf_ematch_tree *tree)
 int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
 {
 	int i;
-	struct rtattr * top_start = (struct rtattr*) skb->tail;
-	struct rtattr * list_start;
+	u8 *tail;
+	struct rtattr *top_start = (struct rtattr *)skb_tail_pointer(skb);
+	struct rtattr *list_start;
 
 	RTA_PUT(skb, tlv, 0, NULL);
 	RTA_PUT(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr);
 
-	list_start = (struct rtattr *) skb->tail;
+	list_start = (struct rtattr *)skb_tail_pointer(skb);
 	RTA_PUT(skb, TCA_EMATCH_TREE_LIST, 0, NULL);
 
+	tail = skb_tail_pointer(skb);
 	for (i = 0; i < tree->hdr.nmatches; i++) {
-		struct rtattr *match_start = (struct rtattr*) skb->tail;
+		struct rtattr *match_start = (struct rtattr *)tail;
 		struct tcf_ematch *em = tcf_em_get_match(tree, i);
 		struct tcf_ematch_hdr em_hdr = {
 			.kind = em->ops ? em->ops->kind : TCF_EM_CONTAINER,
@@ -447,11 +449,12 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
 		} else if (em->datalen > 0)
 			RTA_PUT_NOHDR(skb, em->datalen, (void *) em->data);
 
-		match_start->rta_len = skb->tail - (u8*) match_start;
+		tail = skb_tail_pointer(skb);
+		match_start->rta_len = tail - (u8 *)match_start;
 	}
 
-	list_start->rta_len = skb->tail - (u8 *) list_start;
-	top_start->rta_len = skb->tail - (u8 *) top_start;
+	list_start->rta_len = tail - (u8 *)list_start;
+	top_start->rta_len = tail - (u8 *)top_start;
 
 	return 0;
 
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index ecbdc6b42a9..7482a950717 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -813,7 +813,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 {
 	struct tcmsg *tcm;
 	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct gnet_dump d;
 
 	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
@@ -847,7 +847,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 	if (gnet_stats_finish_copy(&d) < 0)
 		goto rtattr_failure;
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 nlmsg_failure:
@@ -1051,7 +1051,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
 {
 	struct tcmsg *tcm;
 	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct gnet_dump d;
 	struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
 
@@ -1076,7 +1076,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
 	if (gnet_stats_finish_copy(&d) < 0)
 		goto rtattr_failure;
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 nlmsg_failure:
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index baca8743c12..1d7bb163213 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -631,7 +631,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
 {
 	struct atm_qdisc_data *p = PRIV(sch);
 	struct atm_flow_data *flow = (struct atm_flow_data *) cl;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 
 	DPRINTK("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n",
@@ -661,7 +661,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
 
 		RTA_PUT(skb,TCA_ATM_EXCESS,sizeof(zero),&zero);
 	}
-	rta->rta_len = skb->tail-b;
+	rta->rta_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 rtattr_failure:
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index d83414d828d..be98a01253e 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1465,7 +1465,7 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
 
 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
 {
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 
 	RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
 	return skb->len;
@@ -1477,7 +1477,7 @@ rtattr_failure:
 
 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
 {
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_cbq_lssopt opt;
 
 	opt.flags = 0;
@@ -1502,7 +1502,7 @@ rtattr_failure:
 
 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
 {
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_cbq_wrropt opt;
 
 	opt.flags = 0;
@@ -1520,7 +1520,7 @@ rtattr_failure:
 
 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
 {
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_cbq_ovl opt;
 
 	opt.strategy = cl->ovl_strategy;
@@ -1537,7 +1537,7 @@ rtattr_failure:
 
 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
 {
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_cbq_fopt opt;
 
 	if (cl->split || cl->defmap) {
@@ -1556,7 +1556,7 @@ rtattr_failure:
 #ifdef CONFIG_NET_CLS_POLICE
 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
 {
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_cbq_police opt;
 
 	if (cl->police) {
@@ -1590,14 +1590,14 @@ static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 
 	rta = (struct rtattr*)b;
 	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
 	if (cbq_dump_attr(skb, &q->link) < 0)
 		goto rtattr_failure;
-	rta->rta_len = skb->tail - b;
+	rta->rta_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 rtattr_failure:
@@ -1619,7 +1619,7 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
 	       struct sk_buff *skb, struct tcmsg *tcm)
 {
 	struct cbq_class *cl = (struct cbq_class*)arg;
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 
 	if (cl->tparent)
@@ -1633,7 +1633,7 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
 	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
 	if (cbq_dump_attr(skb, cl) < 0)
 		goto rtattr_failure;
-	rta->rta_len = skb->tail - b;
+	rta->rta_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 rtattr_failure:
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 5197b6caaf2..80e6f811e3b 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1363,7 +1363,7 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
 		struct tcmsg *tcm)
 {
 	struct hfsc_class *cl = (struct hfsc_class *)arg;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta = (struct rtattr *)b;
 
 	tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->classid : TC_H_ROOT;
@@ -1374,7 +1374,7 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
 	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
 	if (hfsc_dump_curves(skb, cl) < 0)
 		goto rtattr_failure;
-	rta->rta_len = skb->tail - b;
+	rta->rta_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
  rtattr_failure:
@@ -1576,7 +1576,7 @@ static int
 hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_hfsc_qopt qopt;
 
 	qopt.defcls = q->defcls;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index f76c20c0a10..c687388a8cb 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1110,7 +1110,7 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt)
 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 	struct tc_htb_glob gopt;
 	spin_lock_bh(&sch->dev->queue_lock);
@@ -1123,12 +1123,12 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 	rta = (struct rtattr *)b;
 	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
 	RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
-	rta->rta_len = skb->tail - b;
+	rta->rta_len = skb_tail_pointer(skb) - b;
 	spin_unlock_bh(&sch->dev->queue_lock);
 	return skb->len;
 rtattr_failure:
 	spin_unlock_bh(&sch->dev->queue_lock);
-	skb_trim(skb, skb->tail - skb->data);
+	skb_trim(skb, skb_tail_pointer(skb) - skb->data);
 	return -1;
 }
 
@@ -1136,7 +1136,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 			  struct sk_buff *skb, struct tcmsg *tcm)
 {
 	struct htb_class *cl = (struct htb_class *)arg;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 	struct tc_htb_opt opt;
 
@@ -1159,7 +1159,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 	opt.prio = cl->un.leaf.prio;
 	opt.level = cl->level;
 	RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
-	rta->rta_len = skb->tail - b;
+	rta->rta_len = skb_tail_pointer(skb) - b;
 	spin_unlock_bh(&sch->dev->queue_lock);
 	return skb->len;
 rtattr_failure:
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index cfe070ee6ee..d19f4070c23 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -362,12 +362,12 @@ static void ingress_destroy(struct Qdisc *sch)
 
 static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 
 	rta = (struct rtattr *) b;
 	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
-	rta->rta_len = skb->tail - b;
+	rta->rta_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 rtattr_failure:
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 915f82a2cc3..2a9b1e429ff 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -583,7 +583,7 @@ static void netem_destroy(struct Qdisc *sch)
 static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	const struct netem_sched_data *q = qdisc_priv(sch);
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta = (struct rtattr *) b;
 	struct tc_netem_qopt qopt;
 	struct tc_netem_corr cor;
@@ -611,7 +611,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 	corrupt.correlation = q->corrupt_cor.rho;
 	RTA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
 
-	rta->rta_len = skb->tail - b;
+	rta->rta_len = skb_tail_pointer(skb) - b;
 
 	return skb->len;
 
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index de889f23f22..5b371109ec1 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -271,7 +271,7 @@ static int prio_init(struct Qdisc *sch, struct rtattr *opt)
 static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_prio_qopt opt;
 
 	opt.bands = q->bands;
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index e3695407afc..a511ba83e26 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -461,7 +461,7 @@ static void sfq_destroy(struct Qdisc *sch)
 static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_sfq_qopt opt;
 
 	opt.quantum = q->quantum;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index f14692f3a14..231895562c6 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -387,7 +387,7 @@ static void tbf_destroy(struct Qdisc *sch)
 static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
-	unsigned char	 *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	struct rtattr *rta;
 	struct tc_tbf_qopt opt;
 
@@ -403,7 +403,7 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
 	opt.mtu = q->mtu;
 	opt.buffer = q->buffer;
 	RTA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);
-	rta->rta_len = skb->tail - b;
+	rta->rta_len = skb_tail_pointer(skb) - b;
 
 	return skb->len;
 
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 1ff47b18724..18b97eedc1f 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -612,7 +612,7 @@ int sctp_rcv_ootb(struct sk_buff *skb)
 			break;
 
 		ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
-		if (ch_end > skb->tail)
+		if (ch_end > skb_tail_pointer(skb))
 			break;
 
 		/* RFC 8.4, 2) If the OOTB packet contains an ABORT chunk, the
@@ -644,7 +644,7 @@ int sctp_rcv_ootb(struct sk_buff *skb)
 		}
 
 		ch = (sctp_chunkhdr_t *) ch_end;
-	} while (ch_end < skb->tail);
+	} while (ch_end < skb_tail_pointer(skb));
 
 	return 0;
 
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index c30629e1778..88aa2240754 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -159,16 +159,16 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
 	 * the skb->tail.
 	 */
 	if (unlikely(skb_is_nonlinear(chunk->skb))) {
-		if (chunk->chunk_end > chunk->skb->tail)
-			chunk->chunk_end = chunk->skb->tail;
+		if (chunk->chunk_end > skb_tail_pointer(chunk->skb))
+			chunk->chunk_end = skb_tail_pointer(chunk->skb);
 	}
 	skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t));
 	chunk->subh.v = NULL; /* Subheader is no longer valid.  */
 
-	if (chunk->chunk_end < chunk->skb->tail) {
+	if (chunk->chunk_end < skb_tail_pointer(chunk->skb)) {
 		/* This is not a singleton */
 		chunk->singleton = 0;
-	} else if (chunk->chunk_end > chunk->skb->tail) {
+	} else if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) {
 		/* RFC 2960, Section 6.10  Bundling
 		 *
 		 * Partial chunks MUST NOT be placed in an SCTP packet.
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 60c5b59d4c6..759ea3d1997 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1143,7 +1143,7 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data)
 
 	/* Adjust the chunk length field.  */
 	chunk->chunk_hdr->length = htons(chunklen + padlen + len);
-	chunk->chunk_end = chunk->skb->tail;
+	chunk->chunk_end = skb_tail_pointer(chunk->skb);
 
 	return target;
 }
@@ -1168,7 +1168,7 @@ int sctp_user_addto_chunk(struct sctp_chunk *chunk, int off, int len,
 	/* Adjust the chunk length field.  */
 	chunk->chunk_hdr->length =
 		htons(ntohs(chunk->chunk_hdr->length) + len);
-	chunk->chunk_end = chunk->skb->tail;
+	chunk->chunk_end = skb_tail_pointer(chunk->skb);
 
 out:
 	return err;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index bf502c499c8..438e5dc5c71 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3115,7 +3115,7 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
 			break;
 
 		ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
-		if (ch_end > skb->tail)
+		if (ch_end > skb_tail_pointer(skb))
 			break;
 
 		if (SCTP_CID_SHUTDOWN_ACK == ch->type)
@@ -3130,7 +3130,7 @@ sctp_disposition_t sctp_sf_ootb(const struct sctp_endpoint *ep,
 			return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
 
 		ch = (sctp_chunkhdr_t *) ch_end;
-	} while (ch_end < skb->tail);
+	} while (ch_end < skb_tail_pointer(skb));
 
 	if (ootb_shut_ack)
 		sctp_sf_shut_8_4_5(ep, asoc, type, arg, commands);
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 14789a82de5..c71337a22d3 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -89,7 +89,7 @@ struct sk_buff *tipc_cfg_reply_alloc(int payload_size)
 int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type,
 			void *tlv_data, int tlv_data_size)
 {
-	struct tlv_desc *tlv = (struct tlv_desc *)buf->tail;
+	struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(buf);
 	int new_tlv_space = TLV_SPACE(tlv_data_size);
 
 	if (skb_tailroom(buf) < new_tlv_space) {
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index b71739fbe2c..45832fb75ea 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1020,7 +1020,7 @@ restart:
 
 	if (!err) {
 		buf_crs = (unsigned char *)(TIPC_SKB_CB(buf)->handle);
-		sz = buf->tail - buf_crs;
+		sz = skb_tail_pointer(buf) - buf_crs;
 
 		needed = (buf_len - sz_copied);
 		sz_to_copy = (sz <= needed) ? sz : needed;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 816e3690b60..814bb3125ad 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -576,7 +576,7 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
 	struct sk_buff *skb = sp->out_skb;
 	struct xfrm_usersa_info *p;
 	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 
 	if (sp->this_idx < sp->start_idx)
 		goto out;
@@ -621,7 +621,7 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
 	if (x->lastused)
 		RTA_PUT(skb, XFRMA_LASTUSED, sizeof(x->lastused), &x->lastused);
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 out:
 	sp->this_idx++;
 	return 0;
@@ -1157,7 +1157,7 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
 	struct sk_buff *in_skb = sp->in_skb;
 	struct sk_buff *skb = sp->out_skb;
 	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 
 	if (sp->this_idx < sp->start_idx)
 		goto out;
@@ -1176,7 +1176,7 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
 	if (copy_to_user_policy_type(xp->type, skb) < 0)
 		goto nlmsg_failure;
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 out:
 	sp->this_idx++;
 	return 0;
@@ -1330,7 +1330,7 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
 	struct xfrm_aevent_id *id;
 	struct nlmsghdr *nlh;
 	struct xfrm_lifetime_cur ltime;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 
 	nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_NEWAE, sizeof(*id));
 	id = NLMSG_DATA(nlh);
@@ -1362,7 +1362,7 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
 		RTA_PUT(skb,XFRMA_ETIMER_THRESH,sizeof(u32),&etimer);
 	}
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 rtattr_failure:
@@ -1744,7 +1744,7 @@ static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m,
 	struct xfrm_migrate *mp;
 	struct xfrm_userpolicy_id *pol_id;
 	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	int i;
 
 	nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_MIGRATE, sizeof(*pol_id));
@@ -1764,7 +1764,7 @@ static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m,
 			goto nlmsg_failure;
 	}
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 nlmsg_failure:
 	skb_trim(skb, b - skb->data);
@@ -1942,7 +1942,7 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
 {
 	struct xfrm_user_expire *ue;
 	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 
 	nlh = NLMSG_PUT(skb, c->pid, 0, XFRM_MSG_EXPIRE,
 			sizeof(*ue));
@@ -1952,7 +1952,7 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
 	copy_to_user_state(x, &ue->state);
 	ue->hard = (c->data.hard != 0) ? 1 : 0;
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 nlmsg_failure:
@@ -1999,7 +1999,7 @@ static int xfrm_notify_sa_flush(struct km_event *c)
 	struct xfrm_usersa_flush *p;
 	struct nlmsghdr *nlh;
 	struct sk_buff *skb;
-	unsigned char *b;
+	sk_buff_data_t b;
 	int len = NLMSG_LENGTH(sizeof(struct xfrm_usersa_flush));
 
 	skb = alloc_skb(len, GFP_ATOMIC);
@@ -2045,7 +2045,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c)
 	struct xfrm_usersa_id *id;
 	struct nlmsghdr *nlh;
 	struct sk_buff *skb;
-	unsigned char *b;
+	sk_buff_data_t b;
 	int len = xfrm_sa_len(x);
 	int headlen;
 
@@ -2129,7 +2129,7 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
 {
 	struct xfrm_user_acquire *ua;
 	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 	__u32 seq = xfrm_get_acqseq();
 
 	nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_ACQUIRE,
@@ -2153,7 +2153,7 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
 	if (copy_to_user_policy_type(xp->type, skb) < 0)
 		goto nlmsg_failure;
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 nlmsg_failure:
@@ -2249,7 +2249,7 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
 	struct xfrm_user_polexpire *upe;
 	struct nlmsghdr *nlh;
 	int hard = c->data.hard;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 
 	nlh = NLMSG_PUT(skb, c->pid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe));
 	upe = NLMSG_DATA(nlh);
@@ -2264,7 +2264,7 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
 		goto nlmsg_failure;
 	upe->hard = !!hard;
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 nlmsg_failure:
@@ -2300,7 +2300,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *
 	struct xfrm_userpolicy_id *id;
 	struct nlmsghdr *nlh;
 	struct sk_buff *skb;
-	unsigned char *b;
+	sk_buff_data_t b;
 	int len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
 	int headlen;
 
@@ -2357,7 +2357,7 @@ static int xfrm_notify_policy_flush(struct km_event *c)
 {
 	struct nlmsghdr *nlh;
 	struct sk_buff *skb;
-	unsigned char *b;
+	sk_buff_data_t b;
 	int len = 0;
 #ifdef CONFIG_XFRM_SUB_POLICY
 	len += RTA_SPACE(sizeof(struct xfrm_userpolicy_type));
@@ -2410,7 +2410,7 @@ static int build_report(struct sk_buff *skb, u8 proto,
 {
 	struct xfrm_user_report *ur;
 	struct nlmsghdr *nlh;
-	unsigned char *b = skb->tail;
+	unsigned char *b = skb_tail_pointer(skb);
 
 	nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_REPORT, sizeof(*ur));
 	ur = NLMSG_DATA(nlh);
@@ -2422,7 +2422,7 @@ static int build_report(struct sk_buff *skb, u8 proto,
 	if (addr)
 		RTA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr);
 
-	nlh->nlmsg_len = skb->tail - b;
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
 nlmsg_failure:
diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c
index e203883406d..33f2e064a68 100644
--- a/security/selinux/netlink.c
+++ b/security/selinux/netlink.c
@@ -66,7 +66,7 @@ static void selnl_add_payload(struct nlmsghdr *nlh, int len, int msgtype, void *
 static void selnl_notify(int msgtype, void *data)
 {
 	int len;
-	unsigned char *tmp;
+	sk_buff_data_t tmp;
 	struct sk_buff *skb;
 	struct nlmsghdr *nlh;
 	
-- 
cgit v1.2.3


From 4305b541357ddbd205aa145dc378926b7cb12283 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 19 Apr 2007 20:43:29 -0700
Subject: [SK_BUFF]: Convert skb->end to sk_buff_data_t

Now to convert the last one, skb->data, that will allow many simplifications
and removal of some of the offset helpers.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/ia64/sn/kernel/xpnet.c                   |  6 ++--
 drivers/atm/ambassador.c                      |  2 +-
 drivers/atm/idt77252.c                        | 24 ++++++++-----
 drivers/infiniband/hw/cxgb3/iwch_cm.c         |  2 +-
 drivers/net/cris/eth_v10.c                    |  3 +-
 drivers/net/forcedeth.c                       | 30 ++++++++++------
 drivers/net/macb.c                            |  2 +-
 drivers/net/wan/lmc/lmc_main.c                |  2 +-
 drivers/net/wireless/hostap/hostap_80211_rx.c |  2 +-
 drivers/usb/atm/usbatm.c                      |  4 +--
 include/linux/skbuff.h                        | 23 +++++++++---
 net/atm/lec.c                                 |  2 +-
 net/core/skbuff.c                             | 51 +++++++++++++++++----------
 net/ieee80211/ieee80211_rx.c                  |  2 +-
 net/netlink/af_netlink.c                      |  2 +-
 15 files changed, 101 insertions(+), 56 deletions(-)

diff --git a/arch/ia64/sn/kernel/xpnet.c b/arch/ia64/sn/kernel/xpnet.c
index eb416c95967..98d79142f32 100644
--- a/arch/ia64/sn/kernel/xpnet.c
+++ b/arch/ia64/sn/kernel/xpnet.c
@@ -264,7 +264,7 @@ xpnet_receive(partid_t partid, int channel, struct xpnet_message *msg)
 
 	dev_dbg(xpnet, "<skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
 		"skb->end=0x%p skb->len=%d\n", (void *) skb->head,
-		(void *)skb->data, skb_tail_pointer(skb), (void *)skb->end,
+		(void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb),
 		skb->len);
 
 	skb->protocol = eth_type_trans(skb, xpnet_device);
@@ -273,7 +273,7 @@ xpnet_receive(partid_t partid, int channel, struct xpnet_message *msg)
 	dev_dbg(xpnet, "passing skb to network layer; \n\tskb->head=0x%p "
 		"skb->data=0x%p skb->tail=0x%p skb->end=0x%p skb->len=%d\n",
 		(void *)skb->head, (void *)skb->data, skb_tail_pointer(skb),
-		(void *) skb->end, skb->len);
+		skb_end_pointer(skb), skb->len);
 
 
 	xpnet_device->last_rx = jiffies;
@@ -475,7 +475,7 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
 		"skb->end=0x%p skb->len=%d\n", (void *) skb->head,
-		(void *)skb->data, skb_tail_pointer(skb), (void *)skb->end,
+		(void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb),
 		skb->len);
 
 
diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c
index 3c372e08f77..59651abfa4f 100644
--- a/drivers/atm/ambassador.c
+++ b/drivers/atm/ambassador.c
@@ -821,7 +821,7 @@ static inline void fill_rx_pool (amb_dev * dev, unsigned char pool,
     }
     // cast needed as there is no %? for pointer differences
     PRINTD (DBG_SKB, "allocated skb at %p, head %p, area %li",
-	    skb, skb->head, (long) (skb->end - skb->head));
+	    skb, skb->head, (long) (skb_end_pointer(skb) - skb->head));
     rx.handle = virt_to_bus (skb);
     rx.host_address = cpu_to_be32 (virt_to_bus (skb->data));
     if (rx_give (dev, &rx, pool))
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 1e49799cd6c..20f2a3a8265 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -1065,7 +1065,8 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe)
 	vcc = vc->rx_vcc;
 
 	pci_dma_sync_single_for_cpu(card->pcidev, IDT77252_PRV_PADDR(skb),
-				    skb->end - skb->data, PCI_DMA_FROMDEVICE);
+				    skb_end_pointer(skb) - skb->data,
+				    PCI_DMA_FROMDEVICE);
 
 	if ((vcc->qos.aal == ATM_AAL0) ||
 	    (vcc->qos.aal == ATM_AAL34)) {
@@ -1194,7 +1195,8 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe)
 		}
 
 		pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
-				 skb->end - skb->data, PCI_DMA_FROMDEVICE);
+				 skb_end_pointer(skb) - skb->data,
+				 PCI_DMA_FROMDEVICE);
 		sb_pool_remove(card, skb);
 
 		skb_trim(skb, len);
@@ -1267,7 +1269,7 @@ idt77252_rx_raw(struct idt77252_dev *card)
 	tail = readl(SAR_REG_RAWCT);
 
 	pci_dma_sync_single_for_cpu(card->pcidev, IDT77252_PRV_PADDR(queue),
-				    queue->end - queue->head - 16,
+				    skb_end_pointer(queue) - queue->head - 16,
 				    PCI_DMA_FROMDEVICE);
 
 	while (head != tail) {
@@ -1363,7 +1365,8 @@ drop:
 				queue = card->raw_cell_head;
 				pci_dma_sync_single_for_cpu(card->pcidev,
 							    IDT77252_PRV_PADDR(queue),
-							    queue->end - queue->data,
+							    (skb_end_pointer(queue) -
+							     queue->data),
 							    PCI_DMA_FROMDEVICE);
 			} else {
 				card->raw_cell_head = NULL;
@@ -1875,7 +1878,7 @@ add_rx_skb(struct idt77252_dev *card, int queue,
 		}
 
 		paddr = pci_map_single(card->pcidev, skb->data,
-				       skb->end - skb->data,
+				       skb_end_pointer(skb) - skb->data,
 				       PCI_DMA_FROMDEVICE);
 		IDT77252_PRV_PADDR(skb) = paddr;
 
@@ -1889,7 +1892,7 @@ add_rx_skb(struct idt77252_dev *card, int queue,
 
 outunmap:
 	pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
-			 skb->end - skb->data, PCI_DMA_FROMDEVICE);
+			 skb_end_pointer(skb) - skb->data, PCI_DMA_FROMDEVICE);
 
 	handle = IDT77252_PRV_POOL(skb);
 	card->sbpool[POOL_QUEUE(handle)].skb[POOL_INDEX(handle)] = NULL;
@@ -1906,12 +1909,14 @@ recycle_rx_skb(struct idt77252_dev *card, struct sk_buff *skb)
 	int err;
 
 	pci_dma_sync_single_for_device(card->pcidev, IDT77252_PRV_PADDR(skb),
-				       skb->end - skb->data, PCI_DMA_FROMDEVICE);
+				       skb_end_pointer(skb) - skb->data,
+				       PCI_DMA_FROMDEVICE);
 
 	err = push_rx_skb(card, skb, POOL_QUEUE(handle));
 	if (err) {
 		pci_unmap_single(card->pcidev, IDT77252_PRV_PADDR(skb),
-				 skb->end - skb->data, PCI_DMA_FROMDEVICE);
+				 skb_end_pointer(skb) - skb->data,
+				 PCI_DMA_FROMDEVICE);
 		sb_pool_remove(card, skb);
 		dev_kfree_skb(skb);
 	}
@@ -3123,7 +3128,8 @@ deinit_card(struct idt77252_dev *card)
 			if (skb) {
 				pci_unmap_single(card->pcidev,
 						 IDT77252_PRV_PADDR(skb),
-						 skb->end - skb->data,
+						 (skb_end_pointer(skb) -
+						  skb->data),
 						 PCI_DMA_FROMDEVICE);
 				card->sbpool[i].skb[j] = NULL;
 				dev_kfree_skb(skb);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 66ad4d40ba1..e842c65a3f4 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -477,7 +477,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
 	BUG_ON(skb_cloned(skb));
 
 	mpalen = sizeof(*mpa) + ep->plen;
-	if (skb->data + mpalen + sizeof(*req) > skb->end) {
+	if (skb->data + mpalen + sizeof(*req) > skb_end_pointer(skb)) {
 		kfree_skb(skb);
 		skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL);
 		if (!skb) {
diff --git a/drivers/net/cris/eth_v10.c b/drivers/net/cris/eth_v10.c
index 7feb9c56114..5bdf5ca85a6 100644
--- a/drivers/net/cris/eth_v10.c
+++ b/drivers/net/cris/eth_v10.c
@@ -1348,7 +1348,8 @@ e100_rx(struct net_device *dev)
 
 #ifdef ETHDEBUG
 		printk("head = 0x%x, data = 0x%x, tail = 0x%x, end = 0x%x\n",
-		  skb->head, skb->data, skb_tail_pointer(skb), skb->end);
+		       skb->head, skb->data, skb_tail_pointer(skb),
+		       skb_end_pointer(skb));
 		printk("copying packet to 0x%x.\n", skb_data_ptr);
 #endif
 
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index d5d458c3421..d3f4bcaa969 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -1386,9 +1386,13 @@ static int nv_alloc_rx(struct net_device *dev)
 		struct sk_buff *skb = dev_alloc_skb(np->rx_buf_sz + NV_RX_ALLOC_PAD);
 		if (skb) {
 			np->put_rx_ctx->skb = skb;
-			np->put_rx_ctx->dma = pci_map_single(np->pci_dev, skb->data,
-							     skb->end-skb->data, PCI_DMA_FROMDEVICE);
-			np->put_rx_ctx->dma_len = skb->end-skb->data;
+			np->put_rx_ctx->dma = pci_map_single(np->pci_dev,
+							     skb->data,
+							(skb_end_pointer(skb) -
+							 skb->data),
+							     PCI_DMA_FROMDEVICE);
+			np->put_rx_ctx->dma_len = (skb_end_pointer(skb) -
+						   skb->data);
 			np->put_rx.orig->buf = cpu_to_le32(np->put_rx_ctx->dma);
 			wmb();
 			np->put_rx.orig->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX_AVAIL);
@@ -1416,9 +1420,13 @@ static int nv_alloc_rx_optimized(struct net_device *dev)
 		struct sk_buff *skb = dev_alloc_skb(np->rx_buf_sz + NV_RX_ALLOC_PAD);
 		if (skb) {
 			np->put_rx_ctx->skb = skb;
-			np->put_rx_ctx->dma = pci_map_single(np->pci_dev, skb->data,
-							     skb->end-skb->data, PCI_DMA_FROMDEVICE);
-			np->put_rx_ctx->dma_len = skb->end-skb->data;
+			np->put_rx_ctx->dma = pci_map_single(np->pci_dev,
+							     skb->data,
+							     (skb_end_pointer(skb) -
+							      skb->data),
+							     PCI_DMA_FROMDEVICE);
+			np->put_rx_ctx->dma_len = (skb_end_pointer(skb) -
+						   skb->data);
 			np->put_rx.ex->bufhigh = cpu_to_le64(np->put_rx_ctx->dma) >> 32;
 			np->put_rx.ex->buflow = cpu_to_le64(np->put_rx_ctx->dma) & 0x0FFFFFFFF;
 			wmb();
@@ -1602,8 +1610,9 @@ static void nv_drain_rx(struct net_device *dev)
 		wmb();
 		if (np->rx_skb[i].skb) {
 			pci_unmap_single(np->pci_dev, np->rx_skb[i].dma,
-						np->rx_skb[i].skb->end-np->rx_skb[i].skb->data,
-						PCI_DMA_FROMDEVICE);
+					 (skb_end_pointer(np->rx_skb[i].skb) -
+					  np->rx_skb[i].skb->data),
+					 PCI_DMA_FROMDEVICE);
 			dev_kfree_skb(np->rx_skb[i].skb);
 			np->rx_skb[i].skb = NULL;
 		}
@@ -4378,7 +4387,8 @@ static int nv_loopback_test(struct net_device *dev)
 	for (i = 0; i < pkt_len; i++)
 		pkt_data[i] = (u8)(i & 0xff);
 	test_dma_addr = pci_map_single(np->pci_dev, tx_skb->data,
-				       tx_skb->end-tx_skb->data, PCI_DMA_FROMDEVICE);
+				       (skb_end_pointer(tx_skb) -
+					tx_skb->data), PCI_DMA_FROMDEVICE);
 
 	if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
 		np->tx_ring.orig[0].buf = cpu_to_le32(test_dma_addr);
@@ -4435,7 +4445,7 @@ static int nv_loopback_test(struct net_device *dev)
 	}
 
 	pci_unmap_page(np->pci_dev, test_dma_addr,
-		       tx_skb->end-tx_skb->data,
+		       (skb_end_pointer(tx_skb) - tx_skb->data),
 		       PCI_DMA_TODEVICE);
 	dev_kfree_skb_any(tx_skb);
  out:
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index 98bf51afcee..9e233f8216a 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -576,7 +576,7 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	dev_dbg(&bp->pdev->dev,
 		"start_xmit: len %u head %p data %p tail %p end %p\n",
 		skb->len, skb->head, skb->data,
-		skb_tail_pointer(skb), skb->end);
+		skb_tail_pointer(skb), skb_end_pointer(skb));
 	dev_dbg(&bp->pdev->dev,
 		"data:");
 	for (i = 0; i < 16; i++)
diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index b731f3aae0d..5bb18c0955b 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c
@@ -1932,7 +1932,7 @@ static void lmc_softreset (lmc_softc_t * const sc) /*fold00*/
         sc->lmc_rxring[i].status = 0x80000000;
 
         /* used to be PKT_BUF_SZ now uses skb since we lose some to head room */
-        sc->lmc_rxring[i].length = skb->end - skb->data;
+        sc->lmc_rxring[i].length = skb_end_pointer(skb) - skb->data;
 
         /* use to be tail which is dumb since you're thinking why write
          * to the end of the packj,et but since there's nothing there tail == data
diff --git a/drivers/net/wireless/hostap/hostap_80211_rx.c b/drivers/net/wireless/hostap/hostap_80211_rx.c
index 5e3e9e26270..35a3a50724f 100644
--- a/drivers/net/wireless/hostap/hostap_80211_rx.c
+++ b/drivers/net/wireless/hostap/hostap_80211_rx.c
@@ -922,7 +922,7 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb,
 		if (frag != 0)
 			flen -= hdrlen;
 
-		if (skb_tail_pointer(frag_skb) + flen > frag_skb->end) {
+		if (frag_skb->tail + flen > frag_skb->end) {
 			printk(KERN_WARNING "%s: host decrypted and "
 			       "reassembled frame did not fit skb\n",
 			       dev->name);
diff --git a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c
index 4d8f282b23d..a076f735a7b 100644
--- a/drivers/usb/atm/usbatm.c
+++ b/drivers/usb/atm/usbatm.c
@@ -335,12 +335,12 @@ static void usbatm_extract_one_cell(struct usbatm_data *instance, unsigned char
 
 	sarb = instance->cached_vcc->sarb;
 
-	if (skb_tail_pointer(sarb) + ATM_CELL_PAYLOAD > sarb->end) {
+	if (sarb->tail + ATM_CELL_PAYLOAD > sarb->end) {
 		atm_rldbg(instance, "%s: buffer overrun (sarb->len %u, vcc: 0x%p)!\n",
 				__func__, sarb->len, vcc);
 		/* discard cells already received */
 		skb_trim(sarb, 0);
-		UDSL_ASSERT(skb_tail_pointer(sarb) + ATM_CELL_PAYLOAD <= sarb->end);
+		UDSL_ASSERT(sarb->tail + ATM_CELL_PAYLOAD <= sarb->end);
 	}
 
 	memcpy(skb_tail_pointer(sarb), source + ATM_CELL_HEADER, ATM_CELL_PAYLOAD);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e1c2392ecb5..656dc0e901c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -305,9 +305,9 @@ struct sk_buff {
 	sk_buff_data_t		mac_header;
 	/* These elements must be at the end, see alloc_skb() for details.  */
 	sk_buff_data_t		tail;
+	sk_buff_data_t		end;
 	unsigned char		*head,
-				*data,
-				*end;
+				*data;
 	unsigned int		truesize;
 	atomic_t		users;
 };
@@ -392,8 +392,20 @@ extern unsigned int   skb_find_text(struct sk_buff *skb, unsigned int from,
 				    unsigned int to, struct ts_config *config,
 				    struct ts_state *state);
 
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
+{
+	return skb->head + skb->end;
+}
+#else
+static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
+{
+	return skb->end;
+}
+#endif
+
 /* Internal */
-#define skb_shinfo(SKB)		((struct skb_shared_info *)((SKB)->end))
+#define skb_shinfo(SKB)	((struct skb_shared_info *)(skb_end_pointer(SKB)))
 
 /**
  *	skb_queue_empty - check if a queue is empty
@@ -843,6 +855,7 @@ static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset)
 {
 	skb->tail = skb->data + offset;
 }
+
 #endif /* NET_SKBUFF_DATA_USES_OFFSET */
 
 /*
@@ -872,7 +885,7 @@ static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
 	SKB_LINEAR_ASSERT(skb);
 	skb->tail += len;
 	skb->len  += len;
-	if (unlikely(skb_tail_pointer(skb) > skb->end))
+	if (unlikely(skb->tail > skb->end))
 		skb_over_panic(skb, len, current_text_addr());
 	return tmp;
 }
@@ -968,7 +981,7 @@ static inline int skb_headroom(const struct sk_buff *skb)
  */
 static inline int skb_tailroom(const struct sk_buff *skb)
 {
-	return skb_is_nonlinear(skb) ? 0 : skb->end - skb_tail_pointer(skb);
+	return skb_is_nonlinear(skb) ? 0 : skb->end - skb->tail;
 }
 
 /**
diff --git a/net/atm/lec.c b/net/atm/lec.c
index a8c6b285e06..4b3e72f31b3 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -284,7 +284,7 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	DPRINTK("skbuff head:%lx data:%lx tail:%lx end:%lx\n",
 		(long)skb->head, (long)skb->data, (long)skb_tail_pointer(skb),
-		(long)skb->end);
+		(long)skb_end_pointer(skb));
 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
 	if (memcmp(skb->data, bridge_ula_lec, sizeof(bridge_ula_lec)) == 0)
 		lec_handle_bridge(skb, dev);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ddcbc4d10da..a203bedefe0 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -87,9 +87,9 @@ static struct kmem_cache *skbuff_fclone_cache __read_mostly;
 void skb_over_panic(struct sk_buff *skb, int sz, void *here)
 {
 	printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
-			  "data:%p tail:%#lx end:%p dev:%s\n",
+			  "data:%p tail:%#lx end:%#lx dev:%s\n",
 	       here, skb->len, sz, skb->head, skb->data,
-	       (unsigned long)skb->tail, skb->end,
+	       (unsigned long)skb->tail, (unsigned long)skb->end,
 	       skb->dev ? skb->dev->name : "<NULL>");
 	BUG();
 }
@@ -106,9 +106,9 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here)
 void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 {
 	printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
-			  "data:%p tail:%#lx end:%p dev:%s\n",
+			  "data:%p tail:%#lx end:%#lx dev:%s\n",
 	       here, skb->len, sz, skb->head, skb->data,
-	       (unsigned long)skb->tail, skb->end,
+	       (unsigned long)skb->tail, (unsigned long)skb->end,
 	       skb->dev ? skb->dev->name : "<NULL>");
 	BUG();
 }
@@ -170,7 +170,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	skb->head = data;
 	skb->data = data;
 	skb_reset_tail_pointer(skb);
-	skb->end  = data + size;
+	skb->end = skb->tail + size;
 	/* make sure we initialize shinfo sequentially */
 	shinfo = skb_shinfo(skb);
 	atomic_set(&shinfo->dataref, 1);
@@ -520,8 +520,12 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
 	/*
 	 *	Allocate the copy buffer
 	 */
-	struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len,
-				      gfp_mask);
+	struct sk_buff *n;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+	n = alloc_skb(skb->end + skb->data_len, gfp_mask);
+#else
+	n = alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
+#endif
 	if (!n)
 		return NULL;
 
@@ -558,8 +562,12 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
 	/*
 	 *	Allocate the copy buffer
 	 */
-	struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask);
-
+	struct sk_buff *n;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+	n = alloc_skb(skb->end, gfp_mask);
+#else
+	n = alloc_skb(skb->end - skb->head, gfp_mask);
+#endif
 	if (!n)
 		goto out;
 
@@ -617,7 +625,11 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 {
 	int i;
 	u8 *data;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+	int size = nhead + skb->end + ntail;
+#else
 	int size = nhead + (skb->end - skb->head) + ntail;
+#endif
 	long off;
 
 	if (skb_shared(skb))
@@ -632,12 +644,13 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 	/* Copy only real data... and, alas, header. This should be
 	 * optimized for the cases when header is void. */
 	memcpy(data + nhead, skb->head,
-		skb->tail
-#ifndef NET_SKBUFF_DATA_USES_OFFSET
-		- skb->head
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+		skb->tail);
+#else
+		skb->tail - skb->head);
 #endif
-		);
-	memcpy(data + size, skb->end, sizeof(struct skb_shared_info));
+	memcpy(data + size, skb_end_pointer(skb),
+	       sizeof(struct skb_shared_info));
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
 		get_page(skb_shinfo(skb)->frags[i].page);
@@ -650,9 +663,11 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 	off = (data + nhead) - skb->head;
 
 	skb->head     = data;
-	skb->end      = data + size;
 	skb->data    += off;
-#ifndef NET_SKBUFF_DATA_USES_OFFSET
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+	skb->end      = size;
+#else
+	skb->end      = skb->head + size;
 	/* {transport,network,mac}_header and tail are relative to skb->head */
 	skb->tail	      += off;
 	skb->transport_header += off;
@@ -769,7 +784,7 @@ int skb_pad(struct sk_buff *skb, int pad)
 		return 0;
 	}
 
-	ntail = skb->data_len + pad - (skb->end - skb_tail_pointer(skb));
+	ntail = skb->data_len + pad - (skb->end - skb->tail);
 	if (likely(skb_cloned(skb) || ntail > 0)) {
 		err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC);
 		if (unlikely(err))
@@ -907,7 +922,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
 	 * plus 128 bytes for future expansions. If we have enough
 	 * room at tail, reallocate without expansion only if skb is cloned.
 	 */
-	int i, k, eat = (skb_tail_pointer(skb) + delta) - skb->end;
+	int i, k, eat = (skb->tail + delta) - skb->end;
 
 	if (eat > 0 || skb_cloned(skb)) {
 		if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0,
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index 2b854941e06..59a765c49cf 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -595,7 +595,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 		if (frag != 0)
 			flen -= hdrlen;
 
-		if (skb_tail_pointer(frag_skb) + flen > frag_skb->end) {
+		if (frag_skb->tail + flen > frag_skb->end) {
 			printk(KERN_WARNING "%s: host decrypted and "
 			       "reassembled frame did not fit skb\n",
 			       dev->name);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index fdb6eb13cbc..50dc5edb775 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -785,7 +785,7 @@ static inline struct sk_buff *netlink_trim(struct sk_buff *skb,
 
 	skb_orphan(skb);
 
-	delta = skb->end - skb_tail_pointer(skb);
+	delta = skb->end - skb->tail;
 	if (delta * 2 < skb->truesize)
 		return skb;
 
-- 
cgit v1.2.3


From ca0605a7c8a42379c695308944b3ae82a85479f1 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Mar 2007 10:48:59 -0300
Subject: [SK_BUFF]: Adjust the zeroing up to tail in __alloc_skb too

I did it just in alloc_skb_from_cache, forgot __alloc_skb, fixed now.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a203bedefe0..64caee46291 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -164,7 +164,10 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	if (!data)
 		goto nodata;
 
-	memset(skb, 0, offsetof(struct sk_buff, truesize));
+	/*
+	 * See comment in sk_buff definition, just before the 'tail' member
+	 */
+	memset(skb, 0, offsetof(struct sk_buff, tail));
 	skb->truesize = size + sizeof(struct sk_buff);
 	atomic_set(&skb->users, 1);
 	skb->head = data;
-- 
cgit v1.2.3


From 05eee48c5af8213a71bd908ce17f577b2b776f79 Mon Sep 17 00:00:00 2001
From: Robert Olsson <robert.olsson@its.uu.se>
Date: Mon, 19 Mar 2007 16:27:37 -0700
Subject: [IPV4]: fib_trie resize break

The patch below adds break condition for the resize operations. If
we don't achieve the desired fill factor a warning is printed. Trie
should still be operational but new thresholds should be considered.

Signed-off-by: Robert Olsson <robert.olsson@its.uu.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index e2b39fdd6a0..5d2b43d9f8f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -50,7 +50,7 @@
  *		Patrick McHardy <kaber@trash.net>
  */
 
-#define VERSION "0.407"
+#define VERSION "0.408"
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -458,6 +458,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
 	struct tnode *old_tn;
 	int inflate_threshold_use;
 	int halve_threshold_use;
+	int max_resize;
 
 	if (!tn)
 		return NULL;
@@ -558,7 +559,8 @@ static struct node *resize(struct trie *t, struct tnode *tn)
 		inflate_threshold_use = inflate_threshold;
 
 	err = 0;
-	while ((tn->full_children > 0 &&
+	max_resize = 10;
+	while ((tn->full_children > 0 &&  max_resize-- &&
 	       50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
 				inflate_threshold_use * tnode_child_length(tn))) {
 
@@ -573,6 +575,15 @@ static struct node *resize(struct trie *t, struct tnode *tn)
 		}
 	}
 
+	if (max_resize < 0) {
+		if (!tn->parent)
+			printk(KERN_WARNING "Fix inflate_threshold_root. Now=%d size=%d bits\n",
+			       inflate_threshold_root, tn->bits);
+		else
+			printk(KERN_WARNING "Fix inflate_threshold. Now=%d size=%d bits\n",
+			       inflate_threshold, tn->bits);
+	}
+
 	check_tnode(tn);
 
 	/*
@@ -589,7 +600,8 @@ static struct node *resize(struct trie *t, struct tnode *tn)
 		halve_threshold_use = halve_threshold;
 
 	err = 0;
-	while (tn->bits > 1 &&
+	max_resize = 10;
+	while (tn->bits > 1 &&  max_resize-- &&
 	       100 * (tnode_child_length(tn) - tn->empty_children) <
 	       halve_threshold_use * tnode_child_length(tn)) {
 
@@ -604,6 +616,14 @@ static struct node *resize(struct trie *t, struct tnode *tn)
 		}
 	}
 
+	if (max_resize < 0) {
+		if (!tn->parent)
+			printk(KERN_WARNING "Fix halve_threshold_root. Now=%d size=%d bits\n",
+			       halve_threshold_root, tn->bits);
+		else
+			printk(KERN_WARNING "Fix halve_threshold. Now=%d size=%d bits\n",
+			       halve_threshold, tn->bits);
+	}
 
 	/* Only one child remains */
 	if (tn->empty_children == tnode_child_length(tn) - 1)
-- 
cgit v1.2.3


From 965ffea43d4ebe8cd7b9fee78d651268dd7d23c5 Mon Sep 17 00:00:00 2001
From: Robert Olsson <robert.olsson@its.uu.se>
Date: Mon, 19 Mar 2007 16:29:58 -0700
Subject: [IPV4]: fib_trie root node settings

The threshold for root node can be more aggressive set to get
better tree compression. The new setting mekes the root grow
from 16 to 19 bits and substansial improvemnt in Aver depth
this with the current table of 214393 prefixes

But really the dynamic resize should need more investigation
both in terms convergence and performance and maybe it should
be possible to change...

Maybe just for the brave to start with or we may have to back
this out.
---
 net/ipv4/fib_trie.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 5d2b43d9f8f..9be7da7c3a8 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -292,8 +292,8 @@ static inline void check_tnode(const struct tnode *tn)
 
 static int halve_threshold = 25;
 static int inflate_threshold = 50;
-static int halve_threshold_root = 15;
-static int inflate_threshold_root = 25;
+static int halve_threshold_root = 8;
+static int inflate_threshold_root = 15;
 
 
 static void __alias_free_mem(struct rcu_head *head)
-- 
cgit v1.2.3


From b529ccf2799c14346d1518e9bdf1f88f03643e99 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 25 Apr 2007 19:08:35 -0700
Subject: [NETLINK]: Introduce nlmsg_hdr() helper

For the common "(struct nlmsghdr *)skb->data" sequence, so that we reduce the
number of direct accesses to skb->data and for consistency with all the other
cast skb member helpers.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/connector/connector.c       | 2 +-
 drivers/scsi/scsi_netlink.c         | 2 +-
 drivers/scsi/scsi_transport_iscsi.c | 2 +-
 fs/ecryptfs/netlink.c               | 4 ++--
 include/linux/netlink.h             | 5 +++++
 kernel/audit.c                      | 6 +++---
 kernel/taskstats.c                  | 4 ++--
 net/decnet/netfilter/dn_rtmsg.c     | 2 +-
 net/ipv4/fib_frontend.c             | 2 +-
 net/ipv4/inet_diag.c                | 2 +-
 net/ipv4/netfilter/ip_queue.c       | 2 +-
 net/ipv6/netfilter/ip6_queue.c      | 2 +-
 net/netlink/af_netlink.c            | 2 +-
 net/tipc/netlink.c                  | 2 +-
 security/selinux/hooks.c            | 2 +-
 15 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index a905f782033..7f9c4fb7e5b 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -212,7 +212,7 @@ static void cn_rx_skb(struct sk_buff *__skb)
 	skb = skb_get(__skb);
 
 	if (skb->len >= NLMSG_SPACE(0)) {
-		nlh = (struct nlmsghdr *)skb->data;
+		nlh = nlmsg_hdr(skb);
 
 		if (nlh->nlmsg_len < sizeof(struct cn_msg) ||
 		    skb->len < nlh->nlmsg_len ||
diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c
index 1b59b27e887..45646a28524 100644
--- a/drivers/scsi/scsi_netlink.c
+++ b/drivers/scsi/scsi_netlink.c
@@ -50,7 +50,7 @@ scsi_nl_rcv_msg(struct sk_buff *skb)
 	while (skb->len >= NLMSG_SPACE(0)) {
 		err = 0;
 
-		nlh = (struct nlmsghdr *) skb->data;
+		nlh = nlmsg_hdr(skb);
 		if ((nlh->nlmsg_len < (sizeof(*nlh) + sizeof(*hdr))) ||
 		    (skb->len < nlh->nlmsg_len)) {
 			printk(KERN_WARNING "%s: discarding partial skb\n",
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index ce0d14af33c..10590cd7e9e 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -1081,7 +1081,7 @@ iscsi_if_rx(struct sock *sk, int len)
 			struct nlmsghdr	*nlh;
 			struct iscsi_uevent *ev;
 
-			nlh = (struct nlmsghdr *)skb->data;
+			nlh = nlmsg_hdr(skb);
 			if (nlh->nlmsg_len < sizeof(*nlh) ||
 			    skb->len < nlh->nlmsg_len) {
 				break;
diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c
index e3aa2253c85..8405d216a5f 100644
--- a/fs/ecryptfs/netlink.c
+++ b/fs/ecryptfs/netlink.c
@@ -97,7 +97,7 @@ out:
  */
 static int ecryptfs_process_nl_response(struct sk_buff *skb)
 {
-	struct nlmsghdr *nlh = (struct nlmsghdr*)skb->data;
+	struct nlmsghdr *nlh = nlmsg_hdr(skb);
 	struct ecryptfs_message *msg = NLMSG_DATA(nlh);
 	int rc;
 
@@ -181,7 +181,7 @@ receive:
 				"rc = [%d]\n", rc);
 		return;
 	}
-	nlh = (struct nlmsghdr *)skb->data;
+	nlh = nlmsg_hdr(skb);
 	if (!NLMSG_OK(nlh, skb->len)) {
 		ecryptfs_printk(KERN_ERR, "Received corrupt netlink "
 				"message\n");
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 68a632b372e..36629fff26d 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -138,6 +138,11 @@ struct nlattr
 #include <linux/capability.h>
 #include <linux/skbuff.h>
 
+static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb)
+{
+	return (struct nlmsghdr *)skb->data;
+}
+
 struct netlink_skb_parms
 {
 	struct ucred		creds;		/* Skb credentials	*/
diff --git a/kernel/audit.c b/kernel/audit.c
index ea8521417d1..80a7457dadb 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -151,7 +151,7 @@ struct audit_buffer {
 
 static void audit_set_pid(struct audit_buffer *ab, pid_t pid)
 {
-	struct nlmsghdr *nlh = (struct nlmsghdr *)ab->skb->data;
+	struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
 	nlh->nlmsg_pid = pid;
 }
 
@@ -750,7 +750,7 @@ static void audit_receive_skb(struct sk_buff *skb)
 	u32		rlen;
 
 	while (skb->len >= NLMSG_SPACE(0)) {
-		nlh = (struct nlmsghdr *)skb->data;
+		nlh = nlmsg_hdr(skb);
 		if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
 			return;
 		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
@@ -1268,7 +1268,7 @@ void audit_log_end(struct audit_buffer *ab)
 		audit_log_lost("rate limit exceeded");
 	} else {
 		if (audit_pid) {
-			struct nlmsghdr *nlh = (struct nlmsghdr *)ab->skb->data;
+			struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
 			nlh->nlmsg_len = ab->skb->len - NLMSG_SPACE(0);
 			skb_queue_tail(&audit_skb_queue, ab->skb);
 			ab->skb = NULL;
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 4c3476fa058..ad7d2392cb0 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -102,7 +102,7 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
  */
 static int send_reply(struct sk_buff *skb, pid_t pid)
 {
-	struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
+	struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb));
 	void *reply = genlmsg_data(genlhdr);
 	int rc;
 
@@ -121,7 +121,7 @@ static int send_reply(struct sk_buff *skb, pid_t pid)
 static void send_cpu_listeners(struct sk_buff *skb,
 					struct listener_list *listeners)
 {
-	struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
+	struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb));
 	struct listener *s, *tmp;
 	struct sk_buff *skb_next, *skb_cur = skb;
 	void *reply = genlmsg_data(genlhdr);
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index ceefd9dd0c9..9e8256a2361 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -102,7 +102,7 @@ static unsigned int dnrmg_hook(unsigned int hook,
 
 static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
 {
-	struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data;
+	struct nlmsghdr *nlh = nlmsg_hdr(skb);
 
 	if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
 		return;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index cac06c43f00..3ff753c6f19 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -807,7 +807,7 @@ static void nl_fib_input(struct sock *sk, int len)
 	if (skb == NULL)
 		return;
 
-	nlh = (struct nlmsghdr *)skb->data;
+	nlh = nlmsg_hdr(skb);
 	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len ||
 	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) {
 		kfree_skb(skb);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 37362cd1d07..238999e6e87 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -847,7 +847,7 @@ static inline void inet_diag_rcv_skb(struct sk_buff *skb)
 {
 	if (skb->len >= NLMSG_SPACE(0)) {
 		int err;
-		struct nlmsghdr *nlh = (struct nlmsghdr *)skb->data;
+		struct nlmsghdr *nlh = nlmsg_hdr(skb);
 
 		if (nlh->nlmsg_len < sizeof(*nlh) ||
 		    skb->len < nlh->nlmsg_len)
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 15e0d200223..17f7c988460 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -497,7 +497,7 @@ ipq_rcv_skb(struct sk_buff *skb)
 	if (skblen < sizeof(*nlh))
 		return;
 
-	nlh = (struct nlmsghdr *)skb->data;
+	nlh = nlmsg_hdr(skb);
 	nlmsglen = nlh->nlmsg_len;
 	if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
 		return;
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 5cfce218c5e..275e625e497 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -487,7 +487,7 @@ ipq_rcv_skb(struct sk_buff *skb)
 	if (skblen < sizeof(*nlh))
 		return;
 
-	nlh = (struct nlmsghdr *)skb->data;
+	nlh = nlmsg_hdr(skb);
 	nlmsglen = nlh->nlmsg_len;
 	if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
 		return;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 50dc5edb775..04b72d3c1de 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1471,7 +1471,7 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
 	int err;
 
 	while (skb->len >= nlmsg_total_size(0)) {
-		nlh = (struct nlmsghdr *) skb->data;
+		nlh = nlmsg_hdr(skb);
 
 		if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
 			return 0;
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index b8e1edc2bad..4cdafa2d1d4 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -57,7 +57,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
 
 	if (rep_buf) {
 		skb_push(rep_buf, hdr_space);
-		rep_nlh = (struct nlmsghdr *)rep_buf->data;
+		rep_nlh = nlmsg_hdr(rep_buf);
 		memcpy(rep_nlh, req_nlh, hdr_space);
 		rep_nlh->nlmsg_len = rep_buf->len;
 		genlmsg_unicast(rep_buf, req_nlh->nlmsg_pid);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index addb5850105..5f02b4be191 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3786,7 +3786,7 @@ static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb)
 		err = -EINVAL;
 		goto out;
 	}
-	nlh = (struct nlmsghdr *)skb->data;
+	nlh = nlmsg_hdr(skb);
 	
 	err = selinux_nlmsg_lookup(isec->sclass, nlh->nlmsg_type, &perm);
 	if (err) {
-- 
cgit v1.2.3


From 897933bcdf31c372e029dd4e2ecd573ebe6cfd9c Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Mar 2007 22:27:36 -0300
Subject: [SK_BUFF]: Remove skb_add_mtu() leftovers

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/linux/skbuff.h |  1 -
 net/core/skbuff.c      | 14 --------------
 2 files changed, 15 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 656dc0e901c..81ac934d596 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1512,7 +1512,6 @@ static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
 }
 
 extern void skb_init(void);
-extern void skb_add_mtu(int mtu);
 
 /**
  *	skb_get_timestamp - get timestamp from a skb
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 64caee46291..e28f119156f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -157,7 +157,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	if (!skb)
 		goto out;
 
-	/* Get the DATA. Size must match skb_add_mtu(). */
 	size = SKB_DATA_ALIGN(size);
 	data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
 			gfp_mask, node);
@@ -1533,19 +1532,6 @@ void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head
 	spin_unlock_irqrestore(&list->lock, flags);
 }
 
-#if 0
-/*
- * 	Tune the memory allocator for a new MTU size.
- */
-void skb_add_mtu(int mtu)
-{
-	/* Must match allocation in alloc_skb */
-	mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
-
-	kmem_add_cache_size(mtu);
-}
-#endif
-
 static inline void skb_split_inside_header(struct sk_buff *skb,
 					   struct sk_buff* skb1,
 					   const u32 len, const int pos)
-- 
cgit v1.2.3


From a36ca733375860b389c15ffdf6a5f92df64a33b6 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Mar 2007 22:28:08 -0300
Subject: [NETLINK]: Remove NLMSG_{NEW_ANSWER,CANCEL,END}

Not used anywhere and defined inside __KERNEL__, Thomas acked this on irc.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/linux/netlink.h | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 36629fff26d..0d11f6a7389 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -229,18 +229,6 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags)
 #define NLMSG_PUT(skb, pid, seq, type, len) \
 	NLMSG_NEW(skb, pid, seq, type, len, 0)
 
-#define NLMSG_NEW_ANSWER(skb, cb, type, len, flags) \
-	NLMSG_NEW(skb, NETLINK_CB((cb)->skb).pid, \
-		  (cb)->nlh->nlmsg_seq, type, len, flags)
-
-#define NLMSG_END(skb, nlh) \
-({	(nlh)->nlmsg_len = skb_tail_pointer(skb) - (unsigned char *)(nlh); \
-	(skb)->len; })
-
-#define NLMSG_CANCEL(skb, nlh) \
-({	skb_trim(skb, (unsigned char *) (nlh) - (skb)->data); \
-	-1; })
-
 extern int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 			      struct nlmsghdr *nlh,
 			      int (*dump)(struct sk_buff *skb, struct netlink_callback*),
-- 
cgit v1.2.3


From dc5fc579b90ed0a9a4e55b0218cdbaf0a8cf2e67 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 25 Mar 2007 23:06:12 -0700
Subject: [NETLINK]: Use nlmsg_trim() where appropriate

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h                |  2 +-
 net/core/wireless.c                  |  2 +-
 net/decnet/dn_route.c                |  3 ++-
 net/decnet/dn_table.c                |  3 ++-
 net/ipv4/inet_diag.c                 |  7 ++++---
 net/ipv4/ipmr.c                      |  3 ++-
 net/netfilter/nf_conntrack_netlink.c |  5 +++--
 net/sched/act_api.c                  | 17 +++++++++--------
 net/sched/act_gact.c                 |  3 ++-
 net/sched/act_ipt.c                  |  3 ++-
 net/sched/act_mirred.c               |  3 ++-
 net/sched/act_pedit.c                |  3 ++-
 net/sched/act_police.c               |  9 +++++----
 net/sched/act_simple.c               |  3 ++-
 net/sched/cls_api.c                  |  4 +++-
 net/sched/cls_basic.c                |  3 ++-
 net/sched/cls_fw.c                   |  3 ++-
 net/sched/cls_route.c                |  3 ++-
 net/sched/cls_rsvp.c                 |  1 +
 net/sched/cls_rsvp.h                 |  2 +-
 net/sched/cls_rsvp6.c                |  1 +
 net/sched/cls_tcindex.c              |  3 ++-
 net/sched/cls_u32.c                  |  3 ++-
 net/sched/sch_api.c                  |  5 +++--
 net/sched/sch_atm.c                  |  3 ++-
 net/sched/sch_cbq.c                  | 17 +++++++++--------
 net/sched/sch_hfsc.c                 |  5 +++--
 net/sched/sch_htb.c                  |  5 +++--
 net/sched/sch_ingress.c              |  3 ++-
 net/sched/sch_netem.c                |  3 ++-
 net/sched/sch_prio.c                 |  3 ++-
 net/sched/sch_sfq.c                  |  3 ++-
 net/sched/sch_tbf.c                  |  3 ++-
 net/xfrm/xfrm_user.c                 | 16 ++++++++--------
 34 files changed, 93 insertions(+), 62 deletions(-)

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 2c7ab107f20..510ca7fabe1 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -548,7 +548,7 @@ static inline void *nlmsg_get_pos(struct sk_buff *skb)
  *
  * Trims the message to the provided mark. Returns -1.
  */
-static inline int nlmsg_trim(struct sk_buff *skb, void *mark)
+static inline int nlmsg_trim(struct sk_buff *skb, const void *mark)
 {
 	if (mark)
 		skb_trim(skb, (unsigned char *) mark - skb->data);
diff --git a/net/core/wireless.c b/net/core/wireless.c
index 4a777b68e3b..86db63d7f76 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -1957,7 +1957,7 @@ static inline int rtnetlink_fill_iwinfo(struct sk_buff *	skb,
 
 nlmsg_failure:
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 9678b096b84..2ae35ef1f07 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -77,6 +77,7 @@
 #include <linux/rcupdate.h>
 #include <linux/times.h>
 #include <asm/errno.h>
+#include <net/netlink.h>
 #include <net/neighbour.h>
 #include <net/dst.h>
 #include <net/flow.h>
@@ -1514,7 +1515,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 
 nlmsg_failure:
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 544c4554074..d6615c9361e 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -28,6 +28,7 @@
 #include <asm/uaccess.h>
 #include <linux/route.h> /* RTF_xxx */
 #include <net/neighbour.h>
+#include <net/netlink.h>
 #include <net/dst.h>
 #include <net/flow.h>
 #include <net/fib_rules.h>
@@ -349,7 +350,7 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 
 nlmsg_failure:
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -EMSGSIZE;
 }
 
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 238999e6e87..62c2e9f7e11 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -27,6 +27,7 @@
 #include <net/inet_hashtables.h>
 #include <net/inet_timewait_sock.h>
 #include <net/inet6_hashtables.h>
+#include <net/netlink.h>
 
 #include <linux/inet.h>
 #include <linux/stddef.h>
@@ -152,7 +153,7 @@ static int inet_csk_diag_fill(struct sock *sk,
 
 rtattr_failure:
 nlmsg_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -EMSGSIZE;
 }
 
@@ -208,7 +209,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
 	nlh->nlmsg_len = skb_tail_pointer(skb) - previous_tail;
 	return skb->len;
 nlmsg_failure:
-	skb_trim(skb, previous_tail - skb->data);
+	nlmsg_trim(skb, previous_tail);
 	return -EMSGSIZE;
 }
 
@@ -579,7 +580,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 	return skb->len;
 
 nlmsg_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index ea0a491dce9..48027df5a90 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -62,6 +62,7 @@
 #include <linux/netfilter_ipv4.h>
 #include <net/ipip.h>
 #include <net/checksum.h>
+#include <net/netlink.h>
 
 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
 #define CONFIG_IP_PIMSM	1
@@ -1570,7 +1571,7 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
 	return 1;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -EMSGSIZE;
 }
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 442300c633d..76f11f32591 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -33,6 +33,7 @@
 #include <linux/notifier.h>
 
 #include <linux/netfilter.h>
+#include <net/netlink.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_expect.h>
@@ -306,7 +307,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 
 nlmsg_failure:
 nfattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -1169,7 +1170,7 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 
 nlmsg_failure:
 nfattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 28326fb1fc4..f002f74f376 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -31,6 +31,7 @@
 #include <net/sock.h>
 #include <net/sch_generic.h>
 #include <net/act_api.h>
+#include <net/netlink.h>
 
 void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
 {
@@ -98,7 +99,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
 			err = tcf_action_dump_1(skb, a, 0, 0);
 			if (err < 0) {
 				index--;
-				skb_trim(skb, (u8*)r - skb->data);
+				nlmsg_trim(skb, r);
 				goto done;
 			}
 			r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
@@ -114,7 +115,7 @@ done:
 	return n_i;
 
 rtattr_failure:
-	skb_trim(skb, (u8*)r - skb->data);
+	nlmsg_trim(skb, r);
 	goto done;
 }
 
@@ -144,7 +145,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
 
 	return n_i;
 rtattr_failure:
-	skb_trim(skb, (u8*)r - skb->data);
+	nlmsg_trim(skb, r);
 	return -EINVAL;
 }
 
@@ -440,7 +441,7 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 	}
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -467,7 +468,7 @@ tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref)
 rtattr_failure:
 	err = -EINVAL;
 errout:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return err;
 }
 
@@ -658,7 +659,7 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
 
 rtattr_failure:
 nlmsg_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -1059,7 +1060,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 		x->rta_len = skb_tail_pointer(skb) - (u8 *)x;
 		ret = skb->len;
 	} else
-		skb_trim(skb, (u8*)x - skb->data);
+		nlmsg_trim(skb, x);
 
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	if (NETLINK_CB(cb->skb).pid && ret)
@@ -1070,7 +1071,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 rtattr_failure:
 nlmsg_failure:
 	module_put(a_o->owner);
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return skb->len;
 }
 
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index aad748b3b38..7517f379154 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -28,6 +28,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/proc_fs.h>
+#include <net/netlink.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <linux/tc_act/tc_gact.h>
@@ -181,7 +182,7 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 2ccfd5b20fa..00b05f422d4 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -30,6 +30,7 @@
 #include <linux/init.h>
 #include <linux/proc_fs.h>
 #include <linux/kmod.h>
+#include <net/netlink.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <linux/tc_act/tc_ipt.h>
@@ -277,7 +278,7 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	kfree(t);
 	return -1;
 }
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 15f6ecdaf61..de21c92faaa 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -30,6 +30,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/proc_fs.h>
+#include <net/netlink.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <linux/tc_act/tc_mirred.h>
@@ -225,7 +226,7 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index d654cea1a46..45b3cda86a2 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -27,6 +27,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/proc_fs.h>
+#include <net/netlink.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <linux/tc_act/tc_pedit.h>
@@ -226,7 +227,7 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	kfree(opt);
 	return -1;
 }
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 068b2376366..0a5679ea6c6 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -30,6 +30,7 @@
 #include <linux/init.h>
 #include <net/sock.h>
 #include <net/act_api.h>
+#include <net/netlink.h>
 
 #define L2T(p,L)   ((p)->tcfp_R_tab->data[(L)>>(p)->tcfp_R_tab->rate.cell_log])
 #define L2T_P(p,L) ((p)->tcfp_P_tab->data[(L)>>(p)->tcfp_P_tab->rate.cell_log])
@@ -88,7 +89,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
 				err = tcf_action_dump_1(skb, a, 0, 0);
 			if (err < 0) {
 				index--;
-				skb_trim(skb, (u8*)r - skb->data);
+				nlmsg_trim(skb, r);
 				goto done;
 			}
 			r->rta_len = skb_tail_pointer(skb) - (u8 *)r;
@@ -102,7 +103,7 @@ done:
 	return n_i;
 
 rtattr_failure:
-	skb_trim(skb, (u8*)r - skb->data);
+	nlmsg_trim(skb, r);
 	goto done;
 }
 #endif
@@ -355,7 +356,7 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -598,7 +599,7 @@ int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police)
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index ecbcfa59b76..36e1edad599 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -16,6 +16,7 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
+#include <net/netlink.h>
 #include <net/pkt_sched.h>
 
 #define TCA_ACT_SIMP 22
@@ -173,7 +174,7 @@ static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 84231baf77d..3d0a6cdcaeb 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -32,6 +32,8 @@
 #include <linux/rtnetlink.h>
 #include <linux/init.h>
 #include <linux/kmod.h>
+#include <linux/netlink.h>
+#include <net/netlink.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <net/pkt_cls.h>
@@ -345,7 +347,7 @@ tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, unsigned long fh,
 
 nlmsg_failure:
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 800ec2ac326..c885412d79d 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -17,6 +17,7 @@
 #include <linux/errno.h>
 #include <linux/rtnetlink.h>
 #include <linux/skbuff.h>
+#include <net/netlink.h>
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
 
@@ -267,7 +268,7 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index f5f355852a8..bbec4a0d4dc 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -38,6 +38,7 @@
 #include <linux/notifier.h>
 #include <linux/netfilter.h>
 #include <net/ip.h>
+#include <net/netlink.h>
 #include <net/route.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
@@ -382,7 +383,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 1f94df36239..e92d716c915 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -28,6 +28,7 @@
 #include <linux/etherdevice.h>
 #include <linux/notifier.h>
 #include <net/ip.h>
+#include <net/netlink.h>
 #include <net/route.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
@@ -599,7 +600,7 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c
index 1d4a1fb1760..0a683c07c64 100644
--- a/net/sched/cls_rsvp.c
+++ b/net/sched/cls_rsvp.c
@@ -31,6 +31,7 @@
 #include <net/route.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
+#include <net/netlink.h>
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
 
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 87ed6f3c507..22f9ede70e8 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -630,7 +630,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c
index a2979d89798..93b6abed57d 100644
--- a/net/sched/cls_rsvp6.c
+++ b/net/sched/cls_rsvp6.c
@@ -34,6 +34,7 @@
 #include <net/sock.h>
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
+#include <net/netlink.h>
 
 #define RSVP_DST_LEN	4
 #define RSVP_ID		"rsvp6"
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 0537d6066b4..47ac0c55642 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -12,6 +12,7 @@
 #include <linux/netdevice.h>
 #include <net/ip.h>
 #include <net/act_api.h>
+#include <net/netlink.h>
 #include <net/pkt_cls.h>
 #include <net/route.h>
 
@@ -495,7 +496,7 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index fa11bb75004..62e1deb27a1 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -50,6 +50,7 @@
 #include <linux/notifier.h>
 #include <linux/rtnetlink.h>
 #include <net/ip.h>
+#include <net/netlink.h>
 #include <net/route.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
@@ -772,7 +773,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 7482a950717..0b9abea68fd 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -36,6 +36,7 @@
 #include <linux/bitops.h>
 #include <linux/hrtimer.h>
 
+#include <net/netlink.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 
@@ -852,7 +853,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 
 nlmsg_failure:
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -1081,7 +1082,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
 
 nlmsg_failure:
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 1d7bb163213..0cc3c9b7272 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -14,6 +14,7 @@
 #include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
 #include <linux/file.h> /* for fput */
+#include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <net/sock.h>
 
@@ -665,7 +666,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb,b-skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 static int
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index be98a01253e..dcd9c31dc39 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -29,6 +29,7 @@
 #include <linux/etherdevice.h>
 #include <linux/notifier.h>
 #include <net/ip.h>
+#include <net/netlink.h>
 #include <net/route.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
@@ -1471,7 +1472,7 @@ static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -1496,7 +1497,7 @@ static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -1514,7 +1515,7 @@ static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -1531,7 +1532,7 @@ static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -1549,7 +1550,7 @@ static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -1568,7 +1569,7 @@ static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 #endif
@@ -1601,7 +1602,7 @@ static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -1637,7 +1638,7 @@ cbq_dump_class(struct Qdisc *sch, unsigned long arg,
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 80e6f811e3b..6a762cf781d 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -65,6 +65,7 @@
 #include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
 #include <linux/pkt_sched.h>
+#include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <net/pkt_cls.h>
 #include <asm/system.h>
@@ -1378,7 +1379,7 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
 	return skb->len;
 
  rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -1584,7 +1585,7 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
 	return skb->len;
 
  rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index c687388a8cb..b7abd0ae676 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -50,6 +50,7 @@
 #include <linux/skbuff.h>
 #include <linux/list.h>
 #include <linux/compiler.h>
+#include <net/netlink.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <linux/rbtree.h>
@@ -1128,7 +1129,7 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 	return skb->len;
 rtattr_failure:
 	spin_unlock_bh(&sch->dev->queue_lock);
-	skb_trim(skb, skb_tail_pointer(skb) - skb->data);
+	nlmsg_trim(skb, skb_tail_pointer(skb));
 	return -1;
 }
 
@@ -1164,7 +1165,7 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 	return skb->len;
 rtattr_failure:
 	spin_unlock_bh(&sch->dev->queue_lock);
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index d19f4070c23..f63d5c6eb30 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -16,6 +16,7 @@
 #include <linux/netfilter_ipv6.h>
 #include <linux/netfilter.h>
 #include <linux/smp.h>
+#include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <asm/byteorder.h>
 #include <asm/uaccess.h>
@@ -371,7 +372,7 @@ static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 2a9b1e429ff..4818da5a7e6 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -22,6 +22,7 @@
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
 
+#include <net/netlink.h>
 #include <net/pkt_sched.h>
 
 #define VERSION "1.2"
@@ -616,7 +617,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 5b371109ec1..f13996348dd 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -32,6 +32,7 @@
 #include <net/ip.h>
 #include <net/route.h>
 #include <linux/skbuff.h>
+#include <net/netlink.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 
@@ -280,7 +281,7 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index a511ba83e26..96dfdf78d32 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -30,6 +30,7 @@
 #include <linux/notifier.h>
 #include <linux/init.h>
 #include <net/ip.h>
+#include <net/netlink.h>
 #include <linux/ipv6.h>
 #include <net/route.h>
 #include <linux/skbuff.h>
@@ -476,7 +477,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 231895562c6..626ce96800f 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -32,6 +32,7 @@
 #include <linux/etherdevice.h>
 #include <linux/notifier.h>
 #include <net/ip.h>
+#include <net/netlink.h>
 #include <net/route.h>
 #include <linux/skbuff.h>
 #include <net/sock.h>
@@ -408,7 +409,7 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
 	return skb->len;
 
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 814bb3125ad..6b7f6dc144c 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -628,7 +628,7 @@ out:
 
 nlmsg_failure:
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -1182,7 +1182,7 @@ out:
 	return 0;
 
 nlmsg_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -1367,7 +1367,7 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
 
 rtattr_failure:
 nlmsg_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -1767,7 +1767,7 @@ static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m,
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 nlmsg_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -1956,7 +1956,7 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_eve
 	return skb->len;
 
 nlmsg_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -2157,7 +2157,7 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
 	return skb->len;
 
 nlmsg_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -2268,7 +2268,7 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
 	return skb->len;
 
 nlmsg_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
@@ -2427,7 +2427,7 @@ static int build_report(struct sk_buff *skb, u8 proto,
 
 nlmsg_failure:
 rtattr_failure:
-	skb_trim(skb, b - skb->data);
+	nlmsg_trim(skb, b);
 	return -1;
 }
 
-- 
cgit v1.2.3


From 6b88dd966b42e374dc783c397efc15f5c1458265 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Mar 2007 22:29:03 -0300
Subject: [SK_BUFF] ipv6: Use skb_network_offset in some more places

So that we reduce the number of direct accesses to skb->data.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 net/ipv6/netfilter/ip6_tables.c         | 2 +-
 net/ipv6/netfilter/nf_conntrack_reasm.c | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index caf9e375a0f..b97aedce62a 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1448,7 +1448,7 @@ static void __exit ip6_tables_fini(void)
 int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
 		  int target, unsigned short *fragoff)
 {
-	unsigned int start = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
+	unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
 	u8 nexthdr = ipv6_hdr(skb)->nexthdr;
 	unsigned int len = skb->len - start;
 
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index b7889ceef55..721f02d7b7f 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -707,8 +707,9 @@ static int
 find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
 {
 	u8 nexthdr = ipv6_hdr(skb)->nexthdr;
-	u8 prev_nhoff = (u8 *)&ipv6_hdr(skb)->nexthdr - skb->data;
-	int start = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
+	const int netoff = skb_network_offset(skb);
+	u8 prev_nhoff = netoff + offsetof(struct ipv6hdr, nexthdr);
+	int start = netoff + sizeof(struct ipv6hdr);
 	int len = skb->len - start;
 	u8 prevhdr = NEXTHDR_IPV6;
 
-- 
cgit v1.2.3


From f2adc9866742e7904f0268824edc53c948741415 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 20 Mar 2007 11:52:34 -0300
Subject: [ATM] idt77252: Fix double kfree_skb on failure in push_rx_skb

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 drivers/atm/idt77252.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 20f2a3a8265..057efbc55d3 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -1839,7 +1839,6 @@ push_rx_skb(struct idt77252_dev *card, struct sk_buff *skb, int queue)
 		skb_put(skb, SAR_FB_SIZE_3);
 		break;
 	default:
-		dev_kfree_skb(skb);
 		return -1;
 	}
 
-- 
cgit v1.2.3


From d004b8d4903180c111e114726982c194adf2a04f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 20 Mar 2007 12:00:44 -0300
Subject: [LMC]: lmc_main wants to use skb_tailroom

At that point it is equivalent to what was being used, skb->end - skb->data,
and the need is clearly the one skb_tailroom satisfies.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/lmc/lmc_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index 5bb18c0955b..a576113abbd 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c
@@ -1932,7 +1932,7 @@ static void lmc_softreset (lmc_softc_t * const sc) /*fold00*/
         sc->lmc_rxring[i].status = 0x80000000;
 
         /* used to be PKT_BUF_SZ now uses skb since we lose some to head room */
-        sc->lmc_rxring[i].length = skb_end_pointer(skb) - skb->data;
+        sc->lmc_rxring[i].length = skb_tailroom(skb);
 
         /* use to be tail which is dumb since you're thinking why write
          * to the end of the packj,et but since there's nothing there tail == data
-- 
cgit v1.2.3


From 8b5be26831b973d8013e8b4c9860d9694310cdc6 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 20 Mar 2007 12:08:20 -0300
Subject: [FORCEDETH]: Use skb_tailroom where appropriate

Reducing the number of skb->data direct accesses.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/forcedeth.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index d3f4bcaa969..7a018027fcc 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -1388,11 +1388,9 @@ static int nv_alloc_rx(struct net_device *dev)
 			np->put_rx_ctx->skb = skb;
 			np->put_rx_ctx->dma = pci_map_single(np->pci_dev,
 							     skb->data,
-							(skb_end_pointer(skb) -
-							 skb->data),
+							     skb_tailroom(skb),
 							     PCI_DMA_FROMDEVICE);
-			np->put_rx_ctx->dma_len = (skb_end_pointer(skb) -
-						   skb->data);
+			np->put_rx_ctx->dma_len = skb_tailroom(skb);
 			np->put_rx.orig->buf = cpu_to_le32(np->put_rx_ctx->dma);
 			wmb();
 			np->put_rx.orig->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX_AVAIL);
@@ -1422,11 +1420,9 @@ static int nv_alloc_rx_optimized(struct net_device *dev)
 			np->put_rx_ctx->skb = skb;
 			np->put_rx_ctx->dma = pci_map_single(np->pci_dev,
 							     skb->data,
-							     (skb_end_pointer(skb) -
-							      skb->data),
+							     skb_tailroom(skb),
 							     PCI_DMA_FROMDEVICE);
-			np->put_rx_ctx->dma_len = (skb_end_pointer(skb) -
-						   skb->data);
+			np->put_rx_ctx->dma_len = skb_tailroom(skb);
 			np->put_rx.ex->bufhigh = cpu_to_le64(np->put_rx_ctx->dma) >> 32;
 			np->put_rx.ex->buflow = cpu_to_le64(np->put_rx_ctx->dma) & 0x0FFFFFFFF;
 			wmb();
@@ -4383,12 +4379,12 @@ static int nv_loopback_test(struct net_device *dev)
 		ret = 0;
 		goto out;
 	}
+	test_dma_addr = pci_map_single(np->pci_dev, tx_skb->data,
+				       skb_tailroom(tx_skb),
+				       PCI_DMA_FROMDEVICE);
 	pkt_data = skb_put(tx_skb, pkt_len);
 	for (i = 0; i < pkt_len; i++)
 		pkt_data[i] = (u8)(i & 0xff);
-	test_dma_addr = pci_map_single(np->pci_dev, tx_skb->data,
-				       (skb_end_pointer(tx_skb) -
-					tx_skb->data), PCI_DMA_FROMDEVICE);
 
 	if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
 		np->tx_ring.orig[0].buf = cpu_to_le32(test_dma_addr);
-- 
cgit v1.2.3


From 6b811d43f6cc9eccdfc011a99f8571df2abc46d1 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 20 Mar 2007 12:26:51 -0300
Subject: [DCCP]: 48-bit sequence number arithmetic

This patch
 * organizes the sequence arithmetic functions into one corner of dccp.h
 * performs a small modification of dccp_set_seqno to make it more widely reusable
   (now it is safe to use any number, since it performs modulo-2^48 assignment)
 * adds functions and generic macros for 48-bit sequence arithmetic:
 	--48 bit complement
 	--modulo-48 addition and modulo-48 subtraction
	--dccp_inc_seqno now a special case of add48
Constants renamed following a suggestion by Arnaldo.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ackvec.c |  2 +-
 net/dccp/dccp.h   | 47 +++++++++++++++++++++++++++--------------------
 2 files changed, 28 insertions(+), 21 deletions(-)

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index a086c6312d3..01030f34617 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -157,7 +157,7 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
 
 	if (av != NULL) {
 		av->dccpav_buf_head	= DCCP_MAX_ACKVEC_LEN - 1;
-		av->dccpav_buf_ackno	= DCCP_MAX_SEQNO + 1;
+		av->dccpav_buf_ackno	= UINT48_MAX + 1;
 		av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0;
 		av->dccpav_time.tv_sec	= 0;
 		av->dccpav_time.tv_usec	= 0;
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index e33a9edb403..a2c20a265b2 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -92,6 +92,32 @@ extern int  sysctl_dccp_feat_send_ack_vector;
 extern int  sysctl_dccp_feat_send_ndp_count;
 extern int  sysctl_dccp_tx_qlen;
 
+/*
+ *	48-bit sequence number arithmetic (signed and unsigned)
+ */
+#define INT48_MIN	  0x800000000000LL		/* 2^47	    */
+#define UINT48_MAX	  0xFFFFFFFFFFFFLL		/* 2^48 - 1 */
+#define COMPLEMENT48(x)	 (0x1000000000000LL - (x))	/* 2^48 - x */
+#define TO_SIGNED48(x)	 (((x) < INT48_MIN)? (x) : -COMPLEMENT48( (x)))
+#define TO_UNSIGNED48(x) (((x) >= 0)?	     (x) :  COMPLEMENT48(-(x)))
+#define ADD48(a, b)	 (((a) + (b)) & UINT48_MAX)
+#define SUB48(a, b)	 ADD48((a), COMPLEMENT48(b))
+
+static inline void dccp_set_seqno(u64 *seqno, u64 value)
+{
+	*seqno = value & UINT48_MAX;
+}
+
+static inline void dccp_inc_seqno(u64 *seqno)
+{
+	*seqno = ADD48(*seqno, 1);
+}
+
+static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2)
+{
+	return ((seqno2 << 16) - (seqno1 << 16)) >> 16;
+}
+
 /* is seq1 < seq2 ? */
 static inline int before48(const u64 seq1, const u64 seq2)
 {
@@ -313,26 +339,7 @@ static inline int dccp_packet_without_ack(const struct sk_buff *skb)
 	return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST;
 }
 
-#define DCCP_MAX_SEQNO ((((u64)1) << 48) - 1)
-#define DCCP_PKT_WITHOUT_ACK_SEQ (DCCP_MAX_SEQNO << 2)
-
-static inline void dccp_set_seqno(u64 *seqno, u64 value)
-{
-	if (value > DCCP_MAX_SEQNO)
-		value -= DCCP_MAX_SEQNO + 1;
-	*seqno = value;
-}
-
-static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2)
-{
-	return ((seqno2 << 16) - (seqno1 << 16)) >> 16;
-}
-
-static inline void dccp_inc_seqno(u64 *seqno)
-{
-	if (++*seqno > DCCP_MAX_SEQNO)
-		*seqno = 0;
-}
+#define DCCP_PKT_WITHOUT_ACK_SEQ (UINT48_MAX << 2)
 
 static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss)
 {
-- 
cgit v1.2.3


From 0aec51c86986f61de26dd04913667af544a8b8eb Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 20 Mar 2007 12:45:59 -0300
Subject: [DCCP]: Make dccp_delta_seqno return signed numbers

Problem:
---
 net/dccp/dccp.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index a2c20a265b2..afb313ff216 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -113,9 +113,12 @@ static inline void dccp_inc_seqno(u64 *seqno)
 	*seqno = ADD48(*seqno, 1);
 }
 
-static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2)
+/* signed mod-2^48 distance: pos. if seqno1 < seqno2, neg. if seqno1 > seqno2 */
+static inline s64 dccp_delta_seqno(const u64 seqno1, const u64 seqno2)
 {
-	return ((seqno2 << 16) - (seqno1 << 16)) >> 16;
+	u64 delta = SUB48(seqno2, seqno1);
+
+	return TO_SIGNED48(delta);
 }
 
 /* is seq1 < seq2 ? */
-- 
cgit v1.2.3


From d52de17b8cf36d43a9d6977e7861a9f415541c6b Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 20 Mar 2007 13:00:26 -0300
Subject: [DCCP]: Make `before' relation unambiguous

Problem:
---
 net/dccp/dccp.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index afb313ff216..1615986a8d9 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -124,14 +124,11 @@ static inline s64 dccp_delta_seqno(const u64 seqno1, const u64 seqno2)
 /* is seq1 < seq2 ? */
 static inline int before48(const u64 seq1, const u64 seq2)
 {
-	return (s64)((seq1 << 16) - (seq2 << 16)) < 0;
+	return (s64)((seq2 << 16) - (seq1 << 16)) > 0;
 }
 
 /* is seq1 > seq2 ? */
-static inline int after48(const u64 seq1, const u64 seq2)
-{
-	return (s64)((seq2 << 16) - (seq1 << 16)) < 0;
-}
+#define after48(seq1, seq2)	before48(seq2, seq1)
 
 /* is seq2 <= seq1 <= seq3 ? */
 static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3)
-- 
cgit v1.2.3


From b16be51b5e5d75cec71b18ebc75f15a4734c62ad Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 20 Mar 2007 13:03:47 -0300
Subject: [DCCP]: Fix for follows48

The follows48 relation identifies whether 48-bit sequence number
x is the direct successor of y. Currently, it does not handle cases
of the following type correctly:

	follows48(0x(prefix)10000LL, 0x(prefix)0FFFFLL)

where prefix is an arbitrary hex sequence of up to 7 digits.

This is fixed by reusing the new dccp_delta_seqno function.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/dccp.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 1615986a8d9..c66a4581348 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -144,9 +144,7 @@ static inline u64 max48(const u64 seq1, const u64 seq2)
 /* is seq1 next seqno after seq2 */
 static inline int follows48(const u64 seq1, const u64 seq2)
 {
-	int diff = (seq1 & 0xFFFF) - (seq2 & 0xFFFF);
-
-	return diff==1;
+	return dccp_delta_seqno(seq2, seq1) == 1;
 }
 
 enum {
-- 
cgit v1.2.3


From 8d13bf9a0bd4984756e234ce54299b92acefab99 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 20 Mar 2007 13:08:19 -0300
Subject: [DCCP]: Remove ambiguity in the way before48 is used

This removes two ambiguities in employing the new definition of before48,
following the analysis on http://www.mail-archive.com/dccp@vger.kernel.org/msg01295.html

 (1) Updating GSR when P.seqno >= S.SWL
     With the old definition we did not update when P.seqno and S.SWL are 2^47 apart. To
     ensure the same behaviour as with the old definition, this is replaced with the
     equivalent condition dccp_delta_seqno(S.SWL, P.seqno) >= 0

 (2) Sending SYNC when P.seqno >= S.OSR
     Here it is debatable whether the new definition causes an ambiguity: the case is
     similar to (1); and to have consistency with the case (1), we use the equivalent
     condition dccp_delta_seqno(S.OSR, P.seqno) >= 0

Detailed Justification
---
 net/dccp/input.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/dccp/input.c b/net/dccp/input.c
index 78b043c458b..a1900157e2d 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -86,7 +86,8 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
 	    dh->dccph_type == DCCP_PKT_SYNCACK) {
 		if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
 			      dp->dccps_awl, dp->dccps_awh) &&
-		    !before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swl))
+		    dccp_delta_seqno(dp->dccps_swl,
+				     DCCP_SKB_CB(skb)->dccpd_seq) >= 0)
 			dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
 		else
 			return -1;
@@ -203,7 +204,8 @@ static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
 		if (dp->dccps_role != DCCP_ROLE_CLIENT)
 			goto send_sync;
 check_seq:
-		if (!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_osr)) {
+		if (dccp_delta_seqno(dp->dccps_osr,
+				     DCCP_SKB_CB(skb)->dccpd_seq) >= 0) {
 send_sync:
 			dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
 				       DCCP_PKT_SYNC);
-- 
cgit v1.2.3


From 353b13e10a3f1a18c6b33858fb3337bcd2692eb5 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 20 Mar 2007 13:10:15 -0300
Subject: [CCID3]: Remove redundant `len' test

Since CCID3 avoids  sending 0-byte data packets (cf. ccid3_hc_tx_send_packet),
testing for zero-payload length, as performed by ccid3_hc_tx_update_s, is
redundant - hence removed by this patch.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 746f79d104b..4be62a96566 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -149,11 +149,8 @@ static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
  */
 static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len)
 {
-	if (unlikely(len == 0))
-		ccid3_pr_debug("Packet payload length is 0 - not updating\n");
-	else
-		hctx->ccid3hctx_s = hctx->ccid3hctx_s == 0 ? len :
-				    (9 * hctx->ccid3hctx_s + len) / 10;
+	hctx->ccid3hctx_s = hctx->ccid3hctx_s == 0 ? len :
+			    (9 * hctx->ccid3hctx_s + len) / 10;
 	/*
 	 * Note: We could do a potential optimisation here - when `s' changes,
 	 *	 recalculate sending rate and consequently t_ipi, t_delta, and
-- 
cgit v1.2.3


From 9bf17475eb658a920125bd8f05edf9c57c2dd950 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 20 Mar 2007 13:11:24 -0300
Subject: [CCID3]: Re-order CCID 3 source file

No code change at all.
This splits ccid3.c into a RX and a TX section, so that the file has an
organisation similar to the other ones (e.g. packet_history.{h,c}).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c | 89 +++++++++++++++++++++++++-------------------------
 1 file changed, 45 insertions(+), 44 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 4be62a96566..0009b399d14 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -33,7 +33,6 @@
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
-
 #include "../ccid.h"
 #include "../dccp.h"
 #include "lib/packet_history.h"
@@ -52,6 +51,9 @@ static struct dccp_tx_hist *ccid3_tx_hist;
 static struct dccp_rx_hist *ccid3_rx_hist;
 static struct dccp_li_hist *ccid3_li_hist;
 
+/*
+ *	Transmitter Half-Connection Routines
+ */
 #ifdef CONFIG_IP_DCCP_CCID3_DEBUG
 static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
 {
@@ -641,10 +643,50 @@ static void ccid3_hc_tx_exit(struct sock *sk)
 	dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist);
 }
 
+static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
+{
+	const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+
+	/* Listen socks doesn't have a private CCID block */
+	if (sk->sk_state == DCCP_LISTEN)
+		return;
+
+	BUG_ON(hctx == NULL);
+
+	info->tcpi_rto = hctx->ccid3hctx_t_rto;
+	info->tcpi_rtt = hctx->ccid3hctx_rtt;
+}
+
+static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
+				  u32 __user *optval, int __user *optlen)
+{
+	const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+	const void *val;
+
+	/* Listen socks doesn't have a private CCID block */
+	if (sk->sk_state == DCCP_LISTEN)
+		return -EINVAL;
+
+	switch (optname) {
+	case DCCP_SOCKOPT_CCID_TX_INFO:
+		if (len < sizeof(hctx->ccid3hctx_tfrc))
+			return -EINVAL;
+		len = sizeof(hctx->ccid3hctx_tfrc);
+		val = &hctx->ccid3hctx_tfrc;
+		break;
+	default:
+		return -ENOPROTOOPT;
+	}
+
+	if (put_user(len, optlen) || copy_to_user(optval, val, len))
+		return -EFAULT;
+
+	return 0;
+}
+
 /*
- * RX Half Connection methods
+ *	Receiver Half-Connection Routines
  */
-
 #ifdef CONFIG_IP_DCCP_CCID3_DEBUG
 static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
 {
@@ -1129,20 +1171,6 @@ static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_rcv_rtt  = hcrx->ccid3hcrx_rtt;
 }
 
-static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
-{
-	const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
-
-	/* Listen socks doesn't have a private CCID block */
-	if (sk->sk_state == DCCP_LISTEN)
-		return;
-
-	BUG_ON(hctx == NULL);
-
-	info->tcpi_rto = hctx->ccid3hctx_t_rto;
-	info->tcpi_rtt = hctx->ccid3hctx_rtt;
-}
-
 static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
 				  u32 __user *optval, int __user *optlen)
 {
@@ -1170,33 +1198,6 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
 	return 0;
 }
 
-static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
-				  u32 __user *optval, int __user *optlen)
-{
-	const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
-	const void *val;
-
-	/* Listen socks doesn't have a private CCID block */
-	if (sk->sk_state == DCCP_LISTEN)
-		return -EINVAL;
-
-	switch (optname) {
-	case DCCP_SOCKOPT_CCID_TX_INFO:
-		if (len < sizeof(hctx->ccid3hctx_tfrc))
-			return -EINVAL;
-		len = sizeof(hctx->ccid3hctx_tfrc);
-		val = &hctx->ccid3hctx_tfrc;
-		break;
-	default:
-		return -ENOPROTOOPT;
-	}
-
-	if (put_user(len, optlen) || copy_to_user(optval, val, len))
-		return -EFAULT;
-
-	return 0;
-}
-
 static struct ccid_operations ccid3 = {
 	.ccid_id		   = DCCPC_CCID3,
 	.ccid_name		   = "ccid3",
-- 
cgit v1.2.3


From 371fe7779cad6557a58df9a1b5543652e067400f Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 20 Mar 2007 14:28:44 -0300
Subject: [CCID3]: Use MSS for larger initial windows

This improves the slow-start phase by using the MSS
(as suggested in RFC 4342, sec. 5) instead of the packet size s.
Also figured out that __u32 is ample resource enough.

After applying, I got the following in the logs:

  ccid3_hc_tx_packet_recv: client(f7421700), s=6, MSS=1424, w_init=4380, R_sample=176us, X=24886363

Had the previous variant been used, w_init would have been as low as 24.

Committer note: removed unneeded cast to unsigned long long that was
                causing a compiler warning on 64bit architectures.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 0009b399d14..52dae40bdff 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -466,20 +466,20 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
 			/*
 			 * Larger Initial Windows [RFC 4342, sec. 5]
-			 * We deviate in that we use `s' instead of `MSS'.
 			 */
-			__u64 w_init = min(4 * hctx->ccid3hctx_s,
-					   max(2 * hctx->ccid3hctx_s, 4380));
+			__u32 w_init = min(4 * dp->dccps_mss_cache,
+					   max(2 * dp->dccps_mss_cache, 4380U));
 			hctx->ccid3hctx_rtt  = r_sample;
 			hctx->ccid3hctx_x    = scaled_div(w_init << 6, r_sample);
 			hctx->ccid3hctx_t_ld = now;
 
 			ccid3_update_send_time(hctx);
 
-			ccid3_pr_debug("%s(%p), s=%u, w_init=%llu, "
+			ccid3_pr_debug("%s(%p), s=%u, MSS=%u, w_init=%u, "
 				       "R_sample=%dus, X=%u\n", dccp_role(sk),
 				       sk, hctx->ccid3hctx_s,
-				       (unsigned long long)w_init,
+				       dp->dccps_mss_cache,
+				       w_init,
 				       (int)r_sample,
 				       (unsigned)(hctx->ccid3hctx_x >> 6));
 
-- 
cgit v1.2.3


From 551dc5f7a11cfb66685bfd36cbbdb209c5a11d14 Mon Sep 17 00:00:00 2001
From: Ian McDonald <ian.mcdonald@jandi.co.nz>
Date: Tue, 20 Mar 2007 14:46:52 -0300
Subject: [CCID3]: Fix use of invalid loss intervals

This fixes a bug which uses an invalid comparison.
The bug resulted in the use of invalid loss intervals.

Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Acked-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/lib/loss_interval.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 0a0baef16b3..372d7e75cdd 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -91,7 +91,7 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list)
 	u32 w_tot  = 0;
 
 	list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) {
-		if (li_entry->dccplih_interval != ~0) {
+		if (li_entry->dccplih_interval != ~0U) {
 			i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i];
 			w_tot  += dccp_li_hist_w[i];
 			if (i != 0)
-- 
cgit v1.2.3


From 8699be7d240e37c91a84bdf32e79941d72bc7bd5 Mon Sep 17 00:00:00 2001
From: Ian McDonald <ian.mcdonald@jandi.co.nz>
Date: Tue, 20 Mar 2007 14:49:20 -0300
Subject: [CCID3]: More verbose debugging

This adds a few debugging statements to ccid3.c

Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 52dae40bdff..26ab73db286 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -99,6 +99,11 @@ static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx)
 	/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
 	hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2,
 					   TFRC_OPSYS_HALF_TIME_GRAN);
+
+	ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%llu\n",
+		       hctx->ccid3hctx_t_ipi, hctx->ccid3hctx_delta,
+		       hctx->ccid3hctx_s, hctx->ccid3hctx_x >> 6);
+
 }
 /*
  * Update X by
@@ -141,8 +146,13 @@ static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
 		hctx->ccid3hctx_t_ld = *now;
 	}
 
-	if (hctx->ccid3hctx_x != old_x)
+	if (hctx->ccid3hctx_x != old_x) {
+		ccid3_pr_debug("X_prev=%llu, X_now=%llu, X_calc=%u, "
+			       "X_recv=%llu\n", old_x >> 6, hctx->ccid3hctx_x >> 6,
+			       hctx->ccid3hctx_x_calc, hctx->ccid3hctx_x_recv >> 6);
+
 		ccid3_update_send_time(hctx);
+	}
 }
 
 /*
@@ -339,6 +349,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 	case TFRC_SSTATE_NO_FBACK:
 	case TFRC_SSTATE_FBACK:
 		delay = timeval_delta(&hctx->ccid3hctx_t_nom, &now);
+		ccid3_pr_debug("delay=%ld\n", (long)delay);
 		/*
 		 *	Scheduling of packet transmissions [RFC 3448, 4.6]
 		 *
-- 
cgit v1.2.3


From 1266adee12d25385a25e1c57b1e3ff05a90bb4d7 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 20 Mar 2007 14:56:11 -0300
Subject: [CCID3]: Remove race condition and update t_ipi when `s' changes

This:

 1. removes a race condition in the access to the scheduled send time t_nom which
    results from allowing asynchronous r/w access to t_nom without locks;

 2. updates the inter-packet interval t_ipi = s/X when `s' changes, following a
    suggestion by Ian McDonald.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c | 35 ++++++++++++-----------------------
 1 file changed, 12 insertions(+), 23 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 26ab73db286..95eca99e4dd 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -82,20 +82,14 @@ static void ccid3_hc_tx_set_state(struct sock *sk,
 }
 
 /*
- * Recalculate scheduled nominal send time t_nom, inter-packet interval
- * t_ipi, and delta value. Should be called after each change to X.
+ * Recalculate t_ipi and delta (should be called whenever X changes)
  */
-static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx)
+static inline void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx)
 {
-	timeval_sub_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
-
 	/* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
 	hctx->ccid3hctx_t_ipi = scaled_div(hctx->ccid3hctx_s,
 					   hctx->ccid3hctx_x >> 6);
 
-	/* Update nominal send time with regard to the new t_ipi */
-	timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
-
 	/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
 	hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2,
 					   TFRC_OPSYS_HALF_TIME_GRAN);
@@ -119,8 +113,6 @@ static inline void ccid3_update_send_time(struct ccid3_hc_tx_sock *hctx)
  *       fine-grained resolution of sending rates. This requires scaling by 2^6
  *       throughout the code. Only X_calc is unscaled (in bytes/second).
  *
- * If X has changed, we also update the scheduled send time t_now,
- * the inter-packet interval t_ipi, and the delta value.
  */
 static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
 
@@ -151,7 +143,7 @@ static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
 			       "X_recv=%llu\n", old_x >> 6, hctx->ccid3hctx_x >> 6,
 			       hctx->ccid3hctx_x_calc, hctx->ccid3hctx_x_recv >> 6);
 
-		ccid3_update_send_time(hctx);
+		ccid3_update_send_interval(hctx);
 	}
 }
 
@@ -161,14 +153,12 @@ static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
  */
 static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len)
 {
-	hctx->ccid3hctx_s = hctx->ccid3hctx_s == 0 ? len :
-			    (9 * hctx->ccid3hctx_s + len) / 10;
-	/*
-	 * Note: We could do a potential optimisation here - when `s' changes,
-	 *	 recalculate sending rate and consequently t_ipi, t_delta, and
-	 *	 t_now. This is however non-standard, and the benefits are not
-	 *	 clear, so it is currently left out.
-	 */
+	const u16 old_s = hctx->ccid3hctx_s;
+
+	hctx->ccid3hctx_s = old_s == 0 ? len : (9 * old_s + len) / 10;
+
+	if (hctx->ccid3hctx_s != old_s)
+		ccid3_update_send_interval(hctx);
 }
 
 /*
@@ -228,7 +218,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
 		/* The value of R is still undefined and so we can not recompute
 		 * the timout value. Keep initial value as per [RFC 4342, 5]. */
 		t_nfb = TFRC_INITIAL_TIMEOUT;
-		ccid3_update_send_time(hctx);
+		ccid3_update_send_interval(hctx);
 		break;
 	case TFRC_SSTATE_FBACK:
 		/*
@@ -334,8 +324,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 		ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
 
 		/* Set initial sending rate X/s to 1pps (X is scaled by 2^6) */
-		ccid3_hc_tx_update_s(hctx, skb->len);
-		hctx->ccid3hctx_x = hctx->ccid3hctx_s;
+		hctx->ccid3hctx_x = hctx->ccid3hctx_s = skb->len;
 		hctx->ccid3hctx_x <<= 6;
 
 		/* First timeout, according to [RFC 3448, 4.2], is 1 second */
@@ -484,7 +473,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 			hctx->ccid3hctx_x    = scaled_div(w_init << 6, r_sample);
 			hctx->ccid3hctx_t_ld = now;
 
-			ccid3_update_send_time(hctx);
+			ccid3_update_send_interval(hctx);
 
 			ccid3_pr_debug("%s(%p), s=%u, MSS=%u, w_init=%u, "
 				       "R_sample=%dus, X=%u\n", dccp_role(sk),
-- 
cgit v1.2.3


From ac12b0c49571fe4c3a2f4957ed494da316d558be Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 20 Mar 2007 14:59:23 -0300
Subject: [DCCP]: Always use debug-toggle parameters

Currently debugging output (when configured) is automatically enabled when
DCCP modules are compiled into the kernel rather than built as loadable modules.
This is not necessary, since the module parameters in this case become kernel
commandline parameters, e.g. DCCP or CCID3 debug output can be enabled for a
static build by appending the following at the boot prompt:

	dccp.dccp_debug=1 	dccp_ccid3.ccid3_debug=1

This patch therefore does away with the more complicated way of always enabling
debug output for static builds

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/dccp.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index c66a4581348..292f18ef4f6 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -31,13 +31,9 @@
 					      __stringify(cond));          \
 			     } while (0)
 
-#ifdef MODULE
 #define DCCP_PRINTK(enable, fmt, args...)	do { if (enable)	     \
 							printk(fmt, ##args); \
 						} while(0)
-#else
-#define DCCP_PRINTK(enable, fmt, args...)	printk(fmt, ##args)
-#endif
 #define DCCP_PR_DEBUG(enable, fmt, a...)	DCCP_PRINTK(enable, KERN_DEBUG \
 						  "%s: " fmt, __FUNCTION__, ##a)
 
-- 
cgit v1.2.3


From 6626e3628fe42837f733d103e194c6b4473d8669 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 20 Mar 2007 15:00:28 -0300
Subject: [DCCP]: More debug information for dccp_wait_for_ccid

This adds more detail in the wait_for_ccid packet scheduling loop.
In particular, it informs about (i) when delay is used and (ii) why
a packet is discarded.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/output.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/dccp/output.c b/net/dccp/output.c
index aa21cc4de37..c8d843e983f 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -194,6 +194,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb)
 		rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
 		if (rc <= 0)
 			break;
+		dccp_pr_debug("delayed send by %d msec\n", rc);
 		delay = msecs_to_jiffies(rc);
 		sk->sk_write_pending++;
 		release_sock(sk);
@@ -255,7 +256,7 @@ void dccp_write_xmit(struct sock *sk, int block)
 				DCCP_BUG("err=%d after ccid_hc_tx_packet_sent",
 					 err);
 		} else {
-			dccp_pr_debug("packet discarded\n");
+			dccp_pr_debug("packet discarded due to err=%d\n", err);
 			kfree_skb(skb);
 		}
 	}
-- 
cgit v1.2.3


From f2645101350c6db66f0a1e72648909cc411f2b38 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 20 Mar 2007 15:01:14 -0300
Subject: [CCID3]: Add documentation for socket options

This updates the documentation on CCID3-specific options.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dccp.txt | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
index 387482e46c4..4504cc59e40 100644
--- a/Documentation/networking/dccp.txt
+++ b/Documentation/networking/dccp.txt
@@ -57,6 +57,16 @@ DCCP_SOCKOPT_SEND_CSCOV is for the receiver and has a different meaning: it
 	coverage value are also acceptable. The higher the number, the more
 	restrictive this setting (see [RFC 4340, sec. 9.2.1]).
 
+The following two options apply to CCID 3 exclusively and are getsockopt()-only.
+In either case, a TFRC info struct (defined in <linux/tfrc.h>) is returned.
+DCCP_SOCKOPT_CCID_RX_INFO
+	Returns a `struct tfrc_rx_info' in optval; the buffer for optval and
+	optlen must be set to at least sizeof(struct tfrc_rx_info).
+DCCP_SOCKOPT_CCID_TX_INFO
+	Returns a `struct tfrc_tx_info' in optval; the buffer for optval and
+	optlen must be set to at least sizeof(struct tfrc_tx_info).
+
+
 Sysctl variables
 ================
 Several DCCP default parameters can be managed by the following sysctls
-- 
cgit v1.2.3


From fddc2feb94c1f734dc27d44d166e97ab2e005ec1 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 20 Mar 2007 15:02:10 -0300
Subject: [CCID3]: More to see in dccp_probe

This adds a few more fields of interest to /proc/net/dccpprobe, the following output ensues:

1           2          3           4     5  6     7   8        9        10   11
sec.usec   src:sport   dst:dport   size  s  rtt   p   X_calc   X_recv   X    t_ipi

Also made the formatting consistent.

Scripts that go with this can be downloaded from http://139.133.210.30/users/gerrit/dccp/dccp_probe/

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/probe.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 3b1f509f51d..1f5e3ba6206 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -90,15 +90,18 @@ static int jdccp_sendmsg(struct kiocb *iocb, struct sock *sk,
 	if (port == 0 || ntohs(inet->dport) == port ||
 	    ntohs(inet->sport) == port) {
 		if (hctx)
-			printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %d\n",
-			   NIPQUAD(inet->saddr), ntohs(inet->sport),
-			   NIPQUAD(inet->daddr), ntohs(inet->dport), size,
-			   hctx->ccid3hctx_s, hctx->ccid3hctx_rtt,
-			   hctx->ccid3hctx_p, hctx->ccid3hctx_t_ipi);
+			printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %d %d %d %u "
+			       "%llu %llu %d\n",
+			       NIPQUAD(inet->saddr), ntohs(inet->sport),
+			       NIPQUAD(inet->daddr), ntohs(inet->dport), size,
+			       hctx->ccid3hctx_s, hctx->ccid3hctx_rtt,
+			       hctx->ccid3hctx_p, hctx->ccid3hctx_x_calc,
+			       hctx->ccid3hctx_x_recv >> 6,
+			       hctx->ccid3hctx_x >> 6, hctx->ccid3hctx_t_ipi);
 		else
 			printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d\n",
-			   NIPQUAD(inet->saddr), ntohs(inet->sport),
-			   NIPQUAD(inet->daddr), ntohs(inet->dport), size);
+			       NIPQUAD(inet->saddr), ntohs(inet->sport),
+			       NIPQUAD(inet->daddr), ntohs(inet->dport), size);
 	}
 
 	jprobe_return();
-- 
cgit v1.2.3


From 1761f7d7fea32c2290710f5c0afa0c3d93220593 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 20 Mar 2007 15:04:30 -0300
Subject: [CCID3]: Remove build warnings for 64bit

This clears the following sparc64 build warnings:
 1) warning: format "%ld" expects type "long int", but argument 3 has type "suseconds_t"
 2) warning: format "%llu" expects type "long long unsigned int", but argument 3 has type "__u64"
Fixed by using typecast to unsigned. This is argued to be safe, since the quantities, after
de-scaling (factor 2^6) fit all in u32.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 95eca99e4dd..dc6e4188331 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -94,9 +94,9 @@ static inline void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx)
 	hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2,
 					   TFRC_OPSYS_HALF_TIME_GRAN);
 
-	ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%llu\n",
+	ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n",
 		       hctx->ccid3hctx_t_ipi, hctx->ccid3hctx_delta,
-		       hctx->ccid3hctx_s, hctx->ccid3hctx_x >> 6);
+		       hctx->ccid3hctx_s, (unsigned)(hctx->ccid3hctx_x >> 6));
 
 }
 /*
@@ -139,9 +139,11 @@ static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
 	}
 
 	if (hctx->ccid3hctx_x != old_x) {
-		ccid3_pr_debug("X_prev=%llu, X_now=%llu, X_calc=%u, "
-			       "X_recv=%llu\n", old_x >> 6, hctx->ccid3hctx_x >> 6,
-			       hctx->ccid3hctx_x_calc, hctx->ccid3hctx_x_recv >> 6);
+		ccid3_pr_debug("X_prev=%u, X_now=%u, X_calc=%u, "
+			       "X_recv=%u\n", (unsigned)(old_x >> 6),
+			       (unsigned)(hctx->ccid3hctx_x >> 6),
+			       hctx->ccid3hctx_x_calc,
+			       (unsigned)(hctx->ccid3hctx_x_recv >> 6));
 
 		ccid3_update_send_interval(hctx);
 	}
-- 
cgit v1.2.3


From a21f9f96cd035b0d9aec32d80ea0152672fbed42 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 20 Mar 2007 15:12:10 -0300
Subject: [CCID3]: Wrap computation of RFC3390-initial rate into separate
 function

The CCID 3 and TFRC specs (RFC 4342, RFC 3448, draft-3448bis) make frequent
reference to the computation of the RFC-3390 initial sending rate:

  1. Initial sending rate when RTT is known (RFC 4342, p. 6)
  2. Response to Idle/Application-Limited periods (RFC 4342, 5.1)

This warrants putting the code into its own function, for later code reuse.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index dc6e4188331..8d33a09608e 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -81,6 +81,21 @@ static void ccid3_hc_tx_set_state(struct sock *sk,
 	hctx->ccid3hctx_state = state;
 }
 
+/*
+ * Compute the initial sending rate X_init according to RFC 3390:
+ *	w_init   =    min(4 * MSS, max(2 * MSS, 4380 bytes))
+ *	X_init   =    w_init / RTT
+ * For consistency with other parts of the code, X_init is scaled by 2^6.
+ */
+static inline u64 rfc3390_initial_rate(struct sock *sk)
+{
+	const struct dccp_sock *dp = dccp_sk(sk);
+	const __u32 w_init = min(4 * dp->dccps_mss_cache,
+				 max(2 * dp->dccps_mss_cache, 4380U));
+
+	return scaled_div(w_init << 6, ccid3_hc_tx_sk(sk)->ccid3hctx_rtt);
+}
+
 /*
  * Recalculate t_ipi and delta (should be called whenever X changes)
  */
@@ -469,20 +484,16 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 			/*
 			 * Larger Initial Windows [RFC 4342, sec. 5]
 			 */
-			__u32 w_init = min(4 * dp->dccps_mss_cache,
-					   max(2 * dp->dccps_mss_cache, 4380U));
 			hctx->ccid3hctx_rtt  = r_sample;
-			hctx->ccid3hctx_x    = scaled_div(w_init << 6, r_sample);
+			hctx->ccid3hctx_x    = rfc3390_initial_rate(sk);
 			hctx->ccid3hctx_t_ld = now;
 
 			ccid3_update_send_interval(hctx);
 
-			ccid3_pr_debug("%s(%p), s=%u, MSS=%u, w_init=%u, "
+			ccid3_pr_debug("%s(%p), s=%u, MSS=%u, "
 				       "R_sample=%dus, X=%u\n", dccp_role(sk),
 				       sk, hctx->ccid3hctx_s,
-				       dp->dccps_mss_cache,
-				       w_init,
-				       (int)r_sample,
+				       dp->dccps_mss_cache, (int)r_sample,
 				       (unsigned)(hctx->ccid3hctx_x >> 6));
 
 			ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
-- 
cgit v1.2.3


From 0c150efb280986db7958cf2a559b91d826241e59 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 20 Mar 2007 15:19:07 -0300
Subject: [CCID3]: Handle Idle and Application-Limited periods

This updates the code with regard to handling idle and application-limited
periods as specified in [RFC 4342, 5.1].

Background:
---
 net/dccp/ccids/ccid3.c | 84 ++++++++++++++++++++++++--------------------------
 1 file changed, 40 insertions(+), 44 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 8d33a09608e..35123c19a08 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -133,12 +133,23 @@ static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
 
 {
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+	__u64 min_rate = 2 * hctx->ccid3hctx_x_recv;
 	const  __u64 old_x = hctx->ccid3hctx_x;
 
+	/*
+	 * Handle IDLE periods: do not reduce below RFC3390 initial sending rate
+	 * when idling [RFC 4342, 5.1]. See also draft-ietf-dccp-rfc3448bis.
+	 * For consistency with X and X_recv, min_rate is also scaled by 2^6.
+	 */
+	if (unlikely(hctx->ccid3hctx_idle)) {
+		min_rate = rfc3390_initial_rate(sk);
+		min_rate = max(min_rate, 2 * hctx->ccid3hctx_x_recv);
+	}
+
 	if (hctx->ccid3hctx_p > 0) {
 
 		hctx->ccid3hctx_x = min(((__u64)hctx->ccid3hctx_x_calc) << 6,
-					hctx->ccid3hctx_x_recv * 2);
+					min_rate);
 		hctx->ccid3hctx_x = max(hctx->ccid3hctx_x,
 					(((__u64)hctx->ccid3hctx_s) << 6) /
 								TFRC_T_MBI);
@@ -147,7 +158,7 @@ static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
 			(suseconds_t)hctx->ccid3hctx_rtt >= 0) {
 
 		hctx->ccid3hctx_x =
-			max(2 * min(hctx->ccid3hctx_x, hctx->ccid3hctx_x_recv),
+			max(min(2 * hctx->ccid3hctx_x, min_rate),
 			    scaled_div(((__u64)hctx->ccid3hctx_s) << 6,
 				       hctx->ccid3hctx_rtt));
 		hctx->ccid3hctx_t_ld = *now;
@@ -209,6 +220,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
 {
 	struct sock *sk = (struct sock *)data;
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+	struct timeval now;
 	unsigned long t_nfb = USEC_PER_SEC / 5;
 
 	bh_lock_sock(sk);
@@ -221,6 +233,8 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
 	ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk,
 		       ccid3_tx_state_name(hctx->ccid3hctx_state));
 
+	hctx->ccid3hctx_idle = 1;
+
 	switch (hctx->ccid3hctx_state) {
 	case TFRC_SSTATE_NO_FBACK:
 		/* RFC 3448, 4.4: Halve send rate directly */
@@ -239,49 +253,33 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
 		break;
 	case TFRC_SSTATE_FBACK:
 		/*
-		 * Check if IDLE since last timeout and recv rate is less than
-		 * 4 packets (in units of 64*bytes/sec) per RTT
+		 *  Modify the cached value of X_recv [RFC 3448, 4.4]
+		 *
+		 *  If (p == 0 || X_calc > 2 * X_recv)
+		 *    X_recv = max(X_recv / 2, s / (2 * t_mbi));
+		 *  Else
+		 *    X_recv = X_calc / 4;
+		 *
+		 *  Note that X_recv is scaled by 2^6 while X_calc is not
 		 */
-		if (!hctx->ccid3hctx_idle ||
-		    (hctx->ccid3hctx_x_recv >= 4 *
-		     scaled_div(((__u64)hctx->ccid3hctx_s) << 6,
-				hctx->ccid3hctx_rtt))) {
-			struct timeval now;
+		BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc);
 
-			ccid3_pr_debug("%s(%p, state=%s), not idle\n",
-				       dccp_role(sk), sk,
-				   ccid3_tx_state_name(hctx->ccid3hctx_state));
+		if (hctx->ccid3hctx_p == 0 ||
+		    (hctx->ccid3hctx_x_calc > (hctx->ccid3hctx_x_recv >> 5))) {
 
-			/*
-			 *  Modify the cached value of X_recv [RFC 3448, 4.4]
-			 *
-			 *  If (p == 0 || X_calc > 2 * X_recv)
-			 *    X_recv = max(X_recv / 2, s / (2 * t_mbi));
-			 *  Else
-			 *    X_recv = X_calc / 4;
-			 *
-			 *  Note that X_recv is scaled by 2^6 while X_calc is not
-			 */
-			BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc);
-
-			if (hctx->ccid3hctx_p  == 0 ||
-			    (hctx->ccid3hctx_x_calc >
-			     (hctx->ccid3hctx_x_recv >> 5))) {
-
-				hctx->ccid3hctx_x_recv =
-					max(hctx->ccid3hctx_x_recv / 2,
-					    (((__u64)hctx->ccid3hctx_s) << 6) /
-							  (2 * TFRC_T_MBI));
-
-				if (hctx->ccid3hctx_p == 0)
-					dccp_timestamp(sk, &now);
-			} else {
-				hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc;
-				hctx->ccid3hctx_x_recv <<= 4;
-			}
-			/* Now recalculate X [RFC 3448, 4.3, step (4)] */
-			ccid3_hc_tx_update_x(sk, &now);
+			hctx->ccid3hctx_x_recv =
+				max(hctx->ccid3hctx_x_recv / 2,
+				    (((__u64)hctx->ccid3hctx_s) << 6) /
+							      (2 * TFRC_T_MBI));
+
+			if (hctx->ccid3hctx_p == 0)
+				dccp_timestamp(sk, &now);
+		} else {
+			hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc;
+			hctx->ccid3hctx_x_recv <<= 4;
 		}
+		/* Now recalculate X [RFC 3448, 4.3, step (4)] */
+		ccid3_hc_tx_update_x(sk, &now);
 		/*
 		 * Schedule no feedback timer to expire in
 		 * max(t_RTO, 2 * s/X)  =  max(t_RTO, 2 * t_ipi)
@@ -296,8 +294,6 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
 		goto out;
 	}
 
-	hctx->ccid3hctx_idle = 1;
-
 restart_timer:
 	sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
 			   jiffies + usecs_to_jiffies(t_nfb));
@@ -377,6 +373,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 	/* prepare to send now (add options etc.) */
 	dp->dccps_hc_tx_insert_options = 1;
 	DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
+	hctx->ccid3hctx_idle = 0;
 
 	/* set the nominal send time for the next following packet */
 	timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
@@ -407,7 +404,6 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
 	packet->dccphtx_seqno  = dccp_sk(sk)->dccps_gss;
 	packet->dccphtx_rtt    = hctx->ccid3hctx_rtt;
 	packet->dccphtx_sent   = 1;
-	hctx->ccid3hctx_idle   = 0;
 }
 
 static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
-- 
cgit v1.2.3


From 4712a792ee661921374c163eb6a4d06e33fd305f Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 20 Mar 2007 15:23:18 -0300
Subject: [DCCP]: Provide function for RTT sampling

A recurring problem, in particular in the CCID code, is that RTT samples
from packets with timestamp echo and elapsed time options need to be taken.

This service is provided via a new function dccp_sample_rtt in this patch.
Furthermore, to protect against `insane' RTT samples, the sampled value
is bounded between 100 microseconds and 4 seconds - for which u32 is sufficient.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/dccp.h  | 10 ++++++++--
 net/dccp/input.c | 40 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 292f18ef4f6..d8ad27bfe01 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -71,11 +71,15 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
 /* RFC 1122, 4.2.3.1 initial RTO value */
 #define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ))
 
+#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
+
+/* bounds for sampled RTT values from packet exchanges (in usec) */
+#define DCCP_SANE_RTT_MIN	100
+#define DCCP_SANE_RTT_MAX	(4 * USEC_PER_SEC)
+
 /* Maximal interval between probes for local resources.  */
 #define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U))
 
-#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
-
 /* sysctl variables for DCCP */
 extern int  sysctl_dccp_request_retries;
 extern int  sysctl_dccp_retries1;
@@ -292,6 +296,8 @@ extern int	   dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
 extern int	   dccp_send_reset(struct sock *sk, enum dccp_reset_codes code);
 extern void	   dccp_send_close(struct sock *sk, const int active);
 extern int	   dccp_invalid_packet(struct sk_buff *skb);
+extern u32	   dccp_sample_rtt(struct sock *sk, struct timeval *t_recv,
+						    struct timeval *t_history);
 
 static inline int dccp_bad_service_code(const struct sock *sk,
 					const __be32 service)
diff --git a/net/dccp/input.c b/net/dccp/input.c
index a1900157e2d..bd578c87b2e 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -577,3 +577,43 @@ discard:
 }
 
 EXPORT_SYMBOL_GPL(dccp_rcv_state_process);
+
+/**
+ * dccp_sample_rtt  -  Sample RTT from packet exchange
+ *
+ * @sk:     connected dccp_sock
+ * @t_recv: receive timestamp of packet with timestamp echo
+ * @t_hist: packet history timestamp or NULL
+ */
+u32 dccp_sample_rtt(struct sock *sk, struct timeval *t_recv,
+				     struct timeval *t_hist)
+{
+	struct dccp_sock *dp = dccp_sk(sk);
+	struct dccp_options_received *or = &dp->dccps_options_received;
+	suseconds_t delta;
+
+	if (t_hist == NULL) {
+		if (!or->dccpor_timestamp_echo) {
+			DCCP_WARN("packet without timestamp echo\n");
+			return DCCP_SANE_RTT_MAX;
+		}
+		timeval_sub_usecs(t_recv, or->dccpor_timestamp_echo * 10);
+		delta = timeval_usecs(t_recv);
+	} else
+		delta = timeval_delta(t_recv, t_hist);
+
+	delta -= or->dccpor_elapsed_time * 10;		/* either set or 0 */
+
+	if (unlikely(delta <= 0)) {
+		DCCP_WARN("unusable RTT sample %ld, using min\n", (long)delta);
+		return DCCP_SANE_RTT_MIN;
+	}
+	if (unlikely(delta - (suseconds_t)DCCP_SANE_RTT_MAX > 0)) {
+		DCCP_WARN("RTT sample %ld too large, using max\n", (long)delta);
+		return DCCP_SANE_RTT_MAX;
+	}
+
+	return delta;
+}
+
+EXPORT_SYMBOL_GPL(dccp_sample_rtt);
-- 
cgit v1.2.3


From 7dfee1a9c07f80a82aa5fbad340146f2b5c794b4 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 20 Mar 2007 15:24:37 -0300
Subject: [CCID3]: Use function for RTT sampling

This replaces the existing occurrences of RTT sampling with
the use of the new function dccp_sample_rtt.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c | 42 +++++++++++-------------------------------
 net/dccp/ccids/ccid3.h | 10 ----------
 2 files changed, 11 insertions(+), 41 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 35123c19a08..f4097ddf250 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -414,8 +414,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	struct dccp_tx_hist_entry *packet;
 	struct timeval now;
 	unsigned long t_nfb;
-	u32 pinv;
-	suseconds_t r_sample, t_elapsed;
+	u32 pinv, r_sample;
 
 	BUG_ON(hctx == NULL);
 
@@ -457,18 +456,10 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 		 * Calculate new round trip sample as per [RFC 3448, 4.3] by
 		 *	R_sample  =  (now - t_recvdata) - t_elapsed
 		 */
-		r_sample  = timeval_delta(&now, &packet->dccphtx_tstamp);
-		t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10;
+		r_sample = dccp_sample_rtt(sk, &now, &packet->dccphtx_tstamp);
 
-		DCCP_BUG_ON(r_sample < 0);
-		if (unlikely(r_sample <= t_elapsed))
-			DCCP_WARN("WARNING: r_sample=%dus <= t_elapsed=%dus\n",
-				  (int)r_sample, (int)t_elapsed);
-		else
-			r_sample -= t_elapsed;
-		CCID3_RTT_SANITY_CHECK(r_sample);
-
-		/* Update RTT estimate by
+		/*
+		 * Update RTT estimate by
 		 * If (No feedback recv)
 		 *    R = R_sample;
 		 * Else
@@ -487,15 +478,15 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 			ccid3_update_send_interval(hctx);
 
 			ccid3_pr_debug("%s(%p), s=%u, MSS=%u, "
-				       "R_sample=%dus, X=%u\n", dccp_role(sk),
+				       "R_sample=%uus, X=%u\n", dccp_role(sk),
 				       sk, hctx->ccid3hctx_s,
-				       dp->dccps_mss_cache, (int)r_sample,
+				       dp->dccps_mss_cache, r_sample,
 				       (unsigned)(hctx->ccid3hctx_x >> 6));
 
 			ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
 		} else {
 			hctx->ccid3hctx_rtt = (9 * hctx->ccid3hctx_rtt +
-						   (u32)r_sample) / 10;
+						   r_sample) / 10;
 
 			/* Update sending rate (step 4 of [RFC 3448, 4.3]) */
 			if (hctx->ccid3hctx_p > 0)
@@ -505,10 +496,10 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 						    hctx->ccid3hctx_p);
 			ccid3_hc_tx_update_x(sk, &now);
 
-			ccid3_pr_debug("%s(%p), RTT=%uus (sample=%dus), s=%u, "
+			ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, "
 				       "p=%u, X_calc=%u, X_recv=%u, X=%u\n",
 				       dccp_role(sk),
-				       sk, hctx->ccid3hctx_rtt, (int)r_sample,
+				       sk, hctx->ccid3hctx_rtt, r_sample,
 				       hctx->ccid3hctx_s, hctx->ccid3hctx_p,
 				       hctx->ccid3hctx_x_calc,
 				       (unsigned)(hctx->ccid3hctx_x_recv >> 6),
@@ -1025,8 +1016,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 	const struct dccp_options_received *opt_recv;
 	struct dccp_rx_hist_entry *packet;
 	struct timeval now;
-	u32 p_prev, rtt_prev;
-	suseconds_t r_sample, t_elapsed;
+	u32 p_prev, r_sample, rtt_prev;
 	int loss, payload_size;
 
 	BUG_ON(hcrx == NULL);
@@ -1042,17 +1032,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 			break;
 		rtt_prev = hcrx->ccid3hcrx_rtt;
 		dccp_timestamp(sk, &now);
-		timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10);
-		r_sample = timeval_usecs(&now);
-		t_elapsed = opt_recv->dccpor_elapsed_time * 10;
-
-		DCCP_BUG_ON(r_sample < 0);
-		if (unlikely(r_sample <= t_elapsed))
-			DCCP_WARN("r_sample=%ldus, t_elapsed=%ldus\n",
-				  (long)r_sample, (long)t_elapsed);
-		else
-			r_sample -= t_elapsed;
-		CCID3_RTT_SANITY_CHECK(r_sample);
+		r_sample = dccp_sample_rtt(sk, &now, NULL);
 
 		if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
 			hcrx->ccid3hcrx_rtt = r_sample;
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 15776a88c09..8d31b389c19 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -51,16 +51,6 @@
 /* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */
 #define TFRC_T_MBI		   64
 
-/* What we think is a reasonable upper limit on RTT values */
-#define CCID3_SANE_RTT_MAX	   ((suseconds_t)(4 * USEC_PER_SEC))
-
-#define CCID3_RTT_SANITY_CHECK(rtt) 			do {		   \
-		if (rtt > CCID3_SANE_RTT_MAX) {				   \
-			DCCP_CRIT("RTT (%d) too large, substituting %d",   \
-				  (int)rtt, (int)CCID3_SANE_RTT_MAX);	   \
-			rtt = CCID3_SANE_RTT_MAX;			   \
-		} 					} while (0)
-
 enum ccid3_options {
 	TFRC_OPT_LOSS_EVENT_RATE = 192,
 	TFRC_OPT_LOSS_INTERVALS	 = 193,
-- 
cgit v1.2.3


From 89560b53b92a07c529e13a462aa7fd87a844f1f5 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 20 Mar 2007 15:27:17 -0300
Subject: [DCCP]: Sample RTT from SYN exchange

Function:
---
 include/linux/dccp.h | 2 ++
 net/dccp/input.c     | 8 ++++++++
 net/dccp/options.c   | 8 ++++++++
 3 files changed, 18 insertions(+)

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index fdd4217f104..e668cf531ba 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -481,10 +481,12 @@ struct dccp_ackvec;
  * @dccps_hc_rx_insert_options -
  * @dccps_hc_tx_insert_options -
  * @dccps_xmit_timer - timer for when CCID is not ready to send
+ * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs)
  */
 struct dccp_sock {
 	/* inet_connection_sock has to be the first member of dccp_sock */
 	struct inet_connection_sock	dccps_inet_connection;
+#define dccps_syn_rtt			dccps_inet_connection.icsk_ack.lrcvtime
 	__u64				dccps_swl;
 	__u64				dccps_swh;
 	__u64				dccps_awl;
diff --git a/net/dccp/input.c b/net/dccp/input.c
index bd578c87b2e..da6ec185ed5 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -300,6 +300,14 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 		if (dccp_parse_options(sk, skb))
 			goto out_invalid_packet;
 
+		/* Obtain RTT sample from SYN exchange (used by CCID 3) */
+		if (dp->dccps_options_received.dccpor_timestamp_echo) {
+			struct timeval now;
+
+			dccp_timestamp(sk, &now);
+			dp->dccps_syn_rtt = dccp_sample_rtt(sk, &now, NULL);
+		}
+
 		if (dccp_msk(sk)->dccpms_send_ack_vector &&
 		    dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk,
 				    DCCP_SKB_CB(skb)->dccpd_seq,
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 9074ca7977b..14b62122732 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -563,6 +563,14 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 	    dccp_insert_options_feat(sk, skb))
 		return -1;
 
+	/*
+	 * Obtain RTT sample from Request/Response exchange.
+	 * This is currently used in CCID 3 initialisation.
+	 */
+	if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST &&
+	    dccp_insert_option_timestamp(sk, skb))
+		return -1;
+
 	/* XXX: insert other options when appropriate */
 
 	if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) {
-- 
cgit v1.2.3


From 30833ffead66e1f0052150a51db0b45151189ac1 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 20 Mar 2007 15:31:56 -0300
Subject: [CCID3]: Use initial RTT sample from SYN exchange

The patch follows the following recommendation made in an erratum to RFC 4342:

  "Senders MAY additionally make use of other available RTT measurements,
   including those from the initial Request-Response packet exchange."

It implements larger initial windows with regard to this inital RTT measurement,
using the mechanism suggested in draft-ietf-dccp-rfc3448bis, section 4.2.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index f4097ddf250..c3abd7695d5 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -334,19 +334,30 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 				usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)));
 		hctx->ccid3hctx_last_win_count	 = 0;
 		hctx->ccid3hctx_t_last_win_count = now;
-		ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
-
-		/* Set initial sending rate X/s to 1pps (X is scaled by 2^6) */
-		hctx->ccid3hctx_x = hctx->ccid3hctx_s = skb->len;
-		hctx->ccid3hctx_x <<= 6;
-
-		/* First timeout, according to [RFC 3448, 4.2], is 1 second */
-		hctx->ccid3hctx_t_ipi = USEC_PER_SEC;
-		/* Initial delta: minimum of 0.5 sec and t_gran/2 */
-		hctx->ccid3hctx_delta = TFRC_OPSYS_HALF_TIME_GRAN;
 
 		/* Set t_0 for initial packet */
 		hctx->ccid3hctx_t_nom = now;
+
+		hctx->ccid3hctx_s = skb->len;
+
+		/*
+		 * Use initial RTT sample when available: recommended by erratum
+		 * to RFC 4342. This implements the initialisation procedure of
+		 * draft rfc3448bis, section 4.2. Remember, X is scaled by 2^6.
+		 */
+		if (dp->dccps_syn_rtt) {
+			ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt);
+			hctx->ccid3hctx_rtt  = dp->dccps_syn_rtt;
+			hctx->ccid3hctx_x    = rfc3390_initial_rate(sk);
+			hctx->ccid3hctx_t_ld = now;
+		} else {
+			/* Sender does not have RTT sample: X = MSS/second */
+			hctx->ccid3hctx_x = dp->dccps_mss_cache;
+			hctx->ccid3hctx_x <<= 6;
+		}
+		ccid3_update_send_interval(hctx);
+
+		ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
 		break;
 	case TFRC_SSTATE_NO_FBACK:
 	case TFRC_SSTATE_FBACK:
-- 
cgit v1.2.3


From e284986385b6420a5f30f2dcd743512bbe1a3202 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 22 Mar 2007 11:48:11 -0700
Subject: [RTNL]: Message handler registration interface

This patch adds a new interface to register rtnetlink message
handlers replacing the exported rtnl_links[] array which
required many message handlers to be exported unnecessarly.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h |   7 --
 include/net/rtnetlink.h   |  18 +++++
 net/core/rtnetlink.c      | 188 ++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 184 insertions(+), 29 deletions(-)
 create mode 100644 include/net/rtnetlink.h

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 3a4cb242ecd..1fae30af91f 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -574,13 +574,6 @@ extern int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, in
 #define rtattr_parse_nested(tb, max, rta) \
 	rtattr_parse((tb), (max), RTA_DATA((rta)), RTA_PAYLOAD((rta)))
 
-struct rtnetlink_link
-{
-	int (*doit)(struct sk_buff *, struct nlmsghdr*, void *attr);
-	int (*dumpit)(struct sk_buff *, struct netlink_callback *cb);
-};
-
-extern struct rtnetlink_link * rtnetlink_links[NPROTO];
 extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo);
 extern int rtnl_unicast(struct sk_buff *skb, u32 pid);
 extern int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group,
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
new file mode 100644
index 00000000000..dce7072bd28
--- /dev/null
+++ b/include/net/rtnetlink.h
@@ -0,0 +1,18 @@
+#ifndef __NET_RTNETLINK_H
+#define __NET_RTNETLINK_H
+
+#include <linux/rtnetlink.h>
+#include <net/netlink.h>
+
+typedef int (*rtnl_doit_func)(struct sk_buff *, struct nlmsghdr *, void *);
+typedef int (*rtnl_dumpit_func)(struct sk_buff *, struct netlink_callback *);
+
+extern int	__rtnl_register(int protocol, int msgtype,
+				rtnl_doit_func, rtnl_dumpit_func);
+extern void	rtnl_register(int protocol, int msgtype,
+			      rtnl_doit_func, rtnl_dumpit_func);
+extern int	rtnl_unregister(int protocol, int msgtype);
+extern void	rtnl_unregister_all(int protocol);
+extern int	rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb);
+
+#endif
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 33ea8eac7fe..fb1630d82dd 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -50,12 +50,18 @@
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <net/fib_rules.h>
-#include <net/netlink.h>
+#include <net/rtnetlink.h>
 #ifdef CONFIG_NET_WIRELESS_RTNETLINK
 #include <linux/wireless.h>
 #include <net/iw_handler.h>
 #endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
 
+struct rtnl_link
+{
+	rtnl_doit_func		doit;
+	rtnl_dumpit_func	dumpit;
+};
+
 static DEFINE_MUTEX(rtnl_mutex);
 static struct sock *rtnl;
 
@@ -95,7 +101,151 @@ int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
 	return 0;
 }
 
-struct rtnetlink_link * rtnetlink_links[NPROTO];
+struct rtnl_link *rtnl_msg_handlers[NPROTO];
+
+static inline int rtm_msgindex(int msgtype)
+{
+	int msgindex = msgtype - RTM_BASE;
+
+	/*
+	 * msgindex < 0 implies someone tried to register a netlink
+	 * control code. msgindex >= RTM_NR_MSGTYPES may indicate that
+	 * the message type has not been added to linux/rtnetlink.h
+	 */
+	BUG_ON(msgindex < 0 || msgindex >= RTM_NR_MSGTYPES);
+
+	return msgindex;
+}
+
+static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
+{
+	struct rtnl_link *tab;
+
+	tab = rtnl_msg_handlers[protocol];
+	if (tab == NULL || tab->doit == NULL)
+		tab = rtnl_msg_handlers[PF_UNSPEC];
+
+	return tab ? tab->doit : NULL;
+}
+
+static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
+{
+	struct rtnl_link *tab;
+
+	tab = rtnl_msg_handlers[protocol];
+	if (tab == NULL || tab->dumpit == NULL)
+		tab = rtnl_msg_handlers[PF_UNSPEC];
+
+	return tab ? tab->dumpit : NULL;
+}
+
+/**
+ * __rtnl_register - Register a rtnetlink message type
+ * @protocol: Protocol family or PF_UNSPEC
+ * @msgtype: rtnetlink message type
+ * @doit: Function pointer called for each request message
+ * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
+ *
+ * Registers the specified function pointers (at least one of them has
+ * to be non-NULL) to be called whenever a request message for the
+ * specified protocol family and message type is received.
+ *
+ * The special protocol family PF_UNSPEC may be used to define fallback
+ * function pointers for the case when no entry for the specific protocol
+ * family exists.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int __rtnl_register(int protocol, int msgtype,
+		    rtnl_doit_func doit, rtnl_dumpit_func dumpit)
+{
+	struct rtnl_link *tab;
+	int msgindex;
+
+	BUG_ON(protocol < 0 || protocol >= NPROTO);
+	msgindex = rtm_msgindex(msgtype);
+
+	tab = rtnl_msg_handlers[protocol];
+	if (tab == NULL) {
+		tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL);
+		if (tab == NULL)
+			return -ENOBUFS;
+
+		rtnl_msg_handlers[protocol] = tab;
+	}
+
+	if (doit)
+		tab[msgindex].doit = doit;
+
+	if (dumpit)
+		tab[msgindex].dumpit = dumpit;
+
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(__rtnl_register);
+
+/**
+ * rtnl_register - Register a rtnetlink message type
+ *
+ * Identical to __rtnl_register() but panics on failure. This is useful
+ * as failure of this function is very unlikely, it can only happen due
+ * to lack of memory when allocating the chain to store all message
+ * handlers for a protocol. Meant for use in init functions where lack
+ * of memory implies no sense in continueing.
+ */
+void rtnl_register(int protocol, int msgtype,
+		   rtnl_doit_func doit, rtnl_dumpit_func dumpit)
+{
+	if (__rtnl_register(protocol, msgtype, doit, dumpit) < 0)
+		panic("Unable to register rtnetlink message handler, "
+		      "protocol = %d, message type = %d\n",
+		      protocol, msgtype);
+}
+
+EXPORT_SYMBOL_GPL(rtnl_register);
+
+/**
+ * rtnl_unregister - Unregister a rtnetlink message type
+ * @protocol: Protocol family or PF_UNSPEC
+ * @msgtype: rtnetlink message type
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int rtnl_unregister(int protocol, int msgtype)
+{
+	int msgindex;
+
+	BUG_ON(protocol < 0 || protocol >= NPROTO);
+	msgindex = rtm_msgindex(msgtype);
+
+	if (rtnl_msg_handlers[protocol] == NULL)
+		return -ENOENT;
+
+	rtnl_msg_handlers[protocol][msgindex].doit = NULL;
+	rtnl_msg_handlers[protocol][msgindex].dumpit = NULL;
+
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(rtnl_unregister);
+
+/**
+ * rtnl_unregister_all - Unregister all rtnetlink message type of a protocol
+ * @protocol : Protocol family or PF_UNSPEC
+ *
+ * Identical to calling rtnl_unregster() for all registered message types
+ * of a certain protocol family.
+ */
+void rtnl_unregister_all(int protocol)
+{
+	BUG_ON(protocol < 0 || protocol >= NPROTO);
+
+	kfree(rtnl_msg_handlers[protocol]);
+	rtnl_msg_handlers[protocol] = NULL;
+}
+
+EXPORT_SYMBOL_GPL(rtnl_unregister_all);
 
 static const int rtm_min[RTM_NR_FAMILIES] =
 {
@@ -648,7 +798,7 @@ errout:
 	return err;
 }
 
-static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
+int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	int idx;
 	int s_idx = cb->family;
@@ -659,12 +809,12 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 		int type = cb->nlh->nlmsg_type-RTM_BASE;
 		if (idx < s_idx || idx == PF_PACKET)
 			continue;
-		if (rtnetlink_links[idx] == NULL ||
-		    rtnetlink_links[idx][type].dumpit == NULL)
+		if (rtnl_msg_handlers[idx] == NULL ||
+		    rtnl_msg_handlers[idx][type].dumpit == NULL)
 			continue;
 		if (idx > s_idx)
 			memset(&cb->args[0], 0, sizeof(cb->args));
-		if (rtnetlink_links[idx][type].dumpit(skb, cb))
+		if (rtnl_msg_handlers[idx][type].dumpit(skb, cb))
 			break;
 	}
 	cb->family = idx;
@@ -672,6 +822,8 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
+EXPORT_SYMBOL_GPL(rtnl_dump_all);
+
 void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
 {
 	struct sk_buff *skb;
@@ -703,8 +855,7 @@ static int rtattr_max;
 static __inline__ int
 rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
 {
-	struct rtnetlink_link *link;
-	struct rtnetlink_link *link_tab;
+	rtnl_doit_func doit;
 	int sz_idx, kind;
 	int min_len;
 	int family;
@@ -737,11 +888,6 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
 		return -1;
 	}
 
-	link_tab = rtnetlink_links[family];
-	if (link_tab == NULL)
-		link_tab = rtnetlink_links[PF_UNSPEC];
-	link = &link_tab[type];
-
 	sz_idx = type>>2;
 	kind = type&3;
 
@@ -751,14 +897,14 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
 	}
 
 	if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
-		if (link->dumpit == NULL)
-			link = &(rtnetlink_links[PF_UNSPEC][type]);
+		rtnl_dumpit_func dumpit;
 
-		if (link->dumpit == NULL)
+		dumpit = rtnl_get_dumpit(family, type);
+		if (dumpit == NULL)
 			goto err_inval;
 
 		if ((*errp = netlink_dump_start(rtnl, skb, nlh,
-						link->dumpit, NULL)) != 0) {
+						dumpit, NULL)) != 0) {
 			return -1;
 		}
 
@@ -787,11 +933,10 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
 		}
 	}
 
-	if (link->doit == NULL)
-		link = &(rtnetlink_links[PF_UNSPEC][type]);
-	if (link->doit == NULL)
+	doit = rtnl_get_doit(family, type);
+	if (doit == NULL)
 		goto err_inval;
-	err = link->doit(skb, nlh, (void *)&rta_buf[0]);
+	err = doit(skb, nlh, (void *)&rta_buf[0]);
 
 	*errp = err;
 	return err;
@@ -886,7 +1031,6 @@ void __init rtnetlink_init(void)
 EXPORT_SYMBOL(__rta_fill);
 EXPORT_SYMBOL(rtattr_strlcpy);
 EXPORT_SYMBOL(rtattr_parse);
-EXPORT_SYMBOL(rtnetlink_links);
 EXPORT_SYMBOL(rtnetlink_put_metrics);
 EXPORT_SYMBOL(rtnl_lock);
 EXPORT_SYMBOL(rtnl_trylock);
-- 
cgit v1.2.3


From 340d17fc9d577c93678850e46963e9b19b92db7e Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 22 Mar 2007 11:49:22 -0700
Subject: [NET] link: Use rtnl registration interface

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index fb1630d82dd..83513732000 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -961,9 +961,6 @@ static void rtnetlink_rcv(struct sock *sk, int len)
 
 static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
 {
-	[RTM_GETLINK     - RTM_BASE] = { .doit   = rtnl_getlink,
-					 .dumpit = rtnl_dump_ifinfo	 },
-	[RTM_SETLINK     - RTM_BASE] = { .doit   = rtnl_setlink		 },
 	[RTM_GETADDR     - RTM_BASE] = { .dumpit = rtnl_dump_all	 },
 	[RTM_GETROUTE    - RTM_BASE] = { .dumpit = rtnl_dump_all	 },
 	[RTM_NEWNEIGH    - RTM_BASE] = { .doit   = neigh_add		 },
@@ -1024,8 +1021,9 @@ void __init rtnetlink_init(void)
 		panic("rtnetlink_init: cannot initialize rtnetlink\n");
 	netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
 	register_netdevice_notifier(&rtnetlink_dev_notifier);
-	rtnetlink_links[PF_UNSPEC] = link_rtnetlink_table;
-	rtnetlink_links[PF_PACKET] = link_rtnetlink_table;
+
+	rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, rtnl_dump_ifinfo);
+	rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL);
 }
 
 EXPORT_SYMBOL(__rta_fill);
-- 
cgit v1.2.3


From c8822a4e00442e65d42d50db8e529d75c2025630 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 22 Mar 2007 11:50:06 -0700
Subject: [NEIGH]: Use rtnl registration interface

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/neighbour.h | 10 +---------
 net/core/neighbour.c    | 26 +++++++++++++++++++-------
 net/core/rtnetlink.c    |  5 -----
 3 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index ad7fe112141..a4f26187fc1 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -24,6 +24,7 @@
 
 #include <linux/err.h>
 #include <linux/sysctl.h>
+#include <net/rtnetlink.h>
 
 #define NUD_IN_TIMER	(NUD_INCOMPLETE|NUD_REACHABLE|NUD_DELAY|NUD_PROBE)
 #define NUD_VALID	(NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE|NUD_PROBE|NUD_STALE|NUD_DELAY)
@@ -213,16 +214,7 @@ extern void			pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
 extern struct pneigh_entry	*pneigh_lookup(struct neigh_table *tbl, const void *key, struct net_device *dev, int creat);
 extern int			pneigh_delete(struct neigh_table *tbl, const void *key, struct net_device *dev);
 
-struct netlink_callback;
-struct nlmsghdr;
-extern int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb);
-extern int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg);
-extern int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg);
 extern void neigh_app_ns(struct neighbour *n);
-
-extern int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb);
-extern int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg);
-
 extern void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie);
 extern void __neigh_for_each_release(struct neigh_table *tbl, int (*cb)(struct neighbour *));
 extern void pneigh_for_each(struct neigh_table *tbl, void (*cb)(struct pneigh_entry *));
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 61a4713a5df..6f3bb73053c 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1441,7 +1441,7 @@ int neigh_table_clear(struct neigh_table *tbl)
 	return 0;
 }
 
-int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct ndmsg *ndm;
 	struct nlattr *dst_attr;
@@ -1506,7 +1506,7 @@ out:
 	return err;
 }
 
-int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct ndmsg *ndm;
 	struct nlattr *tb[NDA_MAX+1];
@@ -1786,7 +1786,7 @@ static struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] __read_mostly = {
 	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },
 };
 
-int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct neigh_table *tbl;
 	struct ndtmsg *ndtmsg;
@@ -1910,7 +1910,7 @@ errout:
 	return err;
 }
 
-int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
+static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	int family, tidx, nidx = 0;
 	int tbl_skip = cb->args[0];
@@ -2034,7 +2034,7 @@ out:
 	return rc;
 }
 
-int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
+static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct neigh_table *tbl;
 	int t, family, s_t;
@@ -2746,14 +2746,26 @@ void neigh_sysctl_unregister(struct neigh_parms *p)
 
 #endif	/* CONFIG_SYSCTL */
 
+static int __init neigh_init(void)
+{
+	rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info);
+
+	rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info);
+	rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL);
+
+	return 0;
+}
+
+subsys_initcall(neigh_init);
+
 EXPORT_SYMBOL(__neigh_event_send);
 EXPORT_SYMBOL(neigh_changeaddr);
 EXPORT_SYMBOL(neigh_compat_output);
 EXPORT_SYMBOL(neigh_connected_output);
 EXPORT_SYMBOL(neigh_create);
-EXPORT_SYMBOL(neigh_delete);
 EXPORT_SYMBOL(neigh_destroy);
-EXPORT_SYMBOL(neigh_dump_info);
 EXPORT_SYMBOL(neigh_event_ns);
 EXPORT_SYMBOL(neigh_ifdown);
 EXPORT_SYMBOL(neigh_lookup);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 83513732000..3044702f7d9 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -963,16 +963,11 @@ static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
 {
 	[RTM_GETADDR     - RTM_BASE] = { .dumpit = rtnl_dump_all	 },
 	[RTM_GETROUTE    - RTM_BASE] = { .dumpit = rtnl_dump_all	 },
-	[RTM_NEWNEIGH    - RTM_BASE] = { .doit   = neigh_add		 },
-	[RTM_DELNEIGH    - RTM_BASE] = { .doit   = neigh_delete		 },
-	[RTM_GETNEIGH    - RTM_BASE] = { .dumpit = neigh_dump_info	 },
 #ifdef CONFIG_FIB_RULES
 	[RTM_NEWRULE     - RTM_BASE] = { .doit   = fib_nl_newrule	 },
 	[RTM_DELRULE     - RTM_BASE] = { .doit   = fib_nl_delrule	 },
 #endif
 	[RTM_GETRULE     - RTM_BASE] = { .dumpit = rtnl_dump_all	 },
-	[RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info	 },
-	[RTM_SETNEIGHTBL - RTM_BASE] = { .doit   = neightbl_set		 },
 };
 
 static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
-- 
cgit v1.2.3


From 9d9e6a5819230b5a5cc036f213135cb123ab1e50 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Sun, 25 Mar 2007 23:20:05 -0700
Subject: [NET] rules: Use rtnl registration interface

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/fib_rules.h | 6 +-----
 net/core/fib_rules.c    | 8 ++++++--
 net/core/rtnetlink.c    | 5 -----
 3 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index d585ea9fa97..b2b9ccdf32d 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -5,7 +5,7 @@
 #include <linux/netdevice.h>
 #include <linux/fib_rules.h>
 #include <net/flow.h>
-#include <net/netlink.h>
+#include <net/rtnetlink.h>
 
 struct fib_rule
 {
@@ -99,10 +99,6 @@ extern int			fib_rules_lookup(struct fib_rules_ops *,
 						 struct flowi *, int flags,
 						 struct fib_lookup_arg *);
 
-extern int			fib_nl_newrule(struct sk_buff *,
-					       struct nlmsghdr *, void *);
-extern int			fib_nl_delrule(struct sk_buff *,
-					       struct nlmsghdr *, void *);
 extern int			fib_rules_dump(struct sk_buff *,
 					       struct netlink_callback *, int);
 #endif
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 7174ced75ef..bf45f24cfea 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -174,7 +174,7 @@ errout:
 	return err;
 }
 
-int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct fib_rule_hdr *frh = nlmsg_data(nlh);
 	struct fib_rules_ops *ops = NULL;
@@ -265,7 +265,7 @@ errout:
 	return err;
 }
 
-int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct fib_rule_hdr *frh = nlmsg_data(nlh);
 	struct fib_rules_ops *ops = NULL;
@@ -501,6 +501,10 @@ static struct notifier_block fib_rules_notifier = {
 
 static int __init fib_rules_init(void)
 {
+	rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, rtnl_dump_all);
+
 	return register_netdevice_notifier(&fib_rules_notifier);
 }
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 3044702f7d9..5cc09f82f6d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -963,11 +963,6 @@ static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
 {
 	[RTM_GETADDR     - RTM_BASE] = { .dumpit = rtnl_dump_all	 },
 	[RTM_GETROUTE    - RTM_BASE] = { .dumpit = rtnl_dump_all	 },
-#ifdef CONFIG_FIB_RULES
-	[RTM_NEWRULE     - RTM_BASE] = { .doit   = fib_nl_newrule	 },
-	[RTM_DELRULE     - RTM_BASE] = { .doit   = fib_nl_delrule	 },
-#endif
-	[RTM_GETRULE     - RTM_BASE] = { .dumpit = rtnl_dump_all	 },
 };
 
 static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
-- 
cgit v1.2.3


From 63f3444fb9a54c024d55f1205f8b94e7d2786595 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 22 Mar 2007 11:55:17 -0700
Subject: [IPv4]: Use rtnl registration interface

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h    |  6 ------
 net/ipv4/devinet.c      | 21 +++++----------------
 net/ipv4/fib_frontend.c | 12 ++++++++----
 net/ipv4/fib_rules.c    |  4 +++-
 net/ipv4/route.c        |  6 ++++--
 5 files changed, 20 insertions(+), 29 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 36c635ca1aa..5a4a0366c24 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -215,10 +215,6 @@ extern void fib_select_default(const struct flowi *flp, struct fib_result *res);
 /* Exported by fib_frontend.c */
 extern struct nla_policy rtm_ipv4_policy[];
 extern void		ip_fib_init(void);
-extern int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
-extern int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
-extern int inet_rtm_getroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
-extern int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb);
 extern int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 			       struct net_device *dev, __be32 *spec_dst, u32 *itag);
 extern void fib_select_multipath(const struct flowi *flp, struct fib_result *res);
@@ -235,8 +231,6 @@ extern __be32  __fib_res_prefsrc(struct fib_result *res);
 extern struct fib_table *fib_hash_init(u32 id);
 
 #ifdef CONFIG_IP_MULTIPLE_TABLES
-extern int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb);
-
 extern void __init fib4_rules_init(void);
 
 #ifdef CONFIG_NET_CLS_ROUTE
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 043857bd151..9bdc79564cc 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -48,7 +48,6 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
 #include <linux/init.h>
 #include <linux/notifier.h>
 #include <linux/inetdevice.h>
@@ -62,7 +61,7 @@
 #include <net/ip.h>
 #include <net/route.h>
 #include <net/ip_fib.h>
-#include <net/netlink.h>
+#include <net/rtnetlink.h>
 
 struct ipv4_devconf ipv4_devconf = {
 	.accept_redirects = 1,
@@ -1241,19 +1240,6 @@ errout:
 		rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
 }
 
-static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = {
-	[RTM_NEWADDR  - RTM_BASE] = { .doit	= inet_rtm_newaddr,	},
-	[RTM_DELADDR  - RTM_BASE] = { .doit	= inet_rtm_deladdr,	},
-	[RTM_GETADDR  - RTM_BASE] = { .dumpit	= inet_dump_ifaddr,	},
-	[RTM_NEWROUTE - RTM_BASE] = { .doit	= inet_rtm_newroute,	},
-	[RTM_DELROUTE - RTM_BASE] = { .doit	= inet_rtm_delroute,	},
-	[RTM_GETROUTE - RTM_BASE] = { .doit	= inet_rtm_getroute,
-				      .dumpit	= inet_dump_fib,	},
-#ifdef CONFIG_IP_MULTIPLE_TABLES
-	[RTM_GETRULE  - RTM_BASE] = { .dumpit	= fib4_rules_dump,	},
-#endif
-};
-
 #ifdef CONFIG_SYSCTL
 
 void inet_forward_change(void)
@@ -1636,7 +1622,10 @@ void __init devinet_init(void)
 {
 	register_gifconf(PF_INET, inet_gifconf);
 	register_netdevice_notifier(&ip_netdev_notifier);
-	rtnetlink_links[PF_INET] = inet_rtnetlink_table;
+
+	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
+	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
+	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
 #ifdef CONFIG_SYSCTL
 	devinet_sysctl.sysctl_header =
 		register_sysctl_table(devinet_sysctl.devinet_root_dir);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 3ff753c6f19..5bf718a3e49 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -34,7 +34,6 @@
 #include <linux/if_addr.h>
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
-#include <linux/netlink.h>
 #include <linux/init.h>
 #include <linux/list.h>
 
@@ -46,6 +45,7 @@
 #include <net/icmp.h>
 #include <net/arp.h>
 #include <net/ip_fib.h>
+#include <net/rtnetlink.h>
 
 #define FFprint(a...) printk(KERN_DEBUG a)
 
@@ -540,7 +540,7 @@ errout:
 	return err;
 }
 
-int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct fib_config cfg;
 	struct fib_table *tb;
@@ -561,7 +561,7 @@ errout:
 	return err;
 }
 
-int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct fib_config cfg;
 	struct fib_table *tb;
@@ -582,7 +582,7 @@ errout:
 	return err;
 }
 
-int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
+static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	unsigned int h, s_h;
 	unsigned int e = 0, s_e;
@@ -925,6 +925,10 @@ void __init ip_fib_init(void)
 	register_netdevice_notifier(&fib_netdev_notifier);
 	register_inetaddr_notifier(&fib_inetaddr_notifier);
 	nl_fib_lookup_init();
+
+	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
+	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
+	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
 }
 
 EXPORT_SYMBOL(inet_addr_type);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index c660c074c76..a7f931ddfaa 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -274,7 +274,7 @@ nla_put_failure:
 	return -ENOBUFS;
 }
 
-int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb)
+static int fib4_rule_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	return fib_rules_dump(skb, cb, AF_INET);
 }
@@ -327,4 +327,6 @@ void __init fib4_rules_init(void)
 	list_add_tail(&default_rule.common.list, &fib4_rules);
 
 	fib_rules_register(&fib4_rules_ops);
+
+	rtnl_register(PF_INET, RTM_GETRULE, NULL, fib4_rule_dump);
 }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 58417393dec..df9fe4f2e8c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -82,7 +82,6 @@
 #include <linux/proc_fs.h>
 #include <linux/init.h>
 #include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
 #include <linux/inetdevice.h>
 #include <linux/igmp.h>
 #include <linux/pkt_sched.h>
@@ -104,6 +103,7 @@
 #include <net/xfrm.h>
 #include <net/ip_mp_alg.h>
 #include <net/netevent.h>
+#include <net/rtnetlink.h>
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
 #endif
@@ -2721,7 +2721,7 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
+static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct rtmsg *rtm;
 	struct nlattr *tb[RTA_MAX+1];
@@ -3194,6 +3194,8 @@ int __init ip_rt_init(void)
 	xfrm_init();
 	xfrm4_init();
 #endif
+	rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL);
+
 	return rc;
 }
 
-- 
cgit v1.2.3


From be577ddc2b4aca0849f701222f5bc13cf1b79c9a Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 22 Mar 2007 11:55:50 -0700
Subject: [PKT_SCHED] qdisc: Use rtnl registration interface

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  2 +-
 net/sched/sch_api.c       | 27 +++++++--------------------
 2 files changed, 8 insertions(+), 21 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 82086392735..a3f4ddd1d6a 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -5,10 +5,10 @@
 #include <linux/types.h>
 #include <linux/rcupdate.h>
 #include <linux/module.h>
-#include <linux/rtnetlink.h>
 #include <linux/pkt_sched.h>
 #include <linux/pkt_cls.h>
 #include <net/gen_stats.h>
+#include <net/rtnetlink.h>
 
 struct Qdisc_ops;
 struct qdisc_walker;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 0b9abea68fd..b06f20294ac 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -27,7 +27,6 @@
 #include <linux/interrupt.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
 #include <linux/init.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
@@ -1239,29 +1238,17 @@ static const struct file_operations psched_fops = {
 
 static int __init pktsched_init(void)
 {
-	struct rtnetlink_link *link_p;
-
-	link_p = rtnetlink_links[PF_UNSPEC];
-
-	/* Setup rtnetlink links. It is made here to avoid
-	   exporting large number of public symbols.
-	 */
-
-	if (link_p) {
-		link_p[RTM_NEWQDISC-RTM_BASE].doit = tc_modify_qdisc;
-		link_p[RTM_DELQDISC-RTM_BASE].doit = tc_get_qdisc;
-		link_p[RTM_GETQDISC-RTM_BASE].doit = tc_get_qdisc;
-		link_p[RTM_GETQDISC-RTM_BASE].dumpit = tc_dump_qdisc;
-		link_p[RTM_NEWTCLASS-RTM_BASE].doit = tc_ctl_tclass;
-		link_p[RTM_DELTCLASS-RTM_BASE].doit = tc_ctl_tclass;
-		link_p[RTM_GETTCLASS-RTM_BASE].doit = tc_ctl_tclass;
-		link_p[RTM_GETTCLASS-RTM_BASE].dumpit = tc_dump_tclass;
-	}
-
 	register_qdisc(&pfifo_qdisc_ops);
 	register_qdisc(&bfifo_qdisc_ops);
 	proc_net_fops_create("psched", 0, &psched_fops);
 
+	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
+	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 82623c0d73bd111cad26e501e509966b2455b0e0 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 22 Mar 2007 11:56:22 -0700
Subject: [PKT_SCHED] cls: Use rtnl registration interface

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_api.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 3d0a6cdcaeb..ca3da5013b7 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -29,7 +29,6 @@
 #include <linux/interrupt.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
 #include <linux/init.h>
 #include <linux/kmod.h>
 #include <linux/netlink.h>
@@ -616,18 +615,11 @@ rtattr_failure: __attribute__ ((unused))
 
 static int __init tc_filter_init(void)
 {
-	struct rtnetlink_link *link_p = rtnetlink_links[PF_UNSPEC];
+	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
+						 tc_dump_tfilter);
 
-	/* Setup rtnetlink links. It is made here to avoid
-	   exporting large number of public symbols.
-	 */
-
-	if (link_p) {
-		link_p[RTM_NEWTFILTER-RTM_BASE].doit = tc_ctl_tfilter;
-		link_p[RTM_DELTFILTER-RTM_BASE].doit = tc_ctl_tfilter;
-		link_p[RTM_GETTFILTER-RTM_BASE].doit = tc_ctl_tfilter;
-		link_p[RTM_GETTFILTER-RTM_BASE].dumpit = tc_dump_tfilter;
-	}
 	return 0;
 }
 
-- 
cgit v1.2.3


From 708914cc5e1657eb1a1f9eefc6333dfd2df8c73a Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 22 Mar 2007 11:56:59 -0700
Subject: [PKT_SCHED] act: Use rtnl registration interface

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index f002f74f376..711dd26c95c 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -25,7 +25,6 @@
 #include <linux/interrupt.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
 #include <linux/init.h>
 #include <linux/kmod.h>
 #include <net/sock.h>
@@ -1077,14 +1076,9 @@ nlmsg_failure:
 
 static int __init tc_action_init(void)
 {
-	struct rtnetlink_link *link_p = rtnetlink_links[PF_UNSPEC];
-
-	if (link_p) {
-		link_p[RTM_NEWACTION-RTM_BASE].doit = tc_ctl_action;
-		link_p[RTM_DELACTION-RTM_BASE].doit = tc_ctl_action;
-		link_p[RTM_GETACTION-RTM_BASE].doit = tc_ctl_action;
-		link_p[RTM_GETACTION-RTM_BASE].dumpit = tc_dump_action;
-	}
+	rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action);
 
 	return 0;
 }
-- 
cgit v1.2.3


From fa34ddd739cecf3999ec0b7562618e8321829d41 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 22 Mar 2007 11:57:46 -0700
Subject: [DECNet]: Use rtnl registration interface

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Steven Whitehouse <swhiteho@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dn_fib.h   |  9 ---------
 include/net/dn_route.h |  1 -
 net/decnet/af_decnet.c |  1 +
 net/decnet/dn_dev.c    | 24 +++---------------------
 net/decnet/dn_fib.c    |  8 +++++---
 net/decnet/dn_route.c  |  9 ++++++++-
 net/decnet/dn_rules.c  |  4 +++-
 7 files changed, 20 insertions(+), 36 deletions(-)

diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h
index f01626cbbed..30125119c95 100644
--- a/include/net/dn_fib.h
+++ b/include/net/dn_fib.h
@@ -148,17 +148,8 @@ extern void dn_fib_rules_cleanup(void);
 extern unsigned dnet_addr_type(__le16 addr);
 extern int dn_fib_lookup(struct flowi *fl, struct dn_fib_res *res);
 
-/*
- * rtnetlink interface
- */
-extern int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg);
-extern int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg);
 extern int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb);
 
-extern int dn_fib_rtm_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg);
-extern int dn_fib_rtm_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg);
-extern int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb);
-
 extern void dn_fib_free_info(struct dn_fib_info *fi);
 
 static inline void dn_fib_info_put(struct dn_fib_info *fi)
diff --git a/include/net/dn_route.h b/include/net/dn_route.h
index a566944c496..c10e8e7e59a 100644
--- a/include/net/dn_route.h
+++ b/include/net/dn_route.h
@@ -18,7 +18,6 @@
 extern struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri);
 extern int dn_route_output_sock(struct dst_entry **pprt, struct flowi *, struct sock *sk, int flags);
 extern int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb);
-extern int dn_cache_getroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg);
 extern void dn_rt_cache_flush(int delay);
 
 /* Masks for flags field */
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index c6568d637e1..a205eaa87f5 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -2413,6 +2413,7 @@ module_init(decnet_init);
 static void __exit decnet_exit(void)
 {
 	sock_unregister(AF_DECnet);
+	rtnl_unregister_all(PF_DECnet);
 	dev_remove_pack(&dn_dix_packet_type);
 
 	dn_unregister_sysctl();
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 95871a669dc..61be2caddc5 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -1447,24 +1447,6 @@ static const struct file_operations dn_dev_seq_fops = {
 
 #endif /* CONFIG_PROC_FS */
 
-static struct rtnetlink_link dnet_rtnetlink_table[RTM_NR_MSGTYPES] =
-{
-	[RTM_NEWADDR  - RTM_BASE] = { .doit	= dn_nl_newaddr,	},
-	[RTM_DELADDR  - RTM_BASE] = { .doit	= dn_nl_deladdr,	},
-	[RTM_GETADDR  - RTM_BASE] = { .dumpit	= dn_nl_dump_ifaddr,	},
-#ifdef CONFIG_DECNET_ROUTER
-	[RTM_NEWROUTE - RTM_BASE] = { .doit	= dn_fib_rtm_newroute,	},
-	[RTM_DELROUTE - RTM_BASE] = { .doit	= dn_fib_rtm_delroute,	},
-	[RTM_GETROUTE - RTM_BASE] = { .doit	= dn_cache_getroute,
-				      .dumpit	= dn_fib_dump,		},
-	[RTM_GETRULE  - RTM_BASE] = { .dumpit	= dn_fib_dump_rules,	},
-#else
-	[RTM_GETROUTE - RTM_BASE] = { .doit	= dn_cache_getroute,
-				      .dumpit	= dn_cache_dump,	},
-#endif
-
-};
-
 static int __initdata addr[2];
 module_param_array(addr, int, NULL, 0444);
 MODULE_PARM_DESC(addr, "The DECnet address of this machine: area,node");
@@ -1485,7 +1467,9 @@ void __init dn_dev_init(void)
 
 	dn_dev_devices_on();
 
-	rtnetlink_links[PF_DECnet] = dnet_rtnetlink_table;
+	rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL);
+	rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL);
+	rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr);
 
 	proc_net_fops_create("decnet_dev", S_IRUGO, &dn_dev_seq_fops);
 
@@ -1500,8 +1484,6 @@ void __init dn_dev_init(void)
 
 void __exit dn_dev_cleanup(void)
 {
-	rtnetlink_links[PF_DECnet] = NULL;
-
 #ifdef CONFIG_SYSCTL
 	{
 		int i;
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 82d58a977e6..310a86268d2 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -504,7 +504,7 @@ static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta)
 	return 0;
 }
 
-int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct dn_fib_table *tb;
 	struct rtattr **rta = arg;
@@ -520,7 +520,7 @@ int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	return -ESRCH;
 }
 
-int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct dn_fib_table *tb;
 	struct rtattr **rta = arg;
@@ -748,11 +748,13 @@ void __exit dn_fib_cleanup(void)
 
 void __init dn_fib_init(void)
 {
-
 	dn_fib_table_init();
 	dn_fib_rules_init();
 
 	register_dnaddr_notifier(&dn_fib_dnaddr_notifier);
+
+	rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL);
+	rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL);
 }
 
 
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 2ae35ef1f07..5d7337bcf0f 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1522,7 +1522,7 @@ rtattr_failure:
 /*
  * This is called by both endnodes and routers now.
  */
-int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
+static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
 {
 	struct rtattr **rta = arg;
 	struct rtmsg *rtm = NLMSG_DATA(nlh);
@@ -1813,6 +1813,13 @@ void __init dn_route_init(void)
 	dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1);
 
 	proc_net_fops_create("decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops);
+
+#ifdef CONFIG_DECNET_ROUTER
+	rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, dn_fib_dump);
+#else
+	rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute,
+		      dn_cache_dump);
+#endif
 }
 
 void __exit dn_route_cleanup(void)
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 5e86dd54230..a7a7da9b35c 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -239,7 +239,7 @@ static u32 dn_fib_rule_default_pref(void)
 	return 0;
 }
 
-int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
+static int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	return fib_rules_dump(skb, cb, AF_DECnet);
 }
@@ -264,10 +264,12 @@ void __init dn_fib_rules_init(void)
 {
 	list_add_tail(&default_rule.common.list, &dn_fib_rules);
 	fib_rules_register(&dn_fib_rules_ops);
+	rtnl_register(PF_DECnet, RTM_GETRULE, NULL, dn_fib_dump_rules);
 }
 
 void __exit dn_fib_rules_cleanup(void)
 {
+	rtnl_unregister(PF_DECnet, RTM_GETRULE);
 	fib_rules_unregister(&dn_fib_rules_ops);
 }
 
-- 
cgit v1.2.3


From c127ea2c45d1b13a672fde254679721bb282e90a Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 22 Mar 2007 11:58:32 -0700
Subject: [IPv6]: Use rtnl registration interface

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h   |  2 --
 include/net/ip6_route.h |  5 -----
 net/ipv6/addrconf.c     | 35 ++++++++++++++++-------------------
 net/ipv6/af_inet6.c     |  2 ++
 net/ipv6/fib6_rules.c   |  4 +++-
 net/ipv6/ip6_fib.c      |  4 +++-
 net/ipv6/route.c        | 10 +++++++---
 7 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index cf355a3c2ad..c48ea873f1e 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -219,8 +219,6 @@ extern void			fib6_init(void);
 
 extern void			fib6_rules_init(void);
 extern void			fib6_rules_cleanup(void);
-extern int			fib6_rules_dump(struct sk_buff *,
-						struct netlink_callback *);
 
 #endif
 #endif
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 4e927ebd1cb..5456fdd6d04 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -116,12 +116,7 @@ extern void			rt6_pmtu_discovery(struct in6_addr *daddr,
 						   struct net_device *dev,
 						   u32 pmtu);
 
-struct nlmsghdr;
 struct netlink_callback;
-extern int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb);
-extern int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
-extern int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
-extern int inet6_rtm_getroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg);
 
 struct rt6_rtnl_dump_arg
 {
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 38274c20eaa..eecba1886b4 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3613,23 +3613,6 @@ errout:
 		rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err);
 }
 
-static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = {
-	[RTM_GETLINK - RTM_BASE] = { .dumpit	= inet6_dump_ifinfo, },
-	[RTM_NEWADDR - RTM_BASE] = { .doit	= inet6_rtm_newaddr, },
-	[RTM_DELADDR - RTM_BASE] = { .doit	= inet6_rtm_deladdr, },
-	[RTM_GETADDR - RTM_BASE] = { .doit	= inet6_rtm_getaddr,
-				     .dumpit	= inet6_dump_ifaddr, },
-	[RTM_GETMULTICAST - RTM_BASE] = { .dumpit = inet6_dump_ifmcaddr, },
-	[RTM_GETANYCAST - RTM_BASE] = { .dumpit	= inet6_dump_ifacaddr, },
-	[RTM_NEWROUTE - RTM_BASE] = { .doit	= inet6_rtm_newroute, },
-	[RTM_DELROUTE - RTM_BASE] = { .doit	= inet6_rtm_delroute, },
-	[RTM_GETROUTE - RTM_BASE] = { .doit	= inet6_rtm_getroute,
-				      .dumpit	= inet6_dump_fib, },
-#ifdef CONFIG_IPV6_MULTIPLE_TABLES
-	[RTM_GETRULE  - RTM_BASE] = { .dumpit   = fib6_rules_dump,   },
-#endif
-};
-
 static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 {
 	inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
@@ -4149,7 +4132,18 @@ int __init addrconf_init(void)
 	register_netdevice_notifier(&ipv6_dev_notf);
 
 	addrconf_verify(0);
-	rtnetlink_links[PF_INET6] = inet6_rtnetlink_table;
+
+	err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo);
+	if (err < 0)
+		goto errout;
+
+	/* Only the first call to __rtnl_register can fail */
+	__rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL);
+	__rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL);
+	__rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, inet6_dump_ifaddr);
+	__rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, inet6_dump_ifmcaddr);
+	__rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr);
+
 #ifdef CONFIG_SYSCTL
 	addrconf_sysctl.sysctl_header =
 		register_sysctl_table(addrconf_sysctl.addrconf_root_dir);
@@ -4157,6 +4151,10 @@ int __init addrconf_init(void)
 #endif
 
 	return 0;
+errout:
+	unregister_netdevice_notifier(&ipv6_dev_notf);
+
+	return err;
 }
 
 void __exit addrconf_cleanup(void)
@@ -4168,7 +4166,6 @@ void __exit addrconf_cleanup(void)
 
 	unregister_netdevice_notifier(&ipv6_dev_notf);
 
-	rtnetlink_links[PF_INET6] = NULL;
 #ifdef CONFIG_SYSCTL
 	addrconf_sysctl_unregister(&ipv6_devconf_dflt);
 	addrconf_sysctl_unregister(&ipv6_devconf);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 7b917f856e1..82572b50754 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -945,6 +945,8 @@ static void __exit inet6_exit(void)
 {
 	/* First of all disallow new sockets creation. */
 	sock_unregister(PF_INET6);
+	/* Disallow any further netlink messages */
+	rtnl_unregister_all(PF_INET6);
 
 	/* Cleanup code parts. */
 	ipv6_packet_cleanup();
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index ea3035b4e3e..c74da4b6dd2 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -216,7 +216,7 @@ nla_put_failure:
 	return -ENOBUFS;
 }
 
-int fib6_rules_dump(struct sk_buff *skb, struct netlink_callback *cb)
+static int fib6_rules_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	return fib_rules_dump(skb, cb, AF_INET6);
 }
@@ -255,9 +255,11 @@ void __init fib6_rules_init(void)
 	list_add_tail(&main_rule.common.list, &fib6_rules);
 
 	fib_rules_register(&fib6_rules_ops);
+	__rtnl_register(PF_INET6, RTM_GETRULE, NULL, fib6_rules_dump);
 }
 
 void fib6_rules_cleanup(void)
 {
+	rtnl_unregister(PF_INET6, RTM_GETRULE);
 	fib_rules_unregister(&fib6_rules_ops);
 }
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 268f476ef3d..ca08ee88d07 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -359,7 +359,7 @@ end:
 	return res;
 }
 
-int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
+static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	unsigned int h, s_h;
 	unsigned int e = 0, s_e;
@@ -1486,6 +1486,8 @@ void __init fib6_init(void)
 					   NULL, NULL);
 
 	fib6_tables_init();
+
+	__rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib);
 }
 
 void fib6_gc_cleanup(void)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 52cbe1cd404..70f760f069b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2015,7 +2015,7 @@ errout:
 	return err;
 }
 
-int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct fib6_config cfg;
 	int err;
@@ -2027,7 +2027,7 @@ int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	return ip6_route_del(&cfg);
 }
 
-int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct fib6_config cfg;
 	int err;
@@ -2164,7 +2164,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 		     prefix, NLM_F_MULTI);
 }
 
-int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
+static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct nlattr *tb[RTA_MAX+1];
 	struct rt6_info *rt;
@@ -2508,6 +2508,10 @@ void __init ip6_route_init(void)
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	fib6_rules_init();
 #endif
+
+	__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL);
+	__rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL);
+	__rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL);
 }
 
 void ip6_route_cleanup(void)
-- 
cgit v1.2.3


From 32fe21c0c0a3091552fea8f2f7e4905f547a3433 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 22 Mar 2007 11:59:03 -0700
Subject: [BRIDGE]: Use rtnl registration interface

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br.c         |  8 ++++++--
 net/bridge/br_netlink.c | 20 ++++++++++----------
 net/bridge/br_private.h |  2 +-
 3 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/net/bridge/br.c b/net/bridge/br.c
index 2994387999a..601c37d61c0 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -47,7 +47,10 @@ static int __init br_init(void)
 	if (err)
 		goto err_out2;
 
-	br_netlink_init();
+	err = br_netlink_init();
+	if (err)
+		goto err_out3;
+
 	brioctl_set(br_ioctl_deviceless_stub);
 	br_handle_frame_hook = br_handle_frame;
 
@@ -55,7 +58,8 @@ static int __init br_init(void)
 	br_fdb_put_hook = br_fdb_put;
 
 	return 0;
-
+err_out3:
+	unregister_netdevice_notifier(&br_device_notifier);
 err_out2:
 	br_netfilter_fini();
 err_out1:
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 7d68b24b565..a14ac51753e 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -11,8 +11,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/rtnetlink.h>
-#include <net/netlink.h>
+#include <net/rtnetlink.h>
 #include "br_private.h"
 
 static inline size_t br_nlmsg_size(void)
@@ -179,18 +178,19 @@ static int br_rtm_setlink(struct sk_buff *skb,  struct nlmsghdr *nlh, void *arg)
 }
 
 
-static struct rtnetlink_link bridge_rtnetlink_table[RTM_NR_MSGTYPES] = {
-	[RTM_GETLINK - RTM_BASE] = { .dumpit	= br_dump_ifinfo, },
-	[RTM_SETLINK - RTM_BASE] = { .doit      = br_rtm_setlink, },
-};
-
-void __init br_netlink_init(void)
+int __init br_netlink_init(void)
 {
-	rtnetlink_links[PF_BRIDGE] = bridge_rtnetlink_table;
+	if (__rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, br_dump_ifinfo))
+		return -ENOBUFS;
+
+	/* Only the first call to __rtnl_register can fail */
+	__rtnl_register(PF_BRIDGE, RTM_SETLINK, br_rtm_setlink, NULL);
+
+	return 0;
 }
 
 void __exit br_netlink_fini(void)
 {
-	rtnetlink_links[PF_BRIDGE] = NULL;
+	rtnl_unregister_all(PF_BRIDGE);
 }
 
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index cc3f1c99261..2b73de6c0b4 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -235,7 +235,7 @@ extern void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
 
 
 /* br_netlink.c */
-extern void br_netlink_init(void);
+extern int br_netlink_init(void);
 extern void br_netlink_fini(void);
 extern void br_ifinfo_notify(int event, struct net_bridge_port *port);
 
-- 
cgit v1.2.3


From 687ad8cc640fd1f1619cc44a9ab274dabd48c758 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 22 Mar 2007 11:59:42 -0700
Subject: [RTNL]: Use rtnl registration interface for dump-all aliases

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5cc09f82f6d..294c2f209a3 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -959,12 +959,6 @@ static void rtnetlink_rcv(struct sock *sk, int len)
 	} while (qlen);
 }
 
-static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] =
-{
-	[RTM_GETADDR     - RTM_BASE] = { .dumpit = rtnl_dump_all	 },
-	[RTM_GETROUTE    - RTM_BASE] = { .dumpit = rtnl_dump_all	 },
-};
-
 static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
 	struct net_device *dev = ptr;
@@ -1014,6 +1008,9 @@ void __init rtnetlink_init(void)
 
 	rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, rtnl_dump_ifinfo);
 	rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL);
+
+	rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all);
+	rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all);
 }
 
 EXPORT_SYMBOL(__rta_fill);
-- 
cgit v1.2.3


From c454673da7c1d6533f40ec2f788023df9af56ebf Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Sun, 25 Mar 2007 23:24:24 -0700
Subject: [NET] rules: Unified rules dumping

Implements a unified, protocol independant rules dumping function
which is capable of both, dumping a specific protocol family or
all of them. This speeds up dumping as less lookups are required.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/fib_rules.h |  3 ---
 include/net/rtnetlink.h |  8 ++++++++
 net/core/fib_rules.c    | 47 +++++++++++++++++++++++++++++++++++++----------
 net/decnet/dn_rules.c   |  7 -------
 net/ipv4/fib_rules.c    |  7 -------
 net/ipv6/fib6_rules.c   |  7 -------
 6 files changed, 45 insertions(+), 34 deletions(-)

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index b2b9ccdf32d..ff3029fe965 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -98,7 +98,4 @@ extern int			fib_rules_unregister(struct fib_rules_ops *);
 extern int			fib_rules_lookup(struct fib_rules_ops *,
 						 struct flowi *, int flags,
 						 struct fib_lookup_arg *);
-
-extern int			fib_rules_dump(struct sk_buff *,
-					       struct netlink_callback *, int);
 #endif
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index dce7072bd28..086fa9e8950 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -15,4 +15,12 @@ extern int	rtnl_unregister(int protocol, int msgtype);
 extern void	rtnl_unregister_all(int protocol);
 extern int	rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb);
 
+static inline int rtnl_msg_family(struct nlmsghdr *nlh)
+{
+	if (nlmsg_len(nlh) >= sizeof(struct rtgenmsg))
+		return ((struct rtgenmsg *) nlmsg_data(nlh))->rtgen_family;
+	else
+		return AF_UNSPEC;
+}
+
 #endif
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index bf45f24cfea..fdf05af16ba 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -393,19 +393,15 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family)
+static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
+		      struct fib_rules_ops *ops)
 {
 	int idx = 0;
 	struct fib_rule *rule;
-	struct fib_rules_ops *ops;
-
-	ops = lookup_rules_ops(family);
-	if (ops == NULL)
-		return -EAFNOSUPPORT;
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(rule, ops->rules_list, list) {
-		if (idx < cb->args[0])
+		if (idx < cb->args[1])
 			goto skip;
 
 		if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid,
@@ -416,13 +412,44 @@ skip:
 		idx++;
 	}
 	rcu_read_unlock();
-	cb->args[0] = idx;
+	cb->args[1] = idx;
 	rules_ops_put(ops);
 
 	return skb->len;
 }
 
-EXPORT_SYMBOL_GPL(fib_rules_dump);
+static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct fib_rules_ops *ops;
+	int idx = 0, family;
+
+	family = rtnl_msg_family(cb->nlh);
+	if (family != AF_UNSPEC) {
+		/* Protocol specific dump request */
+		ops = lookup_rules_ops(family);
+		if (ops == NULL)
+			return -EAFNOSUPPORT;
+
+		return dump_rules(skb, cb, ops);
+	}
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(ops, &rules_ops, list) {
+		if (idx < cb->args[0] || !try_module_get(ops->owner))
+			goto skip;
+
+		if (dump_rules(skb, cb, ops) < 0)
+			break;
+
+		cb->args[1] = 0;
+	skip:
+		idx++;
+	}
+	rcu_read_unlock();
+	cb->args[0] = idx;
+
+	return skb->len;
+}
 
 static void notify_rule_change(int event, struct fib_rule *rule,
 			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
@@ -503,7 +530,7 @@ static int __init fib_rules_init(void)
 {
 	rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL);
 	rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL);
-	rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, rtnl_dump_all);
+	rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule);
 
 	return register_netdevice_notifier(&fib_rules_notifier);
 }
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index a7a7da9b35c..fd0cc2aa316 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -239,11 +239,6 @@ static u32 dn_fib_rule_default_pref(void)
 	return 0;
 }
 
-static int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	return fib_rules_dump(skb, cb, AF_DECnet);
-}
-
 static struct fib_rules_ops dn_fib_rules_ops = {
 	.family		= AF_DECnet,
 	.rule_size	= sizeof(struct dn_fib_rule),
@@ -264,12 +259,10 @@ void __init dn_fib_rules_init(void)
 {
 	list_add_tail(&default_rule.common.list, &dn_fib_rules);
 	fib_rules_register(&dn_fib_rules_ops);
-	rtnl_register(PF_DECnet, RTM_GETRULE, NULL, dn_fib_dump_rules);
 }
 
 void __exit dn_fib_rules_cleanup(void)
 {
-	rtnl_unregister(PF_DECnet, RTM_GETRULE);
 	fib_rules_unregister(&dn_fib_rules_ops);
 }
 
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index a7f931ddfaa..b021b3440ca 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -274,11 +274,6 @@ nla_put_failure:
 	return -ENOBUFS;
 }
 
-static int fib4_rule_dump(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	return fib_rules_dump(skb, cb, AF_INET);
-}
-
 static u32 fib4_rule_default_pref(void)
 {
 	struct list_head *pos;
@@ -327,6 +322,4 @@ void __init fib4_rules_init(void)
 	list_add_tail(&default_rule.common.list, &fib4_rules);
 
 	fib_rules_register(&fib4_rules_ops);
-
-	rtnl_register(PF_INET, RTM_GETRULE, NULL, fib4_rule_dump);
 }
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index c74da4b6dd2..dd9720e700e 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -216,11 +216,6 @@ nla_put_failure:
 	return -ENOBUFS;
 }
 
-static int fib6_rules_dump(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	return fib_rules_dump(skb, cb, AF_INET6);
-}
-
 static u32 fib6_rule_default_pref(void)
 {
 	return 0x3FFF;
@@ -255,11 +250,9 @@ void __init fib6_rules_init(void)
 	list_add_tail(&main_rule.common.list, &fib6_rules);
 
 	fib_rules_register(&fib6_rules_ops);
-	__rtnl_register(PF_INET6, RTM_GETRULE, NULL, fib6_rules_dump);
 }
 
 void fib6_rules_cleanup(void)
 {
-	rtnl_unregister(PF_INET6, RTM_GETRULE);
 	fib_rules_unregister(&fib6_rules_ops);
 }
-- 
cgit v1.2.3


From 22b9a0a3a49ab1a856e0853b3f3dd2abd156bd7c Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Thu, 22 Mar 2007 12:10:18 -0700
Subject: [LIB]: div64_64 optimization

Minor optimization of div64_64.  do_div() already does optimization
for the case of 32 by 32 divide, so no need to do it here.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/div64.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/lib/div64.c b/lib/div64.c
index c3d7655cdfb..74f0c8cb403 100644
--- a/lib/div64.c
+++ b/lib/div64.c
@@ -61,20 +61,18 @@ EXPORT_SYMBOL(__div64_32);
 /* 64bit divisor, dividend and result. dynamic precision */
 uint64_t div64_64(uint64_t dividend, uint64_t divisor)
 {
-	uint32_t d = divisor;
+	uint32_t high, d;
 
-	if (divisor > 0xffffffffULL) {
-		unsigned int shift = fls(divisor >> 32);
+	high = divisor >> 32;
+	if (high) {
+		unsigned int shift = fls(high);
 
 		d = divisor >> shift;
 		dividend >>= shift;
-	}
+	} else
+		d = divisor;
 
-	/* avoid 64 bit division if possible */
-	if (dividend >> 32)
-		do_div(dividend, d);
-	else
-		dividend = (uint32_t) dividend / d;
+	do_div(dividend, d);
 
 	return dividend;
 }
-- 
cgit v1.2.3


From 7e58886b45bc4a309aeaa8178ef89ff767daaf7f Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Thu, 22 Mar 2007 12:10:58 -0700
Subject: [TCP]: cubic optimization

Use willy's work in optimizing cube root by having table for small values.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cubic.c | 50 +++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 39 insertions(+), 11 deletions(-)

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 0e6cdfeb207..15c58037500 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -91,23 +91,51 @@ static void bictcp_init(struct sock *sk)
 		tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
 }
 
-/*
- * calculate the cubic root of x using Newton-Raphson
+/* calculate the cubic root of x using a table lookup followed by one
+ * Newton-Raphson iteration.
+ * Avg err ~= 0.195%
  */
 static u32 cubic_root(u64 a)
 {
-	u32 x;
-
-	/* Initial estimate is based on:
-	 * cbrt(x) = exp(log(x) / 3)
+	u32 x, b, shift;
+	/*
+	 * cbrt(x) MSB values for x MSB values in [0..63].
+	 * Precomputed then refined by hand - Willy Tarreau
+	 *
+	 * For x in [0..63],
+	 *   v = cbrt(x << 18) - 1
+	 *   cbrt(x) = (v[x] + 10) >> 6
 	 */
-	x = 1u << (fls64(a)/3);
+	static const u8 v[] = {
+		/* 0x00 */    0,   54,   54,   54,  118,  118,  118,  118,
+		/* 0x08 */  123,  129,  134,  138,  143,  147,  151,  156,
+		/* 0x10 */  157,  161,  164,  168,  170,  173,  176,  179,
+		/* 0x18 */  181,  185,  187,  190,  192,  194,  197,  199,
+		/* 0x20 */  200,  202,  204,  206,  209,  211,  213,  215,
+		/* 0x28 */  217,  219,  221,  222,  224,  225,  227,  229,
+		/* 0x30 */  231,  232,  234,  236,  237,  239,  240,  242,
+		/* 0x38 */  244,  245,  246,  248,  250,  251,  252,  254,
+	};
+
+	b = fls64(a);
+	if (b < 7) {
+		/* a in [0..63] */
+		return ((u32)v[(u32)a] + 35) >> 6;
+	}
+
+	b = ((b * 84) >> 8) - 1;
+	shift = (a >> (b * 3));
 
-	/* converges to 32 bits in 3 iterations */
-	x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3;
-	x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3;
-	x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3;
+	x = ((u32)(((u32)v[shift] + 10) << b)) >> 6;
 
+	/*
+	 * Newton-Raphson iteration
+	 *                         2
+	 * x    = ( 2 * x  +  a / x  ) / 3
+	 *  k+1          k         k
+	 */
+	x = (2 * x + (u32)div64_64(a, (u64)x * (u64)(x - 1)));
+	x = ((x * 341) >> 10);
 	return x;
 }
 
-- 
cgit v1.2.3


From a362e0a7890c735a3ef63aab12d71ecfc6e6f4a5 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Thu, 22 Mar 2007 12:15:45 -0700
Subject: [NETEM]: report reorder percent correctly.

If you setup netem to just delay packets; "tc qdisc ls" will report
the reordering as 100%. Well it's a lie, reorder isn't used unless
gap is set, so just set value to 0 so the output of utility
is correct.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_netem.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 4818da5a7e6..791c615e3aa 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -429,7 +429,8 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
 	/* for compatiablity with earlier versions.
 	 * if gap is set, need to assume 100% probablity
 	 */
-	q->reorder = ~0;
+	if (q->gap)
+		q->reorder = ~0;
 
 	/* Handle nested options after initial queue options.
 	 * Should have put all options in nested format but too late now.
-- 
cgit v1.2.3


From b407621c35ed5f9a0734e57472e9539117963768 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Thu, 22 Mar 2007 12:16:21 -0700
Subject: [NETEM]: use better types for time values

The random number generator always generates 32 bit values.
The time values are limited by psched_tdiff_t

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_netem.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 791c615e3aa..4ac6df0a5b3 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -57,19 +57,20 @@ struct netem_sched_data {
 	struct Qdisc	*qdisc;
 	struct qdisc_watchdog watchdog;
 
-	u32 latency;
+	psched_tdiff_t latency;
+	psched_tdiff_t jitter;
+
 	u32 loss;
 	u32 limit;
 	u32 counter;
 	u32 gap;
-	u32 jitter;
 	u32 duplicate;
 	u32 reorder;
 	u32 corrupt;
 
 	struct crndstate {
-		unsigned long last;
-		unsigned long rho;
+		u32 last;
+		u32 rho;
 	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
 
 	struct disttable {
@@ -96,7 +97,7 @@ static void init_crandom(struct crndstate *state, unsigned long rho)
  * Next number depends on last value.
  * rho is scaled to avoid floating point.
  */
-static unsigned long get_crandom(struct crndstate *state)
+static u32 get_crandom(struct crndstate *state)
 {
 	u64 value, rho;
 	unsigned long answer;
@@ -115,11 +116,13 @@ static unsigned long get_crandom(struct crndstate *state)
  * std deviation sigma.  Uses table lookup to approximate the desired
  * distribution, and a uniformly-distributed pseudo-random source.
  */
-static long tabledist(unsigned long mu, long sigma,
-		      struct crndstate *state, const struct disttable *dist)
+static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
+				struct crndstate *state,
+				const struct disttable *dist)
 {
-	long t, x;
-	unsigned long rnd;
+	psched_tdiff_t x;
+	long t;
+	u32 rnd;
 
 	if (sigma == 0)
 		return mu;
-- 
cgit v1.2.3


From 075aa573b74a732aeff487ab77d3fbd627c10856 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Thu, 22 Mar 2007 12:17:05 -0700
Subject: [NETEM]: Optimize tfifo

In most cases, the next packet will be sent after the
last one. So optimize that case.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_netem.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 4ac6df0a5b3..7e9e658d4d9 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -479,22 +479,28 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
  */
 struct fifo_sched_data {
 	u32 limit;
+	psched_time_t oldest;
 };
 
 static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 {
 	struct fifo_sched_data *q = qdisc_priv(sch);
 	struct sk_buff_head *list = &sch->q;
-	const struct netem_skb_cb *ncb
-		= (const struct netem_skb_cb *)nskb->cb;
+	psched_time_t tnext = ((struct netem_skb_cb *)nskb->cb)->time_to_send;
 	struct sk_buff *skb;
 
 	if (likely(skb_queue_len(list) < q->limit)) {
+		/* Optimize for add at tail */
+		if (likely(skb_queue_empty(list) || !PSCHED_TLESS(tnext, q->oldest))) {
+			q->oldest = tnext;
+			return qdisc_enqueue_tail(nskb, sch);
+		}
+
 		skb_queue_reverse_walk(list, skb) {
 			const struct netem_skb_cb *cb
 				= (const struct netem_skb_cb *)skb->cb;
 
-			if (!PSCHED_TLESS(ncb->time_to_send, cb->time_to_send))
+			if (!PSCHED_TLESS(tnext, cb->time_to_send))
 				break;
 		}
 
@@ -507,7 +513,7 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 		return NET_XMIT_SUCCESS;
 	}
 
-	return qdisc_drop(nskb, sch);
+	return qdisc_reshape_fail(nskb, sch);
 }
 
 static int tfifo_init(struct Qdisc *sch, struct rtattr *opt)
@@ -523,6 +529,7 @@ static int tfifo_init(struct Qdisc *sch, struct rtattr *opt)
 	} else
 		q->limit = max_t(u32, sch->dev->tx_queue_len, 1);
 
+	PSCHED_SET_PASTPERFECT(q->oldest);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 11274e5a43266d531140530adebead6903380caf Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Thu, 22 Mar 2007 12:17:42 -0700
Subject: [NETEM]: avoid excessive requeues

The netem code would call getnstimeofday() and dequeue/requeue after
every packet, even if it was waiting. Avoid this overhead by using
the throttled flag.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c   |  3 +++
 net/sched/sch_netem.c | 23 +++++++++++++----------
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b06f20294ac..fcaa4adefc8 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -298,6 +298,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
 						 timer);
 
 	wd->qdisc->flags &= ~TCQ_F_THROTTLED;
+	smp_wmb();
 	netif_schedule(wd->qdisc->dev);
 	return HRTIMER_NORESTART;
 }
@@ -315,6 +316,7 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
 	ktime_t time;
 
 	wd->qdisc->flags |= TCQ_F_THROTTLED;
+	smp_wmb();
 	time = ktime_set(0, 0);
 	time = ktime_add_ns(time, PSCHED_US2NS(expires));
 	hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
@@ -325,6 +327,7 @@ void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
 {
 	hrtimer_cancel(&wd->timer);
 	wd->qdisc->flags &= ~TCQ_F_THROTTLED;
+	smp_wmb();
 }
 EXPORT_SYMBOL(qdisc_watchdog_cancel);
 
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 7e9e658d4d9..fb49e9e7ace 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -273,6 +273,10 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 	struct netem_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *skb;
 
+	smp_mb();
+	if (sch->flags & TCQ_F_THROTTLED)
+		return NULL;
+
 	skb = q->qdisc->dequeue(q->qdisc);
 	if (skb) {
 		const struct netem_skb_cb *cb
@@ -285,18 +289,17 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 		if (PSCHED_TLESS(cb->time_to_send, now)) {
 			pr_debug("netem_dequeue: return skb=%p\n", skb);
 			sch->q.qlen--;
-			sch->flags &= ~TCQ_F_THROTTLED;
 			return skb;
-		} else {
-			qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
-
-			if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) {
-				qdisc_tree_decrease_qlen(q->qdisc, 1);
-				sch->qstats.drops++;
-				printk(KERN_ERR "netem: queue discpline %s could not requeue\n",
-				       q->qdisc->ops->id);
-			}
 		}
+
+		if (unlikely(q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS)) {
+			qdisc_tree_decrease_qlen(q->qdisc, 1);
+			sch->qstats.drops++;
+			printk(KERN_ERR "netem: %s could not requeue\n",
+			       q->qdisc->ops->id);
+		}
+
+		qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
 	}
 
 	return NULL;
-- 
cgit v1.2.3


From 1936502d00ae6c2aa3931c42f6cf54afaba094f2 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Thu, 22 Mar 2007 12:18:35 -0700
Subject: [NET_SCHED] qdisc: avoid transmit softirq on watchdog wakeup

If possible, avoid having to do a transmit softirq when a qdisc
watchdog decides to re-enable.  The watchdog routine runs off
a timer, so it is already in the same effective context as
the softirq.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index fcaa4adefc8..58732509160 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -296,10 +296,16 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
 {
 	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
 						 timer);
+	struct net_device *dev = wd->qdisc->dev;
 
 	wd->qdisc->flags &= ~TCQ_F_THROTTLED;
 	smp_wmb();
-	netif_schedule(wd->qdisc->dev);
+	if (spin_trylock(&dev->queue_lock)) {
+		qdisc_run(dev);
+		spin_unlock(&dev->queue_lock);
+	} else
+		netif_schedule(dev);
+
 	return HRTIMER_NORESTART;
 }
 
-- 
cgit v1.2.3


From 51057f2fecff1c520b083c5ac9229e7aebce9e01 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 22 Mar 2007 21:41:06 -0700
Subject: [RTNL]: Properly return rntl message handler

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 294c2f209a3..4398cb81bca 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -122,10 +122,10 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
 	struct rtnl_link *tab;
 
 	tab = rtnl_msg_handlers[protocol];
-	if (tab == NULL || tab->doit == NULL)
+	if (tab == NULL || tab[msgindex].doit == NULL)
 		tab = rtnl_msg_handlers[PF_UNSPEC];
 
-	return tab ? tab->doit : NULL;
+	return tab ? tab[msgindex].doit : NULL;
 }
 
 static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
@@ -133,10 +133,10 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
 	struct rtnl_link *tab;
 
 	tab = rtnl_msg_handlers[protocol];
-	if (tab == NULL || tab->dumpit == NULL)
+	if (tab == NULL || tab[msgindex].dumpit == NULL)
 		tab = rtnl_msg_handlers[PF_UNSPEC];
 
-	return tab ? tab->dumpit : NULL;
+	return tab ? tab[msgindex].dumpit : NULL;
 }
 
 /**
-- 
cgit v1.2.3


From e9195d677d6f06730edd5c2a3fe3283564e39c51 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 22 Mar 2007 23:27:01 -0700
Subject: [TCP] vegas: Use type safe netlink interface

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_vegas.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 5c484dceb96..87e72bef6d0 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -341,16 +341,14 @@ static void tcp_vegas_get_info(struct sock *sk, u32 ext,
 {
 	const struct vegas *ca = inet_csk_ca(sk);
 	if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
-		struct tcpvegas_info *info;
-
-		info = RTA_DATA(__RTA_PUT(skb, INET_DIAG_VEGASINFO,
-					  sizeof(*info)));
-
-		info->tcpv_enabled = ca->doing_vegas_now;
-		info->tcpv_rttcnt = ca->cntRTT;
-		info->tcpv_rtt = ca->baseRTT;
-		info->tcpv_minrtt = ca->minRTT;
-	rtattr_failure:	;
+		struct tcpvegas_info info = {
+			.tcpv_enabled = ca->doing_vegas_now,
+			.tcpv_rttcnt = ca->cntRTT,
+			.tcpv_rtt = ca->baseRTT,
+			.tcpv_minrtt = ca->minRTT,
+		};
+
+		nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
 	}
 }
 
-- 
cgit v1.2.3


From 267281058c4cfd6a9a173aa957bffa58239f9656 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 22 Mar 2007 23:27:19 -0700
Subject: [TCP] westwood: Use type safe netlink interface

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_westwood.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 1f91aeae10a..ae1026a6772 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -260,16 +260,13 @@ static void tcp_westwood_info(struct sock *sk, u32 ext,
 {
 	const struct westwood *ca = inet_csk_ca(sk);
 	if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
-		struct rtattr *rta;
-		struct tcpvegas_info *info;
-
-		rta = __RTA_PUT(skb, INET_DIAG_VEGASINFO, sizeof(*info));
-		info = RTA_DATA(rta);
-		info->tcpv_enabled = 1;
-		info->tcpv_rttcnt = 0;
-		info->tcpv_rtt = jiffies_to_usecs(ca->rtt);
-		info->tcpv_minrtt = jiffies_to_usecs(ca->rtt_min);
-	rtattr_failure:	;
+		struct tcpvegas_info info = {
+			.tcpv_enabled = 1,
+			.tcpv_rtt = jiffies_to_usecs(ca->rtt),
+			.tcpv_minrtt = jiffies_to_usecs(ca->rtt_min),
+		};
+
+		nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
 	}
 }
 
-- 
cgit v1.2.3


From 33a0543cd9e090d2c6759e0ed85c3049c6efcc06 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 22 Mar 2007 23:27:39 -0700
Subject: [NETLINK]: Remove unused groups variable

Leftover from dynamic multicast groups allocation work.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 04b72d3c1de..8488c15f264 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -396,7 +396,6 @@ static int netlink_create(struct socket *sock, int protocol)
 {
 	struct module *module = NULL;
 	struct netlink_sock *nlk;
-	unsigned int groups;
 	int err = 0;
 
 	sock->state = SS_UNCONNECTED;
@@ -418,7 +417,6 @@ static int netlink_create(struct socket *sock, int protocol)
 	if (nl_table[protocol].registered &&
 	    try_module_get(nl_table[protocol].module))
 		module = nl_table[protocol].module;
-	groups = nl_table[protocol].groups;
 	netlink_unlock_table();
 
 	if ((err = __netlink_create(sock, protocol)) < 0)
-- 
cgit v1.2.3


From d35b685640aeb39eb4f5e98c75e8e001e406f9a3 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 22 Mar 2007 23:28:46 -0700
Subject: [NETLINK]: Ignore !NLM_F_REQUEST messages directly in
 netlink_run_queue()

netlink_rcv_skb() is changed to skip messages which don't have the
NLM_F_REQUEST bit to avoid every netlink family having to perform this
check on their own.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c     |  4 ----
 net/netlink/af_netlink.c | 10 ++++++++--
 net/netlink/genetlink.c  |  3 ---
 net/xfrm/xfrm_user.c     |  3 ---
 4 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4398cb81bca..cc09283fd76 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -862,10 +862,6 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
 	int type;
 	int err;
 
-	/* Only requests are handled by kernel now */
-	if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
-		return 0;
-
 	type = nlh->nlmsg_type;
 
 	/* A control message: ignore them */
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 8488c15f264..7b455980e9b 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1470,10 +1470,15 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
 
 	while (skb->len >= nlmsg_total_size(0)) {
 		nlh = nlmsg_hdr(skb);
+		err = 0;
 
 		if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
 			return 0;
 
+		/* Only requests are handled by the kernel */
+		if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
+			goto skip;
+
 		if (cb(skb, nlh, &err) < 0) {
 			/* Not an error, but we have to interrupt processing
 			 * here. Note: that in this case we do not pull
@@ -1481,9 +1486,10 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
 			 */
 			if (err == 0)
 				return -1;
+		}
+skip:
+		if (nlh->nlmsg_flags & NLM_F_ACK || err)
 			netlink_ack(skb, nlh, err);
-		} else if (nlh->nlmsg_flags & NLM_F_ACK)
-			netlink_ack(skb, nlh, 0);
 
 		netlink_queue_skip(nlh, skb);
 	}
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index c2996794eb2..039516f6cd8 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -304,9 +304,6 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct genlmsghdr *hdr = nlmsg_data(nlh);
 	int hdrlen, err = -EINVAL;
 
-	if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
-		goto ignore;
-
 	if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
 		goto ignore;
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 6b7f6dc144c..913c8b727d8 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1858,9 +1858,6 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *err
 	struct xfrm_link *link;
 	int type, min_len;
 
-	if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
-		return 0;
-
 	type = nlh->nlmsg_type;
 
 	/* A control message: ignore them */
-- 
cgit v1.2.3


From 45e7ae7f716086994e4e747226881f901c67b031 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 22 Mar 2007 23:29:10 -0700
Subject: [NETLINK]: Ignore control messages directly in netlink_run_queue()

Changes netlink_rcv_skb() to skip netlink controll messages and don't
pass them on to the message handler.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c     | 4 ----
 net/netlink/af_netlink.c | 4 ++++
 net/netlink/genetlink.c  | 6 ------
 net/xfrm/xfrm_user.c     | 4 ----
 4 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index cc09283fd76..b2136accd26 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -864,10 +864,6 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
 
 	type = nlh->nlmsg_type;
 
-	/* A control message: ignore them */
-	if (type < RTM_BASE)
-		return 0;
-
 	/* Unknown message: reply with EINVAL */
 	if (type > RTM_MAX)
 		goto err_inval;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 7b455980e9b..5d1079b1838 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1479,6 +1479,10 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
 		if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
 			goto skip;
 
+		/* Skip control messages */
+		if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
+			goto skip;
+
 		if (cb(skb, nlh, &err) < 0) {
 			/* Not an error, but we have to interrupt processing
 			 * here. Note: that in this case we do not pull
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 039516f6cd8..95391e60904 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -304,9 +304,6 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct genlmsghdr *hdr = nlmsg_data(nlh);
 	int hdrlen, err = -EINVAL;
 
-	if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
-		goto ignore;
-
 	family = genl_family_find_byid(nlh->nlmsg_type);
 	if (family == NULL) {
 		err = -ENOENT;
@@ -364,9 +361,6 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 	*errp = err = ops->doit(skb, &info);
 	return err;
 
-ignore:
-	return 0;
-
 errout:
 	*errp = err;
 	return -1;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 913c8b727d8..4d2f2094e6d 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1860,10 +1860,6 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *err
 
 	type = nlh->nlmsg_type;
 
-	/* A control message: ignore them */
-	if (type < XFRM_MSG_BASE)
-		return 0;
-
 	/* Unknown message: reply with EINVAL */
 	if (type > XFRM_MSG_MAX)
 		goto err_einval;
-- 
cgit v1.2.3


From 1d00a4eb42bdade33a6ec0961cada93577a66ae6 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 22 Mar 2007 23:30:12 -0700
Subject: [NETLINK]: Remove error pointer from netlink message handler

The error pointer argument in netlink message handlers is used
to signal the special case where processing has to be interrupted
because a dump was started but no error happened. Instead it is
simpler and more clear to return -EINTR and have netlink_run_queue()
deal with getting the queue right.

nfnetlink passed on this error pointer to its subsystem handlers
but only uses it to signal the start of a netlink dump. Therefore
it can be removed there as well.

This patch also cleans up the error handling in the affected
message handlers to be consistent since it had to be touched anyway.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nfnetlink.h  |  2 +-
 include/net/netlink.h                |  2 +-
 net/core/rtnetlink.c                 | 46 ++++++++++-------------------
 net/netfilter/nf_conntrack_netlink.c | 46 +++++++++++------------------
 net/netfilter/nfnetlink.c            | 26 ++++++-----------
 net/netfilter/nfnetlink_log.c        |  4 +--
 net/netfilter/nfnetlink_queue.c      |  6 ++--
 net/netlink/af_netlink.c             | 21 ++++++++------
 net/netlink/genetlink.c              | 56 +++++++++++++-----------------------
 net/xfrm/xfrm_user.c                 | 40 +++++++++-----------------
 10 files changed, 93 insertions(+), 156 deletions(-)

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index e1ea5dfbbbd..0f9311df155 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -111,7 +111,7 @@ struct nfgenmsg {
 struct nfnl_callback
 {
 	int (*call)(struct sock *nl, struct sk_buff *skb, 
-		struct nlmsghdr *nlh, struct nfattr *cda[], int *errp);
+		struct nlmsghdr *nlh, struct nfattr *cda[]);
 	u_int16_t attr_count;	/* number of nfattr's */
 };
 
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 510ca7fabe1..1c11518fc82 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -214,7 +214,7 @@ struct nl_info {
 
 extern void		netlink_run_queue(struct sock *sk, unsigned int *qlen,
 					  int (*cb)(struct sk_buff *,
-						    struct nlmsghdr *, int *));
+						    struct nlmsghdr *));
 extern void		netlink_queue_skip(struct nlmsghdr *nlh,
 					   struct sk_buff *skb);
 extern int		nlmsg_notify(struct sock *sk, struct sk_buff *skb,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index b2136accd26..14241ada41a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -852,8 +852,7 @@ static int rtattr_max;
 
 /* Process one rtnetlink message. */
 
-static __inline__ int
-rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
+static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	rtnl_doit_func doit;
 	int sz_idx, kind;
@@ -863,10 +862,8 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
 	int err;
 
 	type = nlh->nlmsg_type;
-
-	/* Unknown message: reply with EINVAL */
 	if (type > RTM_MAX)
-		goto err_inval;
+		return -EINVAL;
 
 	type -= RTM_BASE;
 
@@ -875,40 +872,33 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
 		return 0;
 
 	family = ((struct rtgenmsg*)NLMSG_DATA(nlh))->rtgen_family;
-	if (family >= NPROTO) {
-		*errp = -EAFNOSUPPORT;
-		return -1;
-	}
+	if (family >= NPROTO)
+		return -EAFNOSUPPORT;
 
 	sz_idx = type>>2;
 	kind = type&3;
 
-	if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN)) {
-		*errp = -EPERM;
-		return -1;
-	}
+	if (kind != 2 && security_netlink_recv(skb, CAP_NET_ADMIN))
+		return -EPERM;
 
 	if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
 		rtnl_dumpit_func dumpit;
 
 		dumpit = rtnl_get_dumpit(family, type);
 		if (dumpit == NULL)
-			goto err_inval;
-
-		if ((*errp = netlink_dump_start(rtnl, skb, nlh,
-						dumpit, NULL)) != 0) {
-			return -1;
-		}
+			return -EINVAL;
 
-		netlink_queue_skip(nlh, skb);
-		return -1;
+		err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
+		if (err == 0)
+			err = -EINTR;
+		return err;
 	}
 
 	memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
 
 	min_len = rtm_min[sz_idx];
 	if (nlh->nlmsg_len < min_len)
-		goto err_inval;
+		return -EINVAL;
 
 	if (nlh->nlmsg_len > min_len) {
 		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
@@ -918,7 +908,7 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
 			unsigned flavor = attr->rta_type;
 			if (flavor) {
 				if (flavor > rta_max[sz_idx])
-					goto err_inval;
+					return -EINVAL;
 				rta_buf[flavor-1] = attr;
 			}
 			attr = RTA_NEXT(attr, attrlen);
@@ -927,15 +917,9 @@ rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
 
 	doit = rtnl_get_doit(family, type);
 	if (doit == NULL)
-		goto err_inval;
-	err = doit(skb, nlh, (void *)&rta_buf[0]);
-
-	*errp = err;
-	return err;
+		return -EINVAL;
 
-err_inval:
-	*errp = -EINVAL;
-	return -1;
+	return doit(skb, nlh, (void *)&rta_buf[0]);
 }
 
 static void rtnetlink_rcv(struct sock *sk, int len)
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 76f11f32591..443ba7753a3 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -661,7 +661,7 @@ static const size_t cta_min[CTA_MAX] = {
 
 static int
 ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
-			struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+			struct nlmsghdr *nlh, struct nfattr *cda[])
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_tuple tuple;
@@ -709,7 +709,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 
 static int
 ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
-			struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+			struct nlmsghdr *nlh, struct nfattr *cda[])
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_tuple tuple;
@@ -720,22 +720,15 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	int err = 0;
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
-		u32 rlen;
-
 #ifndef CONFIG_NF_CT_ACCT
 		if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO)
 			return -ENOTSUPP;
 #endif
-		if ((*errp = netlink_dump_start(ctnl, skb, nlh,
-						ctnetlink_dump_table,
-						ctnetlink_done)) != 0)
-			return -EINVAL;
-
-		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
-		if (rlen > skb->len)
-			rlen = skb->len;
-		skb_pull(skb, rlen);
-		return 0;
+		err = netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table,
+					 ctnetlink_done);
+		if (err == 0)
+			err = -EINTR;
+		return err;
 	}
 
 	if (nfattr_bad_size(cda, CTA_MAX, cta_min))
@@ -1009,7 +1002,7 @@ err:
 
 static int
 ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
-			struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+			struct nlmsghdr *nlh, struct nfattr *cda[])
 {
 	struct nf_conntrack_tuple otuple, rtuple;
 	struct nf_conntrack_tuple_hash *h = NULL;
@@ -1260,7 +1253,7 @@ static const size_t cta_min_exp[CTA_EXPECT_MAX] = {
 
 static int
 ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
-		     struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+		     struct nlmsghdr *nlh, struct nfattr *cda[])
 {
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_expect *exp;
@@ -1273,17 +1266,12 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 		return -EINVAL;
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
-		u32 rlen;
-
-		if ((*errp = netlink_dump_start(ctnl, skb, nlh,
-						ctnetlink_exp_dump_table,
-						ctnetlink_done)) != 0)
-			return -EINVAL;
-		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
-		if (rlen > skb->len)
-			rlen = skb->len;
-		skb_pull(skb, rlen);
-		return 0;
+		err = netlink_dump_start(ctnl, skb, nlh,
+					 ctnetlink_exp_dump_table,
+					 ctnetlink_done);
+		if (err == 0)
+			err = -EINTR;
+		return err;
 	}
 
 	if (cda[CTA_EXPECT_MASTER-1])
@@ -1330,7 +1318,7 @@ out:
 
 static int
 ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
-		     struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+		     struct nlmsghdr *nlh, struct nfattr *cda[])
 {
 	struct nf_conntrack_expect *exp, *tmp;
 	struct nf_conntrack_tuple tuple;
@@ -1464,7 +1452,7 @@ out:
 
 static int
 ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
-		     struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+		     struct nlmsghdr *nlh, struct nfattr *cda[])
 {
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_expect *exp;
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index dec36abdf94..c37ed0156b0 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -195,17 +195,14 @@ int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags)
 EXPORT_SYMBOL_GPL(nfnetlink_unicast);
 
 /* Process one complete nfnetlink message. */
-static int nfnetlink_rcv_msg(struct sk_buff *skb,
-				    struct nlmsghdr *nlh, int *errp)
+static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct nfnl_callback *nc;
 	struct nfnetlink_subsystem *ss;
-	int type, err = 0;
+	int type, err;
 
-	if (security_netlink_recv(skb, CAP_NET_ADMIN)) {
-		*errp = -EPERM;
-		return -1;
-	}
+	if (security_netlink_recv(skb, CAP_NET_ADMIN))
+		return -EPERM;
 
 	/* Only requests are handled by kernel now. */
 	if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
@@ -227,12 +224,12 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb,
 		ss = nfnetlink_get_subsys(type);
 		if (!ss)
 #endif
-			goto err_inval;
+			return -EINVAL;
 	}
 
 	nc = nfnetlink_find_client(type, ss);
 	if (!nc)
-		goto err_inval;
+		return -EINVAL;
 
 	{
 		u_int16_t attr_count =
@@ -243,16 +240,9 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb,
 
 		err = nfnetlink_check_attributes(ss, nlh, cda);
 		if (err < 0)
-			goto err_inval;
-
-		err = nc->call(nfnl, skb, nlh, cda, errp);
-		*errp = err;
-		return err;
+			return err;
+		return nc->call(nfnl, skb, nlh, cda);
 	}
-
-err_inval:
-	*errp = -EINVAL;
-	return -1;
 }
 
 static void nfnetlink_rcv(struct sock *sk, int len)
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 9709f94787f..b174aadd73e 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -759,7 +759,7 @@ static struct notifier_block nfulnl_rtnl_notifier = {
 
 static int
 nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
-		  struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp)
+		  struct nlmsghdr *nlh, struct nfattr *nfqa[])
 {
 	return -ENOTSUPP;
 }
@@ -797,7 +797,7 @@ static const int nfula_cfg_min[NFULA_CFG_MAX] = {
 
 static int
 nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
-		   struct nlmsghdr *nlh, struct nfattr *nfula[], int *errp)
+		   struct nlmsghdr *nlh, struct nfattr *nfula[])
 {
 	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
 	u_int16_t group_num = ntohs(nfmsg->res_id);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index b6585caa431..9aefb1c9bfa 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -783,7 +783,7 @@ static const int nfqa_verdict_min[NFQA_MAX] = {
 
 static int
 nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
-		   struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp)
+		   struct nlmsghdr *nlh, struct nfattr *nfqa[])
 {
 	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
 	u_int16_t queue_num = ntohs(nfmsg->res_id);
@@ -848,7 +848,7 @@ err_out_put:
 
 static int
 nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
-		  struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp)
+		  struct nlmsghdr *nlh, struct nfattr *nfqa[])
 {
 	return -ENOTSUPP;
 }
@@ -865,7 +865,7 @@ static struct nf_queue_handler nfqh = {
 
 static int
 nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
-		  struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp)
+		  struct nlmsghdr *nlh, struct nfattr *nfqa[])
 {
 	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
 	u_int16_t queue_num = ntohs(nfmsg->res_id);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 5d1079b1838..1823b7c6315 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1463,7 +1463,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
 }
 
 static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
-						     struct nlmsghdr *, int *))
+						     struct nlmsghdr *))
 {
 	struct nlmsghdr *nlh;
 	int err;
@@ -1483,13 +1483,11 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
 		if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
 			goto skip;
 
-		if (cb(skb, nlh, &err) < 0) {
-			/* Not an error, but we have to interrupt processing
-			 * here. Note: that in this case we do not pull
-			 * message from skb, it will be processed later.
-			 */
-			if (err == 0)
-				return -1;
+		err = cb(skb, nlh);
+		if (err == -EINTR) {
+			/* Not an error, but we interrupt processing */
+			netlink_queue_skip(nlh, skb);
+			return err;
 		}
 skip:
 		if (nlh->nlmsg_flags & NLM_F_ACK || err)
@@ -1515,9 +1513,14 @@ skip:
  *
  * qlen must be initialized to 0 before the initial entry, afterwards
  * the function may be called repeatedly until qlen reaches 0.
+ *
+ * The callback function may return -EINTR to signal that processing
+ * of netlink messages shall be interrupted. In this case the message
+ * currently being processed will NOT be requeued onto the receive
+ * queue.
  */
 void netlink_run_queue(struct sock *sk, unsigned int *qlen,
-		       int (*cb)(struct sk_buff *, struct nlmsghdr *, int *))
+		       int (*cb)(struct sk_buff *, struct nlmsghdr *))
 {
 	struct sk_buff *skb;
 
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 95391e60904..1b897bc92e6 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -295,60 +295,49 @@ int genl_unregister_family(struct genl_family *family)
 	return -ENOENT;
 }
 
-static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
-			       int *errp)
+static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct genl_ops *ops;
 	struct genl_family *family;
 	struct genl_info info;
 	struct genlmsghdr *hdr = nlmsg_data(nlh);
-	int hdrlen, err = -EINVAL;
+	int hdrlen, err;
 
 	family = genl_family_find_byid(nlh->nlmsg_type);
-	if (family == NULL) {
-		err = -ENOENT;
-		goto errout;
-	}
+	if (family == NULL)
+		return -ENOENT;
 
 	hdrlen = GENL_HDRLEN + family->hdrsize;
 	if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
-		goto errout;
+		return -EINVAL;
 
 	ops = genl_get_cmd(hdr->cmd, family);
-	if (ops == NULL) {
-		err = -EOPNOTSUPP;
-		goto errout;
-	}
+	if (ops == NULL)
+		return -EOPNOTSUPP;
 
-	if ((ops->flags & GENL_ADMIN_PERM) && security_netlink_recv(skb, CAP_NET_ADMIN)) {
-		err = -EPERM;
-		goto errout;
-	}
+	if ((ops->flags & GENL_ADMIN_PERM) &&
+	    security_netlink_recv(skb, CAP_NET_ADMIN))
+		return -EPERM;
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
-		if (ops->dumpit == NULL) {
-			err = -EOPNOTSUPP;
-			goto errout;
-		}
+		if (ops->dumpit == NULL)
+			return -EOPNOTSUPP;
 
-		*errp = err = netlink_dump_start(genl_sock, skb, nlh,
-						 ops->dumpit, ops->done);
+		err = netlink_dump_start(genl_sock, skb, nlh,
+					 ops->dumpit, ops->done);
 		if (err == 0)
-			skb_pull(skb, min(NLMSG_ALIGN(nlh->nlmsg_len),
-					  skb->len));
-		return -1;
+			err = -EINTR;
+		return err;
 	}
 
-	if (ops->doit == NULL) {
-		err = -EOPNOTSUPP;
-		goto errout;
-	}
+	if (ops->doit == NULL)
+		return -EOPNOTSUPP;
 
 	if (family->attrbuf) {
 		err = nlmsg_parse(nlh, hdrlen, family->attrbuf, family->maxattr,
 				  ops->policy);
 		if (err < 0)
-			goto errout;
+			return err;
 	}
 
 	info.snd_seq = nlh->nlmsg_seq;
@@ -358,12 +347,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 	info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
 	info.attrs = family->attrbuf;
 
-	*errp = err = ops->doit(skb, &info);
-	return err;
-
-errout:
-	*errp = err;
-	return -1;
+	return ops->doit(skb, &info);
 }
 
 static void genl_rcv(struct sock *sk, int len)
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 4d2f2094e6d..5e52d6275ba 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1852,46 +1852,39 @@ static struct xfrm_link {
 	[XFRM_MSG_MIGRATE     - XFRM_MSG_BASE] = { .doit = xfrm_do_migrate    },
 };
 
-static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
+static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct rtattr *xfrma[XFRMA_MAX];
 	struct xfrm_link *link;
-	int type, min_len;
+	int type, min_len, err;
 
 	type = nlh->nlmsg_type;
-
-	/* Unknown message: reply with EINVAL */
 	if (type > XFRM_MSG_MAX)
-		goto err_einval;
+		return -EINVAL;
 
 	type -= XFRM_MSG_BASE;
 	link = &xfrm_dispatch[type];
 
 	/* All operations require privileges, even GET */
-	if (security_netlink_recv(skb, CAP_NET_ADMIN)) {
-		*errp = -EPERM;
-		return -1;
-	}
+	if (security_netlink_recv(skb, CAP_NET_ADMIN))
+		return -EPERM;
 
 	if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) ||
 	     type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) &&
 	    (nlh->nlmsg_flags & NLM_F_DUMP)) {
 		if (link->dump == NULL)
-			goto err_einval;
-
-		if ((*errp = netlink_dump_start(xfrm_nl, skb, nlh,
-						link->dump, NULL)) != 0) {
-			return -1;
-		}
+			return -EINVAL;
 
-		netlink_queue_skip(nlh, skb);
-		return -1;
+		err = netlink_dump_start(xfrm_nl, skb, nlh, link->dump, NULL);
+		if (err == 0)
+			err = -EINTR;
+		return err;
 	}
 
 	memset(xfrma, 0, sizeof(xfrma));
 
 	if (nlh->nlmsg_len < (min_len = xfrm_msg_min[type]))
-		goto err_einval;
+		return -EINVAL;
 
 	if (nlh->nlmsg_len > min_len) {
 		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
@@ -1901,7 +1894,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *err
 			unsigned short flavor = attr->rta_type;
 			if (flavor) {
 				if (flavor > XFRMA_MAX)
-					goto err_einval;
+					return -EINVAL;
 				xfrma[flavor - 1] = attr;
 			}
 			attr = RTA_NEXT(attr, attrlen);
@@ -1909,14 +1902,9 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *err
 	}
 
 	if (link->doit == NULL)
-		goto err_einval;
-	*errp = link->doit(skb, nlh, xfrma);
-
-	return *errp;
+		return -EINVAL;
 
-err_einval:
-	*errp = -EINVAL;
-	return -1;
+	return link->doit(skb, nlh, xfrma);
 }
 
 static void xfrm_netlink_rcv(struct sock *sk, int len)
-- 
cgit v1.2.3


From ead592ba246dfcc643b3f0f0c8c03f7bc898a59f Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 22 Mar 2007 23:30:35 -0700
Subject: [IPv4] diag: Use netlink_run_queue() to process the receive queue

Makes use of netlink_run_queue() to process the receive queue and
converts inet_diag_rcv_msg() to use the type safe netlink interface.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_diag.c | 74 +++++++++++++++++++---------------------------------
 1 file changed, 27 insertions(+), 47 deletions(-)

diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 62c2e9f7e11..7dd97c5969b 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -806,68 +806,48 @@ done:
 	return skb->len;
 }
 
-static inline int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
-	if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
-		return 0;
+	int hdrlen = sizeof(struct inet_diag_req);
 
-	if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX)
-		goto err_inval;
+	if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX ||
+	    nlmsg_len(nlh) < hdrlen)
+		return -EINVAL;
 
 	if (inet_diag_table[nlh->nlmsg_type] == NULL)
 		return -ENOENT;
 
-	if (NLMSG_LENGTH(sizeof(struct inet_diag_req)) > skb->len)
-		goto err_inval;
-
-	if (nlh->nlmsg_flags&NLM_F_DUMP) {
-		if (nlh->nlmsg_len >
-		    (4 + NLMSG_SPACE(sizeof(struct inet_diag_req)))) {
-			struct rtattr *rta = (void *)(NLMSG_DATA(nlh) +
-						 sizeof(struct inet_diag_req));
-			if (rta->rta_type != INET_DIAG_REQ_BYTECODE ||
-			    rta->rta_len < 8 ||
-			    rta->rta_len >
-			    (nlh->nlmsg_len -
-			     NLMSG_SPACE(sizeof(struct inet_diag_req))))
-				goto err_inval;
-			if (inet_diag_bc_audit(RTA_DATA(rta), RTA_PAYLOAD(rta)))
-				goto err_inval;
-		}
-		return netlink_dump_start(idiagnl, skb, nlh,
-					  inet_diag_dump, NULL);
-	} else
-		return inet_diag_get_exact(skb, nlh);
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+		int err;
 
-err_inval:
-	return -EINVAL;
-}
+		if (nlmsg_attrlen(nlh, hdrlen)) {
+			struct nlattr *attr;
 
+			attr = nlmsg_find_attr(nlh, hdrlen,
+					       INET_DIAG_REQ_BYTECODE);
+			if (attr == NULL ||
+			    nla_len(attr) < sizeof(struct inet_diag_bc_op) ||
+			    inet_diag_bc_audit(nla_data(attr), nla_len(attr)))
+				return -EINVAL;
+		}
 
-static inline void inet_diag_rcv_skb(struct sk_buff *skb)
-{
-	if (skb->len >= NLMSG_SPACE(0)) {
-		int err;
-		struct nlmsghdr *nlh = nlmsg_hdr(skb);
-
-		if (nlh->nlmsg_len < sizeof(*nlh) ||
-		    skb->len < nlh->nlmsg_len)
-			return;
-		err = inet_diag_rcv_msg(skb, nlh);
-		if (err || nlh->nlmsg_flags & NLM_F_ACK)
-			netlink_ack(skb, nlh, err);
+		err = netlink_dump_start(idiagnl, skb, nlh,
+					 inet_diag_dump, NULL);
+		if (err == 0)
+			err = -EINTR;
+		return err;
 	}
+
+	return inet_diag_get_exact(skb, nlh);
 }
 
 static void inet_diag_rcv(struct sock *sk, int len)
 {
-	struct sk_buff *skb;
-	unsigned int qlen = skb_queue_len(&sk->sk_receive_queue);
+	unsigned int qlen = 0;
 
-	while (qlen-- && (skb = skb_dequeue(&sk->sk_receive_queue))) {
-		inet_diag_rcv_skb(skb);
-		kfree_skb(skb);
-	}
+	do {
+		netlink_run_queue(sk, &qlen, &inet_diag_rcv_msg);
+	} while (qlen);
 }
 
 static DEFINE_SPINLOCK(inet_diag_register_lock);
-- 
cgit v1.2.3


From c702e8047fe74648f7852a9c1de781b0d5a98402 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 22 Mar 2007 23:30:55 -0700
Subject: [NETLINK]: Directly return -EINTR from netlink_dump_start()

Now that all users of netlink_dump_start() use netlink_run_queue()
to process the receive queue, it is possible to return -EINTR from
netlink_dump_start() directly, therefore simplying the callers.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c                 |  6 +-----
 net/ipv4/inet_diag.c                 |  9 ++-------
 net/netfilter/nf_conntrack_netlink.c | 16 +++++-----------
 net/netlink/af_netlink.c             |  7 ++++++-
 net/netlink/genetlink.c              |  7 ++-----
 net/xfrm/xfrm_user.c                 |  7 ++-----
 6 files changed, 18 insertions(+), 34 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 14241ada41a..fa5f938b37e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -859,7 +859,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	int min_len;
 	int family;
 	int type;
-	int err;
 
 	type = nlh->nlmsg_type;
 	if (type > RTM_MAX)
@@ -888,10 +887,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		if (dumpit == NULL)
 			return -EINVAL;
 
-		err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
-		if (err == 0)
-			err = -EINTR;
-		return err;
+		return netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
 	}
 
 	memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 7dd97c5969b..0148f0e34ce 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -818,8 +818,6 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		return -ENOENT;
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
-		int err;
-
 		if (nlmsg_attrlen(nlh, hdrlen)) {
 			struct nlattr *attr;
 
@@ -831,11 +829,8 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 				return -EINVAL;
 		}
 
-		err = netlink_dump_start(idiagnl, skb, nlh,
-					 inet_diag_dump, NULL);
-		if (err == 0)
-			err = -EINTR;
-		return err;
+		return netlink_dump_start(idiagnl, skb, nlh,
+					  inet_diag_dump, NULL);
 	}
 
 	return inet_diag_get_exact(skb, nlh);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 443ba7753a3..e5267305bb4 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -724,11 +724,8 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
 		if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO)
 			return -ENOTSUPP;
 #endif
-		err = netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table,
-					 ctnetlink_done);
-		if (err == 0)
-			err = -EINTR;
-		return err;
+		return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table,
+					  ctnetlink_done);
 	}
 
 	if (nfattr_bad_size(cda, CTA_MAX, cta_min))
@@ -1266,12 +1263,9 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 		return -EINVAL;
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
-		err = netlink_dump_start(ctnl, skb, nlh,
-					 ctnetlink_exp_dump_table,
-					 ctnetlink_done);
-		if (err == 0)
-			err = -EINTR;
-		return err;
+		return netlink_dump_start(ctnl, skb, nlh,
+					  ctnetlink_exp_dump_table,
+					  ctnetlink_done);
 	}
 
 	if (cda[CTA_EXPECT_MASTER-1])
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 1823b7c6315..2cbf1682f63 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1426,7 +1426,12 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 
 	netlink_dump(sk);
 	sock_put(sk);
-	return 0;
+
+	/* We successfully started a dump, by returning -EINTR we
+	 * signal the queue mangement to interrupt processing of
+	 * any netlink messages so userspace gets a chance to read
+	 * the results. */
+	return -EINTR;
 }
 
 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 1b897bc92e6..fac2e7a6dbe 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -323,11 +323,8 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		if (ops->dumpit == NULL)
 			return -EOPNOTSUPP;
 
-		err = netlink_dump_start(genl_sock, skb, nlh,
-					 ops->dumpit, ops->done);
-		if (err == 0)
-			err = -EINTR;
-		return err;
+		return netlink_dump_start(genl_sock, skb, nlh,
+					  ops->dumpit, ops->done);
 	}
 
 	if (ops->doit == NULL)
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 5e52d6275ba..2ff968373f1 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1856,7 +1856,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct rtattr *xfrma[XFRMA_MAX];
 	struct xfrm_link *link;
-	int type, min_len, err;
+	int type, min_len;
 
 	type = nlh->nlmsg_type;
 	if (type > XFRM_MSG_MAX)
@@ -1875,10 +1875,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		if (link->dump == NULL)
 			return -EINVAL;
 
-		err = netlink_dump_start(xfrm_nl, skb, nlh, link->dump, NULL);
-		if (err == 0)
-			err = -EINTR;
-		return err;
+		return netlink_dump_start(xfrm_nl, skb, nlh, link->dump, NULL);
 	}
 
 	memset(xfrma, 0, sizeof(xfrma));
-- 
cgit v1.2.3


From bb2f8cc0ecf025d6d3947e0389434650023f432e Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Fri, 23 Mar 2007 00:12:09 -0700
Subject: [NETEM]: spelling errors

Get rid of some of my creative spelling.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_netem.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index fb49e9e7ace..3e1b633e8b0 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -102,7 +102,7 @@ static u32 get_crandom(struct crndstate *state)
 	u64 value, rho;
 	unsigned long answer;
 
-	if (state->rho == 0)	/* no correllation */
+	if (state->rho == 0)	/* no correlation */
 		return net_random();
 
 	value = net_random();
@@ -432,8 +432,8 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
 	q->loss = qopt->loss;
 	q->duplicate = qopt->duplicate;
 
-	/* for compatiablity with earlier versions.
-	 * if gap is set, need to assume 100% probablity
+	/* for compatibility with earlier versions.
+	 * if gap is set, need to assume 100% probability
 	 */
 	if (q->gap)
 		q->reorder = ~0;
-- 
cgit v1.2.3


From 1b53d9042c04b8eb875d02e65792e9884efc3784 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 23 Mar 2007 11:10:13 -0700
Subject: [NETFILTER]: Remove changelogs and CVS IDs

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ip_queue.c                  | 12 ------------
 net/ipv4/netfilter/ip_tables.c                 |  6 ------
 net/ipv4/netfilter/ipt_ECN.c                   |  2 --
 net/ipv4/netfilter/ipt_REJECT.c                |  2 --
 net/ipv4/netfilter/ipt_SAME.c                  | 15 ---------------
 net/ipv4/netfilter/ipt_ULOG.c                  | 16 ----------------
 net/ipv4/netfilter/ipt_ecn.c                   |  2 --
 net/ipv4/netfilter/ipt_ttl.c                   |  2 --
 net/ipv4/netfilter/iptable_mangle.c            |  2 --
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |  8 --------
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   |  5 -----
 net/ipv4/netfilter/nf_nat_snmp_basic.c         |  4 ----
 net/ipv6/netfilter/ip6_queue.c                 | 12 ------------
 net/ipv6/netfilter/ip6_tables.c                |  9 ---------
 net/ipv6/netfilter/ip6table_mangle.c           |  2 --
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 11 -----------
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c |  7 -------
 net/netfilter/core.c                           |  4 ----
 net/netfilter/nf_conntrack_core.c              | 18 ------------------
 net/netfilter/nf_conntrack_ftp.c               |  6 ------
 net/netfilter/nf_conntrack_netlink.c           |  5 -----
 net/netfilter/nf_conntrack_proto_generic.c     |  5 -----
 net/netfilter/nf_conntrack_proto_sctp.c        |  9 ---------
 net/netfilter/nf_conntrack_proto_tcp.c         | 18 ------------------
 net/netfilter/nf_conntrack_proto_udp.c         |  5 -----
 net/netfilter/nf_conntrack_standalone.c        | 11 -----------
 net/netfilter/nfnetlink_log.c                  |  5 -----
 net/netfilter/xt_DSCP.c                        |  2 --
 net/netfilter/xt_connbytes.c                   |  9 ---------
 net/netfilter/xt_dscp.c                        |  2 --
 net/netfilter/xt_helper.c                      |  3 ---
 net/netfilter/xt_limit.c                       |  7 -------
 net/netfilter/xt_realm.c                       |  2 --
 33 files changed, 228 deletions(-)

diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 17f7c988460..cd8fec05f9b 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -8,18 +8,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 2000-03-27: Simplified code (thanks to Andi Kleen for clues).
- * 2000-05-20: Fixed notifier problems (following Miguel Freitas' report).
- * 2000-06-19: Fixed so nfmark is copied to metadata (reported by Sebastian
- *             Zander).
- * 2000-08-01: Added Nick Williams' MAC support.
- * 2002-06-25: Code cleanup.
- * 2005-01-10: Added /proc counter for dropped packets; fixed so
- *             packets aren't delivered to user space if they're going
- *             to be dropped.
- * 2005-05-26: local_bh_{disable,enable} around nf_reinject (Harald Welte)
- *
  */
 #include <linux/module.h>
 #include <linux/skbuff.h>
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 39ab8ae282e..e3f83bf160d 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -7,12 +7,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
- * 	- increase module usage count as soon as we have rules inside
- * 	  a table
- * 08 Oct 2005 Harald Welte <lafore@netfilter.org>
- * 	- Generalize into "x_tables" layer and "{ip,ip6,arp}_tables"
  */
 #include <linux/cache.h>
 #include <linux/capability.h>
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 97c0e53c8b2..918ca92e534 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -5,8 +5,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * ipt_ECN.c,v 1.5 2002/08/18 19:36:51 laforge Exp
 */
 
 #include <linux/in.h>
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 1399e7c183b..9041e0741f6 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -1,7 +1,5 @@
 /*
  * This is a module which is used for rejecting packets.
- * Added support for customized reject packets (Jozsef Kadlecsik).
- * Added support for ICMP type-3-code-13 (Maciej Soltysiak). [RFC 1812]
  */
 
 /* (C) 1999-2001 Paul `Rusty' Russell
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index fe76ffc0cae..511e5ff8493 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -7,21 +7,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 010320 Martin Josefsson <gandalf@wlug.westbo.se>
- * 	* copied ipt_BALANCE.c to ipt_SAME.c and changed a few things.
- * 010728 Martin Josefsson <gandalf@wlug.westbo.se>
- * 	* added --nodst to not include destination-ip in new source
- * 	  calculations.
- *	* added some more sanity-checks.
- * 010729 Martin Josefsson <gandalf@wlug.westbo.se>
- * 	* fixed a buggy if-statement in same_check(), should have
- * 	  used ntohl() but didn't.
- * 	* added support for multiple ranges. IPT_SAME_MAX_RANGE is
- * 	  defined in linux/include/linux/netfilter_ipv4/ipt_SAME.h
- * 	  and is currently set to 10.
- * 	* added support for 1-address range, nice to have now that
- * 	  we have multiple ranges.
  */
 #include <linux/types.h>
 #include <linux/ip.h>
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index ace711e2b05..57f51af3572 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -2,20 +2,6 @@
  * netfilter module for userspace packet logging daemons
  *
  * (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
- *
- * 2000/09/22 ulog-cprange feature added
- * 2001/01/04 in-kernel queue as proposed by Sebastian Zander
- * 						<zander@fokus.gmd.de>
- * 2001/01/30 per-rule nlgroup conflicts with global queue.
- *            nlgroup now global (sysctl)
- * 2001/04/19 ulog-queue reworked, now fixed buffer size specified at
- * 	      module loadtime -HW
- * 2002/07/07 remove broken nflog_rcv() function -HW
- * 2002/08/29 fix shifted/unshifted nlgroup bug -HW
- * 2002/10/30 fix uninitialized mac_len field - <Anders K. Pedersen>
- * 2004/10/25 fix erroneous calculation of 'len' parameter to NLMSG_PUT
- *	      resulting in bogus 'error during NLMSG_PUT' messages.
- *
  * (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
  *
@@ -42,8 +28,6 @@
  * flushtimeout:
  *   Specify, after how many hundredths of a second the queue should be
  *   flushed even if it is not full yet.
- *
- * ipt_ULOG.c,v 1.22 2002/10/30 09:07:31 laforge Exp
  */
 
 #include <linux/module.h>
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index 3b4ca0c5c12..26218122f86 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -1,6 +1,4 @@
 /* IP tables module for matching the value of the IPv4 and TCP ECN bits
- *
- * ipt_ecn.c,v 1.3 2002/05/29 15:09:00 laforge Exp
  *
  * (C) 2002 by Harald Welte <laforge@gnumonks.org>
  *
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
index 9615c04a2fc..ab02d9e3139 100644
--- a/net/ipv4/netfilter/ipt_ttl.c
+++ b/net/ipv4/netfilter/ipt_ttl.c
@@ -1,6 +1,4 @@
 /* IP tables module for matching the value of the TTL
- *
- * ipt_ttl.c,v 1.5 2000/11/13 11:16:08 laforge Exp
  *
  * (C) 2000,2001 by Harald Welte <laforge@netfilter.org>
  *
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 26e60fbe7ee..9278802f274 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -7,8 +7,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * Extended to all five netfilter hooks by Brad Chapman & Harald Welte
  */
 #include <linux/module.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index d52ca0c1ce8..0654eaae70c 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -4,14 +4,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- move L3 protocol dependent part to this file.
- * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- add get_features() to support various size of conntrack
- *	  structures.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c
  */
 
 #include <linux/types.h>
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index e090e929e6e..f4fc657c198 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -4,11 +4,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- enable working with Layer 3 protocol independent connection tracking.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_proto_icmp.c
  */
 
 #include <linux/types.h>
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 0cc0d97585d..6e88505d616 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -38,10 +38,6 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  *
  * Author: James Morris <jmorris@intercode.com.au>
- *
- * Updates:
- * 2000-08-06: Convert to new helper API (Harald Welte).
- *
  */
 #include <linux/module.h>
 #include <linux/moduleparam.h>
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 275e625e497..2f1ae422d87 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -11,18 +11,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 2001-11-06: First try. Working with ip_queue.c for IPv4 and trying
- *             to adapt it to IPv6
- *             HEAVILY based in ipqueue.c by James Morris. It's just
- *             a little modified version of it, so he's nearly the
- *             real coder of this.
- *             Few changes needed, mainly the hard_routing code and
- *             the netlink socket protocol (we're NETLINK_IP6_FW).
- * 2002-06-25: Code cleanup. [JM: ported cleanup over from ip_queue.c]
- * 2005-02-04: Added /proc counter for dropped packets; fixed so
- *             packets aren't delivered to user space if they're going
- *             to be dropped.
  */
 #include <linux/module.h>
 #include <linux/skbuff.h>
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index b97aedce62a..9aa62402668 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -7,15 +7,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
- * 	- increase module usage count as soon as we have rules inside
- * 	  a table
- * 06 Jun 2002 Andras Kis-Szabo <kisza@sch.bme.hu>
- *      - new extension header parser code
- * 15 Oct 2005 Harald Welte <laforge@netfilter.org>
- * 	- Unification of {ip,ip6}_tables into x_tables
- * 	- Removed tcp and udp code, since it's not ipv6 specific
  */
 
 #include <linux/capability.h>
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 85b1c272946..a9f10e32c16 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -7,8 +7,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * Extended to all five netfilter hooks by Brad Chapman & Harald Welte
  */
 #include <linux/module.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index fe7f46c4dbf..6d2a0820511 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -7,17 +7,6 @@
  *
  * Author:
  *	Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- support Layer 3 protocol independent connection tracking.
- *	  Based on the original ip_conntrack code which	had the following
- *	  copyright information:
- *		(C) 1999-2001 Paul `Rusty' Russell
- *		(C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- add get_features() to support various size of conntrack
- *	  structures.
  */
 
 #include <linux/types.h>
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 075da4f287b..0be790d250f 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -7,13 +7,6 @@
  *
  * Author:
  *	Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- ICMPv6 tracking support. Derived from the original ip_conntrack code
- *	  net/ipv4/netfilter/ip_conntrack_proto_icmp.c which had the following
- *	  copyright information:
- *		(C) 1999-2001 Paul `Rusty' Russell
- *		(C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
  */
 
 #include <linux/types.h>
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index c3ebdbd917e..d802b342c61 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -5,10 +5,6 @@
  * way.
  *
  * Rusty Russell (C)2000 -- This code is GPL.
- *
- * February 2000: Modified by James Morris to have 1 queue per protocol.
- * 15-Mar-2000:   Added NF_REPEAT --RR.
- * 08-May-2003:	  Internal logging interface added by Jozsef Kadlecsik.
  */
 #include <linux/kernel.h>
 #include <linux/netfilter.h>
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 2b0cc7a1771..a74b205b272 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -9,24 +9,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
- *	- new API and handling of conntrack/nat helpers
- *	- now capable of multiple expectations for one master
- * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
- *	- add usage/reference counts to ip_conntrack_expect
- *	- export ip_conntrack[_expect]_{find_get,put} functions
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- generalize L3 protocol denendent part.
- * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- add support various size of conntrack structures.
- * 26 Jan 2006: Harald Welte <laforge@netfilter.org>
- * 	- restructure nf_conn (introduce nf_conn_help)
- * 	- redesign 'features' how they were originally intended
- * 26 Feb 2006: Pablo Neira Ayuso <pablo@eurodev.net>
- * 	- add support for L3 protocol module load on demand.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_core.c
  */
 
 #include <linux/types.h>
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 3089dfc40c8..a186799f654 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -7,12 +7,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- enable working with Layer 3 protocol independent connection tracking.
- *	- track EPRT and EPSV commands with IPv6 address.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_ftp.c
  */
 
 #include <linux/module.h>
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index e5267305bb4..aa1a97ee514 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -6,9 +6,6 @@
  * (C) 2003 by Patrick Mchardy <kaber@trash.net>
  * (C) 2005-2006 by Pablo Neira Ayuso <pablo@eurodev.net>
  *
- * I've reworked this stuff to use attributes instead of conntrack
- * structures. 5.44 am. I need more tea. --pablo 05/07/11.
- *
  * Initial connection tracking via netlink development funded and
  * generally made possible by Network Robots, Inc. (www.networkrobots.com)
  *
@@ -16,8 +13,6 @@
  *
  * This software may be used and distributed according to the terms
  * of the GNU General Public License, incorporated herein by reference.
- *
- * Derived from ip_conntrack_netlink.c: Port by Pablo Neira Ayuso (05/11/14)
  */
 
 #include <linux/init.h>
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 7c069939695..6faf1bed722 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -4,11 +4,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- enable working with L3 protocol independent connection tracking.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_proto_generic.c
  */
 
 #include <linux/types.h>
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 3c80558716a..0d3254b974c 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -7,15 +7,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 17 Oct 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- enable working with L3 protocol independent connection tracking.
- *
- * Derived from net/ipv4/ip_conntrack_sctp.c
- */
-
-/*
- * Added support for proc manipulation of timeouts.
  */
 
 #include <linux/types.h>
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 8439768f9d1..ccdd5d231e0 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -4,24 +4,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>:
- *	- Real stateful connection tracking
- *	- Modified state transitions table
- *	- Window scaling support added
- *	- SACK support added
- *
- * Willy Tarreau:
- *	- State table bugfixes
- *	- More robust state changes
- *	- Tuning timer parameters
- *
- * 27 Oct 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- genelized Layer 3 protocol part.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_proto_tcp.c
- *
- * version 2.2
  */
 
 #include <linux/types.h>
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index a5e5726ec0c..3620ecc095f 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -4,11 +4,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- enable working with Layer 3 protocol independent connection tracking.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_proto_udp.c
  */
 
 #include <linux/types.h>
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index b8586360e51..45baeb0e30f 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -1,20 +1,9 @@
-/* This file contains all the functions required for the standalone
-   nf_conntrack module.
-
-   These are not required by the compatibility layer.
-*/
-
 /* (C) 1999-2001 Paul `Rusty' Russell
  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- generalize L3 protocol dependent part.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c
  */
 
 #include <linux/types.h>
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index b174aadd73e..8ae83cb9a39 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -10,11 +10,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- * 2006-01-26 Harald Welte <laforge@netfilter.org>
- * 	- Add optional local and global sequence number to detect lost
- * 	  events from userspace
- *
  */
 #include <linux/module.h>
 #include <linux/skbuff.h>
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index a97704a3f95..9f2f2201f6a 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -8,8 +8,6 @@
  * published by the Free Software Foundation.
  *
  * See RFC2474 for a description of the DSCP field within the IP Header.
- *
- * xt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp
 */
 
 #include <linux/module.h>
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index fec9316a1e1..804afe55e14 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -1,14 +1,5 @@
 /* Kernel module to match connection tracking byte counter.
  * GPL (C) 2002 Martin Devera (devik@cdi.cz).
- *
- * 2004-07-20 Harald Welte <laforge@netfilter.org>
- * 	- reimplemented to use per-connection accounting counters
- * 	- add functionality to match number of packets
- * 	- add functionality to match average packet size
- * 	- add support to match directions seperately
- * 2005-10-16 Harald Welte <laforge@netfilter.org>
- * 	- Port to x_tables
- *
  */
 #include <linux/module.h>
 #include <linux/skbuff.h>
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 000e9c2f8d1..56b247ecc28 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -1,6 +1,4 @@
 /* IP tables module for matching the value of the IPv4/IPv6 DSCP field
- *
- * xt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp
  *
  * (C) 2002 by Harald Welte <laforge@netfilter.org>
  *
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index bc70b26ba5b..c139b2f43a1 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -5,9 +5,6 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
- *
- *   19 Mar 2002 Harald Welte <laforge@gnumonks.org>:
- *   		 - Port to newnat infrastructure
  */
 
 #include <linux/module.h>
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 6fd8347c005..571a72ab89a 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -1,10 +1,3 @@
-/* Kernel module to control the rate
- *
- * 2 September 1999: Changed from the target RATE to the match
- *                   `limit', removed logging.  Did I mention that
- *                   Alexey is a fucking genius?
- *                   Rusty Russell (rusty@rustcorp.com.au).  */
-
 /* (C) 1999 J�r�me de Vivie <devivie@info.enserb.u-bordeaux.fr>
  * (C) 1999 Herv� Eychenne <eychenne@info.enserb.u-bordeaux.fr>
  *
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index 97ffc2fbc19..c2017f8af9c 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -1,6 +1,4 @@
 /* IP tables module for matching the routing realm
- *
- * $Id: ipt_realm.c,v 1.3 2004/03/05 13:25:40 laforge Exp $
  *
  * (C) 2003 by Sampsa Ranta <sampsa@netsonic.fi>
  *
-- 
cgit v1.2.3


From 09972d6f968d67dd82cbd403d5aa42c241a8d0cb Mon Sep 17 00:00:00 2001
From: Michal Miroslaw <mirq-linux@rere.qmqm.pl>
Date: Fri, 23 Mar 2007 11:10:47 -0700
Subject: [NETFILTER]: nfnetlink_log: don't count max(a,b) twice

We don't need local nlbufsiz (skb size) as nfulnl_alloc_skb() takes
the maximum anyway.

Signed-off-by: Michal Miroslaw <mirq-linux@rere.qmqm.pl>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_log.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 8ae83cb9a39..3b849a72d7c 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -590,7 +590,6 @@ nfulnl_log_packet(unsigned int pf,
 	struct nfulnl_instance *inst;
 	const struct nf_loginfo *li;
 	unsigned int qthreshold;
-	unsigned int nlbufsiz;
 	unsigned int plen;
 
 	if (li_user && li_user->type == NF_LOG_TYPE_ULOG)
@@ -666,13 +665,8 @@ nfulnl_log_packet(unsigned int pf,
 		return;
 	}
 
-	if (size > inst->nlbufsiz)
-		nlbufsiz = size;
-	else
-		nlbufsiz = inst->nlbufsiz;
-
 	if (!inst->skb) {
-		if (!(inst->skb = nfulnl_alloc_skb(nlbufsiz, size))) {
+		if (!(inst->skb = nfulnl_alloc_skb(inst->nlbufsiz, size))) {
 			UDEBUG("error in nfulnl_alloc_skb(%u, %u)\n",
 				inst->nlbufsiz, size);
 			goto alloc_failure;
@@ -688,7 +682,7 @@ nfulnl_log_packet(unsigned int pf,
 			instance_put(inst);
 		__nfulnl_send(inst);
 
-		if (!(inst->skb = nfulnl_alloc_skb(nlbufsiz, size))) {
+		if (!(inst->skb = nfulnl_alloc_skb(inst->nlbufsiz, size))) {
 			UDEBUG("error in nfulnl_alloc_skb(%u, %u)\n",
 				inst->nlbufsiz, size);
 			goto alloc_failure;
-- 
cgit v1.2.3


From 55b5a91e1723280570957990a0c5ab8c3ec4090a Mon Sep 17 00:00:00 2001
From: Michal Miroslaw <mirq-linux@rere.qmqm.pl>
Date: Fri, 23 Mar 2007 11:11:05 -0700
Subject: [NETFILTER]: nfnetlink_log: kill duplicate code

Kill some duplicate code in nfulnl_log_packet().

Signed-off-by: Michal Miroslaw <mirq-linux@rere.qmqm.pl>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_log.c | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 3b849a72d7c..d14cf514160 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -660,19 +660,11 @@ nfulnl_log_packet(unsigned int pf,
 		break;
 
 	default:
-		spin_unlock_bh(&inst->lock);
-		instance_put(inst);
-		return;
+		goto unlock_and_release;
 	}
 
-	if (!inst->skb) {
-		if (!(inst->skb = nfulnl_alloc_skb(inst->nlbufsiz, size))) {
-			UDEBUG("error in nfulnl_alloc_skb(%u, %u)\n",
-				inst->nlbufsiz, size);
-			goto alloc_failure;
-		}
-	} else if (inst->qlen >= qthreshold ||
-		   size > skb_tailroom(inst->skb)) {
+	if (inst->qlen >= qthreshold ||
+	    (inst->skb && size > skb_tailroom(inst->skb))) {
 		/* either the queue len is too high or we don't have
 		 * enough room in the skb left. flush to userspace. */
 		UDEBUG("flushing old skb\n");
@@ -681,12 +673,12 @@ nfulnl_log_packet(unsigned int pf,
 		if (del_timer(&inst->timer))
 			instance_put(inst);
 		__nfulnl_send(inst);
+	}
 
-		if (!(inst->skb = nfulnl_alloc_skb(inst->nlbufsiz, size))) {
-			UDEBUG("error in nfulnl_alloc_skb(%u, %u)\n",
-				inst->nlbufsiz, size);
+	if (!inst->skb) {
+		inst->skb = nfulnl_alloc_skb(inst->nlbufsiz, size);
+		if (!inst->skb)
 			goto alloc_failure;
-		}
 	}
 
 	UDEBUG("qlen %d, qthreshold %d\n", inst->qlen, qthreshold);
-- 
cgit v1.2.3


From f414c16c04b1c998e90370791f9a728e292146ea Mon Sep 17 00:00:00 2001
From: Michal Miroslaw <mirq-linux@rere.qmqm.pl>
Date: Fri, 23 Mar 2007 11:11:31 -0700
Subject: [NETFILTER]: nfnetlink_log: micro-optimization for inst==NULL in
 nfulnl_recv_config()

Simple micro-optimization: don't call instance_put() on known NULL pointers.

Signed-off-by: Michal Miroslaw <mirq-linux@rere.qmqm.pl>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_log.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index d14cf514160..d4b45e28ebd 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -810,13 +810,13 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 					       NETLINK_CB(skb).pid);
 			if (!inst) {
 				ret = -EINVAL;
-				goto out_put;
+				goto out;
 			}
 			break;
 		case NFULNL_CFG_CMD_UNBIND:
 			if (!inst) {
 				ret = -ENODEV;
-				goto out_put;
+				goto out;
 			}
 
 			if (inst->peer_pid != NETLINK_CB(skb).pid) {
@@ -849,7 +849,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 				"group=%u pid=%u =>ENOENT\n",
 				group_num, NETLINK_CB(skb).pid);
 			ret = -ENOENT;
-			goto out_put;
+			goto out;
 		}
 
 		if (inst->peer_pid != NETLINK_CB(skb).pid) {
-- 
cgit v1.2.3


From 9a36e8c2b337c424ed77f5dea0a67dc8039d351b Mon Sep 17 00:00:00 2001
From: Michal Miroslaw <mirq-linux@rere.qmqm.pl>
Date: Fri, 23 Mar 2007 11:11:48 -0700
Subject: [NETFILTER]: nfnetlink_log: micro-optimization: don't modify
 destroyed instance

Simple micro-optimization: Don't change any options if the instance is
being destroyed.

Signed-off-by: Michal Miroslaw <mirq-linux@rere.qmqm.pl>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_log.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index d4b45e28ebd..441f0fbcfb2 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -825,7 +825,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 			}
 
 			instance_destroy(inst);
-			break;
+			goto out;
 		case NFULNL_CFG_CMD_PF_BIND:
 			UDEBUG("registering log handler for pf=%u\n", pf);
 			ret = nf_log_register(pf, &nfulnl_logger);
-- 
cgit v1.2.3


From f76cdcee5ba4a3fb41de93d5f1c17fb6ab4d0820 Mon Sep 17 00:00:00 2001
From: Michal Miroslaw <mirq-linux@rere.qmqm.pl>
Date: Fri, 23 Mar 2007 11:12:03 -0700
Subject: [NETFILTER]: nfnetlink_log: iterator functions need iter_state * only

get_*() don't need access to seq_file - iter_state is enough for them.

Signed-off-by: Michal Miroslaw <mirq-linux@rere.qmqm.pl>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_log.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 441f0fbcfb2..e934770339d 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -919,10 +919,8 @@ struct iter_state {
 	unsigned int bucket;
 };
 
-static struct hlist_node *get_first(struct seq_file *seq)
+static struct hlist_node *get_first(struct iter_state *st)
 {
-	struct iter_state *st = seq->private;
-
 	if (!st)
 		return NULL;
 
@@ -933,10 +931,8 @@ static struct hlist_node *get_first(struct seq_file *seq)
 	return NULL;
 }
 
-static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
+static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h)
 {
-	struct iter_state *st = seq->private;
-
 	h = h->next;
 	while (!h) {
 		if (++st->bucket >= INSTANCE_BUCKETS)
@@ -947,13 +943,13 @@ static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
 	return h;
 }
 
-static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
+static struct hlist_node *get_idx(struct iter_state *st, loff_t pos)
 {
 	struct hlist_node *head;
-	head = get_first(seq);
+	head = get_first(st);
 
 	if (head)
-		while (pos && (head = get_next(seq, head)))
+		while (pos && (head = get_next(st, head)))
 			pos--;
 	return pos ? NULL : head;
 }
@@ -961,13 +957,13 @@ static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
 static void *seq_start(struct seq_file *seq, loff_t *pos)
 {
 	read_lock_bh(&instances_lock);
-	return get_idx(seq, *pos);
+	return get_idx(seq->private, *pos);
 }
 
 static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
 {
 	(*pos)++;
-	return get_next(s, v);
+	return get_next(s->private, v);
 }
 
 static void seq_stop(struct seq_file *s, void *v)
-- 
cgit v1.2.3


From 370e6a878962cad614eb8c7c5a22240e5cd316bb Mon Sep 17 00:00:00 2001
From: Michal Miroslaw <mirq-linux@rere.qmqm.pl>
Date: Fri, 23 Mar 2007 11:12:21 -0700
Subject: [NETFILTER]: nfnetlink_log: micro-optimization: inst->skb != NULL in
 __nfulnl_send()

No other function calls __nfulnl_send() with inst->skb == NULL than
nfulnl_timer().

Signed-off-by: Michal Miroslaw <mirq-linux@rere.qmqm.pl>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_log.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index e934770339d..34de3d84e2f 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -360,9 +360,6 @@ __nfulnl_send(struct nfulnl_instance *inst)
 {
 	int status;
 
-	if (!inst->skb)
-		return 0;
-
 	if (inst->qlen > 1)
 		inst->lastnlh->nlmsg_type = NLMSG_DONE;
 
@@ -386,7 +383,8 @@ static void nfulnl_timer(unsigned long data)
 	UDEBUG("timer function called, flushing buffer\n");
 
 	spin_lock_bh(&inst->lock);
-	__nfulnl_send(inst);
+	if (inst->skb)
+		__nfulnl_send(inst);
 	spin_unlock_bh(&inst->lock);
 	instance_put(inst);
 }
-- 
cgit v1.2.3


From 9afdb00c80b0b9c20435ce690b5287fa2434ef44 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 23 Mar 2007 11:12:50 -0700
Subject: [NETFILTER]: nfnetlink_log: remove conditional locking

This is gross, have the wrapper function take the lock.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_log.c | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 34de3d84e2f..0ae06561ae9 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -195,20 +195,14 @@ out_unlock:
 static int __nfulnl_send(struct nfulnl_instance *inst);
 
 static void
-_instance_destroy2(struct nfulnl_instance *inst, int lock)
+__instance_destroy(struct nfulnl_instance *inst)
 {
 	/* first pull it out of the global list */
-	if (lock)
-		write_lock_bh(&instances_lock);
-
 	UDEBUG("removing instance %p (queuenum=%u) from hash\n",
 		inst, inst->group_num);
 
 	hlist_del(&inst->hlist);
 
-	if (lock)
-		write_unlock_bh(&instances_lock);
-
 	/* then flush all pending packets from skb */
 
 	spin_lock_bh(&inst->lock);
@@ -229,16 +223,12 @@ _instance_destroy2(struct nfulnl_instance *inst, int lock)
 	instance_put(inst);
 }
 
-static inline void
-__instance_destroy(struct nfulnl_instance *inst)
-{
-	_instance_destroy2(inst, 0);
-}
-
 static inline void
 instance_destroy(struct nfulnl_instance *inst)
 {
-	_instance_destroy2(inst, 1);
+	write_lock_bh(&instances_lock);
+	__instance_destroy(inst);
+	write_unlock_bh(&instances_lock);
 }
 
 static int
-- 
cgit v1.2.3


From e6f689db51a789807edede411b32eb7c9e457948 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 23 Mar 2007 11:16:30 -0700
Subject: [NETFILTER]: Use setup_timer

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/netfilter/ebt_ulog.c         | 4 +---
 net/ipv4/netfilter/ipt_ULOG.c           | 7 ++-----
 net/ipv6/netfilter/nf_conntrack_reasm.c | 7 ++-----
 net/netfilter/nf_conntrack_core.c       | 5 ++---
 net/netfilter/nf_conntrack_expect.c     | 4 +---
 net/netfilter/nfnetlink_log.c           | 5 +----
 net/netfilter/xt_hashlimit.c            | 4 +---
 7 files changed, 10 insertions(+), 26 deletions(-)

diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 259f5c370f3..8b84cd40279 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -297,9 +297,7 @@ static int __init ebt_ulog_init(void)
 
 	/* initialize ulog_buffers */
 	for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
-		init_timer(&ulog_buffers[i].timer);
-		ulog_buffers[i].timer.function = ulog_timer;
-		ulog_buffers[i].timer.data = i;
+		setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
 		spin_lock_init(&ulog_buffers[i].lock);
 	}
 
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 57f51af3572..d26bbd2daaa 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -376,11 +376,8 @@ static int __init ipt_ulog_init(void)
 	}
 
 	/* initialize ulog_buffers */
-	for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
-		init_timer(&ulog_buffers[i].timer);
-		ulog_buffers[i].timer.function = ulog_timer;
-		ulog_buffers[i].timer.data = i;
-	}
+	for (i = 0; i < ULOG_MAXNLGROUPS; i++)
+		setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
 
 	nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL,
 					THIS_MODULE);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 721f02d7b7f..347ab760823 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -353,9 +353,7 @@ nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src,				   str
 	ipv6_addr_copy(&fq->saddr, src);
 	ipv6_addr_copy(&fq->daddr, dst);
 
-	init_timer(&fq->timer);
-	fq->timer.function = nf_ct_frag6_expire;
-	fq->timer.data = (long) fq;
+	setup_timer(&fq->timer, nf_ct_frag6_expire, (unsigned long)fq);
 	spin_lock_init(&fq->lock);
 	atomic_set(&fq->refcnt, 1);
 
@@ -870,8 +868,7 @@ int nf_ct_frag6_init(void)
 	nf_ct_frag6_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
 				   (jiffies ^ (jiffies >> 6)));
 
-	init_timer(&nf_ct_frag6_secret_timer);
-	nf_ct_frag6_secret_timer.function = nf_ct_frag6_secret_rebuild;
+	setup_timer(&nf_ct_frag6_secret_timer, nf_ct_frag6_secret_rebuild, 0);
 	nf_ct_frag6_secret_timer.expires = jiffies
 					   + nf_ct_frag6_secret_interval;
 	add_timer(&nf_ct_frag6_secret_timer);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index a74b205b272..6f2aac1d01a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -620,9 +620,8 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
 	conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
 	conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
 	/* Don't set timer yet: wait for confirmation */
-	init_timer(&conntrack->timeout);
-	conntrack->timeout.data = (unsigned long)conntrack;
-	conntrack->timeout.function = death_by_timeout;
+	setup_timer(&conntrack->timeout, death_by_timeout,
+		    (unsigned long)conntrack);
 	read_unlock_bh(&nf_ct_cache_lock);
 
 	return conntrack;
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index ce70a6fc6bd..c31af29a443 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -290,9 +290,7 @@ static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
 	master_help->expecting++;
 	list_add(&exp->list, &nf_conntrack_expect_list);
 
-	init_timer(&exp->timeout);
-	exp->timeout.data = (unsigned long)exp;
-	exp->timeout.function = expectation_timed_out;
+	setup_timer(&exp->timeout, expectation_timed_out, (unsigned long)exp);
 	exp->timeout.expires = jiffies + master_help->helper->timeout * HZ;
 	add_timer(&exp->timeout);
 
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 0ae06561ae9..d2c6aab6bb7 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -158,10 +158,7 @@ instance_create(u_int16_t group_num, int pid)
 	/* needs to be two, since we _put() after creation */
 	atomic_set(&inst->use, 2);
 
-	init_timer(&inst->timer);
-	inst->timer.function = nfulnl_timer;
-	inst->timer.data = (unsigned long)inst;
-	/* don't start timer yet. (re)start it  with every packet */
+	setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst);
 
 	inst->peer_pid = pid;
 	inst->group_num = group_num;
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index eb932913693..d3043fa32eb 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -216,10 +216,8 @@ static int htable_create(struct xt_hashlimit_info *minfo, int family)
 	hinfo->pde->proc_fops = &dl_file_ops;
 	hinfo->pde->data = hinfo;
 
-	init_timer(&hinfo->timer);
+	setup_timer(&hinfo->timer, htable_gc, (unsigned long )hinfo);
 	hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
-	hinfo->timer.data = (unsigned long )hinfo;
-	hinfo->timer.function = htable_gc;
 	add_timer(&hinfo->timer);
 
 	spin_lock_bh(&hashlimit_lock);
-- 
cgit v1.2.3


From 5f79e0f916a3bdeccc910fdf466bca582a9b2cca Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Fri, 23 Mar 2007 11:17:07 -0700
Subject: [NETFILTER]: nf_conntrack: don't use nfct in skb if conntrack is
 disabled

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h | 11 +++++++----
 include/linux/skbuff.h    | 24 +++++++-----------------
 net/core/skbuff.c         |  4 +---
 net/netfilter/core.c      |  2 ++
 4 files changed, 17 insertions(+), 24 deletions(-)

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 70d3b4f1e48..4777f1b619c 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -281,9 +281,6 @@ extern void nf_reinject(struct sk_buff *skb,
 			struct nf_info *info,
 			unsigned int verdict);
 
-extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
-extern void nf_ct_attach(struct sk_buff *, struct sk_buff *);
-
 /* FIXME: Before cache is ever used, this must be implemented for real. */
 extern void nf_invalidate_cache(int pf);
 
@@ -388,11 +385,17 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb,
 {
 	return 1;
 }
-static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
 struct flowi;
 static inline void
 nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, int family) {}
 #endif /*CONFIG_NETFILTER*/
 
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
+extern void nf_ct_attach(struct sk_buff *, struct sk_buff *);
+#else
+static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
+#endif
+
 #endif /*__KERNEL__*/
 #endif /*__LINUX_NETFILTER_H*/
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 81ac934d596..0bedf538485 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -87,11 +87,12 @@
 
 struct net_device;
 
-#ifdef CONFIG_NETFILTER
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 struct nf_conntrack {
 	atomic_t use;
 	void (*destroy)(struct nf_conntrack *);
 };
+#endif
 
 #ifdef CONFIG_BRIDGE_NETFILTER
 struct nf_bridge_info {
@@ -106,8 +107,6 @@ struct nf_bridge_info {
 };
 #endif
 
-#endif
-
 struct sk_buff_head {
 	/* These two members must be first. */
 	struct sk_buff	*next;
@@ -276,15 +275,13 @@ struct sk_buff {
 	__be16			protocol;
 
 	void			(*destructor)(struct sk_buff *skb);
-#ifdef CONFIG_NETFILTER
-	struct nf_conntrack	*nfct;
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+	struct nf_conntrack	*nfct;
 	struct sk_buff		*nfct_reasm;
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	struct nf_bridge_info	*nf_bridge;
 #endif
-#endif /* CONFIG_NETFILTER */
 #ifdef CONFIG_NET_SCHED
 	__u16			tc_index;	/* traffic control index */
 #ifdef CONFIG_NET_CLS_ACT
@@ -1558,7 +1555,7 @@ static inline unsigned int skb_checksum_complete(struct sk_buff *skb)
 		__skb_checksum_complete(skb);
 }
 
-#ifdef CONFIG_NETFILTER
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 static inline void nf_conntrack_put(struct nf_conntrack *nfct)
 {
 	if (nfct && atomic_dec_and_test(&nfct->use))
@@ -1569,7 +1566,6 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
 	if (nfct)
 		atomic_inc(&nfct->use);
 }
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 static inline void nf_conntrack_get_reasm(struct sk_buff *skb)
 {
 	if (skb)
@@ -1595,9 +1591,9 @@ static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge)
 #endif /* CONFIG_BRIDGE_NETFILTER */
 static inline void nf_reset(struct sk_buff *skb)
 {
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_conntrack_put(skb->nfct);
 	skb->nfct = NULL;
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_conntrack_put_reasm(skb->nfct_reasm);
 	skb->nfct_reasm = NULL;
 #endif
@@ -1610,10 +1606,10 @@ static inline void nf_reset(struct sk_buff *skb)
 /* Note: This doesn't put any conntrack and bridge info in dst. */
 static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 {
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	dst->nfct = src->nfct;
 	nf_conntrack_get(src->nfct);
 	dst->nfctinfo = src->nfctinfo;
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	dst->nfct_reasm = src->nfct_reasm;
 	nf_conntrack_get_reasm(src->nfct_reasm);
 #endif
@@ -1625,8 +1621,8 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 
 static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 {
-	nf_conntrack_put(dst->nfct);
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+	nf_conntrack_put(dst->nfct);
 	nf_conntrack_put_reasm(dst->nfct_reasm);
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
@@ -1635,12 +1631,6 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 	__nf_copy(dst, src);
 }
 
-#else /* CONFIG_NETFILTER */
-static inline void nf_reset(struct sk_buff *skb) {}
-static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src) {}
-static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src) {}
-#endif /* CONFIG_NETFILTER */
-
 #ifdef CONFIG_NETWORK_SECMARK
 static inline void skb_copy_secmark(struct sk_buff *to, const struct sk_buff *from)
 {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e28f119156f..f16c72204cf 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -325,15 +325,13 @@ void __kfree_skb(struct sk_buff *skb)
 		WARN_ON(in_irq());
 		skb->destructor(skb);
 	}
-#ifdef CONFIG_NETFILTER
-	nf_conntrack_put(skb->nfct);
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+	nf_conntrack_put(skb->nfct);
 	nf_conntrack_put_reasm(skb->nfct_reasm);
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	nf_bridge_put(skb->nf_bridge);
 #endif
-#endif
 /* XXX: IS this still necessary? - JHS */
 #ifdef CONFIG_NET_SCHED
 	skb->tc_index = 0;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index d802b342c61..fe5f22df620 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -240,6 +240,7 @@ void nf_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(nf_proto_csum_replace4);
 
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 /* This does not belong here, but locally generated errors need it if connection
    tracking in use: without this, connection may not be in hash table, and hence
    manufactured ICMP or RST packets will not be associated with it. */
@@ -259,6 +260,7 @@ void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
 	}
 }
 EXPORT_SYMBOL(nf_ct_attach);
+#endif
 
 #ifdef CONFIG_PROC_FS
 struct proc_dir_entry *proc_net_netfilter;
-- 
cgit v1.2.3


From de6e05c49f8b4ed63224c5d38891f531ecc4eabb Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Fri, 23 Mar 2007 11:17:27 -0700
Subject: [NETFILTER]: nf_conntrack: kill destroy() in struct nf_conntrack for
 diet

The destructor per conntrack is unnecessary, then this replaces it with
system wide destructor.

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h         |  1 +
 include/linux/skbuff.h            |  4 ++--
 net/netfilter/core.c              | 17 ++++++++++++++++-
 net/netfilter/nf_conntrack_core.c |  4 +++-
 4 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 4777f1b619c..10b5c627570 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -393,6 +393,7 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, int family) {}
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
 extern void nf_ct_attach(struct sk_buff *, struct sk_buff *);
+extern void (*nf_ct_destroy)(struct nf_conntrack *);
 #else
 static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
 #endif
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0bedf538485..37247901ebd 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -90,7 +90,6 @@ struct net_device;
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 struct nf_conntrack {
 	atomic_t use;
-	void (*destroy)(struct nf_conntrack *);
 };
 #endif
 
@@ -1556,10 +1555,11 @@ static inline unsigned int skb_checksum_complete(struct sk_buff *skb)
 }
 
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+extern void nf_conntrack_destroy(struct nf_conntrack *nfct);
 static inline void nf_conntrack_put(struct nf_conntrack *nfct)
 {
 	if (nfct && atomic_dec_and_test(&nfct->use))
-		nfct->destroy(nfct);
+		nf_conntrack_destroy(nfct);
 }
 static inline void nf_conntrack_get(struct nf_conntrack *nfct)
 {
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index fe5f22df620..a84478ee2de 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -260,7 +260,22 @@ void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
 	}
 }
 EXPORT_SYMBOL(nf_ct_attach);
-#endif
+
+void (*nf_ct_destroy)(struct nf_conntrack *);
+EXPORT_SYMBOL(nf_ct_destroy);
+
+void nf_conntrack_destroy(struct nf_conntrack *nfct)
+{
+	void (*destroy)(struct nf_conntrack *);
+
+	rcu_read_lock();
+	destroy = rcu_dereference(nf_ct_destroy);
+	BUG_ON(destroy == NULL);
+	destroy(nfct);
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(nf_conntrack_destroy);
+#endif /* CONFIG_NF_CONNTRACK */
 
 #ifdef CONFIG_PROC_FS
 struct proc_dir_entry *proc_net_netfilter;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 6f2aac1d01a..e132c8ae878 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -616,7 +616,6 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
 	memset(conntrack, 0, nf_ct_cache[features].size);
 	conntrack->features = features;
 	atomic_set(&conntrack->ct_general.use, 1);
-	conntrack->ct_general.destroy = destroy_conntrack;
 	conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
 	conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
 	/* Don't set timer yet: wait for confirmation */
@@ -1122,6 +1121,8 @@ void nf_conntrack_cleanup(void)
 	while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
 		schedule();
 
+	rcu_assign_pointer(nf_ct_destroy, NULL);
+
 	for (i = 0; i < NF_CT_F_NUM; i++) {
 		if (nf_ct_cache[i].use == 0)
 			continue;
@@ -1259,6 +1260,7 @@ int __init nf_conntrack_init(void)
 
 	/* For use by REJECT target */
 	rcu_assign_pointer(ip_ct_attach, __nf_conntrack_attach);
+	rcu_assign_pointer(nf_ct_destroy, destroy_conntrack);
 
 	/* Set up fake conntrack:
 	    - to never be deleted, not in any hashes */
-- 
cgit v1.2.3


From c7bf5f9dc2f78ae8ebbfffc5f17becd0d9e6ba9e Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 23 Mar 2007 11:17:57 -0700
Subject: [NETFILTER] nfnetlink: netlink_run_queue() already checks for
 NLM_F_REQUEST

Patrick has made use of netlink_run_queue() in nfnetlink while my patches
have been waiting for net-2.6.22 to open. So this check for NLM_F_REQUEST
can go as well.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index c37ed0156b0..b0da853eabe 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -204,10 +204,6 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	if (security_netlink_recv(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
-	/* Only requests are handled by kernel now. */
-	if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
-		return 0;
-
 	/* All the messages must at least contain nfgenmsg */
 	if (nlh->nlmsg_len < NLMSG_SPACE(sizeof(struct nfgenmsg)))
 		return 0;
-- 
cgit v1.2.3


From 76d643cd3bd2b4a1e27e3eafee8e37be9c681792 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 23 Mar 2007 11:27:04 -0700
Subject: [NET_SCHED]: sch_netem: fix off-by-one in send time comparison

netem checks PSCHED_TLESS(cb->time_to_send, now) to find out whether it is
allowed to send a packet, which is equivalent to cb->time_to_send < now.
Use !PSCHED_TLESS(now, cb->time_to_send) instead to properly handle
cb->time_to_send == now.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_netem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 3e1b633e8b0..bc4284396fc 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -286,7 +286,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 		/* if more time remaining? */
 		PSCHED_GET_TIME(now);
 
-		if (PSCHED_TLESS(cb->time_to_send, now)) {
+		if (!PSCHED_TLESS(now, cb->time_to_send)) {
 			pr_debug("netem_dequeue: return skb=%p\n", skb);
 			sch->q.qlen--;
 			return skb;
-- 
cgit v1.2.3


From 26e252df1e6e5b68eb790e4a4baf745aa3870038 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 23 Mar 2007 11:27:29 -0700
Subject: [NET_SCHED]: kill PSCHED_AUDIT_TDIFF

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h | 1 -
 net/sched/sch_cbq.c     | 2 --
 2 files changed, 3 deletions(-)

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 6555e57ff6c..276d1ad2b70 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -59,7 +59,6 @@ typedef long	psched_tdiff_t;
 #define PSCHED_TADD(tv, delta)		((tv) += (delta))
 #define PSCHED_SET_PASTPERFECT(t)	((t) = 0)
 #define PSCHED_IS_PASTPERFECT(t)	((t) == 0)
-#define	PSCHED_AUDIT_TDIFF(t)
 
 struct qdisc_watchdog {
 	struct hrtimer	timer;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index dcd9c31dc39..57ac6c5cd27 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -820,8 +820,6 @@ cbq_update(struct cbq_sched_data *q)
 			idle -= L2T(&q->link, len);
 			idle += L2T(cl, len);
 
-			PSCHED_AUDIT_TDIFF(idle);
-
 			PSCHED_TADD2(q->now, idle, cl->undertime);
 		} else {
 			/* Underlimit */
-- 
cgit v1.2.3


From 7c59e25f3186f26e85b13a318dbc4482d1d363e9 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 23 Mar 2007 11:27:45 -0700
Subject: [NET_SCHED]: kill PSCHED_TADD/PSCHED_TADD2

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h |  2 --
 net/sched/sch_cbq.c     | 12 ++++++------
 net/sched/sch_netem.c   |  2 +-
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 276d1ad2b70..32cdf0137cb 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -55,8 +55,6 @@ typedef long	psched_tdiff_t;
 #define PSCHED_TDIFF_SAFE(tv1, tv2, bound) \
 					min_t(long long, (tv1) - (tv2), bound)
 #define PSCHED_TLESS(tv1, tv2)		((tv1) < (tv2))
-#define PSCHED_TADD2(tv, delta, tv_res) ((tv_res) = (tv) + (delta))
-#define PSCHED_TADD(tv, delta)		((tv) += (delta))
 #define PSCHED_SET_PASTPERFECT(t)	((t) = 0)
 #define PSCHED_IS_PASTPERFECT(t)	((t) == 0)
 
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 57ac6c5cd27..290b26bdc89 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -387,7 +387,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 
 		PSCHED_GET_TIME(now);
 		incr = PSCHED_TDIFF(now, q->now_rt);
-		PSCHED_TADD2(q->now, incr, now);
+		now = q->now + incr;
 
 		do {
 			if (PSCHED_TLESS(cl->undertime, now)) {
@@ -492,7 +492,7 @@ static void cbq_ovl_classic(struct cbq_class *cl)
 			cl->avgidle = cl->minidle;
 		if (delay <= 0)
 			delay = 1;
-		PSCHED_TADD2(q->now, delay, cl->undertime);
+		cl->undertime = q->now + delay;
 
 		cl->xstats.overactions++;
 		cl->delayed = 1;
@@ -558,7 +558,7 @@ static void cbq_ovl_delay(struct cbq_class *cl)
 			delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 		if (cl->avgidle < cl->minidle)
 			cl->avgidle = cl->minidle;
-		PSCHED_TADD2(q->now, delay, cl->undertime);
+		cl->undertime = q->now + delay;
 
 		if (delay > 0) {
 			sched += delay + cl->penalty;
@@ -820,7 +820,7 @@ cbq_update(struct cbq_sched_data *q)
 			idle -= L2T(&q->link, len);
 			idle += L2T(cl, len);
 
-			PSCHED_TADD2(q->now, idle, cl->undertime);
+			cl->undertime = q->now + idle;
 		} else {
 			/* Underlimit */
 
@@ -1018,12 +1018,12 @@ cbq_dequeue(struct Qdisc *sch)
 		   cbq_time = max(real_time, work);
 		 */
 		incr2 = L2T(&q->link, q->tx_len);
-		PSCHED_TADD(q->now, incr2);
+		q->now += incr2;
 		cbq_update(q);
 		if ((incr -= incr2) < 0)
 			incr = 0;
 	}
-	PSCHED_TADD(q->now, incr);
+	q->now += incr;
 	q->now_rt = now;
 
 	for (;;) {
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index bc4284396fc..6044ae77d5d 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -218,7 +218,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 				  &q->delay_cor, q->delay_dist);
 
 		PSCHED_GET_TIME(now);
-		PSCHED_TADD2(now, delay, cb->time_to_send);
+		cb->time_to_send = now + delay;
 		++q->counter;
 		ret = q->qdisc->enqueue(skb, q->qdisc);
 	} else {
-- 
cgit v1.2.3


From 104e0878984bb467e3f54d61105d8903babb4ec1 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 23 Mar 2007 11:28:07 -0700
Subject: [NET_SCHED]: kill PSCHED_TLESS

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h | 1 -
 net/sched/sch_cbq.c     | 7 +++----
 net/sched/sch_netem.c   | 6 +++---
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 32cdf0137cb..49325ffb00b 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -54,7 +54,6 @@ typedef long	psched_tdiff_t;
 #define PSCHED_TDIFF(tv1, tv2)		(long)((tv1) - (tv2))
 #define PSCHED_TDIFF_SAFE(tv1, tv2, bound) \
 					min_t(long long, (tv1) - (tv2), bound)
-#define PSCHED_TLESS(tv1, tv2)		((tv1) < (tv2))
 #define PSCHED_SET_PASTPERFECT(t)	((t) = 0)
 #define PSCHED_IS_PASTPERFECT(t)	((t) == 0)
 
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 290b26bdc89..9e6cdab6af3 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -390,7 +390,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 		now = q->now + incr;
 
 		do {
-			if (PSCHED_TLESS(cl->undertime, now)) {
+			if (cl->undertime < now) {
 				q->toplevel = cl->level;
 				return;
 			}
@@ -845,8 +845,7 @@ cbq_under_limit(struct cbq_class *cl)
 	if (cl->tparent == NULL)
 		return cl;
 
-	if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
-	    !PSCHED_TLESS(q->now, cl->undertime)) {
+	if (PSCHED_IS_PASTPERFECT(cl->undertime) || q->now >= cl->undertime) {
 		cl->delayed = 0;
 		return cl;
 	}
@@ -870,7 +869,7 @@ cbq_under_limit(struct cbq_class *cl)
 		if (cl->level > q->toplevel)
 			return NULL;
 	} while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
-		 PSCHED_TLESS(q->now, cl->undertime));
+		 q->now < cl->undertime);
 
 	cl->delayed = 0;
 	return cl;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 6044ae77d5d..5d571aa04a7 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -286,7 +286,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 		/* if more time remaining? */
 		PSCHED_GET_TIME(now);
 
-		if (!PSCHED_TLESS(now, cb->time_to_send)) {
+		if (cb->time_to_send <= now) {
 			pr_debug("netem_dequeue: return skb=%p\n", skb);
 			sch->q.qlen--;
 			return skb;
@@ -494,7 +494,7 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 
 	if (likely(skb_queue_len(list) < q->limit)) {
 		/* Optimize for add at tail */
-		if (likely(skb_queue_empty(list) || !PSCHED_TLESS(tnext, q->oldest))) {
+		if (likely(skb_queue_empty(list) || tnext >= q->oldest)) {
 			q->oldest = tnext;
 			return qdisc_enqueue_tail(nskb, sch);
 		}
@@ -503,7 +503,7 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 			const struct netem_skb_cb *cb
 				= (const struct netem_skb_cb *)skb->cb;
 
-			if (!PSCHED_TLESS(tnext, cb->time_to_send))
+			if (tnext >= cb->time_to_send)
 				break;
 		}
 
-- 
cgit v1.2.3


From a084980dcbf56c896e4b6c19aff2b082d5db7006 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 23 Mar 2007 11:28:30 -0700
Subject: [NET_SCHED]: kill PSCHED_SET_PASTPERFECT/PSCHED_IS_PASTPERFECT

Use direct assignment and comparison instead.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h |  3 +--
 include/net/red.h       |  4 ++--
 net/sched/sch_cbq.c     | 17 ++++++++---------
 net/sched/sch_netem.c   |  2 +-
 4 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 49325ffb00b..c40147a6020 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -54,8 +54,7 @@ typedef long	psched_tdiff_t;
 #define PSCHED_TDIFF(tv1, tv2)		(long)((tv1) - (tv2))
 #define PSCHED_TDIFF_SAFE(tv1, tv2, bound) \
 					min_t(long long, (tv1) - (tv2), bound)
-#define PSCHED_SET_PASTPERFECT(t)	((t) = 0)
-#define PSCHED_IS_PASTPERFECT(t)	((t) == 0)
+#define PSCHED_PASTPERFECT		0
 
 struct qdisc_watchdog {
 	struct hrtimer	timer;
diff --git a/include/net/red.h b/include/net/red.h
index a4eb37946f2..d9e1149a2bc 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -151,7 +151,7 @@ static inline void red_set_parms(struct red_parms *p,
 
 static inline int red_is_idling(struct red_parms *p)
 {
-	return !PSCHED_IS_PASTPERFECT(p->qidlestart);
+	return p->qidlestart != PSCHED_PASTPERFECT;
 }
 
 static inline void red_start_of_idle_period(struct red_parms *p)
@@ -161,7 +161,7 @@ static inline void red_start_of_idle_period(struct red_parms *p)
 
 static inline void red_end_of_idle_period(struct red_parms *p)
 {
-	PSCHED_SET_PASTPERFECT(p->qidlestart);
+	p->qidlestart = PSCHED_PASTPERFECT;
 }
 
 static inline void red_restart(struct red_parms *p)
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 9e6cdab6af3..2bb271b0efc 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -738,7 +738,7 @@ cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 	if (cl && q->toplevel >= borrowed->level) {
 		if (cl->q->q.qlen > 1) {
 			do {
-				if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
+				if (borrowed->undertime == PSCHED_PASTPERFECT) {
 					q->toplevel = borrowed->level;
 					return;
 				}
@@ -824,7 +824,7 @@ cbq_update(struct cbq_sched_data *q)
 		} else {
 			/* Underlimit */
 
-			PSCHED_SET_PASTPERFECT(cl->undertime);
+			cl->undertime = PSCHED_PASTPERFECT;
 			if (avgidle > cl->maxidle)
 				cl->avgidle = cl->maxidle;
 			else
@@ -845,7 +845,7 @@ cbq_under_limit(struct cbq_class *cl)
 	if (cl->tparent == NULL)
 		return cl;
 
-	if (PSCHED_IS_PASTPERFECT(cl->undertime) || q->now >= cl->undertime) {
+	if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 		cl->delayed = 0;
 		return cl;
 	}
@@ -868,8 +868,7 @@ cbq_under_limit(struct cbq_class *cl)
 		}
 		if (cl->level > q->toplevel)
 			return NULL;
-	} while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
-		 q->now < cl->undertime);
+	} while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 
 	cl->delayed = 0;
 	return cl;
@@ -1054,11 +1053,11 @@ cbq_dequeue(struct Qdisc *sch)
 		*/
 
 		if (q->toplevel == TC_CBQ_MAXLEVEL &&
-		    PSCHED_IS_PASTPERFECT(q->link.undertime))
+		    q->link.undertime == PSCHED_PASTPERFECT)
 			break;
 
 		q->toplevel = TC_CBQ_MAXLEVEL;
-		PSCHED_SET_PASTPERFECT(q->link.undertime);
+		q->link.undertime = PSCHED_PASTPERFECT;
 	}
 
 	/* No packets in scheduler or nobody wants to give them to us :-(
@@ -1289,7 +1288,7 @@ cbq_reset(struct Qdisc* sch)
 			qdisc_reset(cl->q);
 
 			cl->next_alive = NULL;
-			PSCHED_SET_PASTPERFECT(cl->undertime);
+			cl->undertime = PSCHED_PASTPERFECT;
 			cl->avgidle = cl->maxidle;
 			cl->deficit = cl->quantum;
 			cl->cpriority = cl->priority;
@@ -1650,7 +1649,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 	cl->xstats.avgidle = cl->avgidle;
 	cl->xstats.undertime = 0;
 
-	if (!PSCHED_IS_PASTPERFECT(cl->undertime))
+	if (cl->undertime != PSCHED_PASTPERFECT)
 		cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
 
 	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 5d571aa04a7..1e88301f505 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -532,7 +532,7 @@ static int tfifo_init(struct Qdisc *sch, struct rtattr *opt)
 	} else
 		q->limit = max_t(u32, sch->dev->tx_queue_len, 1);
 
-	PSCHED_SET_PASTPERFECT(q->oldest);
+	q->oldest = PSCHED_PASTPERFECT;
 	return 0;
 }
 
-- 
cgit v1.2.3


From 8edc0c31d6b7849b0fb50db86824830769241939 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 23 Mar 2007 11:28:55 -0700
Subject: [NET_SCHED]: kill PSCHED_TDIFF

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h |  1 -
 net/sched/sch_cbq.c     | 14 +++++++-------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index c40147a6020..16397374020 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -51,7 +51,6 @@ typedef long	psched_tdiff_t;
 #define PSCHED_GET_TIME(stamp) \
 	((stamp) = PSCHED_NS2US(ktime_to_ns(ktime_get())))
 
-#define PSCHED_TDIFF(tv1, tv2)		(long)((tv1) - (tv2))
 #define PSCHED_TDIFF_SAFE(tv1, tv2, bound) \
 					min_t(long long, (tv1) - (tv2), bound)
 #define PSCHED_PASTPERFECT		0
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 2bb271b0efc..f9e8403c522 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -386,7 +386,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 		psched_tdiff_t incr;
 
 		PSCHED_GET_TIME(now);
-		incr = PSCHED_TDIFF(now, q->now_rt);
+		incr = now - q->now_rt;
 		now = q->now + incr;
 
 		do {
@@ -474,7 +474,7 @@ cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 static void cbq_ovl_classic(struct cbq_class *cl)
 {
 	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
-	psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
+	psched_tdiff_t delay = cl->undertime - q->now;
 
 	if (!cl->delayed) {
 		delay += cl->offtime;
@@ -509,7 +509,7 @@ static void cbq_ovl_classic(struct cbq_class *cl)
 		psched_tdiff_t base_delay = q->wd_expires;
 
 		for (b = cl->borrow; b; b = b->borrow) {
-			delay = PSCHED_TDIFF(b->undertime, q->now);
+			delay = b->undertime - q->now;
 			if (delay < base_delay) {
 				if (delay <= 0)
 					delay = 1;
@@ -547,7 +547,7 @@ static void cbq_ovl_rclassic(struct cbq_class *cl)
 static void cbq_ovl_delay(struct cbq_class *cl)
 {
 	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
-	psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
+	psched_tdiff_t delay = cl->undertime - q->now;
 
 	if (!cl->delayed) {
 		psched_time_t sched = q->now;
@@ -776,7 +776,7 @@ cbq_update(struct cbq_sched_data *q)
 			 idle = (now - last) - last_pktlen/rate
 		 */
 
-		idle = PSCHED_TDIFF(q->now, cl->last);
+		idle = q->now - cl->last;
 		if ((unsigned long)idle > 128*1024*1024) {
 			avgidle = cl->maxidle;
 		} else {
@@ -1004,7 +1004,7 @@ cbq_dequeue(struct Qdisc *sch)
 	psched_tdiff_t incr;
 
 	PSCHED_GET_TIME(now);
-	incr = PSCHED_TDIFF(now, q->now_rt);
+	incr = now - q->now_rt;
 
 	if (q->tx_class) {
 		psched_tdiff_t incr2;
@@ -1650,7 +1650,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 	cl->xstats.undertime = 0;
 
 	if (cl->undertime != PSCHED_PASTPERFECT)
-		cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
+		cl->xstats.undertime = cl->undertime - q->now;
 
 	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
 #ifdef CONFIG_NET_ESTIMATOR
-- 
cgit v1.2.3


From 03cc45c0a5b9b7f74768feb43b9a2525d203bbdb Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 23 Mar 2007 11:29:11 -0700
Subject: [NET_SCHED]: turn PSCHED_TDIFF_SAFE into inline function

Also rename to psched_tdiff_bounded.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h | 8 ++++++--
 include/net/red.h       | 2 +-
 net/sched/act_police.c  | 8 ++++----
 net/sched/sch_htb.c     | 4 ++--
 net/sched/sch_tbf.c     | 2 +-
 5 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 16397374020..e6b1da050d3 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -51,10 +51,14 @@ typedef long	psched_tdiff_t;
 #define PSCHED_GET_TIME(stamp) \
 	((stamp) = PSCHED_NS2US(ktime_to_ns(ktime_get())))
 
-#define PSCHED_TDIFF_SAFE(tv1, tv2, bound) \
-					min_t(long long, (tv1) - (tv2), bound)
 #define PSCHED_PASTPERFECT		0
 
+static inline psched_tdiff_t
+psched_tdiff_bounded(psched_time_t tv1, psched_time_t tv2, psched_time_t bound)
+{
+	return min(tv1 - tv2, bound);
+}
+
 struct qdisc_watchdog {
 	struct hrtimer	timer;
 	struct Qdisc	*qdisc;
diff --git a/include/net/red.h b/include/net/red.h
index d9e1149a2bc..0bc16913fdd 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -178,7 +178,7 @@ static inline unsigned long red_calc_qavg_from_idle_time(struct red_parms *p)
 	int  shift;
 
 	PSCHED_GET_TIME(now);
-	us_idle = PSCHED_TDIFF_SAFE(now, p->qidlestart, p->Scell_max);
+	us_idle = psched_tdiff_bounded(now, p->qidlestart, p->Scell_max);
 
 	/*
 	 * The problem: ideally, average length queue recalcultion should
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 0a5679ea6c6..65d60a3f776 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -298,8 +298,8 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
 
 		PSCHED_GET_TIME(now);
 
-		toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c,
-					 police->tcfp_burst);
+		toks = psched_tdiff_bounded(now, police->tcfp_t_c,
+					    police->tcfp_burst);
 		if (police->tcfp_P_tab) {
 			ptoks = toks + police->tcfp_ptoks;
 			if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
@@ -544,8 +544,8 @@ int tcf_police(struct sk_buff *skb, struct tcf_police *police)
 		}
 
 		PSCHED_GET_TIME(now);
-		toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c,
-					 police->tcfp_burst);
+		toks = psched_tdiff_bounded(now, police->tcfp_t_c,
+					    police->tcfp_burst);
 		if (police->tcfp_P_tab) {
 			ptoks = toks + police->tcfp_ptoks;
 			if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index b7abd0ae676..71e4c92b7e8 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -729,7 +729,7 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
 	cl->T = toks
 
 	while (cl) {
-		diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
+		diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
 		if (cl->level >= level) {
 			if (cl->level == level)
 				cl->xstats.lends++;
@@ -789,7 +789,7 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level)
 			return cl->pq_key;
 
 		htb_safe_rb_erase(p, q->wait_pq + level);
-		diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer);
+		diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
 		htb_change_class_mode(q, cl, &diff);
 		if (cl->cmode != HTB_CAN_SEND)
 			htb_add_to_wait_tree(q, cl, diff);
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 626ce96800f..da9f40e5444 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -201,7 +201,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
 
 		PSCHED_GET_TIME(now);
 
-		toks = PSCHED_TDIFF_SAFE(now, q->t_c, q->buffer);
+		toks = psched_tdiff_bounded(now, q->t_c, q->buffer);
 
 		if (q->P_tab) {
 			ptoks = toks + q->ptokens;
-- 
cgit v1.2.3


From 3bebcda28077375470dd60545b71bba2f83335fd Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 23 Mar 2007 11:29:25 -0700
Subject: [NET_SCHED]: turn PSCHED_GET_TIME into inline function

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h |  8 +++++---
 include/net/red.h       |  4 ++--
 net/sched/act_police.c  |  9 ++++-----
 net/sched/sch_cbq.c     | 10 +++++-----
 net/sched/sch_hfsc.c    | 10 ++++------
 net/sched/sch_htb.c     |  6 +++---
 net/sched/sch_netem.c   |  8 +++-----
 net/sched/sch_tbf.c     |  7 +++----
 8 files changed, 29 insertions(+), 33 deletions(-)

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index e6b1da050d3..b2cc9a8ed4e 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -48,11 +48,13 @@ typedef long	psched_tdiff_t;
 #define PSCHED_NS2US(x)			((x) >> 10)
 
 #define PSCHED_TICKS_PER_SEC		PSCHED_NS2US(NSEC_PER_SEC)
-#define PSCHED_GET_TIME(stamp) \
-	((stamp) = PSCHED_NS2US(ktime_to_ns(ktime_get())))
-
 #define PSCHED_PASTPERFECT		0
 
+static inline psched_time_t psched_get_time(void)
+{
+	return PSCHED_NS2US(ktime_to_ns(ktime_get()));
+}
+
 static inline psched_tdiff_t
 psched_tdiff_bounded(psched_time_t tv1, psched_time_t tv2, psched_time_t bound)
 {
diff --git a/include/net/red.h b/include/net/red.h
index 0bc16913fdd..3cf31d466a8 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -156,7 +156,7 @@ static inline int red_is_idling(struct red_parms *p)
 
 static inline void red_start_of_idle_period(struct red_parms *p)
 {
-	PSCHED_GET_TIME(p->qidlestart);
+	p->qidlestart = psched_get_time();
 }
 
 static inline void red_end_of_idle_period(struct red_parms *p)
@@ -177,7 +177,7 @@ static inline unsigned long red_calc_qavg_from_idle_time(struct red_parms *p)
 	long us_idle;
 	int  shift;
 
-	PSCHED_GET_TIME(now);
+	now = psched_get_time();
 	us_idle = psched_tdiff_bounded(now, p->qidlestart, p->Scell_max);
 
 	/*
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 65d60a3f776..616f465f407 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -241,7 +241,7 @@ override:
 	if (ret != ACT_P_CREATED)
 		return ret;
 
-	PSCHED_GET_TIME(police->tcfp_t_c);
+	police->tcfp_t_c = psched_get_time();
 	police->tcf_index = parm->index ? parm->index :
 		tcf_hash_new_index(&police_idx_gen, &police_hash_info);
 	h = tcf_hash(police->tcf_index, POL_TAB_MASK);
@@ -296,8 +296,7 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
 			return police->tcfp_result;
 		}
 
-		PSCHED_GET_TIME(now);
-
+		now = psched_get_time();
 		toks = psched_tdiff_bounded(now, police->tcfp_t_c,
 					    police->tcfp_burst);
 		if (police->tcfp_P_tab) {
@@ -495,7 +494,7 @@ struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est)
 	}
 	if (police->tcfp_P_tab)
 		police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu);
-	PSCHED_GET_TIME(police->tcfp_t_c);
+	police->tcfp_t_c = psched_get_time();
 	police->tcf_index = parm->index ? parm->index :
 		tcf_police_new_index();
 	police->tcf_action = parm->action;
@@ -543,7 +542,7 @@ int tcf_police(struct sk_buff *skb, struct tcf_police *police)
 			return police->tcfp_result;
 		}
 
-		PSCHED_GET_TIME(now);
+		now = psched_get_time();
 		toks = psched_tdiff_bounded(now, police->tcfp_t_c,
 					    police->tcfp_burst);
 		if (police->tcfp_P_tab) {
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index f9e8403c522..414a97c962f 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -385,7 +385,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 		psched_time_t now;
 		psched_tdiff_t incr;
 
-		PSCHED_GET_TIME(now);
+		now = psched_get_time();
 		incr = now - q->now_rt;
 		now = q->now + incr;
 
@@ -654,7 +654,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 	psched_tdiff_t delay = 0;
 	unsigned pmask;
 
-	PSCHED_GET_TIME(now);
+	now = psched_get_time();
 
 	pmask = q->pmask;
 	q->pmask = 0;
@@ -1003,7 +1003,7 @@ cbq_dequeue(struct Qdisc *sch)
 	psched_time_t now;
 	psched_tdiff_t incr;
 
-	PSCHED_GET_TIME(now);
+	now = psched_get_time();
 	incr = now - q->now_rt;
 
 	if (q->tx_class) {
@@ -1277,7 +1277,7 @@ cbq_reset(struct Qdisc* sch)
 	qdisc_watchdog_cancel(&q->watchdog);
 	hrtimer_cancel(&q->delay_timer);
 	q->toplevel = TC_CBQ_MAXLEVEL;
-	PSCHED_GET_TIME(q->now);
+	q->now = psched_get_time();
 	q->now_rt = q->now;
 
 	for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
@@ -1448,7 +1448,7 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
 	hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	q->delay_timer.function = cbq_undelay;
 	q->toplevel = TC_CBQ_MAXLEVEL;
-	PSCHED_GET_TIME(q->now);
+	q->now = psched_get_time();
 	q->now_rt = q->now;
 
 	cbq_link_class(&q->link);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 6a762cf781d..7d51d0d6a70 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -629,9 +629,7 @@ rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y)
 static void
 init_ed(struct hfsc_class *cl, unsigned int next_len)
 {
-	u64 cur_time;
-
-	PSCHED_GET_TIME(cur_time);
+	u64 cur_time = psched_get_time();
 
 	/* update the deadline curve */
 	rtsc_min(&cl->cl_deadline, &cl->cl_rsc, cur_time, cl->cl_cumul);
@@ -754,7 +752,7 @@ init_vf(struct hfsc_class *cl, unsigned int len)
 			if (cl->cl_flags & HFSC_USC) {
 				/* class has upper limit curve */
 				if (cur_time == 0)
-					PSCHED_GET_TIME(cur_time);
+					cur_time = psched_get_time();
 
 				/* update the ulimit curve */
 				rtsc_min(&cl->cl_ulimit, &cl->cl_usc, cur_time,
@@ -1038,7 +1036,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 			if (cl->cl_parent == NULL && parentid != TC_H_ROOT)
 				return -EINVAL;
 		}
-		PSCHED_GET_TIME(cur_time);
+		cur_time = psched_get_time();
 
 		sch_tree_lock(sch);
 		if (rsc != NULL)
@@ -1639,7 +1637,7 @@ hfsc_dequeue(struct Qdisc *sch)
 	if ((skb = __skb_dequeue(&q->requeue)))
 		goto out;
 
-	PSCHED_GET_TIME(cur_time);
+	cur_time = psched_get_time();
 
 	/*
 	 * if there are eligible classes, use real-time criteria.
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 71e4c92b7e8..3f528554b0d 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -965,7 +965,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 
 	if (!sch->q.qlen)
 		goto fin;
-	PSCHED_GET_TIME(q->now);
+	q->now = psched_get_time();
 
 	next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
 	q->nwc_hit = 0;
@@ -1274,7 +1274,7 @@ static void htb_parent_to_leaf(struct htb_class *cl, struct Qdisc *new_q)
 	parent->un.leaf.prio = parent->prio;
 	parent->tokens = parent->buffer;
 	parent->ctokens = parent->cbuffer;
-	PSCHED_GET_TIME(parent->t_c);
+	parent->t_c = psched_get_time();
 	parent->cmode = HTB_CAN_SEND;
 }
 
@@ -1471,7 +1471,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		cl->tokens = hopt->buffer;
 		cl->ctokens = hopt->cbuffer;
 		cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC;	/* 1min */
-		PSCHED_GET_TIME(cl->t_c);
+		cl->t_c = psched_get_time();
 		cl->cmode = HTB_CAN_SEND;
 
 		/* attach to the hash list and parent's family */
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 1e88301f505..5d9d8bc9cc3 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -217,7 +217,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		delay = tabledist(q->latency, q->jitter,
 				  &q->delay_cor, q->delay_dist);
 
-		PSCHED_GET_TIME(now);
+		now = psched_get_time();
 		cb->time_to_send = now + delay;
 		++q->counter;
 		ret = q->qdisc->enqueue(skb, q->qdisc);
@@ -226,7 +226,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		 * Do re-ordering by putting one out of N packets at the front
 		 * of the queue.
 		 */
-		PSCHED_GET_TIME(cb->time_to_send);
+		cb->time_to_send = psched_get_time();
 		q->counter = 0;
 		ret = q->qdisc->ops->requeue(skb, q->qdisc);
 	}
@@ -281,11 +281,9 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 	if (skb) {
 		const struct netem_skb_cb *cb
 			= (const struct netem_skb_cb *)skb->cb;
-		psched_time_t now;
+		psched_time_t now = psched_get_time();
 
 		/* if more time remaining? */
-		PSCHED_GET_TIME(now);
-
 		if (cb->time_to_send <= now) {
 			pr_debug("netem_dequeue: return skb=%p\n", skb);
 			sch->q.qlen--;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index da9f40e5444..53862953baa 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -199,8 +199,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
 		long ptoks = 0;
 		unsigned int len = skb->len;
 
-		PSCHED_GET_TIME(now);
-
+		now = psched_get_time();
 		toks = psched_tdiff_bounded(now, q->t_c, q->buffer);
 
 		if (q->P_tab) {
@@ -254,7 +253,7 @@ static void tbf_reset(struct Qdisc* sch)
 
 	qdisc_reset(q->qdisc);
 	sch->q.qlen = 0;
-	PSCHED_GET_TIME(q->t_c);
+	q->t_c = psched_get_time();
 	q->tokens = q->buffer;
 	q->ptokens = q->mtu;
 	qdisc_watchdog_cancel(&q->watchdog);
@@ -364,7 +363,7 @@ static int tbf_init(struct Qdisc* sch, struct rtattr *opt)
 	if (opt == NULL)
 		return -EINVAL;
 
-	PSCHED_GET_TIME(q->t_c);
+	q->t_c = psched_get_time();
 	qdisc_watchdog_init(&q->watchdog, sch);
 	q->qdisc = &noop_qdisc;
 
-- 
cgit v1.2.3


From a48b5a61448899040dfbd2e0cd55b06a2bd2466c Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 23 Mar 2007 11:29:43 -0700
Subject: [NET_SCHED]: Unline tcf_destroy

Uninline tcf_destroy and add a helper function to destroy an entire filter
chain.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 10 ++--------
 net/sched/sch_api.c       | 18 ++++++++++++++++++
 net/sched/sch_atm.c       | 17 ++---------------
 net/sched/sch_cbq.c       | 14 ++------------
 net/sched/sch_dsmark.c    |  8 +-------
 net/sched/sch_hfsc.c      | 13 +------------
 net/sched/sch_htb.c       | 14 ++------------
 net/sched/sch_ingress.c   |  7 +------
 net/sched/sch_prio.c      |  7 +------
 9 files changed, 30 insertions(+), 78 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index a3f4ddd1d6a..1b8e35197eb 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -177,14 +177,8 @@ extern void qdisc_tree_decrease_qlen(struct Qdisc *qdisc, unsigned int n);
 extern struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops);
 extern struct Qdisc *qdisc_create_dflt(struct net_device *dev,
 				       struct Qdisc_ops *ops, u32 parentid);
-
-static inline void
-tcf_destroy(struct tcf_proto *tp)
-{
-	tp->ops->destroy(tp);
-	module_put(tp->ops->owner);
-	kfree(tp);
-}
+extern void tcf_destroy(struct tcf_proto *tp);
+extern void tcf_destroy_chain(struct tcf_proto *fl);
 
 static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
 				       struct sk_buff_head *list)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 58732509160..5b5bce0694e 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1220,6 +1220,24 @@ reclassify:
 	return -1;
 }
 
+void tcf_destroy(struct tcf_proto *tp)
+{
+	tp->ops->destroy(tp);
+	module_put(tp->ops->owner);
+	kfree(tp);
+}
+
+void tcf_destroy_chain(struct tcf_proto *fl)
+{
+	struct tcf_proto *tp;
+
+	while ((tp = fl) != NULL) {
+		fl = tp->next;
+		tcf_destroy(tp);
+	}
+}
+EXPORT_SYMBOL(tcf_destroy_chain);
+
 #ifdef CONFIG_PROC_FS
 static int psched_show(struct seq_file *seq, void *v)
 {
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 0cc3c9b7272..be7d299acd7 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -158,19 +158,6 @@ static unsigned long atm_tc_bind_filter(struct Qdisc *sch,
 	return atm_tc_get(sch,classid);
 }
 
-
-static void destroy_filters(struct atm_flow_data *flow)
-{
-	struct tcf_proto *filter;
-
-	while ((filter = flow->filter_list)) {
-		DPRINTK("destroy_filters: destroying filter %p\n",filter);
-		flow->filter_list = filter->next;
-		tcf_destroy(filter);
-	}
-}
-
-
 /*
  * atm_tc_put handles all destructions, including the ones that are explicitly
  * requested (atm_tc_destroy, etc.). The assumption here is that we never drop
@@ -195,7 +182,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
 	*prev = flow->next;
 	DPRINTK("atm_tc_put: qdisc %p\n",flow->q);
 	qdisc_destroy(flow->q);
-	destroy_filters(flow);
+	tcf_destroy_chain(flow->filter_list);
 	if (flow->sock) {
 		DPRINTK("atm_tc_put: f_count %d\n",
 		    file_count(flow->sock->file));
@@ -611,7 +598,7 @@ static void atm_tc_destroy(struct Qdisc *sch)
 	DPRINTK("atm_tc_destroy(sch %p,[qdisc %p])\n",sch,p);
 	/* races ? */
 	while ((flow = p->flows)) {
-		destroy_filters(flow);
+		tcf_destroy_chain(flow->filter_list);
 		if (flow->ref > 1)
 			printk(KERN_ERR "atm_destroy: %p->ref = %d\n",flow,
 			    flow->ref);
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 414a97c962f..a294542cb8e 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1717,23 +1717,13 @@ static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
 	return 0;
 }
 
-static void cbq_destroy_filters(struct cbq_class *cl)
-{
-	struct tcf_proto *tp;
-
-	while ((tp = cl->filter_list) != NULL) {
-		cl->filter_list = tp->next;
-		tcf_destroy(tp);
-	}
-}
-
 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 
 	BUG_TRAP(!cl->filters);
 
-	cbq_destroy_filters(cl);
+	tcf_destroy_chain(cl->filter_list);
 	qdisc_destroy(cl->q);
 	qdisc_put_rtab(cl->R_tab);
 #ifdef CONFIG_NET_ESTIMATOR
@@ -1760,7 +1750,7 @@ cbq_destroy(struct Qdisc* sch)
 	 */
 	for (h = 0; h < 16; h++)
 		for (cl = q->classes[h]; cl; cl = cl->next)
-			cbq_destroy_filters(cl);
+			tcf_destroy_chain(cl->filter_list);
 
 	for (h = 0; h < 16; h++) {
 		struct cbq_class *next;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 2c857af79a1..e38e0d00d1e 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -412,16 +412,10 @@ static void dsmark_reset(struct Qdisc *sch)
 static void dsmark_destroy(struct Qdisc *sch)
 {
 	struct dsmark_qdisc_data *p = PRIV(sch);
-	struct tcf_proto *tp;
 
 	DPRINTK("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p);
 
-	while (p->filter_list) {
-		tp = p->filter_list;
-		p->filter_list = tp->next;
-		tcf_destroy(tp);
-	}
-
+	tcf_destroy_chain(p->filter_list);
 	qdisc_destroy(p->q);
 	kfree(p->mask);
 }
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 7d51d0d6a70..9d124c4ee3a 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1121,23 +1121,12 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	return 0;
 }
 
-static void
-hfsc_destroy_filters(struct tcf_proto **fl)
-{
-	struct tcf_proto *tp;
-
-	while ((tp = *fl) != NULL) {
-		*fl = tp->next;
-		tcf_destroy(tp);
-	}
-}
-
 static void
 hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
 
-	hfsc_destroy_filters(&cl->filter_list);
+	tcf_destroy_chain(cl->filter_list);
 	qdisc_destroy(cl->qdisc);
 #ifdef CONFIG_NET_ESTIMATOR
 	gen_kill_estimator(&cl->bstats, &cl->rate_est);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 3f528554b0d..99bcec8dd04 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1236,16 +1236,6 @@ static unsigned long htb_get(struct Qdisc *sch, u32 classid)
 	return (unsigned long)cl;
 }
 
-static void htb_destroy_filters(struct tcf_proto **fl)
-{
-	struct tcf_proto *tp;
-
-	while ((tp = *fl) != NULL) {
-		*fl = tp->next;
-		tcf_destroy(tp);
-	}
-}
-
 static inline int htb_parent_last_child(struct htb_class *cl)
 {
 	if (!cl->parent)
@@ -1289,7 +1279,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
 	qdisc_put_rtab(cl->rate);
 	qdisc_put_rtab(cl->ceil);
 
-	htb_destroy_filters(&cl->filter_list);
+	tcf_destroy_chain(cl->filter_list);
 
 	while (!list_empty(&cl->children))
 		htb_destroy_class(sch, list_entry(cl->children.next,
@@ -1321,7 +1311,7 @@ static void htb_destroy(struct Qdisc *sch)
 	   and surprisingly it worked in 2.4. But it must precede it
 	   because filter need its target class alive to be able to call
 	   unbind_filter on it (without Oops). */
-	htb_destroy_filters(&q->filter_list);
+	tcf_destroy_chain(q->filter_list);
 
 	while (!list_empty(&q->root))
 		htb_destroy_class(sch, list_entry(q->root.next,
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index f63d5c6eb30..1fb60aba1e6 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -346,14 +346,9 @@ static void ingress_reset(struct Qdisc *sch)
 static void ingress_destroy(struct Qdisc *sch)
 {
 	struct ingress_qdisc_data *p = PRIV(sch);
-	struct tcf_proto *tp;
 
 	DPRINTK("ingress_destroy(sch %p,[qdisc %p])\n", sch, p);
-	while (p->filter_list) {
-		tp = p->filter_list;
-		p->filter_list = tp->next;
-		tcf_destroy(tp);
-	}
+	tcf_destroy_chain(p->filter_list);
 #if 0
 /* for future use */
 	qdisc_destroy(p->q);
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index f13996348dd..5cfe60bf6e2 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -189,13 +189,8 @@ prio_destroy(struct Qdisc* sch)
 {
 	int prio;
 	struct prio_sched_data *q = qdisc_priv(sch);
-	struct tcf_proto *tp;
-
-	while ((tp = q->filter_list) != NULL) {
-		q->filter_list = tp->next;
-		tcf_destroy(tp);
-	}
 
+	tcf_destroy_chain(q->filter_list);
 	for (prio=0; prio<q->bands; prio++)
 		qdisc_destroy(q->queues[prio]);
 }
-- 
cgit v1.2.3


From c95e939508e64863a1c5c73a9e1a908784e06820 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 23 Mar 2007 11:30:04 -0700
Subject: [NET_SCHED]: qdisc: remove unnecessary memory barriers

We're holding dev->queue_lock in qdisc_watchdog_schedule and
qdisc_watchdog_cancel, no need for the barriers.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 5b5bce0694e..2e863bdaa9a 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -322,7 +322,6 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
 	ktime_t time;
 
 	wd->qdisc->flags |= TCQ_F_THROTTLED;
-	smp_wmb();
 	time = ktime_set(0, 0);
 	time = ktime_add_ns(time, PSCHED_US2NS(expires));
 	hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
@@ -333,7 +332,6 @@ void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
 {
 	hrtimer_cancel(&wd->timer);
 	wd->qdisc->flags &= ~TCQ_F_THROTTLED;
-	smp_wmb();
 }
 EXPORT_SYMBOL(qdisc_watchdog_cancel);
 
-- 
cgit v1.2.3


From b6e1331f3ce25a56edb956054eaf8011654686cb Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 20 Apr 2007 12:23:15 -0700
Subject: [SCTP]: Implement SCTP_FRAGMENT_INTERLEAVE socket option
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This option was introduced in draft-ietf-tsvwg-sctpsocket-13.  It
prevents head-of-line blocking in the case of one-to-many endpoint.
Applications enabling this option really must enable SCTP_SNDRCV event
so that they would know where the data belongs.  Based on an
earlier patch by Ivan Skytte Jørgensen.

Additionally, this functionality now permits multiple associations
on the same endpoint to enter Partial Delivery.  Applications should
be extra careful, when using this functionality, to track EOR indicators.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h  |   3 +-
 include/net/sctp/ulpqueue.h |   2 +-
 include/net/sctp/user.h     |   4 +-
 net/sctp/socket.c           |  84 +++++++++++++++++++++++++++++++++---
 net/sctp/ulpqueue.c         | 103 +++++++++++++++++++++++++++++++-------------
 5 files changed, 157 insertions(+), 39 deletions(-)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index f431acf3dce..fe7f5ae1c51 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -304,10 +304,11 @@ struct sctp_sock {
 	__u32 autoclose;
 	__u8 nodelay;
 	__u8 disable_fragments;
-	__u8 pd_mode;
 	__u8 v4mapped;
+	__u8 frag_interleave;
 	__u32 adaptation_ind;
 
+	atomic_t pd_mode;
 	/* Receive to here while partial delivery is in effect. */
 	struct sk_buff_head pd_lobby;
 };
diff --git a/include/net/sctp/ulpqueue.h b/include/net/sctp/ulpqueue.h
index ab26ab3adae..39ea3f442b4 100644
--- a/include/net/sctp/ulpqueue.h
+++ b/include/net/sctp/ulpqueue.h
@@ -78,7 +78,7 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *, struct sctp_chunk *, gfp_t);
 void sctp_ulpq_abort_pd(struct sctp_ulpq *, gfp_t);
 
 /* Clear the partial data delivery condition on this socket. */
-int sctp_clear_pd(struct sock *sk);
+int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc);
 
 /* Skip over an SSN. */
 void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn);
diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h
index 67a30eb2b3a..e77316088dc 100644
--- a/include/net/sctp/user.h
+++ b/include/net/sctp/user.h
@@ -97,6 +97,8 @@ enum sctp_optname {
 #define SCTP_DELAYED_ACK_TIME SCTP_DELAYED_ACK_TIME
 	SCTP_CONTEXT,	/* Receive Context */
 #define SCTP_CONTEXT SCTP_CONTEXT
+	SCTP_FRAGMENT_INTERLEAVE,
+#define SCTP_FRAGMENT_INTERLEAVE SCTP_FRAGMENT_INTERLEAVE
 
 	/* Internal Socket Options. Some of the sctp library functions are 
 	 * implemented using these socket options.
@@ -530,7 +532,7 @@ struct sctp_paddrparams {
 	__u32			spp_flags;
 } __attribute__((packed, aligned(4)));
 
-/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
+/* 7.1.23. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
  *
  *   This options will get or set the delayed ack timer.  The time is set
  *   in milliseconds.  If the assoc_id is 0, then this sets or gets the
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index a1d026f12b0..b4be473c68b 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2255,7 +2255,7 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk,
 	return 0;
 }
 
-/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
+/* 7.1.23. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
  *
  *   This options will get or set the delayed ack timer.  The time is set
  *   in milliseconds.  If the assoc_id is 0, then this sets or gets the
@@ -2792,6 +2792,46 @@ static int sctp_setsockopt_context(struct sock *sk, char __user *optval,
 	return 0;
 }
 
+/*
+ * 7.1.24.  Get or set fragmented interleave (SCTP_FRAGMENT_INTERLEAVE)
+ *
+ * This options will at a minimum specify if the implementation is doing
+ * fragmented interleave.  Fragmented interleave, for a one to many
+ * socket, is when subsequent calls to receive a message may return
+ * parts of messages from different associations.  Some implementations
+ * may allow you to turn this value on or off.  If so, when turned off,
+ * no fragment interleave will occur (which will cause a head of line
+ * blocking amongst multiple associations sharing the same one to many
+ * socket).  When this option is turned on, then each receive call may
+ * come from a different association (thus the user must receive data
+ * with the extended calls (e.g. sctp_recvmsg) to keep track of which
+ * association each receive belongs to.
+ *
+ * This option takes a boolean value.  A non-zero value indicates that
+ * fragmented interleave is on.  A value of zero indicates that
+ * fragmented interleave is off.
+ *
+ * Note that it is important that an implementation that allows this
+ * option to be turned on, have it off by default.  Otherwise an unaware
+ * application using the one to many model may become confused and act
+ * incorrectly.
+ */
+static int sctp_setsockopt_fragment_interleave(struct sock *sk,
+					       char __user *optval,
+					       int optlen)
+{
+	int val;
+
+	if (optlen != sizeof(int))
+		return -EINVAL;
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	sctp_sk(sk)->frag_interleave = (val == 0) ? 0 : 1;
+
+	return 0;
+}
+
 /* API 6.2 setsockopt(), getsockopt()
  *
  * Applications use setsockopt() and getsockopt() to set or retrieve
@@ -2906,7 +2946,9 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
 	case SCTP_CONTEXT:
 		retval = sctp_setsockopt_context(sk, optval, optlen);
 		break;
-
+	case SCTP_FRAGMENT_INTERLEAVE:
+		retval = sctp_setsockopt_fragment_interleave(sk, optval, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
@@ -3134,8 +3176,9 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 	sp->pf = sctp_get_pf_specific(sk->sk_family);
 
 	/* Control variables for partial data delivery. */
-	sp->pd_mode           = 0;
+	atomic_set(&sp->pd_mode, 0);
 	skb_queue_head_init(&sp->pd_lobby);
+	sp->frag_interleave = 0;
 
 	/* Create a per socket endpoint structure.  Even if we
 	 * change the data structure relationships, this may still
@@ -3642,7 +3685,7 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len,
 	return 0;
 }
 
-/* 7.1.24. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
+/* 7.1.23. Delayed Ack Timer (SCTP_DELAYED_ACK_TIME)
  *
  *   This options will get or set the delayed ack timer.  The time is set
  *   in milliseconds.  If the assoc_id is 0, then this sets or gets the
@@ -4536,6 +4579,29 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len,
 	return 0;
 }
 
+/*
+ * 7.1.24.  Get or set fragmented interleave (SCTP_FRAGMENT_INTERLEAVE)
+ * (chapter and verse is quoted at sctp_setsockopt_fragment_interleave())
+ */
+static int sctp_getsockopt_fragment_interleave(struct sock *sk, int len,
+					       char __user *optval, int __user *optlen)
+{
+	int val;
+
+	if (len < sizeof(int))
+		return -EINVAL;
+
+	len = sizeof(int);
+
+	val = sctp_sk(sk)->frag_interleave;
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &val, len))
+		return -EFAULT;
+
+	return 0;
+}
+
 SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
 				char __user *optval, int __user *optlen)
 {
@@ -4648,6 +4714,10 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
 	case SCTP_CONTEXT:
 		retval = sctp_getsockopt_context(sk, len, optval, optlen);
 		break;
+	case SCTP_FRAGMENT_INTERLEAVE:
+		retval = sctp_getsockopt_fragment_interleave(sk, len, optval,
+							     optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
@@ -5742,9 +5812,9 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 	 * 3) Peeling off non-partial delivery; move pd_lobby to receive_queue.
 	 */
 	skb_queue_head_init(&newsp->pd_lobby);
-	sctp_sk(newsk)->pd_mode = assoc->ulpq.pd_mode;
+	atomic_set(&sctp_sk(newsk)->pd_mode, assoc->ulpq.pd_mode);
 
-	if (sctp_sk(oldsk)->pd_mode) {
+	if (atomic_read(&sctp_sk(oldsk)->pd_mode)) {
 		struct sk_buff_head *queue;
 
 		/* Decide which queue to move pd_lobby skbs to. */
@@ -5770,7 +5840,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 		 * delivery to finish.
 		 */
 		if (assoc->ulpq.pd_mode)
-			sctp_clear_pd(oldsk);
+			sctp_clear_pd(oldsk, NULL);
 
 	}
 
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index b29e3e4b72c..ac80c34f6c2 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -138,18 +138,42 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
 /* Clear the partial delivery mode for this socket.   Note: This
  * assumes that no association is currently in partial delivery mode.
  */
-int sctp_clear_pd(struct sock *sk)
+int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc)
 {
 	struct sctp_sock *sp = sctp_sk(sk);
 
-	sp->pd_mode = 0;
-	if (!skb_queue_empty(&sp->pd_lobby)) {
-		struct list_head *list;
-		sctp_skb_list_tail(&sp->pd_lobby, &sk->sk_receive_queue);
-		list = (struct list_head *)&sctp_sk(sk)->pd_lobby;
-		INIT_LIST_HEAD(list);
-		return 1;
+	if (atomic_dec_and_test(&sp->pd_mode)) {
+		/* This means there are no other associations in PD, so
+		 * we can go ahead and clear out the lobby in one shot
+		 */
+		if (!skb_queue_empty(&sp->pd_lobby)) {
+			struct list_head *list;
+			sctp_skb_list_tail(&sp->pd_lobby, &sk->sk_receive_queue);
+			list = (struct list_head *)&sctp_sk(sk)->pd_lobby;
+			INIT_LIST_HEAD(list);
+			return 1;
+		}
+	} else {
+		/* There are other associations in PD, so we only need to
+		 * pull stuff out of the lobby that belongs to the
+		 * associations that is exiting PD (all of its notifications
+		 * are posted here).
+		 */
+		if (!skb_queue_empty(&sp->pd_lobby) && asoc) {
+			struct sk_buff *skb, *tmp;
+			struct sctp_ulpevent *event;
+
+			sctp_skb_for_each(skb, &sp->pd_lobby, tmp) {
+				event = sctp_skb2event(skb);
+				if (event->asoc == asoc) {
+					__skb_unlink(skb, &sp->pd_lobby);
+					__skb_queue_tail(&sk->sk_receive_queue,
+							 skb);
+				}
+			}
+		}
 	}
+
 	return 0;
 }
 
@@ -157,7 +181,7 @@ int sctp_clear_pd(struct sock *sk)
 static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq)
 {
 	ulpq->pd_mode = 0;
-	return sctp_clear_pd(ulpq->asoc->base.sk);
+	return sctp_clear_pd(ulpq->asoc->base.sk, ulpq->asoc);
 }
 
 /* If the SKB of 'event' is on a list, it is the first such member
@@ -187,25 +211,35 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
 	 * the association the cause of the partial delivery.
 	 */
 
-	if (!sctp_sk(sk)->pd_mode) {
+	if (atomic_read(&sctp_sk(sk)->pd_mode) == 0) {
 		queue = &sk->sk_receive_queue;
-	} else if (ulpq->pd_mode) {
-		/* If the association is in partial delivery, we
-		 * need to finish delivering the partially processed
-		 * packet before passing any other data.  This is
-		 * because we don't truly support stream interleaving.
-		 */
-		if ((event->msg_flags & MSG_NOTIFICATION) ||
-		    (SCTP_DATA_NOT_FRAG ==
-			    (event->msg_flags & SCTP_DATA_FRAG_MASK)))
-			queue = &sctp_sk(sk)->pd_lobby;
-		else {
-			clear_pd = event->msg_flags & MSG_EOR;
-			queue = &sk->sk_receive_queue;
+	} else {
+		if (ulpq->pd_mode) {
+			/* If the association is in partial delivery, we
+			 * need to finish delivering the partially processed
+			 * packet before passing any other data.  This is
+			 * because we don't truly support stream interleaving.
+			 */
+			if ((event->msg_flags & MSG_NOTIFICATION) ||
+			    (SCTP_DATA_NOT_FRAG ==
+				    (event->msg_flags & SCTP_DATA_FRAG_MASK)))
+				queue = &sctp_sk(sk)->pd_lobby;
+			else {
+				clear_pd = event->msg_flags & MSG_EOR;
+				queue = &sk->sk_receive_queue;
+			}
+		} else {
+			/*
+			 * If fragment interleave is enabled, we
+			 * can queue this to the recieve queue instead
+			 * of the lobby.
+			 */
+			if (sctp_sk(sk)->frag_interleave)
+				queue = &sk->sk_receive_queue;
+			else
+				queue = &sctp_sk(sk)->pd_lobby;
 		}
-	} else
-		queue = &sctp_sk(sk)->pd_lobby;
-
+	}
 
 	/* If we are harvesting multiple skbs they will be
 	 * collected on a list.
@@ -826,18 +860,29 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
 {
 	struct sctp_ulpevent *event;
 	struct sctp_association *asoc;
+	struct sctp_sock *sp;
 
 	asoc = ulpq->asoc;
+	sp = sctp_sk(asoc->base.sk);
 
-	/* Are we already in partial delivery mode?  */
-	if (!sctp_sk(asoc->base.sk)->pd_mode) {
+	/* If the association is already in Partial Delivery mode
+	 * we have noting to do.
+	 */
+	if (ulpq->pd_mode)
+		return;
 
+	/* If the user enabled fragment interleave socket option,
+	 * multiple associations can enter partial delivery.
+	 * Otherwise, we can only enter partial delivery if the
+	 * socket is not in partial deliver mode.
+	 */
+	if (sp->frag_interleave || atomic_read(&sp->pd_mode) == 0) {
 		/* Is partial delivery possible?  */
 		event = sctp_ulpq_retrieve_first(ulpq);
 		/* Send event to the ULP.   */
 		if (event) {
 			sctp_ulpq_tail_event(ulpq, event);
-			sctp_sk(asoc->base.sk)->pd_mode = 1;
+			atomic_inc(&sp->pd_mode);
 			ulpq->pd_mode = 1;
 			return;
 		}
-- 
cgit v1.2.3


From d49d91d79a8dc5e85108a5ae1c8eef23dec135c1 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 23 Mar 2007 11:32:00 -0700
Subject: [SCTP]: Implement SCTP_PARTIAL_DELIVERY_POINT option.

This option induces partial delivery to run as soon
as the specified amount of data has been accumulated on
the association.  However, we give preference to fully
reassembled messages over PD messages.  In any case,
window and buffer is freed up.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@.hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  1 +
 include/net/sctp/user.h    |  2 ++
 net/sctp/socket.c          | 57 +++++++++++++++++++++++++++++++++++++++++
 net/sctp/ulpqueue.c        | 64 +++++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 120 insertions(+), 4 deletions(-)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index fe7f5ae1c51..37b4a24e589 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -307,6 +307,7 @@ struct sctp_sock {
 	__u8 v4mapped;
 	__u8 frag_interleave;
 	__u32 adaptation_ind;
+	__u32 pd_point;
 
 	atomic_t pd_mode;
 	/* Receive to here while partial delivery is in effect. */
diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h
index e77316088dc..9a835271063 100644
--- a/include/net/sctp/user.h
+++ b/include/net/sctp/user.h
@@ -99,6 +99,8 @@ enum sctp_optname {
 #define SCTP_CONTEXT SCTP_CONTEXT
 	SCTP_FRAGMENT_INTERLEAVE,
 #define SCTP_FRAGMENT_INTERLEAVE SCTP_FRAGMENT_INTERLEAVE
+	SCTP_PARTIAL_DELIVERY_POINT,	/* Set/Get partial delivery point */
+#define SCTP_PARTIAL_DELIVERY_POINT SCTP_PARTIAL_DELIVERY_POINT
 
 	/* Internal Socket Options. Some of the sctp library functions are 
 	 * implemented using these socket options.
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index b4be473c68b..1e787a2d0b5 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2832,6 +2832,32 @@ static int sctp_setsockopt_fragment_interleave(struct sock *sk,
 	return 0;
 }
 
+/*
+ * 7.1.25.  Set or Get the sctp partial delivery point
+ *       (SCTP_PARTIAL_DELIVERY_POINT)
+ * This option will set or get the SCTP partial delivery point.  This
+ * point is the size of a message where the partial delivery API will be
+ * invoked to help free up rwnd space for the peer.  Setting this to a
+ * lower value will cause partial delivery's to happen more often.  The
+ * calls argument is an integer that sets or gets the partial delivery
+ * point.
+ */
+static int sctp_setsockopt_partial_delivery_point(struct sock *sk,
+						  char __user *optval,
+						  int optlen)
+{
+	u32 val;
+
+	if (optlen != sizeof(u32))
+		return -EINVAL;
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	sctp_sk(sk)->pd_point = val;
+
+	return 0; /* is this the right error code? */
+}
+
 /* API 6.2 setsockopt(), getsockopt()
  *
  * Applications use setsockopt() and getsockopt() to set or retrieve
@@ -2911,6 +2937,9 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
 	case SCTP_DELAYED_ACK_TIME:
 		retval = sctp_setsockopt_delayed_ack_time(sk, optval, optlen);
 		break;
+	case SCTP_PARTIAL_DELIVERY_POINT:
+		retval = sctp_setsockopt_partial_delivery_point(sk, optval, optlen);
+		break;
 
 	case SCTP_INITMSG:
 		retval = sctp_setsockopt_initmsg(sk, optval, optlen);
@@ -4602,6 +4631,30 @@ static int sctp_getsockopt_fragment_interleave(struct sock *sk, int len,
 	return 0;
 }
 
+/*
+ * 7.1.25.  Set or Get the sctp partial delivery point
+ * (chapter and verse is quoted at sctp_setsockopt_partial_delivery_point())
+ */
+static int sctp_getsockopt_partial_delivery_point(struct sock *sk, int len,
+						  char __user *optval,
+						  int __user *optlen)
+{
+        u32 val;
+
+	if (len < sizeof(u32))
+		return -EINVAL;
+
+	len = sizeof(u32);
+
+	val = sctp_sk(sk)->pd_point;
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &val, len))
+		return -EFAULT;
+
+	return -ENOTSUPP;
+}
+
 SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
 				char __user *optval, int __user *optlen)
 {
@@ -4718,6 +4771,10 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
 		retval = sctp_getsockopt_fragment_interleave(sk, len, optval,
 							     optlen);
 		break;
+	case SCTP_PARTIAL_DELIVERY_POINT:
+		retval = sctp_getsockopt_partial_delivery_point(sk, len, optval,
+								optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index ac80c34f6c2..0fa4d4d4df1 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -177,6 +177,15 @@ int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc)
 	return 0;
 }
 
+/* Set the pd_mode on the socket and ulpq */
+static void sctp_ulpq_set_pd(struct sctp_ulpq *ulpq)
+{
+	struct sctp_sock *sp = sctp_sk(ulpq->asoc->base.sk);
+
+	atomic_inc(&sp->pd_mode);
+	ulpq->pd_mode = 1;
+}
+
 /* Clear the pd_mode and restart any pending messages waiting for delivery. */
 static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq)
 {
@@ -401,6 +410,11 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u
 	struct sk_buff *first_frag = NULL;
 	__u32 ctsn, next_tsn;
 	struct sctp_ulpevent *retval = NULL;
+	struct sk_buff *pd_first = NULL;
+	struct sk_buff *pd_last = NULL;
+	size_t pd_len = 0;
+	struct sctp_association *asoc;
+	u32 pd_point;
 
 	/* Initialized to 0 just to avoid compiler warning message.  Will
 	 * never be used with this value. It is referenced only after it
@@ -416,6 +430,10 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u
 	 * we expect to find the remaining middle fragments and the last
 	 * fragment in order. If not, first_frag is reset to NULL and we
 	 * start the next pass when we find another first fragment.
+	 *
+	 * There is a potential to do partial delivery if user sets
+	 * SCTP_PARTIAL_DELIVERY_POINT option. Lets count some things here
+	 * to see if can do PD.
 	 */
 	skb_queue_walk(&ulpq->reasm, pos) {
 		cevent = sctp_skb2event(pos);
@@ -423,14 +441,32 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u
 
 		switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) {
 		case SCTP_DATA_FIRST_FRAG:
+			/* If this "FIRST_FRAG" is the first
+			 * element in the queue, then count it towards
+			 * possible PD.
+			 */
+			if (pos == ulpq->reasm.next) {
+			    pd_first = pos;
+			    pd_last = pos;
+			    pd_len = pos->len;
+			} else {
+			    pd_first = NULL;
+			    pd_last = NULL;
+			    pd_len = 0;
+			}
+
 			first_frag = pos;
 			next_tsn = ctsn + 1;
 			break;
 
 		case SCTP_DATA_MIDDLE_FRAG:
-			if ((first_frag) && (ctsn == next_tsn))
+			if ((first_frag) && (ctsn == next_tsn)) {
 				next_tsn++;
-			else
+				if (pd_first) {
+				    pd_last = pos;
+				    pd_len += pos->len;
+				}
+			} else
 				first_frag = NULL;
 			break;
 
@@ -441,7 +477,28 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u
 				first_frag = NULL;
 			break;
 		};
+	}
+
+	asoc = ulpq->asoc;
+	if (pd_first) {
+		/* Make sure we can enter partial deliver.
+		 * We can trigger partial delivery only if framgent
+		 * interleave is set, or the socket is not already
+		 * in  partial delivery.
+		 */
+		if (!sctp_sk(asoc->base.sk)->frag_interleave &&
+		    atomic_read(&sctp_sk(asoc->base.sk)->pd_mode))
+			goto done;
 
+		cevent = sctp_skb2event(pd_first);
+		pd_point = sctp_sk(asoc->base.sk)->pd_point;
+		if (pd_point && pd_point <= pd_len) {
+			retval = sctp_make_reassembled_event(&ulpq->reasm,
+							     pd_first,
+							     pd_last);
+			if (retval)
+				sctp_ulpq_set_pd(ulpq);
+		}
 	}
 done:
 	return retval;
@@ -882,8 +939,7 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
 		/* Send event to the ULP.   */
 		if (event) {
 			sctp_ulpq_tail_event(ulpq, event);
-			atomic_inc(&sp->pd_mode);
-			ulpq->pd_mode = 1;
+			sctp_ulpq_set_pd(ulpq);
 			return;
 		}
 	}
-- 
cgit v1.2.3


From 1ae4114dce35dd1d32ed847f60b599dbbdfd5829 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 23 Mar 2007 11:32:26 -0700
Subject: [SCTP]: Implement SCTP_ADDR_CONFIRMED state for ADDR_CHNAGE event

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/user.h |  1 +
 net/sctp/associola.c    | 10 +++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h
index 9a835271063..4ed752119bb 100644
--- a/include/net/sctp/user.h
+++ b/include/net/sctp/user.h
@@ -265,6 +265,7 @@ enum sctp_spc_state {
 	SCTP_ADDR_REMOVED,
 	SCTP_ADDR_ADDED,
 	SCTP_ADDR_MADE_PRIM,
+	SCTP_ADDR_CONFIRMED,
 };
 
 
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 78d2ddb5ca1..85af1cb70fe 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -714,8 +714,16 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
 	/* Record the transition on the transport.  */
 	switch (command) {
 	case SCTP_TRANSPORT_UP:
+		/* If we are moving from UNCONFIRMED state due
+		 * to heartbeat success, report the SCTP_ADDR_CONFIRMED
+		 * state to the user, otherwise report SCTP_ADDR_AVAILABLE.
+		 */
+		if (SCTP_UNCONFIRMED == transport->state &&
+		    SCTP_HEARTBEAT_SUCCESS == error)
+			spc_state = SCTP_ADDR_CONFIRMED;
+		else
+			spc_state = SCTP_ADDR_AVAILABLE;
 		transport->state = SCTP_ACTIVE;
-		spc_state = SCTP_ADDR_AVAILABLE;
 		break;
 
 	case SCTP_TRANSPORT_DOWN:
-- 
cgit v1.2.3


From bdf3092af601ccad765974652ab103162fbe14f4 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 23 Mar 2007 11:33:12 -0700
Subject: [SCTP]: Honor flags when setting peer address parameters

Parameters only take effect when a corresponding flag bit is set
and a value is specified. This means we need to check the flags
in addition to checking for non-zero value.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/user.h | 15 +++++++-------
 net/sctp/socket.c       | 54 ++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 52 insertions(+), 17 deletions(-)

diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h
index 4ed752119bb..80b7afea17f 100644
--- a/include/net/sctp/user.h
+++ b/include/net/sctp/user.h
@@ -513,16 +513,17 @@ struct sctp_setadaptation {
  *   address's parameters:
  */
 enum  sctp_spp_flags {
-	SPP_HB_ENABLE = 1,		/*Enable heartbeats*/
-	SPP_HB_DISABLE = 2,		/*Disable heartbeats*/
+	SPP_HB_ENABLE = 1<<0,		/*Enable heartbeats*/
+	SPP_HB_DISABLE = 1<<1,		/*Disable heartbeats*/
 	SPP_HB = SPP_HB_ENABLE | SPP_HB_DISABLE,
-	SPP_HB_DEMAND = 4,		/*Send heartbeat immediately*/
-	SPP_PMTUD_ENABLE = 8,		/*Enable PMTU discovery*/
-	SPP_PMTUD_DISABLE = 16,		/*Disable PMTU discovery*/
+	SPP_HB_DEMAND = 1<<2,		/*Send heartbeat immediately*/
+	SPP_PMTUD_ENABLE = 1<<3,	/*Enable PMTU discovery*/
+	SPP_PMTUD_DISABLE = 1<<4,	/*Disable PMTU discovery*/
 	SPP_PMTUD = SPP_PMTUD_ENABLE | SPP_PMTUD_DISABLE,
-	SPP_SACKDELAY_ENABLE = 32,	/*Enable SACK*/
-	SPP_SACKDELAY_DISABLE = 64,	/*Disable SACK*/
+	SPP_SACKDELAY_ENABLE = 1<<5,	/*Enable SACK*/
+	SPP_SACKDELAY_DISABLE = 1<<6,	/*Disable SACK*/
 	SPP_SACKDELAY = SPP_SACKDELAY_ENABLE | SPP_SACKDELAY_DISABLE,
+	SPP_HB_TIME_IS_ZERO = 1<<7,	/* Set HB delay to 0 */
 };
 
 struct sctp_paddrparams {
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 1e787a2d0b5..dda2f6700f5 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2039,6 +2039,10 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
  *                     SPP_HB_DEMAND - Request a user initiated heartbeat
  *                     to be made immediately.
  *
+ *                     SPP_HB_TIME_IS_ZERO - Specify's that the time for
+ *                     heartbeat delayis to be set to the value of 0
+ *                     milliseconds.
+ *
  *                     SPP_PMTUD_ENABLE - This field will enable PMTU
  *                     discovery upon the specified address. Note that
  *                     if the address feild is empty then all addresses
@@ -2081,13 +2085,30 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 			return error;
 	}
 
-	if (params->spp_hbinterval) {
-		if (trans) {
-			trans->hbinterval = msecs_to_jiffies(params->spp_hbinterval);
-		} else if (asoc) {
-			asoc->hbinterval = msecs_to_jiffies(params->spp_hbinterval);
-		} else {
-			sp->hbinterval = params->spp_hbinterval;
+	/* Note that unless the spp_flag is set to SPP_HB_ENABLE the value of
+	 * this field is ignored.  Note also that a value of zero indicates
+	 * the current setting should be left unchanged.
+	 */
+	if (params->spp_flags & SPP_HB_ENABLE) {
+
+		/* Re-zero the interval if the SPP_HB_TIME_IS_ZERO is
+		 * set.  This lets us use 0 value when this flag
+		 * is set.
+		 */
+		if (params->spp_flags & SPP_HB_TIME_IS_ZERO)
+			params->spp_hbinterval = 0;
+
+		if (params->spp_hbinterval ||
+		    (params->spp_flags & SPP_HB_TIME_IS_ZERO)) {
+			if (trans) {
+				trans->hbinterval =
+				    msecs_to_jiffies(params->spp_hbinterval);
+			} else if (asoc) {
+				asoc->hbinterval =
+				    msecs_to_jiffies(params->spp_hbinterval);
+			} else {
+				sp->hbinterval = params->spp_hbinterval;
+			}
 		}
 	}
 
@@ -2104,7 +2125,12 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 		}
 	}
 
-	if (params->spp_pathmtu) {
+	/* When Path MTU discovery is disabled the value specified here will
+	 * be the "fixed" path mtu (i.e. the value of the spp_flags field must
+	 * include the flag SPP_PMTUD_DISABLE for this field to have any
+	 * effect).
+	 */
+	if ((params->spp_flags & SPP_PMTUD_DISABLE) && params->spp_pathmtu) {
 		if (trans) {
 			trans->pathmtu = params->spp_pathmtu;
 			sctp_assoc_sync_pmtu(asoc);
@@ -2135,7 +2161,11 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 		}
 	}
 
-	if (params->spp_sackdelay) {
+	/* Note that unless the spp_flag is set to SPP_SACKDELAY_ENABLE the
+	 * value of this field is ignored.  Note also that a value of zero
+	 * indicates the current setting should be left unchanged.
+	 */
+	if ((params->spp_flags & SPP_SACKDELAY_ENABLE) && params->spp_sackdelay) {
 		if (trans) {
 			trans->sackdelay =
 				msecs_to_jiffies(params->spp_sackdelay);
@@ -2163,7 +2193,11 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 		}
 	}
 
-	if (params->spp_pathmaxrxt) {
+	/* Note that unless the spp_flag is set to SPP_PMTUD_ENABLE the value
+	 * of this field is ignored.  Note also that a value of zero
+	 * indicates the current setting should be left unchanged.
+	 */
+	if ((params->spp_flags & SPP_PMTUD_ENABLE) && params->spp_pathmaxrxt) {
 		if (trans) {
 			trans->pathmaxrxt = params->spp_pathmaxrxt;
 		} else if (asoc) {
-- 
cgit v1.2.3


From a5a35e76753d27e782028843a5186f176b50dd16 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 23 Mar 2007 11:34:08 -0700
Subject: [SCTP]: Implement sac_info field in SCTP_ASSOC_CHANGE notification.

As stated in the sctp socket api draft:

   sac_info: variable

   If the sac_state is SCTP_COMM_LOST and an ABORT chunk was received
   for this association, sac_info[] contains the complete ABORT chunk as
   defined in the SCTP specification RFC2960 [RFC2960] section 3.3.7.

We now save received ABORT chunks into the sac_info field and pass that
to the user.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/ulpevent.h |  1 +
 include/net/sctp/user.h     |  1 +
 net/sctp/sm_sideeffect.c    | 11 +++++++---
 net/sctp/sm_statefuns.c     | 14 ++++++-------
 net/sctp/ulpevent.c         | 49 ++++++++++++++++++++++++++++++++++++++-------
 5 files changed, 59 insertions(+), 17 deletions(-)

diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h
index 2923e3d31a0..de88ed5b0ba 100644
--- a/include/net/sctp/ulpevent.h
+++ b/include/net/sctp/ulpevent.h
@@ -89,6 +89,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_assoc_change(
 	__u16 error,
 	__u16 outbound,
 	__u16 inbound,
+	struct sctp_chunk *chunk,
 	gfp_t gfp);
 
 struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change(
diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h
index 80b7afea17f..1b3153c2cdf 100644
--- a/include/net/sctp/user.h
+++ b/include/net/sctp/user.h
@@ -217,6 +217,7 @@ struct sctp_assoc_change {
 	__u16 sac_outbound_streams;
 	__u16 sac_inbound_streams;
 	sctp_assoc_t sac_assoc_id;
+	__u8 sac_info[0];
 };
 
 /*
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 13556749311..0a1a197193a 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -464,7 +464,7 @@ static void sctp_cmd_init_failed(sctp_cmd_seq_t *commands,
 	struct sctp_ulpevent *event;
 
 	event = sctp_ulpevent_make_assoc_change(asoc,0, SCTP_CANT_STR_ASSOC,
-						(__u16)error, 0, 0,
+						(__u16)error, 0, 0, NULL,
 						GFP_ATOMIC);
 
 	if (event)
@@ -492,8 +492,13 @@ static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands,
 	/* Cancel any partial delivery in progress. */
 	sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
 
-	event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_LOST,
-						(__u16)error, 0, 0,
+	if (event_type == SCTP_EVENT_T_CHUNK && subtype.chunk == SCTP_CID_ABORT)
+		event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_LOST,
+						(__u16)error, 0, 0, chunk,
+						GFP_ATOMIC);
+	else
+		event = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_LOST,
+						(__u16)error, 0, 0, NULL,
 						GFP_ATOMIC);
 	if (event)
 		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 438e5dc5c71..e0ec16dd678 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -186,7 +186,7 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep,
 	 * notification is passed to the upper layer.
 	 */
 	ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_SHUTDOWN_COMP,
-					     0, 0, 0, GFP_ATOMIC);
+					     0, 0, 0, NULL, GFP_ATOMIC);
 	if (ev)
 		sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
 				SCTP_ULPEVENT(ev));
@@ -661,7 +661,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
 	ev = sctp_ulpevent_make_assoc_change(new_asoc, 0, SCTP_COMM_UP, 0,
 					     new_asoc->c.sinit_num_ostreams,
 					     new_asoc->c.sinit_max_instreams,
-					     GFP_ATOMIC);
+					     NULL, GFP_ATOMIC);
 	if (!ev)
 		goto nomem_ev;
 
@@ -790,7 +790,7 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(const struct sctp_endpoint *ep,
 	ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_UP,
 					     0, asoc->c.sinit_num_ostreams,
 					     asoc->c.sinit_max_instreams,
-					     GFP_ATOMIC);
+					     NULL, GFP_ATOMIC);
 
 	if (!ev)
 		goto nomem;
@@ -1625,7 +1625,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep,
 	ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_RESTART, 0,
 					     new_asoc->c.sinit_num_ostreams,
 					     new_asoc->c.sinit_max_instreams,
-					     GFP_ATOMIC);
+					     NULL, GFP_ATOMIC);
 	if (!ev)
 		goto nomem_ev;
 
@@ -1691,7 +1691,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_b(const struct sctp_endpoint *ep,
 	ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_COMM_UP, 0,
 					     new_asoc->c.sinit_num_ostreams,
 					     new_asoc->c.sinit_max_instreams,
-					     GFP_ATOMIC);
+					     NULL, GFP_ATOMIC);
 	if (!ev)
 		goto nomem_ev;
 
@@ -1786,7 +1786,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep,
 					     SCTP_COMM_UP, 0,
 					     asoc->c.sinit_num_ostreams,
 					     asoc->c.sinit_max_instreams,
-					     GFP_ATOMIC);
+                                             NULL, GFP_ATOMIC);
 		if (!ev)
 			goto nomem;
 
@@ -3035,7 +3035,7 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep,
 	 * notification is passed to the upper layer.
 	 */
 	ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_SHUTDOWN_COMP,
-					     0, 0, 0, GFP_ATOMIC);
+					     0, 0, 0, NULL, GFP_ATOMIC);
 	if (!ev)
 		goto nomem;
 
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 2e11bc8d5d3..661ea2dd78b 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -131,19 +131,54 @@ static inline void sctp_ulpevent_release_owner(struct sctp_ulpevent *event)
 struct sctp_ulpevent  *sctp_ulpevent_make_assoc_change(
 	const struct sctp_association *asoc,
 	__u16 flags, __u16 state, __u16 error, __u16 outbound,
-	__u16 inbound, gfp_t gfp)
+	__u16 inbound, struct sctp_chunk *chunk, gfp_t gfp)
 {
 	struct sctp_ulpevent *event;
 	struct sctp_assoc_change *sac;
 	struct sk_buff *skb;
 
-	event = sctp_ulpevent_new(sizeof(struct sctp_assoc_change),
+	/* If the lower layer passed in the chunk, it will be
+	 * an ABORT, so we need to include it in the sac_info.
+	 */
+	if (chunk) {
+		/* sctp_inqu_pop() has allready pulled off the chunk
+		 * header.  We need to put it back temporarily
+		 */
+		skb_push(chunk->skb, sizeof(sctp_chunkhdr_t));
+
+		/* Copy the chunk data to a new skb and reserve enough
+		 * head room to use as notification.
+		 */
+		skb = skb_copy_expand(chunk->skb,
+				      sizeof(struct sctp_assoc_change), 0, gfp);
+
+		if (!skb)
+			goto fail;
+
+		/* put back the chunk header now that we have a copy */
+		skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t));
+
+		/* Embed the event fields inside the cloned skb.  */
+		event = sctp_skb2event(skb);
+		sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize);
+
+		/* Include the notification structure */
+		sac = (struct sctp_assoc_change *)
+			skb_push(skb, sizeof(struct sctp_assoc_change));
+
+		/* Trim the buffer to the right length.  */
+		skb_trim(skb, sizeof(struct sctp_assoc_change) +
+			 ntohs(chunk->chunk_hdr->length));
+	} else {
+		event = sctp_ulpevent_new(sizeof(struct sctp_assoc_change),
 				  MSG_NOTIFICATION, gfp);
-	if (!event)
-		goto fail;
-	skb = sctp_event2skb(event);
-	sac = (struct sctp_assoc_change *)
-		skb_put(skb, sizeof(struct sctp_assoc_change));
+		if (!event)
+			goto fail;
+
+		skb = sctp_event2skb(event);
+		sac = (struct sctp_assoc_change *) skb_put(skb,
+					sizeof(struct sctp_assoc_change));
+	}
 
 	/* Socket Extensions for SCTP
 	 * 5.3.1.1 SCTP_ASSOC_CHANGE
-- 
cgit v1.2.3


From 703315712cfccfe0b45ef4aa6994527d8ee95e33 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 23 Mar 2007 11:34:36 -0700
Subject: [SCTP]: Implement SCTP_MAX_BURST socket option.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/constants.h |  2 +-
 include/net/sctp/structs.h   |  1 +
 include/net/sctp/user.h      |  2 ++
 net/sctp/associola.c         |  2 +-
 net/sctp/protocol.c          |  2 +-
 net/sctp/socket.c            | 61 ++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 67 insertions(+), 3 deletions(-)

diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 5ddb8559986..bb37724495a 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -283,7 +283,7 @@ enum { SCTP_MAX_GABS = 16 };
 #define SCTP_RTO_BETA           2   /* 1/4 when converted to right shifts. */
 
 /* Maximum number of new data packets that can be sent in a burst.  */
-#define SCTP_MAX_BURST		4
+#define SCTP_DEFAULT_MAX_BURST		4
 
 #define SCTP_CLOCK_GRANULARITY	1	/* 1 jiffy */
 
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 37b4a24e589..7b4fff93ba7 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -276,6 +276,7 @@ struct sctp_sock {
 	__u32 default_context;
 	__u32 default_timetolive;
 	__u32 default_rcv_context;
+	int max_burst;
 
 	/* Heartbeat interval: The endpoint sends out a Heartbeat chunk to
 	 * the destination address every heartbeat interval. This value
diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h
index 1b3153c2cdf..6d2b57758cc 100644
--- a/include/net/sctp/user.h
+++ b/include/net/sctp/user.h
@@ -101,6 +101,8 @@ enum sctp_optname {
 #define SCTP_FRAGMENT_INTERLEAVE SCTP_FRAGMENT_INTERLEAVE
 	SCTP_PARTIAL_DELIVERY_POINT,	/* Set/Get partial delivery point */
 #define SCTP_PARTIAL_DELIVERY_POINT SCTP_PARTIAL_DELIVERY_POINT
+	SCTP_MAX_BURST,		/* Set/Get max burst */
+#define SCTP_MAX_BURST SCTP_MAX_BURST
 
 	/* Internal Socket Options. Some of the sctp library functions are 
 	 * implemented using these socket options.
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 85af1cb70fe..37a343e1ebb 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -143,7 +143,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 	/* Initialize the maximum mumber of new data packets that can be sent
 	 * in a burst.
 	 */
-	asoc->max_burst = sctp_max_burst;
+	asoc->max_burst = sp->max_burst;
 
 	/* initialize association timers */
 	asoc->timeouts[SCTP_EVENT_TIMEOUT_NONE] = 0;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 7c28c9b959e..c361deb6cea 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1042,7 +1042,7 @@ SCTP_STATIC __init int sctp_init(void)
 	sctp_cookie_preserve_enable 	= 1;
 
 	/* Max.Burst		    - 4 */
-	sctp_max_burst 			= SCTP_MAX_BURST;
+	sctp_max_burst 			= SCTP_DEFAULT_MAX_BURST;
 
 	/* Association.Max.Retrans  - 10 attempts
 	 * Path.Max.Retrans         - 5  attempts (per destination address)
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index dda2f6700f5..f904f2bc0f2 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2892,6 +2892,36 @@ static int sctp_setsockopt_partial_delivery_point(struct sock *sk,
 	return 0; /* is this the right error code? */
 }
 
+/*
+ * 7.1.28.  Set or Get the maximum burst (SCTP_MAX_BURST)
+ *
+ * This option will allow a user to change the maximum burst of packets
+ * that can be emitted by this association.  Note that the default value
+ * is 4, and some implementations may restrict this setting so that it
+ * can only be lowered.
+ *
+ * NOTE: This text doesn't seem right.  Do this on a socket basis with
+ * future associations inheriting the socket value.
+ */
+static int sctp_setsockopt_maxburst(struct sock *sk,
+				    char __user *optval,
+				    int optlen)
+{
+	int val;
+
+	if (optlen != sizeof(int))
+		return -EINVAL;
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	if (val < 0)
+		return -EINVAL;
+
+	sctp_sk(sk)->max_burst = val;
+
+	return 0;
+}
+
 /* API 6.2 setsockopt(), getsockopt()
  *
  * Applications use setsockopt() and getsockopt() to set or retrieve
@@ -3012,6 +3042,9 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
 	case SCTP_FRAGMENT_INTERLEAVE:
 		retval = sctp_setsockopt_fragment_interleave(sk, optval, optlen);
 		break;
+	case SCTP_MAX_BURST:
+		retval = sctp_setsockopt_maxburst(sk, optval, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
@@ -3171,6 +3204,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 	sp->default_timetolive = 0;
 
 	sp->default_rcv_context = 0;
+	sp->max_burst = sctp_max_burst;
 
 	/* Initialize default setup parameters. These parameters
 	 * can be modified with the SCTP_INITMSG socket option or
@@ -4689,6 +4723,30 @@ static int sctp_getsockopt_partial_delivery_point(struct sock *sk, int len,
 	return -ENOTSUPP;
 }
 
+/*
+ * 7.1.28.  Set or Get the maximum burst (SCTP_MAX_BURST)
+ * (chapter and verse is quoted at sctp_setsockopt_maxburst())
+ */
+static int sctp_getsockopt_maxburst(struct sock *sk, int len,
+				    char __user *optval,
+				    int __user *optlen)
+{
+        int val;
+
+	if (len < sizeof(int))
+		return -EINVAL;
+
+	len = sizeof(int);
+
+	val = sctp_sk(sk)->max_burst;
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &val, len))
+		return -EFAULT;
+
+	return -ENOTSUPP;
+}
+
 SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
 				char __user *optval, int __user *optlen)
 {
@@ -4809,6 +4867,9 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
 		retval = sctp_getsockopt_partial_delivery_point(sk, len, optval,
 								optlen);
 		break;
+	case SCTP_MAX_BURST:
+		retval = sctp_getsockopt_maxburst(sk, len, optval, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
-- 
cgit v1.2.3


From d30045a0bcf144753869175dd9d840f7ceaf4aba Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 23 Mar 2007 11:37:48 -0700
Subject: [NETLINK]: introduce NLA_BINARY type

This patch introduces a new NLA_BINARY attribute policy type with the
verification of simply checking the maximum length of the payload.

It also fixes a small typo in the example.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 4 +++-
 net/netlink/attr.c    | 5 +++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 1c11518fc82..2e4c90a98a7 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -171,6 +171,7 @@ enum {
 	NLA_MSECS,
 	NLA_NESTED,
 	NLA_NUL_STRING,
+	NLA_BINARY,
 	__NLA_TYPE_MAX,
 };
 
@@ -188,12 +189,13 @@ enum {
  *    NLA_STRING           Maximum length of string
  *    NLA_NUL_STRING       Maximum length of string (excluding NUL)
  *    NLA_FLAG             Unused
+ *    NLA_BINARY           Maximum length of attribute payload
  *    All other            Exact length of attribute payload
  *
  * Example:
  * static struct nla_policy my_policy[ATTR_MAX+1] __read_mostly = {
  * 	[ATTR_FOO] = { .type = NLA_U16 },
- *	[ATTR_BAR] = { .type = NLA_STRING, len = BARSIZ },
+ *	[ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ },
  *	[ATTR_BAZ] = { .len = sizeof(struct mystruct) },
  * };
  */
diff --git a/net/netlink/attr.c b/net/netlink/attr.c
index 004139557e0..df5f820a4c3 100644
--- a/net/netlink/attr.c
+++ b/net/netlink/attr.c
@@ -67,6 +67,11 @@ static int validate_nla(struct nlattr *nla, int maxtype,
 		}
 		break;
 
+	case NLA_BINARY:
+		if (pt->len && attrlen > pt->len)
+			return -ERANGE;
+		break;
+
 	default:
 		if (pt->len)
 			minlen = pt->len;
-- 
cgit v1.2.3


From b3da2cf37c5c6e47698957a25ab43a7223dbb90f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 23 Mar 2007 11:40:27 -0700
Subject: [INET]: Use jhash + random secret for ehash.

The days are gone when this was not an issue, there are folks out
there with huge bot networks that can be used to attack the
established hash tables on remote systems.

So just like the routing cache and connection tracking
hash, use Jenkins hash with random secret input.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet6_hashtables.h | 12 +++++++-----
 include/net/inet_sock.h        | 11 +++++++----
 net/ipv4/af_inet.c             | 16 ++++++++++++++++
 net/ipv6/af_inet6.c            |  5 +++++
 4 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index c28e424f53d..668056b4bb0 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -19,6 +19,9 @@
 #include <linux/in6.h>
 #include <linux/ipv6.h>
 #include <linux/types.h>
+#include <linux/jhash.h>
+
+#include <net/inet_sock.h>
 
 #include <net/ipv6.h>
 
@@ -28,12 +31,11 @@ struct inet_hashinfo;
 static inline unsigned int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport,
 				const struct in6_addr *faddr, const __be16 fport)
 {
-	unsigned int hashent = (lport ^ (__force u16)fport);
+	u32 ports = (lport ^ (__force u16)fport);
 
-	hashent ^= (__force u32)(laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
-	hashent ^= hashent >> 16;
-	hashent ^= hashent >> 8;
-	return hashent;
+	return jhash_3words((__force u32)laddr->s6_addr32[3],
+			    (__force u32)faddr->s6_addr32[3],
+			    ports, inet_ehash_secret);
 }
 
 static inline int inet6_sk_ehashfn(const struct sock *sk)
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index ce6da97bc84..62daf214931 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -19,6 +19,7 @@
 
 #include <linux/string.h>
 #include <linux/types.h>
+#include <linux/jhash.h>
 
 #include <net/flow.h>
 #include <net/sock.h>
@@ -167,13 +168,15 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to,
 
 extern int inet_sk_rebuild_header(struct sock *sk);
 
+extern u32 inet_ehash_secret;
+extern void build_ehash_secret(void);
+
 static inline unsigned int inet_ehashfn(const __be32 laddr, const __u16 lport,
 					const __be32 faddr, const __be16 fport)
 {
-	unsigned int h = ((__force __u32)laddr ^ lport) ^ ((__force __u32)faddr ^ (__force __u32)fport);
-	h ^= h >> 16;
-	h ^= h >> 8;
-	return h;
+	return jhash_2words((__force __u32) laddr ^ (__force __u32) faddr,
+			    ((__u32) lport) << 16 | (__force __u32)fport,
+			    inet_ehash_secret);
 }
 
 static inline int inet_sk_ehashfn(const struct sock *sk)
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f011390f19c..b7b7278d801 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -87,6 +87,7 @@
 #include <linux/init.h>
 #include <linux/poll.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/random.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -217,6 +218,16 @@ out:
 	return err;
 }
 
+u32 inet_ehash_secret;
+EXPORT_SYMBOL(inet_ehash_secret);
+
+void build_ehash_secret(void)
+{
+	while (!inet_ehash_secret)
+		get_random_bytes(&inet_ehash_secret, 4);
+}
+EXPORT_SYMBOL(build_ehash_secret);
+
 /*
  *	Create an inet socket.
  */
@@ -233,6 +244,11 @@ static int inet_create(struct socket *sock, int protocol)
 	int try_loading_module = 0;
 	int err;
 
+	if (sock->type != SOCK_RAW &&
+	    sock->type != SOCK_DGRAM &&
+	    !inet_ehash_secret)
+		build_ehash_secret();
+
 	sock->state = SS_UNCONNECTED;
 
 	/* Look for the requested type/protocol pair. */
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 82572b50754..df31cdd33cd 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -98,6 +98,11 @@ static int inet6_create(struct socket *sock, int protocol)
 	int try_loading_module = 0;
 	int err;
 
+	if (sock->type != SOCK_RAW &&
+	    sock->type != SOCK_DGRAM &&
+	    !inet_ehash_secret)
+		build_ehash_secret();
+
 	/* Look for the requested type/protocol pair. */
 	answer = NULL;
 lookup_protocol:
-- 
cgit v1.2.3


From 9af3912ec9e30509b76cb376abb65a4d8af27df3 Mon Sep 17 00:00:00 2001
From: John Heffner <jheffner@psc.edu>
Date: Sun, 25 Mar 2007 23:32:29 -0700
Subject: [NET] Move DF check to ip_forward

Do fragmentation check in ip_forward, similar to ipv6 forwarding.

Signed-off-by: John Heffner <jheffner@psc.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_forward.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 467ebedb99b..61b30d10067 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -85,6 +85,14 @@ int ip_forward(struct sk_buff *skb)
 	if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
 		goto sr_failed;
 
+	if (unlikely(skb->len > dst_mtu(&rt->u.dst) &&
+	             (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
+		IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+			  htonl(dst_mtu(&rt->u.dst)));
+		goto drop;
+	}
+
 	/* We are about to mangle packet. Copy it! */
 	if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len))
 		goto drop;
-- 
cgit v1.2.3


From e1c3e7ab6de9711d2e0e9daf369c6638582eb7e7 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Sat, 24 Mar 2007 21:34:38 -0700
Subject: [TCP]: cubic update for net-2.6.22

The following update received from Injong updates TCP cubic to the latest
version. I am running more complete tests and will have results after 4/1.

According to Injong: the new version improves on its scalability,
fairness and stability.  So in all properties, we confirmed it shows better
performance.

NCSU results (for 2.6.18 and 2.6.20) available:
	http://netsrv.csc.ncsu.edu/wiki/index.php/TCP_Testing

This version is described in a new Internet draft for CUBIC.
	http://www.ietf.org/internet-drafts/draft-rhee-tcp-cubic-00.txt

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_cubic.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 15c58037500..296845be912 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -1,5 +1,5 @@
 /*
- * TCP CUBIC: Binary Increase Congestion control for TCP v2.0
+ * TCP CUBIC: Binary Increase Congestion control for TCP v2.1
  *
  * This is from the implementation of CUBIC TCP in
  * Injong Rhee, Lisong Xu.
@@ -214,7 +214,9 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
 	if (ca->delay_min > 0) {
 		/* max increment = Smax * rtt / 0.1  */
 		min_cnt = (cwnd * HZ * 8)/(10 * max_increment * ca->delay_min);
-		if (ca->cnt < min_cnt)
+
+		/* use concave growth when the target is above the origin */
+		if (ca->cnt < min_cnt && t >= ca->bic_K)
 			ca->cnt = min_cnt;
 	}
 
@@ -400,4 +402,4 @@ module_exit(cubictcp_unregister);
 MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("CUBIC TCP");
-MODULE_VERSION("2.0");
+MODULE_VERSION("2.1");
-- 
cgit v1.2.3


From 85795d64eddd4546375f5ee37bedd88cb5bc4ece Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Sat, 24 Mar 2007 21:35:33 -0700
Subject: [TCP] tcp_probe: improvements for net-2.6.22

Change tcp_probe to use ktime (needed to add one export).
Add option to only get events when cwnd changes - from Doug Leith

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/time.c        |  1 +
 net/ipv4/tcp_probe.c | 68 ++++++++++++++++++++++++++++++----------------------
 2 files changed, 40 insertions(+), 29 deletions(-)

diff --git a/kernel/time.c b/kernel/time.c
index a1439f421d0..ba18ec4899b 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -452,6 +452,7 @@ struct timespec ns_to_timespec(const s64 nsec)
 
 	return ts;
 }
+EXPORT_SYMBOL(ns_to_timespec);
 
 /**
  * ns_to_timeval - Convert nanoseconds to timeval
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 61f406f2729..3938d5dbdf2 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -26,6 +26,8 @@
 #include <linux/proc_fs.h>
 #include <linux/module.h>
 #include <linux/kfifo.h>
+#include <linux/ktime.h>
+#include <linux/time.h>
 #include <linux/vmalloc.h>
 
 #include <net/tcp.h>
@@ -34,43 +36,45 @@ MODULE_AUTHOR("Stephen Hemminger <shemminger@linux-foundation.org>");
 MODULE_DESCRIPTION("TCP cwnd snooper");
 MODULE_LICENSE("GPL");
 
-static int port = 0;
+static int port __read_mostly = 0;
 MODULE_PARM_DESC(port, "Port to match (0=all)");
 module_param(port, int, 0);
 
-static int bufsize = 64*1024;
+static int bufsize __read_mostly = 64*1024;
 MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
 module_param(bufsize, int, 0);
 
+static int full __read_mostly;
+MODULE_PARM_DESC(full, "Full log (1=every ack packet received,  0=only cwnd changes)");
+module_param(full, int, 0);
+
 static const char procname[] = "tcpprobe";
 
 struct {
-	struct kfifo  *fifo;
-	spinlock_t    lock;
+	struct kfifo	*fifo;
+	spinlock_t	lock;
 	wait_queue_head_t wait;
-	struct timeval tstart;
+	ktime_t		start;
+	u32		lastcwnd;
 } tcpw;
 
+/*
+ * Print to log with timestamps.
+ * FIXME: causes an extra copy
+ */
 static void printl(const char *fmt, ...)
 {
 	va_list args;
 	int len;
-	struct timeval now;
+	struct timespec tv;
 	char tbuf[256];
 
 	va_start(args, fmt);
-	do_gettimeofday(&now);
+	/* want monotonic time since start of tcp_probe */
+	tv = ktime_to_timespec(ktime_sub(ktime_get(), tcpw.start));
 
-	now.tv_sec -= tcpw.tstart.tv_sec;
-	now.tv_usec -= tcpw.tstart.tv_usec;
-	if (now.tv_usec < 0) {
-		--now.tv_sec;
-		now.tv_usec += 1000000;
-	}
-
-	len = sprintf(tbuf, "%lu.%06lu ",
-		      (unsigned long) now.tv_sec,
-		      (unsigned long) now.tv_usec);
+	len = sprintf(tbuf, "%lu.%09lu ",
+		      (unsigned long) tv.tv_sec, (unsigned long) tv.tv_nsec);
 	len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
 	va_end(args);
 
@@ -78,38 +82,44 @@ static void printl(const char *fmt, ...)
 	wake_up(&tcpw.wait);
 }
 
-static int jtcp_sendmsg(struct kiocb *iocb, struct sock *sk,
-			struct msghdr *msg, size_t size)
+/*
+ * Hook inserted to be called before each receive packet.
+ * Note: arguments must match tcp_rcv_established()!
+ */
+static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
+			       struct tcphdr *th, unsigned len)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_sock *inet = inet_sk(sk);
 
-	if (port == 0 || ntohs(inet->dport) == port ||
-	    ntohs(inet->sport) == port) {
+	/* Only update if port matches */
+	if ((port == 0 || ntohs(inet->dport) == port || ntohs(inet->sport) == port)
+	    && (full || tp->snd_cwnd != tcpw.lastcwnd)) {
 		printl("%d.%d.%d.%d:%u %d.%d.%d.%d:%u %d %#x %#x %u %u %u\n",
 		       NIPQUAD(inet->saddr), ntohs(inet->sport),
 		       NIPQUAD(inet->daddr), ntohs(inet->dport),
-		       size, tp->snd_nxt, tp->snd_una,
+		       skb->len, tp->snd_nxt, tp->snd_una,
 		       tp->snd_cwnd, tcp_current_ssthresh(sk),
-		       tp->snd_wnd);
+		       tp->snd_wnd, tp->srtt >> 3);
+		tcpw.lastcwnd = tp->snd_cwnd;
 	}
 
 	jprobe_return();
 	return 0;
 }
 
-static struct jprobe tcp_send_probe = {
+static struct jprobe tcp_probe = {
 	.kp = {
-		.symbol_name	= "tcp_sendmsg",
+		.symbol_name	= "tcp_rcv_established",
 	},
-	.entry	= JPROBE_ENTRY(jtcp_sendmsg),
+	.entry	= JPROBE_ENTRY(jtcp_rcv_established),
 };
 
 
 static int tcpprobe_open(struct inode * inode, struct file * file)
 {
 	kfifo_reset(tcpw.fifo);
-	do_gettimeofday(&tcpw.tstart);
+	tcpw.start = ktime_get();
 	return 0;
 }
 
@@ -162,7 +172,7 @@ static __init int tcpprobe_init(void)
 	if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops))
 		goto err0;
 
-	ret = register_jprobe(&tcp_send_probe);
+	ret = register_jprobe(&tcp_probe);
 	if (ret)
 		goto err1;
 
@@ -180,7 +190,7 @@ static __exit void tcpprobe_exit(void)
 {
 	kfifo_free(tcpw.fifo);
 	proc_net_remove(procname);
-	unregister_jprobe(&tcp_send_probe);
+	unregister_jprobe(&tcp_probe);
 
 }
 module_exit(tcpprobe_exit);
-- 
cgit v1.2.3


From 2f7826c02447480c7c1b5500b34fc783f1ed8145 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Mon, 26 Mar 2007 02:00:58 -0700
Subject: [WAN] cosa.c: Build fix.

Caused by skb_reset_mac_header() changes, missing semicolon.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/cosa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index c198511ec3f..23464735fa8 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -773,7 +773,7 @@ static int sppp_rx_done(struct channel_data *chan)
 	}
 	chan->rx_skb->protocol = htons(ETH_P_WAN_PPP);
 	chan->rx_skb->dev = chan->pppdev.dev;
-	skb_reset_mac_header(chan->rx_skb)
+	skb_reset_mac_header(chan->rx_skb);
 	chan->stats.rx_packets++;
 	chan->stats.rx_bytes += chan->cosa->rxsize;
 	netif_rx(chan->rx_skb);
-- 
cgit v1.2.3


From 0947c9fe56d9cf7ad0bc3a03ccd30446cde698e4 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Mon, 26 Mar 2007 17:14:15 -0700
Subject: [NET] fib_rules: goto rule action

This patch adds a new rule action FR_ACT_GOTO which allows
to skip a set of rules by jumping to another rule. The rule
to jump to is specified via the FRA_GOTO attribute which
carries a rule preference.

Referring to a rule which doesn't exists is explicitely allowed.
Such goto rules are marked with the flag FIB_RULE_UNRESOLVED
and will act like a rule with a non-matching selector. The rule
will become functional as soon as its target is present.

The goto action enables performance optimizations by reducing
the average number of rules that have to be passed per lookup.

Example:
0:      from all lookup local
40:     not from all to 192.168.23.128 goto 32766
41:     from all fwmark 0xa blackhole
42:     from all fwmark 0xff blackhole
32766:  from all lookup main

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/fib_rules.h |  5 +--
 include/net/fib_rules.h   |  7 +++-
 net/core/fib_rules.c      | 88 +++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 94 insertions(+), 6 deletions(-)

diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h
index 8270aac2aa5..ec9c7b1d3e9 100644
--- a/include/linux/fib_rules.h
+++ b/include/linux/fib_rules.h
@@ -7,6 +7,7 @@
 /* rule is permanent, and cannot be deleted */
 #define FIB_RULE_PERMANENT	1
 #define FIB_RULE_INVERT		2
+#define FIB_RULE_UNRESOLVED	4
 
 struct fib_rule_hdr
 {
@@ -29,7 +30,7 @@ enum
 	FRA_DST,	/* destination address */
 	FRA_SRC,	/* source address */
 	FRA_IFNAME,	/* interface name */
-	FRA_UNUSED1,
+	FRA_GOTO,	/* target to jump to (FR_ACT_GOTO) */
 	FRA_UNUSED2,
 	FRA_PRIORITY,	/* priority/preference */
 	FRA_UNUSED3,
@@ -51,7 +52,7 @@ enum
 {
 	FR_ACT_UNSPEC,
 	FR_ACT_TO_TBL,		/* Pass to fixed table */
-	FR_ACT_RES1,
+	FR_ACT_GOTO,		/* Jump to another rule */
 	FR_ACT_RES2,
 	FR_ACT_RES3,
 	FR_ACT_RES4,
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index ff3029fe965..08bab8b6e57 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -19,6 +19,8 @@ struct fib_rule
 	u32			flags;
 	u32			table;
 	u8			action;
+	u32			target;
+	struct fib_rule *	ctarget;
 	struct rcu_head		rcu;
 };
 
@@ -35,6 +37,8 @@ struct fib_rules_ops
 	struct list_head	list;
 	int			rule_size;
 	int			addr_size;
+	int			unresolved_rules;
+	int			nr_goto_rules;
 
 	int			(*action)(struct fib_rule *,
 					  struct flowi *, int,
@@ -66,7 +70,8 @@ struct fib_rules_ops
 	[FRA_PRIORITY]	= { .type = NLA_U32 }, \
 	[FRA_FWMARK]	= { .type = NLA_U32 }, \
 	[FRA_FWMASK]	= { .type = NLA_U32 }, \
-	[FRA_TABLE]     = { .type = NLA_U32 }
+	[FRA_TABLE]     = { .type = NLA_U32 }, \
+	[FRA_GOTO]	= { .type = NLA_U32 }
 
 static inline void fib_rule_get(struct fib_rule *rule)
 {
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index fdf05af16ba..0d8bb2efb0c 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -132,10 +132,23 @@ int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl,
 	rcu_read_lock();
 
 	list_for_each_entry_rcu(rule, ops->rules_list, list) {
+jumped:
 		if (!fib_rule_match(rule, ops, fl, flags))
 			continue;
 
-		err = ops->action(rule, fl, flags, arg);
+		if (rule->action == FR_ACT_GOTO) {
+			struct fib_rule *target;
+
+			target = rcu_dereference(rule->ctarget);
+			if (target == NULL) {
+				continue;
+			} else {
+				rule = target;
+				goto jumped;
+			}
+		} else
+			err = ops->action(rule, fl, flags, arg);
+
 		if (err != -EAGAIN) {
 			fib_rule_get(rule);
 			arg->rule = rule;
@@ -180,7 +193,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	struct fib_rules_ops *ops = NULL;
 	struct fib_rule *rule, *r, *last = NULL;
 	struct nlattr *tb[FRA_MAX+1];
-	int err = -EINVAL;
+	int err = -EINVAL, unresolved = 0;
 
 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
 		goto errout;
@@ -237,6 +250,28 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	if (!rule->pref && ops->default_pref)
 		rule->pref = ops->default_pref();
 
+	err = -EINVAL;
+	if (tb[FRA_GOTO]) {
+		if (rule->action != FR_ACT_GOTO)
+			goto errout_free;
+
+		rule->target = nla_get_u32(tb[FRA_GOTO]);
+		/* Backward jumps are prohibited to avoid endless loops */
+		if (rule->target <= rule->pref)
+			goto errout_free;
+
+		list_for_each_entry(r, ops->rules_list, list) {
+			if (r->pref == rule->target) {
+				rule->ctarget = r;
+				break;
+			}
+		}
+
+		if (rule->ctarget == NULL)
+			unresolved = 1;
+	} else if (rule->action == FR_ACT_GOTO)
+		goto errout_free;
+
 	err = ops->configure(rule, skb, nlh, frh, tb);
 	if (err < 0)
 		goto errout_free;
@@ -249,6 +284,28 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
 	fib_rule_get(rule);
 
+	if (ops->unresolved_rules) {
+		/*
+		 * There are unresolved goto rules in the list, check if
+		 * any of them are pointing to this new rule.
+		 */
+		list_for_each_entry(r, ops->rules_list, list) {
+			if (r->action == FR_ACT_GOTO &&
+			    r->target == rule->pref) {
+				BUG_ON(r->ctarget != NULL);
+				rcu_assign_pointer(r->ctarget, rule);
+				if (--ops->unresolved_rules == 0)
+					break;
+			}
+		}
+	}
+
+	if (rule->action == FR_ACT_GOTO)
+		ops->nr_goto_rules++;
+
+	if (unresolved)
+		ops->unresolved_rules++;
+
 	if (last)
 		list_add_rcu(&rule->list, &last->list);
 	else
@@ -269,7 +326,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct fib_rule_hdr *frh = nlmsg_data(nlh);
 	struct fib_rules_ops *ops = NULL;
-	struct fib_rule *rule;
+	struct fib_rule *rule, *tmp;
 	struct nlattr *tb[FRA_MAX+1];
 	int err = -EINVAL;
 
@@ -322,6 +379,25 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 		}
 
 		list_del_rcu(&rule->list);
+
+		if (rule->action == FR_ACT_GOTO)
+			ops->nr_goto_rules--;
+
+		/*
+		 * Check if this rule is a target to any of them. If so,
+		 * disable them. As this operation is eventually very
+		 * expensive, it is only performed if goto rules have
+		 * actually been added.
+		 */
+		if (ops->nr_goto_rules > 0) {
+			list_for_each_entry(tmp, ops->rules_list, list) {
+				if (tmp->ctarget == rule) {
+					rcu_assign_pointer(tmp->ctarget, NULL);
+					ops->unresolved_rules++;
+				}
+			}
+		}
+
 		synchronize_rcu();
 		notify_rule_change(RTM_DELRULE, rule, ops, nlh,
 				   NETLINK_CB(skb).pid);
@@ -371,6 +447,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 	frh->action = rule->action;
 	frh->flags = rule->flags;
 
+	if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL)
+		frh->flags |= FIB_RULE_UNRESOLVED;
+
 	if (rule->ifname[0])
 		NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname);
 
@@ -383,6 +462,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 	if (rule->mark_mask || rule->mark)
 		NLA_PUT_U32(skb, FRA_FWMASK, rule->mark_mask);
 
+	if (rule->target)
+		NLA_PUT_U32(skb, FRA_GOTO, rule->target);
+
 	if (ops->fill(rule, skb, nlh, frh) < 0)
 		goto nla_put_failure;
 
-- 
cgit v1.2.3


From 2b44368307cd06c5614d7b53801f516c0654020b Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Mon, 26 Mar 2007 17:37:59 -0700
Subject: [NET] fib_rules: Mark rules detached from the device

Rules which match against device names in their selector can
remain while the device itself disappears, in fact the device
doesn't have to present when the rule is added in the first
place. The device name is resolved by trying when the rule is
added and later by listening to NETDEV_REGISTER/UNREGISTER
notifications.

This patch adds the flag FIB_RULE_DEV_DETACHED which is set
towards userspace when a rule contains a device match which
is unresolved at the moment. This eases spotting the reason
why certain rules seem not to function properly.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/fib_rules.h | 1 +
 net/core/fib_rules.c      | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h
index ec9c7b1d3e9..c151273293e 100644
--- a/include/linux/fib_rules.h
+++ b/include/linux/fib_rules.h
@@ -8,6 +8,7 @@
 #define FIB_RULE_PERMANENT	1
 #define FIB_RULE_INVERT		2
 #define FIB_RULE_UNRESOLVED	4
+#define FIB_RULE_DEV_DETACHED	8
 
 struct fib_rule_hdr
 {
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 0d8bb2efb0c..7ac602cc8c8 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -450,9 +450,13 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 	if (rule->action == FR_ACT_GOTO && rule->ctarget == NULL)
 		frh->flags |= FIB_RULE_UNRESOLVED;
 
-	if (rule->ifname[0])
+	if (rule->ifname[0]) {
 		NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname);
 
+		if (rule->ifindex == -1)
+			frh->flags |= FIB_RULE_DEV_DETACHED;
+	}
+
 	if (rule->pref)
 		NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref);
 
-- 
cgit v1.2.3


From fa0b2d1d2196dd46527a8d028797e2bca5930a92 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Mon, 26 Mar 2007 17:38:53 -0700
Subject: [NET] fib_rules: Add no-operation action

The use of nop rules simplifies the usage of goto rules
and adds more flexibility as they allow targets to remain
while the actual content of the branches can change easly.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/fib_rules.h | 2 +-
 net/core/fib_rules.c      | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h
index c151273293e..f278ba781d0 100644
--- a/include/linux/fib_rules.h
+++ b/include/linux/fib_rules.h
@@ -54,7 +54,7 @@ enum
 	FR_ACT_UNSPEC,
 	FR_ACT_TO_TBL,		/* Pass to fixed table */
 	FR_ACT_GOTO,		/* Jump to another rule */
-	FR_ACT_RES2,
+	FR_ACT_NOP,		/* No operation */
 	FR_ACT_RES3,
 	FR_ACT_RES4,
 	FR_ACT_BLACKHOLE,	/* Drop without notification */
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 7ac602cc8c8..5824b2644f2 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -146,7 +146,9 @@ jumped:
 				rule = target;
 				goto jumped;
 			}
-		} else
+		} else if (rule->action == FR_ACT_NOP)
+			continue;
+		else
 			err = ops->action(rule, fl, flags, arg);
 
 		if (err != -EAGAIN) {
-- 
cgit v1.2.3


From 2d771cd86d4c3af26f34a7bcdc1b87696824cad9 Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <jkrzyszt@tis.icnet.pl>
Date: Mon, 26 Mar 2007 18:03:44 -0700
Subject: [IPV4] LVS: Allow to send ICMP unreachable responses when
 real-servers are removed

this is a small patch by Janusz Krzysztofik to ip_route_output_slow()
that allows VIP-less LVS linux director to generate packets
originating >From VIP if sysctl_ip_nonlocal_bind is set.

In a nutshell, the intention is for an LVS linux director to be able
to send ICMP unreachable responses to end-users when real-servers are
removed.

http://archive.linuxvirtualserver.org/html/lvs-users/2007-01/msg00106.html

Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index df9fe4f2e8c..cb76e3c725a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2396,7 +2396,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
 
 		/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
 		dev_out = ip_dev_find(oldflp->fl4_src);
-		if (dev_out == NULL)
+		if ((dev_out == NULL) && !(sysctl_ip_nonlocal_bind))
 			goto out;
 
 		/* I removed check for oif == dev_out->oif here.
@@ -2407,7 +2407,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
 		      of another iface. --ANK
 		 */
 
-		if (oldflp->oif == 0
+		if (dev_out && oldflp->oif == 0
 		    && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) {
 			/* Special hack: user can direct multicasts
 			   and limited broadcast via necessary interface
-- 
cgit v1.2.3


From 35fc92a9deee0da6e35fdc3150bb134e58f2fd63 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 26 Mar 2007 23:22:20 -0700
Subject: [NET]: Allow forwarding of ip_summed except CHECKSUM_COMPLETE

Right now Xen has a horrible hack that lets it forward packets with
partial checksums.  One of the reasons that CHECKSUM_PARTIAL and
CHECKSUM_COMPLETE were added is so that we can get rid of this hack
(where it creates two extra bits in the skbuff to essentially mirror
ip_summed without being destroyed by the forwarding code).

I had forgotten that I've already gone through all the deivce drivers
last time around to make sure that they're looking at ip_summed ==
CHECKSUM_PARTIAL rather than ip_summed != 0 on transmit.  In any case,
I've now done that again so it should definitely be safe.

Unfortunately nobody has yet added any code to update CHECKSUM_COMPLETE
values on forward so we I'm setting that to CHECKSUM_NONE.  This should
be safe to remove for bridging but I'd like to check that code path
first.

So here is the patch that lets us get rid of the hack by preserving
ip_summed (mostly) on forwarded packets.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h  | 7 +++++++
 net/bridge/br_forward.c | 2 +-
 net/ipv4/ip_forward.c   | 2 +-
 net/ipv6/ip6_output.c   | 2 +-
 4 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 37247901ebd..1c19b2d55c2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1654,5 +1654,12 @@ static inline int skb_is_gso(const struct sk_buff *skb)
 	return skb_shinfo(skb)->gso_size;
 }
 
+static inline void skb_forward_csum(struct sk_buff *skb)
+{
+	/* Unfortunately we don't support this one.  Any brave souls? */
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->ip_summed = CHECKSUM_NONE;
+}
+
 #endif	/* __KERNEL__ */
 #endif	/* _LINUX_SKBUFF_H */
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 3e45c1a1aa9..ada7f495445 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -71,7 +71,7 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
 
 	indev = skb->dev;
 	skb->dev = to->dev;
-	skb->ip_summed = CHECKSUM_NONE;
+	skb_forward_csum(skb);
 
 	NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev,
 			br_forward_finish);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 61b30d10067..9cb04df0054 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -67,7 +67,7 @@ int ip_forward(struct sk_buff *skb)
 	if (skb->pkt_type != PACKET_HOST)
 		goto drop;
 
-	skb->ip_summed = CHECKSUM_NONE;
+	skb_forward_csum(skb);
 
 	/*
 	 *	According to the RFC, we must first decrease the TTL field. If
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index e2b8db6b9ae..be3f082a87e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -378,7 +378,7 @@ int ip6_forward(struct sk_buff *skb)
 		goto drop;
 	}
 
-	skb->ip_summed = CHECKSUM_NONE;
+	skb_forward_csum(skb);
 
 	/*
 	 *	We DO NOT make any processing on
-- 
cgit v1.2.3


From be776281aee54626a474ba06f91926b98bdd180d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 27 Mar 2007 13:53:04 -0700
Subject: [NET]: inet_ehash_secret should be __read_mostly and set only once

There is a very tiny probability that build_ehash_secret() is called
at the same time by different CPUS.

Also, using __read_mostly is a must for inet_ehash_secret

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b7b7278d801..45ced52c03d 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -218,13 +218,23 @@ out:
 	return err;
 }
 
-u32 inet_ehash_secret;
+u32 inet_ehash_secret __read_mostly;
 EXPORT_SYMBOL(inet_ehash_secret);
 
+/*
+ * inet_ehash_secret must be set exactly once
+ * Instead of using a dedicated spinlock, we (ab)use inetsw_lock
+ */
 void build_ehash_secret(void)
 {
-	while (!inet_ehash_secret)
-		get_random_bytes(&inet_ehash_secret, 4);
+	u32 rnd;
+	do {
+		get_random_bytes(&rnd, sizeof(rnd));
+	} while (rnd == 0);
+	spin_lock_bh(&inetsw_lock);
+	if (!inet_ehash_secret)
+		inet_ehash_secret = rnd;
+	spin_unlock_bh(&inetsw_lock);
 }
 EXPORT_SYMBOL(build_ehash_secret);
 
-- 
cgit v1.2.3


From 73417f617a93cf30342c3ea41abc38927bd467aa Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 27 Mar 2007 13:56:52 -0700
Subject: [NET] fib_rules: Flush route cache after rule modifications

The results of FIB rules lookups are cached in the routing cache
except for IPv6 as no such cache exists. So far, it was the
responsibility of the user to flush the cache after modifying any
rules. This lead to many false bug reports due to misunderstanding
of this concept.

This patch automatically flushes the route cache after inserting
or deleting a rule.

Thanks to Muli Ben-Yehuda <muli@il.ibm.com> for catching a bug
in the previous patch.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/fib_rules.h | 4 ++++
 net/core/fib_rules.c    | 8 ++++++++
 net/decnet/dn_rules.c   | 7 +++++++
 net/ipv4/fib_rules.c    | 6 ++++++
 4 files changed, 25 insertions(+)

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 08bab8b6e57..ed3a8872c6c 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -59,6 +59,10 @@ struct fib_rules_ops
 	u32			(*default_pref)(void);
 	size_t			(*nlmsg_payload)(struct fib_rule *);
 
+	/* Called after modifications to the rules set, must flush
+	 * the route cache if one exists. */
+	void			(*flush_cache)(void);
+
 	int			nlgroup;
 	struct nla_policy	*policy;
 	struct list_head	*rules_list;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 5824b2644f2..cb2dae19531 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -44,6 +44,12 @@ static void rules_ops_put(struct fib_rules_ops *ops)
 		module_put(ops->owner);
 }
 
+static void flush_route_cache(struct fib_rules_ops *ops)
+{
+	if (ops->flush_cache)
+		ops->flush_cache();
+}
+
 int fib_rules_register(struct fib_rules_ops *ops)
 {
 	int err = -EEXIST;
@@ -314,6 +320,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 		list_add_rcu(&rule->list, ops->rules_list);
 
 	notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
+	flush_route_cache(ops);
 	rules_ops_put(ops);
 	return 0;
 
@@ -404,6 +411,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 		notify_rule_change(RTM_DELRULE, rule, ops, nlh,
 				   NETLINK_CB(skb).pid);
 		fib_rule_put(rule);
+		flush_route_cache(ops);
 		rules_ops_put(ops);
 		return 0;
 	}
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index fd0cc2aa316..7f58b95b27d 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -31,6 +31,7 @@
 #include <net/dn_fib.h>
 #include <net/dn_neigh.h>
 #include <net/dn_dev.h>
+#include <net/dn_route.h>
 
 static struct fib_rules_ops dn_fib_rules_ops;
 
@@ -239,6 +240,11 @@ static u32 dn_fib_rule_default_pref(void)
 	return 0;
 }
 
+static void dn_fib_rule_flush_cache(void)
+{
+	dn_rt_cache_flush(0);
+}
+
 static struct fib_rules_ops dn_fib_rules_ops = {
 	.family		= AF_DECnet,
 	.rule_size	= sizeof(struct dn_fib_rule),
@@ -249,6 +255,7 @@ static struct fib_rules_ops dn_fib_rules_ops = {
 	.compare	= dn_fib_rule_compare,
 	.fill		= dn_fib_rule_fill,
 	.default_pref	= dn_fib_rule_default_pref,
+	.flush_cache	= dn_fib_rule_flush_cache,
 	.nlgroup	= RTNLGRP_DECnet_RULE,
 	.policy		= dn_fib_rule_policy,
 	.rules_list	= &dn_fib_rules,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index b021b3440ca..fe29b98d6c8 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -298,6 +298,11 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
 	       + nla_total_size(4); /* flow */
 }
 
+static void fib4_rule_flush_cache(void)
+{
+	rt_cache_flush(0);
+}
+
 static struct fib_rules_ops fib4_rules_ops = {
 	.family		= AF_INET,
 	.rule_size	= sizeof(struct fib4_rule),
@@ -309,6 +314,7 @@ static struct fib_rules_ops fib4_rules_ops = {
 	.fill		= fib4_rule_fill,
 	.default_pref	= fib4_rule_default_pref,
 	.nlmsg_payload	= fib4_rule_nlmsg_payload,
+	.flush_cache	= fib4_rule_flush_cache,
 	.nlgroup	= RTNLGRP_IPV4_RULE,
 	.policy		= fib4_rule_policy,
 	.rules_list	= &fib4_rules,
-- 
cgit v1.2.3


From 4103f8cd5c1f260d674a7b426ed221812de54d47 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 27 Mar 2007 13:58:31 -0700
Subject: [TCP]: tcp_memory_pressure and tcp_socket are__read_mostly candidates

tcp_memory_pressure and tcp_socket currently share a cache line with tcp_memory_allocated, tcp_sockets_allocated.
(Very hot cache line)
It makes sense to declare these variables as __read_mostly, to avoid false sharing on SMP.

ffffffff8081d9c0 B tcp_orphan_count
ffffffff8081d9c4 B tcp_memory_allocated
ffffffff8081d9c8 B tcp_sockets_allocated
ffffffff8081d9cc B tcp_memory_pressure
ffffffff8081d9d0 b tcp_md5sig_users
ffffffff8081d9d8 b tcp_md5sig_pool
ffffffff8081d9e0 b warntime.31570
ffffffff8081d9e8 b tcp_socket

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c      | 2 +-
 net/ipv4/tcp_ipv4.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 18a09a78ca0..bb9d91a7662 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -297,7 +297,7 @@ EXPORT_SYMBOL(tcp_sockets_allocated);
  * All the sk_stream_mem_schedule() is of this nature: accounting
  * is strict, actions are advisory and have some latency.
  */
-int tcp_memory_pressure;
+int tcp_memory_pressure __read_mostly;
 
 EXPORT_SYMBOL(tcp_memory_pressure);
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 617a5e4ca01..e11eaf4cc26 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -88,7 +88,7 @@ int sysctl_tcp_low_latency __read_mostly;
 #define ICMP_MIN_LENGTH 8
 
 /* Socket used for sending RSTs */
-static struct socket *tcp_socket;
+static struct socket *tcp_socket __read_mostly;
 
 void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
 
-- 
cgit v1.2.3


From 03d4f879b9ddf7d5c1f788792247e62450342eed Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 27 Mar 2007 14:18:34 -0700
Subject: [IPV4]: align inet_protos[] on SMP

As IPPROTO_TCP is 6, it makes sense to make sure inet_protos[] array
is properly cache line aligned to avoid false sharing on SMP.

c0680540 b peer_total
c0680544 b inet_peer_unused_head
c0680560 B inet_protos

On i386 this example, we can see that inet_protos[IPPROTO_TCP] shares
a potentially hot (and modified) cache line.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/protocol.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index da70fef82c9..971ab9356e5 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -45,7 +45,7 @@
 #include <net/ipip.h>
 #include <linux/igmp.h>
 
-struct net_protocol *inet_protos[MAX_INET_PROTOS];
+struct net_protocol *inet_protos[MAX_INET_PROTOS] ____cacheline_aligned_in_smp;
 static DEFINE_SPINLOCK(inet_proto_lock);
 
 /*
-- 
cgit v1.2.3


From 2a123b86e2b242a4a6db990d2851d45e192f88e5 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 27 Mar 2007 18:38:07 -0300
Subject: [BLUETOOTH]: Introduce skb->data accessor methods for
 hci_{acl,event,sco}_hdr

For consistency with other skb data accessors, reducing the number of direct
accesses to skb->data.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 drivers/bluetooth/bluecard_cs.c |  6 +++---
 drivers/bluetooth/bt3c_cs.c     |  6 +++---
 drivers/bluetooth/btuart_cs.c   |  6 +++---
 drivers/bluetooth/hci_h4.c      |  6 +++---
 include/net/bluetooth/hci.h     | 18 ++++++++++++++++++
 5 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c
index acfb6a430dc..851de4d5b7d 100644
--- a/drivers/bluetooth/bluecard_cs.c
+++ b/drivers/bluetooth/bluecard_cs.c
@@ -461,20 +461,20 @@ static void bluecard_receive(bluecard_info_t *info, unsigned int offset)
 				switch (info->rx_state) {
 
 				case RECV_WAIT_EVENT_HEADER:
-					eh = (struct hci_event_hdr *)(info->rx_skb->data);
+					eh = hci_event_hdr(info->rx_skb);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = eh->plen;
 					break;
 
 				case RECV_WAIT_ACL_HEADER:
-					ah = (struct hci_acl_hdr *)(info->rx_skb->data);
+					ah = hci_acl_hdr(info->rx_skb);
 					dlen = __le16_to_cpu(ah->dlen);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = dlen;
 					break;
 
 				case RECV_WAIT_SCO_HEADER:
-					sh = (struct hci_sco_hdr *)(info->rx_skb->data);
+					sh = hci_sco_hdr(info->rx_skb);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = sh->dlen;
 					break;
diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c
index 18b0f3992c5..39516074636 100644
--- a/drivers/bluetooth/bt3c_cs.c
+++ b/drivers/bluetooth/bt3c_cs.c
@@ -303,20 +303,20 @@ static void bt3c_receive(bt3c_info_t *info)
 				switch (info->rx_state) {
 
 				case RECV_WAIT_EVENT_HEADER:
-					eh = (struct hci_event_hdr *)(info->rx_skb->data);
+					eh = hci_event_hdr(info->rx_skb);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = eh->plen;
 					break;
 
 				case RECV_WAIT_ACL_HEADER:
-					ah = (struct hci_acl_hdr *)(info->rx_skb->data);
+					ah = hci_acl_hdr(info->rx_skb);
 					dlen = __le16_to_cpu(ah->dlen);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = dlen;
 					break;
 
 				case RECV_WAIT_SCO_HEADER:
-					sh = (struct hci_sco_hdr *)(info->rx_skb->data);
+					sh = hci_sco_hdr(info->rx_skb);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = sh->dlen;
 					break;
diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c
index c1bce75148f..d7d2ea0d86a 100644
--- a/drivers/bluetooth/btuart_cs.c
+++ b/drivers/bluetooth/btuart_cs.c
@@ -250,20 +250,20 @@ static void btuart_receive(btuart_info_t *info)
 				switch (info->rx_state) {
 
 				case RECV_WAIT_EVENT_HEADER:
-					eh = (struct hci_event_hdr *)(info->rx_skb->data);
+					eh = hci_event_hdr(info->rx_skb);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = eh->plen;
 					break;
 
 				case RECV_WAIT_ACL_HEADER:
-					ah = (struct hci_acl_hdr *)(info->rx_skb->data);
+					ah = hci_acl_hdr(info->rx_skb);
 					dlen = __le16_to_cpu(ah->dlen);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = dlen;
 					break;
 
 				case RECV_WAIT_SCO_HEADER:
-					sh = (struct hci_sco_hdr *)(info->rx_skb->data);
+					sh = hci_sco_hdr(info->rx_skb);
 					info->rx_state = RECV_WAIT_DATA;
 					info->rx_count = sh->dlen;
 					break;
diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c
index 34f0afc4240..bfbae14cf93 100644
--- a/drivers/bluetooth/hci_h4.c
+++ b/drivers/bluetooth/hci_h4.c
@@ -188,7 +188,7 @@ static int h4_recv(struct hci_uart *hu, void *data, int count)
 				continue;
 
 			case H4_W4_EVENT_HDR:
-				eh = (struct hci_event_hdr *) h4->rx_skb->data;
+				eh = hci_event_hdr(h4->rx_skb);
 
 				BT_DBG("Event header: evt 0x%2.2x plen %d", eh->evt, eh->plen);
 
@@ -196,7 +196,7 @@ static int h4_recv(struct hci_uart *hu, void *data, int count)
 				continue;
 
 			case H4_W4_ACL_HDR:
-				ah = (struct hci_acl_hdr *) h4->rx_skb->data;
+				ah = hci_acl_hdr(h4->rx_skb);
 				dlen = __le16_to_cpu(ah->dlen);
 
 				BT_DBG("ACL header: dlen %d", dlen);
@@ -205,7 +205,7 @@ static int h4_recv(struct hci_uart *hu, void *data, int count)
 				continue;
 
 			case H4_W4_SCO_HDR:
-				sh = (struct hci_sco_hdr *) h4->rx_skb->data;
+				sh = hci_sco_hdr(h4->rx_skb);
 
 				BT_DBG("SCO header: dlen %d", sh->dlen);
 
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 41456c14884..93ce272a5d2 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -709,6 +709,24 @@ struct hci_sco_hdr {
 	__u8 	dlen;
 } __attribute__ ((packed));
 
+#ifdef __KERNEL__
+#include <linux/skbuff.h>
+static inline struct hci_event_hdr *hci_event_hdr(const struct sk_buff *skb)
+{
+	return (struct hci_event_hdr *)skb->data;
+}
+
+static inline struct hci_acl_hdr *hci_acl_hdr(const struct sk_buff *skb)
+{
+	return (struct hci_acl_hdr *)skb->data;
+}
+
+static inline struct hci_sco_hdr *hci_sco_hdr(const struct sk_buff *skb)
+{
+	return (struct hci_sco_hdr *)skb->data;
+}
+#endif
+
 /* Command opcode pack/unpack */
 #define hci_opcode_pack(ogf, ocf)	(__u16) ((ocf & 0x03ff)|(ogf << 10))
 #define hci_opcode_ogf(op)		(op >> 10)
-- 
cgit v1.2.3


From d626f62b11e00c16e81e4308ab93d3f13551812a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 27 Mar 2007 18:55:52 -0300
Subject: [SK_BUFF]: Introduce skb_copy_from_linear_data{_offset}

To clearly state the intent of copying from linear sk_buffs, _offset being a
overly long variant but interesting for the sake of saving some bytes.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 arch/ia64/sn/kernel/xpnet.c                   |  3 ++-
 drivers/atm/atmtcp.c                          |  4 ++--
 drivers/atm/nicstar.c                         |  6 +++---
 drivers/bluetooth/bfusb.c                     |  2 +-
 drivers/bluetooth/bpa10x.c                    |  4 ++--
 drivers/bluetooth/dtl1_cs.c                   |  2 +-
 drivers/char/pcmcia/synclink_cs.c             |  2 +-
 drivers/infiniband/hw/cxgb3/iwch_cm.c         |  9 ++++++---
 drivers/isdn/act2000/module.c                 |  2 +-
 drivers/isdn/gigaset/usb-gigaset.c            |  2 +-
 drivers/isdn/hardware/avm/b1dma.c             |  3 ++-
 drivers/isdn/hardware/avm/c4.c                |  3 ++-
 drivers/isdn/hisax/elsa_ser.c                 |  6 ++++--
 drivers/isdn/hisax/isdnl2.c                   |  3 ++-
 drivers/isdn/hysdn/hycapi.c                   |  2 +-
 drivers/isdn/hysdn/hysdn_sched.c              |  5 +++--
 drivers/isdn/i4l/isdn_common.c                |  2 +-
 drivers/isdn/i4l/isdn_ppp.c                   |  7 +++++--
 drivers/isdn/isdnloop/isdnloop.c              |  3 ++-
 drivers/isdn/pcbit/capi.c                     | 12 +++++++-----
 drivers/media/dvb/dvb-core/dvb_net.c          |  4 +++-
 drivers/message/fusion/mptlan.c               |  6 +++---
 drivers/net/3c505.c                           |  2 +-
 drivers/net/3c523.c                           |  2 +-
 drivers/net/7990.c                            |  2 +-
 drivers/net/a2065.c                           |  2 +-
 drivers/net/arcnet/capmode.c                  |  3 ++-
 drivers/net/atari_bionet.c                    |  3 ++-
 drivers/net/atari_pamsnet.c                   |  3 ++-
 drivers/net/au1000_eth.c                      |  2 +-
 drivers/net/b44.c                             |  7 ++++---
 drivers/net/bnx2.c                            |  6 ++----
 drivers/net/cassini.c                         |  4 ++--
 drivers/net/chelsio/sge.c                     |  2 +-
 drivers/net/cxgb3/sge.c                       |  5 +++--
 drivers/net/dgrs.c                            |  2 +-
 drivers/net/eepro100.c                        |  5 +++--
 drivers/net/ehea/ehea_main.c                  | 11 ++++++-----
 drivers/net/fec_8xx/fec_main.c                |  4 +++-
 drivers/net/fs_enet/fs_enet-main.c            |  6 ++++--
 drivers/net/hamradio/dmascc.c                 |  2 +-
 drivers/net/hamradio/hdlcdrv.c                |  4 +++-
 drivers/net/hamradio/yam.c                    |  4 +++-
 drivers/net/ioc3-eth.c                        |  2 +-
 drivers/net/irda/ali-ircc.c                   |  5 ++---
 drivers/net/irda/au1k_ir.c                    |  2 +-
 drivers/net/irda/donauboe.c                   |  2 +-
 drivers/net/irda/irda-usb.c                   |  4 ++--
 drivers/net/irda/mcs7780.c                    |  4 ++--
 drivers/net/irda/nsc-ircc.c                   |  5 ++---
 drivers/net/irda/pxaficp_ir.c                 |  2 +-
 drivers/net/irda/smsc-ircc2.c                 |  2 +-
 drivers/net/irda/via-ircc.c                   |  4 ++--
 drivers/net/irda/vlsi_ir.c                    |  2 +-
 drivers/net/irda/w83977af_ir.c                |  2 +-
 drivers/net/lance.c                           |  2 +-
 drivers/net/macmace.c                         |  3 +--
 drivers/net/meth.c                            | 10 +++++-----
 drivers/net/myri_sbus.c                       |  2 +-
 drivers/net/netxen/netxen_nic_main.c          |  6 ++++--
 drivers/net/ni52.c                            |  2 +-
 drivers/net/ni65.c                            |  5 +++--
 drivers/net/pci-skeleton.c                    |  2 +-
 drivers/net/pcmcia/axnet_cs.c                 |  2 +-
 drivers/net/ppp_synctty.c                     |  3 ++-
 drivers/net/pppoe.c                           |  3 ++-
 drivers/net/qla3xxx.c                         |  3 ++-
 drivers/net/rrunner.c                         |  2 +-
 drivers/net/sgiseeq.c                         |  2 +-
 drivers/net/skge.c                            |  2 +-
 drivers/net/sky2.c                            |  2 +-
 drivers/net/sun3_82586.c                      |  2 +-
 drivers/net/sun3lance.c                       |  2 +-
 drivers/net/sungem.c                          |  2 +-
 drivers/net/sunhme.c                          |  2 +-
 drivers/net/sunlance.c                        |  2 +-
 drivers/net/sunqe.c                           |  2 +-
 drivers/net/tg3.c                             |  2 +-
 drivers/net/tlan.c                            |  2 +-
 drivers/net/tokenring/3c359.c                 |  7 +++++--
 drivers/net/tokenring/olympic.c               |  8 ++++++--
 drivers/net/tokenring/tms380tr.c              |  2 +-
 drivers/net/tulip/de2104x.c                   |  4 ++--
 drivers/net/tulip/dmfe.c                      |  6 ++++--
 drivers/net/tulip/uli526x.c                   |  2 +-
 drivers/net/tulip/xircom_cb.c                 |  6 +++---
 drivers/net/tulip/xircom_tulip_cb.c           |  4 +++-
 drivers/net/tun.c                             |  4 ++--
 drivers/net/via-velocity.c                    |  7 ++++---
 drivers/net/wan/lmc/lmc_main.c                |  2 +-
 drivers/net/wan/pc300_drv.c                   |  2 +-
 drivers/net/wan/z85230.c                      |  2 +-
 drivers/net/wireless/atmel.c                  |  4 ++--
 drivers/net/wireless/bcm43xx/bcm43xx_dma.c    |  3 ++-
 drivers/net/wireless/hostap/hostap_80211_rx.c | 13 ++++++++-----
 drivers/net/wireless/hostap/hostap_80211_tx.c | 23 ++++++++++++++---------
 drivers/net/wireless/hostap/hostap_ap.c       |  4 ++--
 drivers/net/wireless/hostap/hostap_hw.c       |  5 +++--
 drivers/net/wireless/ipw2100.c                |  5 +++--
 drivers/net/wireless/ipw2200.c                |  2 +-
 drivers/net/wireless/prism54/islpci_eth.c     | 13 +++++++++----
 drivers/net/wireless/ray_cs.c                 |  3 ++-
 drivers/net/wireless/wavelan.c                |  2 +-
 drivers/net/wireless/zd1201.c                 |  4 ++--
 drivers/s390/net/ctcmain.c                    | 13 ++++++++-----
 drivers/s390/net/lcs.c                        |  2 +-
 drivers/s390/net/netiucv.c                    |  7 +++++--
 drivers/s390/net/qeth_eddp.c                  |  3 ++-
 drivers/usb/atm/usbatm.c                      |  2 +-
 drivers/usb/net/catc.c                        |  2 +-
 drivers/usb/net/pegasus.c                     |  2 +-
 include/linux/skbuff.h                        | 14 ++++++++++++++
 net/ax25/ax25_out.c                           |  4 ++--
 net/bluetooth/bnep/core.c                     |  2 +-
 net/bluetooth/cmtp/core.c                     |  4 ++--
 net/bluetooth/l2cap.c                         |  6 ++++--
 net/bridge/br_netfilter.c                     |  3 ++-
 net/core/skbuff.c                             | 17 +++++++++--------
 net/decnet/dn_nsp_in.c                        |  5 +++--
 net/ieee80211/ieee80211_crypt_wep.c           |  2 +-
 net/ieee80211/ieee80211_rx.c                  |  6 +++---
 net/ieee80211/ieee80211_tx.c                  |  8 ++++----
 net/ipv4/ip_output.c                          |  2 +-
 net/ipv6/ip6_output.c                         |  2 +-
 net/irda/irttp.c                              |  4 ++--
 net/netrom/af_netrom.c                        |  3 ++-
 net/netrom/nr_loopback.c                      |  2 +-
 net/netrom/nr_out.c                           |  4 ++--
 net/netrom/nr_subr.c                          |  4 ++--
 net/rose/af_rose.c                            |  4 ++--
 net/x25/af_x25.c                              |  2 +-
 net/x25/x25_in.c                              |  5 +++--
 net/x25/x25_out.c                             |  4 ++--
 133 files changed, 321 insertions(+), 230 deletions(-)

diff --git a/arch/ia64/sn/kernel/xpnet.c b/arch/ia64/sn/kernel/xpnet.c
index 98d79142f32..9fc02654f0f 100644
--- a/arch/ia64/sn/kernel/xpnet.c
+++ b/arch/ia64/sn/kernel/xpnet.c
@@ -566,7 +566,8 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			msg->version = XPNET_VERSION_EMBED;
 			dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n",
 				&msg->data, skb->data, (size_t) embedded_bytes);
-			memcpy(&msg->data, skb->data, (size_t) embedded_bytes);
+			skb_copy_from_linear_data(skb, &msg->data,
+						  (size_t)embedded_bytes);
 		} else {
 			msg->version = XPNET_VERSION;
 		}
diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c
index fc518d85543..1b9493a16ac 100644
--- a/drivers/atm/atmtcp.c
+++ b/drivers/atm/atmtcp.c
@@ -221,7 +221,7 @@ static int atmtcp_v_send(struct atm_vcc *vcc,struct sk_buff *skb)
 	hdr->vpi = htons(vcc->vpi);
 	hdr->vci = htons(vcc->vci);
 	hdr->length = htonl(skb->len);
-	memcpy(skb_put(new_skb,skb->len),skb->data,skb->len);
+	skb_copy_from_linear_data(skb, skb_put(new_skb, skb->len), skb->len);
 	if (vcc->pop) vcc->pop(vcc,skb);
 	else dev_kfree_skb(skb);
 	out_vcc->push(out_vcc,new_skb);
@@ -310,7 +310,7 @@ static int atmtcp_c_send(struct atm_vcc *vcc,struct sk_buff *skb)
 		goto done;
 	}
 	__net_timestamp(new_skb);
-	memcpy(skb_put(new_skb,skb->len),skb->data,skb->len);
+	skb_copy_from_linear_data(skb, skb_put(new_skb, skb->len), skb->len);
 	out_vcc->push(out_vcc,new_skb);
 	atomic_inc(&vcc->stats->tx);
 	atomic_inc(&out_vcc->stats->rx);
diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c
index 26f4b703349..14ced85b3f5 100644
--- a/drivers/atm/nicstar.c
+++ b/drivers/atm/nicstar.c
@@ -2395,7 +2395,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
                skb->destructor = ns_lb_destructor;
 #endif /* NS_USE_DESTRUCTORS */
                skb_push(skb, NS_SMBUFSIZE);
-               memcpy(skb->data, sb->data, NS_SMBUFSIZE);
+               skb_copy_from_linear_data(sb, skb->data, NS_SMBUFSIZE);
                skb_put(skb, len - NS_SMBUFSIZE);
                ATM_SKB(skb)->vcc = vcc;
 	       __net_timestamp(skb);
@@ -2479,7 +2479,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
 	 {
             /* Copy the small buffer to the huge buffer */
             sb = (struct sk_buff *) iov->iov_base;
-            memcpy(hb->data, sb->data, iov->iov_len);
+            skb_copy_from_linear_data(sb, hb->data, iov->iov_len);
             skb_put(hb, iov->iov_len);
             remaining = len - iov->iov_len;
             iov++;
@@ -2491,7 +2491,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe)
             {
                lb = (struct sk_buff *) iov->iov_base;
                tocopy = min_t(int, remaining, iov->iov_len);
-               memcpy(skb_tail_pointer(hb), lb->data, tocopy);
+               skb_copy_from_linear_data(lb, skb_tail_pointer(hb), tocopy);
                skb_put(hb, tocopy);
                iov++;
                remaining -= tocopy;
diff --git a/drivers/bluetooth/bfusb.c b/drivers/bluetooth/bfusb.c
index 4c766f36d88..b990805806a 100644
--- a/drivers/bluetooth/bfusb.c
+++ b/drivers/bluetooth/bfusb.c
@@ -527,7 +527,7 @@ static int bfusb_send_frame(struct sk_buff *skb)
 		buf[2] = (size == BFUSB_MAX_BLOCK_SIZE) ? 0 : size;
 
 		memcpy(skb_put(nskb, 3), buf, 3);
-		memcpy(skb_put(nskb, size), skb->data + sent, size);
+		skb_copy_from_linear_data_offset(skb, sent, skb_put(nskb, size), size);
 
 		sent  += size;
 		count -= size;
diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c
index 9fca6513562..e8ebd5d3de8 100644
--- a/drivers/bluetooth/bpa10x.c
+++ b/drivers/bluetooth/bpa10x.c
@@ -231,7 +231,7 @@ static void bpa10x_wakeup(struct bpa10x_data *data)
 		cr = (struct usb_ctrlrequest *) urb->setup_packet;
 		cr->wLength = __cpu_to_le16(skb->len);
 
-		memcpy(urb->transfer_buffer, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, urb->transfer_buffer, skb->len);
 		urb->transfer_buffer_length = skb->len;
 
 		err = usb_submit_urb(urb, GFP_ATOMIC);
@@ -250,7 +250,7 @@ static void bpa10x_wakeup(struct bpa10x_data *data)
 		skb = skb_dequeue(&data->tx_queue);
 
 	if (skb) {
-		memcpy(urb->transfer_buffer, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, urb->transfer_buffer, skb->len);
 		urb->transfer_buffer_length = skb->len;
 
 		err = usb_submit_urb(urb, GFP_ATOMIC);
diff --git a/drivers/bluetooth/dtl1_cs.c b/drivers/bluetooth/dtl1_cs.c
index 459aa97937a..7f9c54b9964 100644
--- a/drivers/bluetooth/dtl1_cs.c
+++ b/drivers/bluetooth/dtl1_cs.c
@@ -425,7 +425,7 @@ static int dtl1_hci_send_frame(struct sk_buff *skb)
 		return -ENOMEM;
 
 	skb_reserve(s, NSHL);
-	memcpy(skb_put(s, skb->len), skb->data, skb->len);
+	skb_copy_from_linear_data(skb, skb_put(s, skb->len), skb->len);
 	if (skb->len & 0x0001)
 		*skb_put(s, 1) = 0;	/* PAD */
 
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 8d025e9b5bc..157b1d09ab5 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -4169,7 +4169,7 @@ static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev)
 	netif_stop_queue(dev);
 
 	/* copy data to device buffers */
-	memcpy(info->tx_buf, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, info->tx_buf, skb->len);
 	info->tx_get = 0;
 	info->tx_put = info->tx_count = skb->len;
 
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index e842c65a3f4..3b4b0acd707 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -821,7 +821,8 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
 	/*
 	 * copy the new data into our accumulation buffer.
 	 */
-	memcpy(&(ep->mpa_pkt[ep->mpa_pkt_len]), skb->data, skb->len);
+	skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
+				  skb->len);
 	ep->mpa_pkt_len += skb->len;
 
 	/*
@@ -940,7 +941,8 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
 	/*
 	 * Copy the new data into our accumulation buffer.
 	 */
-	memcpy(&(ep->mpa_pkt[ep->mpa_pkt_len]), skb->data, skb->len);
+	skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
+				  skb->len);
 	ep->mpa_pkt_len += skb->len;
 
 	/*
@@ -1619,7 +1621,8 @@ static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 	PDBG("%s ep %p\n", __FUNCTION__, ep);
 	skb_pull(skb, sizeof(struct cpl_rdma_terminate));
 	PDBG("%s saving %d bytes of term msg\n", __FUNCTION__, skb->len);
-	memcpy(ep->com.qp->attr.terminate_buffer, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, ep->com.qp->attr.terminate_buffer,
+				  skb->len);
 	ep->com.qp->attr.terminate_msg_len = skb->len;
 	ep->com.qp->attr.is_terminate_local = 0;
 	return CPL_RET_BUF_DONE;
diff --git a/drivers/isdn/act2000/module.c b/drivers/isdn/act2000/module.c
index e3e5c139907..ee2b0b9f8f4 100644
--- a/drivers/isdn/act2000/module.c
+++ b/drivers/isdn/act2000/module.c
@@ -442,7 +442,7 @@ act2000_sendbuf(act2000_card *card, int channel, int ack, struct sk_buff *skb)
 			return 0;
 		}
 		skb_reserve(xmit_skb, 19);
-		memcpy(skb_put(xmit_skb, len), skb->data, len);
+		skb_copy_from_linear_data(skb, skb_put(xmit_skb, len), len);
 	} else {
 		xmit_skb = skb_clone(skb, GFP_ATOMIC);
 		if (!xmit_skb) {
diff --git a/drivers/isdn/gigaset/usb-gigaset.c b/drivers/isdn/gigaset/usb-gigaset.c
index 2baef349c12..c8e1c357cec 100644
--- a/drivers/isdn/gigaset/usb-gigaset.c
+++ b/drivers/isdn/gigaset/usb-gigaset.c
@@ -652,7 +652,7 @@ static int write_modem(struct cardstate *cs)
 	 * transmit data
 	 */
 	count = min(bcs->tx_skb->len, (unsigned) ucs->bulk_out_size);
-	memcpy(ucs->bulk_out_buffer, bcs->tx_skb->data, count);
+	skb_copy_from_linear_data(bcs->tx_skb, ucs->bulk_out_buffer, count);
 	skb_pull(bcs->tx_skb, count);
 	atomic_set(&ucs->busy, 1);
 	gig_dbg(DEBUG_OUTPUT, "write_modem: send %d bytes", count);
diff --git a/drivers/isdn/hardware/avm/b1dma.c b/drivers/isdn/hardware/avm/b1dma.c
index 1e2d38e3d68..428872b653e 100644
--- a/drivers/isdn/hardware/avm/b1dma.c
+++ b/drivers/isdn/hardware/avm/b1dma.c
@@ -404,7 +404,8 @@ static void b1dma_dispatch_tx(avmcard *card)
 		printk(KERN_DEBUG "tx: put 0x%x len=%d\n", 
 		       skb->data[2], txlen);
 #endif
-		memcpy(dma->sendbuf.dmabuf, skb->data+2, skb->len-2);
+		skb_copy_from_linear_data_offset(skb, 2, dma->sendbuf.dmabuf,
+						 skb->len - 2);
 	}
 	txlen = (txlen + 3) & ~3;
 
diff --git a/drivers/isdn/hardware/avm/c4.c b/drivers/isdn/hardware/avm/c4.c
index 6f5efa8d78c..d58f927e766 100644
--- a/drivers/isdn/hardware/avm/c4.c
+++ b/drivers/isdn/hardware/avm/c4.c
@@ -457,7 +457,8 @@ static void c4_dispatch_tx(avmcard *card)
 		printk(KERN_DEBUG "%s: tx put 0x%x len=%d\n",
 				card->name, skb->data[2], txlen);
 #endif
-		memcpy(dma->sendbuf.dmabuf, skb->data+2, skb->len-2);
+		skb_copy_from_linear_data_offset(skb, 2, dma->sendbuf.dmabuf,
+						 skb->len - 2);
 	}
 	txlen = (txlen + 3) & ~3;
 
diff --git a/drivers/isdn/hisax/elsa_ser.c b/drivers/isdn/hisax/elsa_ser.c
index ae377e81277..1642dca988a 100644
--- a/drivers/isdn/hisax/elsa_ser.c
+++ b/drivers/isdn/hisax/elsa_ser.c
@@ -254,14 +254,16 @@ write_modem(struct BCState *bcs) {
 	count = len;
 	if (count > MAX_MODEM_BUF - fp) {
 		count = MAX_MODEM_BUF - fp;
-		memcpy(cs->hw.elsa.transbuf + fp, bcs->tx_skb->data, count);
+		skb_copy_from_linear_data(bcs->tx_skb,
+					  cs->hw.elsa.transbuf + fp, count);
 		skb_pull(bcs->tx_skb, count);
 		cs->hw.elsa.transcnt += count;
 		ret = count;
 		count = len - count;
 		fp = 0;
 	}
-	memcpy((cs->hw.elsa.transbuf + fp), bcs->tx_skb->data, count);
+	skb_copy_from_linear_data(bcs->tx_skb,
+				  cs->hw.elsa.transbuf + fp, count);
 	skb_pull(bcs->tx_skb, count);
 	cs->hw.elsa.transcnt += count;
 	ret += count;
diff --git a/drivers/isdn/hisax/isdnl2.c b/drivers/isdn/hisax/isdnl2.c
index cd3b5ad5349..3446f249d67 100644
--- a/drivers/isdn/hisax/isdnl2.c
+++ b/drivers/isdn/hisax/isdnl2.c
@@ -1293,7 +1293,8 @@ l2_pull_iqueue(struct FsmInst *fi, int event, void *arg)
 		oskb = skb;
 		skb = alloc_skb(oskb->len + i, GFP_ATOMIC);
 		memcpy(skb_put(skb, i), header, i);
-		memcpy(skb_put(skb, oskb->len), oskb->data, oskb->len);
+		skb_copy_from_linear_data(oskb,
+					  skb_put(skb, oskb->len), oskb->len);
 		dev_kfree_skb(oskb);
 	}
 	st->l2.l2l1(st, PH_PULL | INDICATION, skb);
diff --git a/drivers/isdn/hysdn/hycapi.c b/drivers/isdn/hysdn/hycapi.c
index b2ae4ec1e49..4433ce0fca5 100644
--- a/drivers/isdn/hysdn/hycapi.c
+++ b/drivers/isdn/hysdn/hycapi.c
@@ -398,7 +398,7 @@ static u16 hycapi_send_message(struct capi_ctr *ctrl, struct sk_buff *skb)
 			_len = CAPIMSG_LEN(skb->data);
 			if (_len > 22) {
 				_len2 = _len - 22;
-				memcpy(msghead, skb->data, 22);
+				skb_copy_from_linear_data(skb, msghead, 22);
 				memcpy(skb->data + _len2, msghead, 22);
 				skb_pull(skb, _len2);
 				CAPIMSG_SETLEN(skb->data, 22);
diff --git a/drivers/isdn/hysdn/hysdn_sched.c b/drivers/isdn/hysdn/hysdn_sched.c
index b7b5aa4748a..81db4a190d4 100644
--- a/drivers/isdn/hysdn/hysdn_sched.c
+++ b/drivers/isdn/hysdn/hysdn_sched.c
@@ -113,7 +113,8 @@ hysdn_sched_tx(hysdn_card *card, unsigned char *buf,
 	    (skb = hysdn_tx_netget(card)) != NULL) 
 	{
 		if (skb->len <= maxlen) {
-			memcpy(buf, skb->data, skb->len);	/* copy the packet to the buffer */
+			/* copy the packet to the buffer */
+			skb_copy_from_linear_data(skb, buf, skb->len);
 			*len = skb->len;
 			*chan = CHAN_NDIS_DATA;
 			card->net_tx_busy = 1;	/* we are busy sending network data */
@@ -126,7 +127,7 @@ hysdn_sched_tx(hysdn_card *card, unsigned char *buf,
 	    ((skb = hycapi_tx_capiget(card)) != NULL) )
 	{
 		if (skb->len <= maxlen) {
-			memcpy(buf, skb->data, skb->len);
+			skb_copy_from_linear_data(skb, buf, skb->len);
 			*len = skb->len;
 			*chan = CHAN_CAPI;
 			hycapi_tx_capiack(card);
diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c
index 9c926e41b11..c97330b1987 100644
--- a/drivers/isdn/i4l/isdn_common.c
+++ b/drivers/isdn/i4l/isdn_common.c
@@ -829,7 +829,7 @@ isdn_readbchan(int di, int channel, u_char * buf, u_char * fp, int len, wait_que
 				dflag = 0;
 			}
 			count_put = count_pull;
-			memcpy(cp, skb->data, count_put);
+			skb_copy_from_linear_data(skb, cp, count_put);
 			cp += count_put;
 			len -= count_put;
 #ifdef CONFIG_ISDN_AUDIO
diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index be915051cb2..387392cb3d6 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -1100,7 +1100,8 @@ isdn_ppp_push_higher(isdn_net_dev * net_dev, isdn_net_local * lp, struct sk_buff
 					goto drop_packet;
 				}
 				skb_put(skb, skb_old->len + 128);
-				memcpy(skb->data, skb_old->data, skb_old->len);
+				skb_copy_from_linear_data(skb_old, skb->data,
+							  skb_old->len);
 				if (net_dev->local->ppp_slot < 0) {
 					printk(KERN_ERR "%s: net_dev->local->ppp_slot(%d) out of range\n",
 						__FUNCTION__, net_dev->local->ppp_slot);
@@ -1902,7 +1903,9 @@ void isdn_ppp_mp_reassembly( isdn_net_dev * net_dev, isdn_net_local * lp,
 		while( from != to ) {
 			unsigned int len = from->len - MP_HEADER_LEN;
 
-			memcpy(skb_put(skb,len), from->data+MP_HEADER_LEN, len);
+			skb_copy_from_linear_data_offset(from, MP_HEADER_LEN,
+							 skb_put(skb,len),
+							 len);
 			frag = from->next;
 			isdn_ppp_mp_free_skb(mp, from);
 			from = frag; 
diff --git a/drivers/isdn/isdnloop/isdnloop.c b/drivers/isdn/isdnloop/isdnloop.c
index e3add27dd0e..e93ad59f60b 100644
--- a/drivers/isdn/isdnloop/isdnloop.c
+++ b/drivers/isdn/isdnloop/isdnloop.c
@@ -415,7 +415,8 @@ isdnloop_sendbuf(int channel, struct sk_buff *skb, isdnloop_card * card)
 		spin_lock_irqsave(&card->isdnloop_lock, flags);
 		nskb = dev_alloc_skb(skb->len);
 		if (nskb) {
-			memcpy(skb_put(nskb, len), skb->data, len);
+			skb_copy_from_linear_data(skb,
+						  skb_put(nskb, len), len);
 			skb_queue_tail(&card->bqueue[channel], nskb);
 			dev_kfree_skb(skb);
 		} else
diff --git a/drivers/isdn/pcbit/capi.c b/drivers/isdn/pcbit/capi.c
index 47c59e95898..7b55e151f1b 100644
--- a/drivers/isdn/pcbit/capi.c
+++ b/drivers/isdn/pcbit/capi.c
@@ -429,8 +429,9 @@ int capi_decode_conn_ind(struct pcbit_chan * chan,
 		if (!(info->data.setup.CallingPN = kmalloc(len - count + 1, GFP_ATOMIC)))
 			return -1;
        
-		memcpy(info->data.setup.CallingPN, skb->data + count + 1, 
-		       len - count);
+		skb_copy_from_linear_data_offset(skb, count + 1,
+						 info->data.setup.CallingPN,
+						 len - count);
 		info->data.setup.CallingPN[len - count] = 0;
 
 	}
@@ -457,8 +458,9 @@ int capi_decode_conn_ind(struct pcbit_chan * chan,
 		if (!(info->data.setup.CalledPN = kmalloc(len - count + 1, GFP_ATOMIC)))
 			return -1;
         
-		memcpy(info->data.setup.CalledPN, skb->data + count + 1, 
-		       len - count); 
+		skb_copy_from_linear_data_offset(skb, count + 1,
+						 info->data.setup.CalledPN,
+						 len - count);
 		info->data.setup.CalledPN[len - count] = 0;
 
 	}
@@ -539,7 +541,7 @@ int capi_decode_conn_actv_ind(struct pcbit_chan * chan, struct sk_buff *skb)
 
 #ifdef DEBUG
 	if (len > 1 && len < 31) {
-		memcpy(str, skb->data + 2, len - 1);
+		skb_copy_from_linear_data_offset(skb, 2, str, len - 1);
 		str[len] = 0;
 		printk(KERN_DEBUG "Connected Party Number: %s\n", str);
 	}
diff --git a/drivers/media/dvb/dvb-core/dvb_net.c b/drivers/media/dvb/dvb-core/dvb_net.c
index 9de177a5b9f..6a5ab409c4e 100644
--- a/drivers/media/dvb/dvb-core/dvb_net.c
+++ b/drivers/media/dvb/dvb-core/dvb_net.c
@@ -697,7 +697,9 @@ static void dvb_net_ule( struct net_device *dev, const u8 *buf, size_t buf_len )
 					}
 					else
 					{
-						memcpy(dest_addr,  priv->ule_skb->data, ETH_ALEN);
+						skb_copy_from_linear_data(priv->ule_skb,
+							      dest_addr,
+							      ETH_ALEN);
 						skb_pull(priv->ule_skb, ETH_ALEN);
 					}
 				}
diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c
index 21fe1b66808..7dd34bd28ef 100644
--- a/drivers/message/fusion/mptlan.c
+++ b/drivers/message/fusion/mptlan.c
@@ -932,7 +932,7 @@ mpt_lan_receive_post_turbo(struct net_device *dev, u32 tmsg)
 		pci_dma_sync_single_for_cpu(mpt_dev->pcidev, priv->RcvCtl[ctx].dma,
 					    priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE);
 
-		memcpy(skb_put(skb, len), old_skb->data, len);
+		skb_copy_from_linear_data(old_skb, skb_put(skb, len), len);
 
 		pci_dma_sync_single_for_device(mpt_dev->pcidev, priv->RcvCtl[ctx].dma,
 					       priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE);
@@ -1093,7 +1093,7 @@ mpt_lan_receive_post_reply(struct net_device *dev,
 						    priv->RcvCtl[ctx].dma,
 						    priv->RcvCtl[ctx].len,
 						    PCI_DMA_FROMDEVICE);
-			memcpy(skb_put(skb, l), old_skb->data, l);
+			skb_copy_from_linear_data(old_skb, skb_put(skb, l), l);
 
 			pci_dma_sync_single_for_device(mpt_dev->pcidev,
 						       priv->RcvCtl[ctx].dma,
@@ -1122,7 +1122,7 @@ mpt_lan_receive_post_reply(struct net_device *dev,
 					    priv->RcvCtl[ctx].len,
 					    PCI_DMA_FROMDEVICE);
 
-		memcpy(skb_put(skb, len), old_skb->data, len);
+		skb_copy_from_linear_data(old_skb, skb_put(skb, len), len);
 
 		pci_dma_sync_single_for_device(mpt_dev->pcidev,
 					       priv->RcvCtl[ctx].dma,
diff --git a/drivers/net/3c505.c b/drivers/net/3c505.c
index c693b5a7950..e985a85a562 100644
--- a/drivers/net/3c505.c
+++ b/drivers/net/3c505.c
@@ -1025,7 +1025,7 @@ static int send_packet(struct net_device *dev, struct sk_buff *skb)
 	adapter->current_dma.start_time = jiffies;
 
 	if ((unsigned long)(skb->data + nlen) >= MAX_DMA_ADDRESS || nlen != skb->len) {
-		memcpy(adapter->dma_buffer, skb->data, nlen);
+		skb_copy_from_linear_data(skb, adapter->dma_buffer, nlen);
 		memset(adapter->dma_buffer+skb->len, 0, nlen-skb->len);
 		target = isa_virt_to_bus(adapter->dma_buffer);
 	}
diff --git a/drivers/net/3c523.c b/drivers/net/3c523.c
index 6b2036df685..a384f7d478a 100644
--- a/drivers/net/3c523.c
+++ b/drivers/net/3c523.c
@@ -1145,7 +1145,7 @@ static int elmc_send_packet(struct sk_buff *skb, struct net_device *dev)
 
 	if (len != skb->len)
 		memset((char *) p->xmit_cbuffs[p->xmit_count], 0, ETH_ZLEN);
-	memcpy((char *) p->xmit_cbuffs[p->xmit_count], (char *) (skb->data), skb->len);
+	skb_copy_from_linear_data(skb, p->xmit_cbuffs[p->xmit_count], skb->len);
 
 #if (NUM_XMIT_BUFFS == 1)
 #ifdef NO_NOPCOMMANDS
diff --git a/drivers/net/7990.c b/drivers/net/7990.c
index c50264aea16..d396f996af5 100644
--- a/drivers/net/7990.c
+++ b/drivers/net/7990.c
@@ -567,7 +567,7 @@ int lance_start_xmit (struct sk_buff *skb, struct net_device *dev)
 
     	if (skb->len < ETH_ZLEN)
     		memset((char *)&ib->tx_buf[entry][0], 0, ETH_ZLEN);
-        memcpy ((char *)&ib->tx_buf [entry][0], skb->data, skblen);
+        skb_copy_from_linear_data(skb, &ib->tx_buf[entry][0], skblen);
 
         /* Now, give the packet to the lance */
         ib->btx_ring [entry].tmd1_bits = (LE_T1_POK|LE_T1_OWN);
diff --git a/drivers/net/a2065.c b/drivers/net/a2065.c
index b38fc65005e..1226cbba045 100644
--- a/drivers/net/a2065.c
+++ b/drivers/net/a2065.c
@@ -598,7 +598,7 @@ static int lance_start_xmit (struct sk_buff *skb, struct net_device *dev)
 	ib->btx_ring [entry].length = (-len) | 0xf000;
 	ib->btx_ring [entry].misc = 0;
 
-	memcpy ((char *)&ib->tx_buf [entry][0], skb->data, skblen);
+	skb_copy_from_linear_data(skb, &ib->tx_buf [entry][0], skblen);
 
 	/* Clear the slack of the packet, do I need this? */
 	if (len != skblen)
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c
index f6a87bd20ff..cc4610db639 100644
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -273,7 +273,8 @@ static int ack_tx(struct net_device *dev, int acked)
   /* skb_pull(ackskb, ARC_HDR_SIZE); */
 
 
-  memcpy(ackpkt, lp->outgoing.skb->data, ARC_HDR_SIZE+sizeof(struct arc_cap));
+  skb_copy_from_linear_data(lp->outgoing.skb, ackpkt,
+		ARC_HDR_SIZE + sizeof(struct arc_cap));
   ackpkt->soft.cap.proto=0; /* using protocol 0 for acknowledge */
   ackpkt->soft.cap.mes.ack=acked;
 
diff --git a/drivers/net/atari_bionet.c b/drivers/net/atari_bionet.c
index f52e7f22f63..13dbed368d6 100644
--- a/drivers/net/atari_bionet.c
+++ b/drivers/net/atari_bionet.c
@@ -453,7 +453,8 @@ bionet_send_packet(struct sk_buff *skb, struct net_device *dev) {
 		stdma_lock(bionet_intr, NULL);
 		local_irq_restore(flags);
 		if( !STRAM_ADDR(buf+length-1) ) {
-			memcpy(nic_packet->buffer, skb->data, length);
+			skb_copy_from_linear_data(skb, nic_packet->buffer,
+						  length);
 			buf = (unsigned long)&((struct nic_pkt_s *)phys_nic_packet)->buffer;
 		}
 
diff --git a/drivers/net/atari_pamsnet.c b/drivers/net/atari_pamsnet.c
index 3b543614928..745101d7451 100644
--- a/drivers/net/atari_pamsnet.c
+++ b/drivers/net/atari_pamsnet.c
@@ -717,7 +717,8 @@ pamsnet_send_packet(struct sk_buff *skb, struct net_device *dev) {
 
 		local_irq_restore(flags);
 		if( !STRAM_ADDR(buf+length-1) ) {
-			memcpy(nic_packet->buffer, skb->data, length);
+			skb_copy_from_linear_data(skb, nic_packet->buffer,
+						  length);
 			buf = (unsigned long)phys_nic_packet;
 		}
 
diff --git a/drivers/net/au1000_eth.c b/drivers/net/au1000_eth.c
index 97b55f2546c..d10fb80e9a6 100644
--- a/drivers/net/au1000_eth.c
+++ b/drivers/net/au1000_eth.c
@@ -1125,7 +1125,7 @@ static int au1000_tx(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	pDB = aup->tx_db_inuse[aup->tx_head];
-	memcpy((void *)pDB->vaddr, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, pDB->vaddr, skb->len);
 	if (skb->len < ETH_ZLEN) {
 		for (i=skb->len; i<ETH_ZLEN; i++) {
 			((char *)pDB->vaddr)[i] = 0;
diff --git a/drivers/net/b44.c b/drivers/net/b44.c
index f67d97de97f..879a2fff474 100644
--- a/drivers/net/b44.c
+++ b/drivers/net/b44.c
@@ -828,8 +828,8 @@ static int b44_rx(struct b44 *bp, int budget)
 			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
 			/* DMA sync done above, copy just the actual packet */
-			memcpy(copy_skb->data, skb->data+bp->rx_offset, len);
-
+			skb_copy_from_linear_data_offset(skb, bp->rx_offset,
+							 copy_skb->data, len);
 			skb = copy_skb;
 		}
 		skb->ip_summed = CHECKSUM_NONE;
@@ -1006,7 +1006,8 @@ static int b44_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			goto err_out;
 		}
 
-		memcpy(skb_put(bounce_skb, len), skb->data, skb->len);
+		skb_copy_from_linear_data(skb, skb_put(bounce_skb, len),
+					  skb->len);
 		dev_kfree_skb_any(skb);
 		skb = bounce_skb;
 	}
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 7e7b5f34403..f98a2205a09 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -1884,10 +1884,8 @@ bnx2_rx_int(struct bnx2 *bp, int budget)
 				goto reuse_rx;
 
 			/* aligned copy */
-			memcpy(new_skb->data,
-				skb->data + bp->rx_offset - 2,
-				len + 2);
-
+			skb_copy_from_linear_data_offset(skb, bp->rx_offset - 2,
+				      new_skb->data, len + 2);
 			skb_reserve(new_skb, 2);
 			skb_put(new_skb, len);
 
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
index bd3ab6493e3..4aec747d9e4 100644
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -2846,8 +2846,8 @@ static inline int cas_xmit_tx_ringN(struct cas *cp, int ring,
 			      ctrl | TX_DESC_SOF, 0);
 		entry = TX_DESC_NEXT(ring, entry);
 
-		memcpy(tx_tiny_buf(cp, ring, entry), skb->data +
-		       len - tabort, tabort);
+		skb_copy_from_linear_data_offset(skb, len - tabort,
+			      tx_tiny_buf(cp, ring, entry), tabort);
 		mapping = tx_tiny_map(cp, ring, entry, tentry);
 		cas_write_txd(cp, ring, entry, mapping, tabort, ctrl,
 			      (nr_frags == 0));
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index 43e92f9f0bc..1be1bbd1616 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -1062,7 +1062,7 @@ static inline struct sk_buff *get_packet(struct pci_dev *pdev,
 					    pci_unmap_addr(ce, dma_addr),
 					    pci_unmap_len(ce, dma_len),
 					    PCI_DMA_FROMDEVICE);
-		memcpy(skb->data, ce->skb->data, len);
+		skb_copy_from_linear_data(ce->skb, skb->data, len);
 		pci_dma_sync_single_for_device(pdev,
 					       pci_unmap_addr(ce, dma_addr),
 					       pci_unmap_len(ce, dma_len),
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index c5faf1380e1..166c959c94b 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -913,7 +913,8 @@ static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
 		if (skb->len <= WR_LEN - sizeof(*cpl)) {
 			q->sdesc[pidx].skb = NULL;
 			if (!skb->data_len)
-				memcpy(&d->flit[2], skb->data, skb->len);
+				skb_copy_from_linear_data(skb, &d->flit[2],
+							  skb->len);
 			else
 				skb_copy_bits(skb, 0, &d->flit[2], skb->len);
 
@@ -1771,7 +1772,7 @@ static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
 			__skb_put(skb, len);
 			pci_dma_sync_single_for_cpu(adap->pdev, mapping, len,
 						    PCI_DMA_FROMDEVICE);
-			memcpy(skb->data, sd->t.skb->data, len);
+			skb_copy_from_linear_data(sd->t.skb, skb->data, len);
 			pci_dma_sync_single_for_device(adap->pdev, mapping, len,
 						       PCI_DMA_FROMDEVICE);
 		} else if (!drop_thres)
diff --git a/drivers/net/dgrs.c b/drivers/net/dgrs.c
index d223c38966f..df62c0232f3 100644
--- a/drivers/net/dgrs.c
+++ b/drivers/net/dgrs.c
@@ -741,7 +741,7 @@ static int dgrs_start_xmit(struct sk_buff *skb, struct net_device *devN)
 		}
 
 		amt = min_t(unsigned int, len, rbdp->size - count);
-		memcpy( (char *) S2H(rbdp->buf) + count, skb->data + i, amt);
+		skb_copy_from_linear_data_offset(skb, i, S2H(rbdp->buf) + count, amt);
 		i += amt;
 		count += amt;
 		len -= amt;
diff --git a/drivers/net/eepro100.c b/drivers/net/eepro100.c
index db658bc491a..6c267c38df9 100644
--- a/drivers/net/eepro100.c
+++ b/drivers/net/eepro100.c
@@ -1804,8 +1804,9 @@ speedo_rx(struct net_device *dev)
 				eth_copy_and_sum(skb, sp->rx_skbuff[entry]->data, pkt_len, 0);
 				skb_put(skb, pkt_len);
 #else
-				memcpy(skb_put(skb, pkt_len), sp->rx_skbuff[entry]->data,
-					   pkt_len);
+				skb_copy_from_linear_data(sp->rx_skbuff[entry],
+							  skb_put(skb, pkt_len),
+							  pkt_len);
 #endif
 				pci_dma_sync_single_for_device(sp->pdev, sp->rx_ring_dma[entry],
 											   sizeof(struct RxFD) + pkt_len,
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index 63732d2305b..8b539207263 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -1306,7 +1306,7 @@ static void write_swqe2_TSO(struct sk_buff *skb,
 
 	if (skb_data_size >= headersize) {
 		/* copy immediate data */
-		memcpy(imm_data, skb->data, headersize);
+		skb_copy_from_linear_data(skb, imm_data, headersize);
 		swqe->immediate_data_length = headersize;
 
 		if (skb_data_size > headersize) {
@@ -1337,7 +1337,7 @@ static void write_swqe2_nonTSO(struct sk_buff *skb,
 	 */
 	if (skb_data_size >= SWQE2_MAX_IMM) {
 		/* copy immediate data */
-		memcpy(imm_data, skb->data, SWQE2_MAX_IMM);
+		skb_copy_from_linear_data(skb, imm_data, SWQE2_MAX_IMM);
 
 		swqe->immediate_data_length = SWQE2_MAX_IMM;
 
@@ -1350,7 +1350,7 @@ static void write_swqe2_nonTSO(struct sk_buff *skb,
 			swqe->descriptors++;
 		}
 	} else {
-		memcpy(imm_data, skb->data, skb_data_size);
+		skb_copy_from_linear_data(skb, imm_data, skb_data_size);
 		swqe->immediate_data_length = skb_data_size;
 	}
 }
@@ -1772,10 +1772,11 @@ static void ehea_xmit3(struct sk_buff *skb, struct net_device *dev,
 	/* copy (immediate) data */
 	if (nfrags == 0) {
 		/* data is in a single piece */
-		memcpy(imm_data, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, imm_data, skb->len);
 	} else {
 		/* first copy data from the skb->data buffer ... */
-		memcpy(imm_data, skb->data, skb->len - skb->data_len);
+		skb_copy_from_linear_data(skb, imm_data,
+					  skb->len - skb->data_len);
 		imm_data += skb->len - skb->data_len;
 
 		/* ... then copy data from the fragments */
diff --git a/drivers/net/fec_8xx/fec_main.c b/drivers/net/fec_8xx/fec_main.c
index 698dba8f2aa..e824d5d231a 100644
--- a/drivers/net/fec_8xx/fec_main.c
+++ b/drivers/net/fec_8xx/fec_main.c
@@ -551,7 +551,9 @@ static int fec_enet_rx_common(struct net_device *dev, int *budget)
 				skbn = dev_alloc_skb(pkt_len + 2);
 				if (skbn != NULL) {
 					skb_reserve(skbn, 2);	/* align IP header */
-					memcpy(skbn->data, skb->data, pkt_len);
+					skb_copy_from_linear_data(skb
+								  skbn->data,
+								  pkt_len);
 					/* swap */
 					skbt = skb;
 					skb = skbn;
diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c
index 9f6ef315ce5..e2ddd617493 100644
--- a/drivers/net/fs_enet/fs_enet-main.c
+++ b/drivers/net/fs_enet/fs_enet-main.c
@@ -160,7 +160,8 @@ static int fs_enet_rx_napi(struct net_device *dev, int *budget)
 				skbn = dev_alloc_skb(pkt_len + 2);
 				if (skbn != NULL) {
 					skb_reserve(skbn, 2);	/* align IP header */
-					memcpy(skbn->data, skb->data, pkt_len);
+					skb_copy_from_linear_data(skb,
+						      skbn->data, pkt_len);
 					/* swap */
 					skbt = skb;
 					skb = skbn;
@@ -293,7 +294,8 @@ static int fs_enet_rx_non_napi(struct net_device *dev)
 				skbn = dev_alloc_skb(pkt_len + 2);
 				if (skbn != NULL) {
 					skb_reserve(skbn, 2);	/* align IP header */
-					memcpy(skbn->data, skb->data, pkt_len);
+					skb_copy_from_linear_data(skb,
+						      skbn->data, pkt_len);
 					/* swap */
 					skbt = skb;
 					skb = skbn;
diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c
index 0fbb414b5a4..3be8c504759 100644
--- a/drivers/net/hamradio/dmascc.c
+++ b/drivers/net/hamradio/dmascc.c
@@ -930,7 +930,7 @@ static int scc_send_packet(struct sk_buff *skb, struct net_device *dev)
 
 	/* Transfer data to DMA buffer */
 	i = priv->tx_head;
-	memcpy(priv->tx_buf[i], skb->data + 1, skb->len - 1);
+	skb_copy_from_linear_data_offset(skb, 1, priv->tx_buf[i], skb->len - 1);
 	priv->tx_len[i] = skb->len - 1;
 
 	/* Clear interrupts while we touch our circular buffers */
diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c
index f5a17ad9d3d..b33adc6a340 100644
--- a/drivers/net/hamradio/hdlcdrv.c
+++ b/drivers/net/hamradio/hdlcdrv.c
@@ -317,7 +317,9 @@ void hdlcdrv_transmitter(struct net_device *dev, struct hdlcdrv_state *s)
 				dev_kfree_skb_irq(skb);
 				break;
 			}
-			memcpy(s->hdlctx.buffer, skb->data+1, pkt_len);
+			skb_copy_from_linear_data_offset(skb, 1,
+							 s->hdlctx.buffer,
+							 pkt_len);
 			dev_kfree_skb_irq(skb);
 			s->hdlctx.bp = s->hdlctx.buffer;
 			append_crc_ccitt(s->hdlctx.buffer, pkt_len);
diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
index ee3ea4fa729..ac2d6dd9dbe 100644
--- a/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@ -638,7 +638,9 @@ static void yam_tx_byte(struct net_device *dev, struct yam_port *yp)
         			dev_kfree_skb_any(skb);
 				break;
 			}
-			memcpy(yp->tx_buf, skb->data + 1, yp->tx_len);
+			skb_copy_from_linear_data_offset(skb->data, 1,
+							 yp->tx_buf,
+							 yp->tx_len);
 			dev_kfree_skb_any(skb);
 			yp->tx_count = 0;
 			yp->tx_crcl = 0x21;
diff --git a/drivers/net/ioc3-eth.c b/drivers/net/ioc3-eth.c
index bc62e770a25..f749e07c642 100644
--- a/drivers/net/ioc3-eth.c
+++ b/drivers/net/ioc3-eth.c
@@ -1443,7 +1443,7 @@ static int ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (len <= 104) {
 		/* Short packet, let's copy it directly into the ring.  */
-		memcpy(desc->data, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, desc->data, skb->len);
 		if (len < ETH_ZLEN) {
 			/* Very short packet, pad with zeros at the end. */
 			memset(desc->data + len, 0, ETH_ZLEN - len);
diff --git a/drivers/net/irda/ali-ircc.c b/drivers/net/irda/ali-ircc.c
index 0f10758226f..fb2248a2551 100644
--- a/drivers/net/irda/ali-ircc.c
+++ b/drivers/net/irda/ali-ircc.c
@@ -1472,9 +1472,8 @@ static int ali_ircc_fir_hard_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	self->stats.tx_bytes += skb->len;
 
-	memcpy(self->tx_fifo.queue[self->tx_fifo.free].start, skb->data, 
-	       skb->len);
-	
+	skb_copy_from_linear_data(skb, self->tx_fifo.queue[self->tx_fifo.free].start,
+		      skb->len);
 	self->tx_fifo.len++;
 	self->tx_fifo.free++;
 
diff --git a/drivers/net/irda/au1k_ir.c b/drivers/net/irda/au1k_ir.c
index 27afd0f367d..cdd1f6c1e74 100644
--- a/drivers/net/irda/au1k_ir.c
+++ b/drivers/net/irda/au1k_ir.c
@@ -526,7 +526,7 @@ static int au1k_irda_hard_xmit(struct sk_buff *skb, struct net_device *dev)
 	
 	if (aup->speed == 4000000) {
 		/* FIR */
-		memcpy((void *)pDB->vaddr, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, pDB->vaddr, skb->len);
 		ptxd->count_0 = skb->len & 0xff;
 		ptxd->count_1 = (skb->len >> 8) & 0xff;
 
diff --git a/drivers/net/irda/donauboe.c b/drivers/net/irda/donauboe.c
index ddfa6c38a16..9987a0dc1ea 100644
--- a/drivers/net/irda/donauboe.c
+++ b/drivers/net/irda/donauboe.c
@@ -1119,7 +1119,7 @@ dumpbufs(skb->data,skb->len,'>');
   else
     {
       len = skb->len;
-      memcpy (self->tx_bufs[self->txs], skb->data, len);
+      skb_copy_from_linear_data(skb, self->tx_bufs[self->txs], len);
     }
   self->ring->tx[self->txs].len = len & 0x0fff;
 
diff --git a/drivers/net/irda/irda-usb.c b/drivers/net/irda/irda-usb.c
index 6ef375a095f..0ac240ca905 100644
--- a/drivers/net/irda/irda-usb.c
+++ b/drivers/net/irda/irda-usb.c
@@ -441,7 +441,7 @@ static int irda_usb_hard_xmit(struct sk_buff *skb, struct net_device *netdev)
 		goto drop;
 	}
 
-	memcpy(self->tx_buff + self->header_length, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, self->tx_buff + self->header_length, skb->len);
 
 	/* Change setting for next frame */
 	if (self->capability & IUC_STIR421X) {
@@ -902,7 +902,7 @@ static void irda_usb_receive(struct urb *urb)
 
 	if(docopy) {
 		/* Copy packet, so we can recycle the original */
-		memcpy(newskb->data, skb->data, urb->actual_length);
+		skb_copy_from_linear_data(skb, newskb->data, urb->actual_length);
 		/* Deliver this new skb */
 		dataskb = newskb;
 		/* And hook the old skb to the URB
diff --git a/drivers/net/irda/mcs7780.c b/drivers/net/irda/mcs7780.c
index 3ff1f4b33c0..4b0037e498f 100644
--- a/drivers/net/irda/mcs7780.c
+++ b/drivers/net/irda/mcs7780.c
@@ -353,7 +353,7 @@ static unsigned mcs_wrap_fir_skb(const struct sk_buff *skb, __u8 *buf)
 	buf[0] = len & 0xff;
 	buf[1] = (len >> 8) & 0xff;
 	/* copy the data into the tx buffer. */
-	memcpy(buf+2, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, buf + 2, skb->len);
 	/* put the fcs in the last four bytes in little endian order. */
 	buf[len - 4] = fcs & 0xff;
 	buf[len - 3] = (fcs >> 8) & 0xff;
@@ -377,7 +377,7 @@ static unsigned mcs_wrap_mir_skb(const struct sk_buff *skb, __u8 *buf)
 	buf[0] = len & 0xff;
 	buf[1] = (len >> 8) & 0xff;
 	/* copy the data */
-	memcpy(buf+2, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, buf + 2, skb->len);
 	/* put the fcs in last two bytes in little endian order. */
 	buf[len - 2] = fcs & 0xff;
 	buf[len - 1] = (fcs >> 8) & 0xff;
diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/net/irda/nsc-ircc.c
index 8ce7dad582f..0ff99271413 100644
--- a/drivers/net/irda/nsc-ircc.c
+++ b/drivers/net/irda/nsc-ircc.c
@@ -1466,9 +1466,8 @@ static int nsc_ircc_hard_xmit_fir(struct sk_buff *skb, struct net_device *dev)
 
 	self->stats.tx_bytes += skb->len;
 
-	memcpy(self->tx_fifo.queue[self->tx_fifo.free].start, skb->data, 
-	       skb->len);
-	
+	skb_copy_from_linear_data(skb, self->tx_fifo.queue[self->tx_fifo.free].start,
+		      skb->len);
 	self->tx_fifo.len++;
 	self->tx_fifo.free++;
 
diff --git a/drivers/net/irda/pxaficp_ir.c b/drivers/net/irda/pxaficp_ir.c
index f35d7d42624..b3e1107420a 100644
--- a/drivers/net/irda/pxaficp_ir.c
+++ b/drivers/net/irda/pxaficp_ir.c
@@ -484,7 +484,7 @@ static int pxa_irda_hard_xmit(struct sk_buff *skb, struct net_device *dev)
 		unsigned long mtt = irda_get_mtt(skb);
 
 		si->dma_tx_buff_len = skb->len;
-		memcpy(si->dma_tx_buff, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, si->dma_tx_buff, skb->len);
 
 		if (mtt)
 			while ((unsigned)(OSCR - si->last_oscr)/4 < mtt)
diff --git a/drivers/net/irda/smsc-ircc2.c b/drivers/net/irda/smsc-ircc2.c
index e8453868d74..198bf3bfa70 100644
--- a/drivers/net/irda/smsc-ircc2.c
+++ b/drivers/net/irda/smsc-ircc2.c
@@ -1162,7 +1162,7 @@ static int smsc_ircc_hard_xmit_fir(struct sk_buff *skb, struct net_device *dev)
 		self->new_speed = speed;
 	}
 
-	memcpy(self->tx_buff.head, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, self->tx_buff.head, skb->len);
 
 	self->tx_buff.len = skb->len;
 	self->tx_buff.data = self->tx_buff.head;
diff --git a/drivers/net/irda/via-ircc.c b/drivers/net/irda/via-ircc.c
index 5ff41631460..45bbd668615 100644
--- a/drivers/net/irda/via-ircc.c
+++ b/drivers/net/irda/via-ircc.c
@@ -925,8 +925,8 @@ static int via_ircc_hard_xmit_fir(struct sk_buff *skb,
 
 	self->tx_fifo.tail += skb->len;
 	self->stats.tx_bytes += skb->len;
-	memcpy(self->tx_fifo.queue[self->tx_fifo.free].start, skb->data,
-	       skb->len);
+	skb_copy_from_linear_data(skb,
+		      self->tx_fifo.queue[self->tx_fifo.free].start, skb->len);
 	self->tx_fifo.len++;
 	self->tx_fifo.free++;
 //F01   if (self->tx_fifo.len == 1) {
diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c
index 79b407f3a49..c4be973867a 100644
--- a/drivers/net/irda/vlsi_ir.c
+++ b/drivers/net/irda/vlsi_ir.c
@@ -993,7 +993,7 @@ static int vlsi_hard_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 			goto drop;
 		}
 		else
-			memcpy(rd->buf, skb->data, len);
+			skb_copy_from_linear_data(skb, rd->buf, len);
 	}
 
 	rd->skb = skb;			/* remember skb for tx-complete stats */
diff --git a/drivers/net/irda/w83977af_ir.c b/drivers/net/irda/w83977af_ir.c
index bee44513095..0d4a68618fc 100644
--- a/drivers/net/irda/w83977af_ir.c
+++ b/drivers/net/irda/w83977af_ir.c
@@ -529,7 +529,7 @@ int w83977af_hard_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Decide if we should use PIO or DMA transfer */
 	if (self->io.speed > PIO_MAX_SPEED) {
 		self->tx_buff.data = self->tx_buff.head;
-		memcpy(self->tx_buff.data, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, self->tx_buff.data, skb->len);
 		self->tx_buff.len = skb->len;
 		
 		mtt = irda_get_mtt(skb);
diff --git a/drivers/net/lance.c b/drivers/net/lance.c
index 11cbcb946db..0fe96c85828 100644
--- a/drivers/net/lance.c
+++ b/drivers/net/lance.c
@@ -988,7 +988,7 @@ static int lance_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		if (lance_debug > 5)
 			printk("%s: bouncing a high-memory packet (%#x).\n",
 				   dev->name, (u32)isa_virt_to_bus(skb->data));
-		memcpy(&lp->tx_bounce_buffs[entry], skb->data, skb->len);
+		skb_copy_from_linear_data(skb, &lp->tx_bounce_buffs[entry], skb->len);
 		lp->tx_ring[entry].base =
 			((u32)isa_virt_to_bus((lp->tx_bounce_buffs + entry)) & 0xffffff) | 0x83000000;
 		dev_kfree_skb(skb);
diff --git a/drivers/net/macmace.c b/drivers/net/macmace.c
index 8c07ffc9c24..27911c07558 100644
--- a/drivers/net/macmace.c
+++ b/drivers/net/macmace.c
@@ -420,8 +420,7 @@ static int mace_xmit_start(struct sk_buff *skb, struct net_device *dev)
 	mp->stats.tx_bytes += skb->len;
 
 	/* We need to copy into our xmit buffer to take care of alignment and caching issues */
-
-	memcpy((void *) mp->tx_ring, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, mp->tx_ring, skb->len);
 
 	/* load the Tx DMA and fire it off */
 
diff --git a/drivers/net/meth.c b/drivers/net/meth.c
index fafe6783523..0343ea12b29 100644
--- a/drivers/net/meth.c
+++ b/drivers/net/meth.c
@@ -608,7 +608,7 @@ static void meth_tx_short_prepare(struct meth_private *priv,
 
 	desc->header.raw = METH_TX_CMD_INT_EN | (len-1) | ((128-len) << 16);
 	/* maybe I should set whole thing to 0 first... */
-	memcpy(desc->data.dt + (120 - len), skb->data, skb->len);
+	skb_copy_from_linear_data(skb, desc->data.dt + (120 - len), skb->len);
 	if (skb->len < len)
 		memset(desc->data.dt + 120 - len + skb->len, 0, len-skb->len);
 }
@@ -626,8 +626,8 @@ static void meth_tx_1page_prepare(struct meth_private *priv,
 
 	/* unaligned part */
 	if (unaligned_len) {
-		memcpy(desc->data.dt + (120 - unaligned_len),
-		       skb->data, unaligned_len);
+		skb_copy_from_linear_data(skb, desc->data.dt + (120 - unaligned_len),
+			      unaligned_len);
 		desc->header.raw |= (128 - unaligned_len) << 16;
 	}
 
@@ -652,8 +652,8 @@ static void meth_tx_2page_prepare(struct meth_private *priv,
 	desc->header.raw = METH_TX_CMD_INT_EN | TX_CATBUF1 | TX_CATBUF2| (skb->len - 1);
 	/* unaligned part */
 	if (unaligned_len){
-		memcpy(desc->data.dt + (120 - unaligned_len),
-		       skb->data, unaligned_len);
+		skb_copy_from_linear_data(skb, desc->data.dt + (120 - unaligned_len),
+			      unaligned_len);
 		desc->header.raw |= (128 - unaligned_len) << 16;
 	}
 
diff --git a/drivers/net/myri_sbus.c b/drivers/net/myri_sbus.c
index e1f16fb0584..13444da9327 100644
--- a/drivers/net/myri_sbus.c
+++ b/drivers/net/myri_sbus.c
@@ -502,7 +502,7 @@ static void myri_rx(struct myri_eth *mp, struct net_device *dev)
 			copy_skb->dev = dev;
 			DRX(("resv_and_put "));
 			skb_put(copy_skb, len);
-			memcpy(copy_skb->data, skb->data, len);
+			skb_copy_from_linear_data(skb, copy_skb->data, len);
 
 			/* Reuse original ring buffer. */
 			DRX(("reuse "));
diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c
index b488e94bc4c..ab25c225a07 100644
--- a/drivers/net/netxen/netxen_nic_main.c
+++ b/drivers/net/netxen/netxen_nic_main.c
@@ -920,8 +920,10 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 			/* copy the next 64 bytes - should be enough except
 			 * for pathological case
 			 */
-			memcpy((void *)hwdesc, (void *)(skb->data) +
-			       first_hdr_len, hdr_len - first_hdr_len);
+			skb_copy_from_linear_data_offset(skb, first_hdr_len,
+							 hwdesc,
+							 (hdr_len -
+							  first_hdr_len));
 			producer = get_next_index(producer, max_tx_desc_count);
 		}
 	}
diff --git a/drivers/net/ni52.c b/drivers/net/ni52.c
index 70b6812a8a7..8646698c77d 100644
--- a/drivers/net/ni52.c
+++ b/drivers/net/ni52.c
@@ -1182,7 +1182,7 @@ static int ni52_send_packet(struct sk_buff *skb, struct net_device *dev)
 	else
 #endif
 	{
-		memcpy((char *)p->xmit_cbuffs[p->xmit_count],(char *)(skb->data),skb->len);
+		skb_copy_from_linear_data(skb, p->xmit_cbuffs[p->xmit_count], skb->len);
 		len = skb->len;
 		if (len < ETH_ZLEN) {
 			len = ETH_ZLEN;
diff --git a/drivers/net/ni65.c b/drivers/net/ni65.c
index 782201d12c2..3818edf0ac1 100644
--- a/drivers/net/ni65.c
+++ b/drivers/net/ni65.c
@@ -1176,8 +1176,9 @@ static int ni65_send_packet(struct sk_buff *skb, struct net_device *dev)
 		if( (unsigned long) (skb->data + skb->len) > 0x1000000) {
 #endif
 
-			memcpy((char *) p->tmdbounce[p->tmdbouncenum] ,(char *)skb->data,
-							 (skb->len > T_BUF_SIZE) ? T_BUF_SIZE : skb->len);
+			skb_copy_from_linear_data(skb, p->tmdbounce[p->tmdbouncenum],
+				      skb->len > T_BUF_SIZE ? T_BUF_SIZE :
+							      skb->len);
 			if (len > skb->len)
 				memset((char *)p->tmdbounce[p->tmdbouncenum]+skb->len, 0, len-skb->len);
 			dev_kfree_skb (skb);
diff --git a/drivers/net/pci-skeleton.c b/drivers/net/pci-skeleton.c
index 099972c977e..df8998b4f37 100644
--- a/drivers/net/pci-skeleton.c
+++ b/drivers/net/pci-skeleton.c
@@ -1344,7 +1344,7 @@ static int netdrv_start_xmit (struct sk_buff *skb, struct net_device *dev)
 
 	tp->tx_info[entry].skb = skb;
 	/* tp->tx_info[entry].mapping = 0; */
-	memcpy (tp->tx_buf[entry], skb->data, skb->len);
+	skb_copy_from_linear_data(skb, tp->tx_buf[entry], skb->len);
 
 	/* Note: the chip doesn't have auto-pad! */
 	NETDRV_W32 (TxStatus0 + (entry * sizeof(u32)),
diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c
index fabbe95c7ef..808fae1577e 100644
--- a/drivers/net/pcmcia/axnet_cs.c
+++ b/drivers/net/pcmcia/axnet_cs.c
@@ -1136,7 +1136,7 @@ static int ei_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		ei_block_output(dev, length, skb->data, output_page);
 	else {
 		memset(packet, 0, ETH_ZLEN);
-		memcpy(packet, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, packet, skb->len);
 		ei_block_output(dev, length, packet, output_page);
 	}
 	
diff --git a/drivers/net/ppp_synctty.c b/drivers/net/ppp_synctty.c
index b6f0e9a25e2..5918fab3834 100644
--- a/drivers/net/ppp_synctty.c
+++ b/drivers/net/ppp_synctty.c
@@ -594,7 +594,8 @@ ppp_sync_txmunge(struct syncppp *ap, struct sk_buff *skb)
 				return NULL;
 			}
 			skb_reserve(npkt,2);
-			memcpy(skb_put(npkt,skb->len), skb->data, skb->len);
+			skb_copy_from_linear_data(skb,
+				      skb_put(npkt, skb->len), skb->len);
 			kfree_skb(skb);
 			skb = npkt;
 		}
diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c
index e94790632d5..e9fb616ff66 100644
--- a/drivers/net/pppoe.c
+++ b/drivers/net/pppoe.c
@@ -869,7 +869,8 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb)
 			goto abort;
 
 		skb_reserve(skb2, dev->hard_header_len + sizeof(struct pppoe_hdr));
-		memcpy(skb_put(skb2, skb->len), skb->data, skb->len);
+		skb_copy_from_linear_data(skb, skb_put(skb2, skb->len),
+					  skb->len);
 	} else {
 		/* Make a clone so as to not disturb the original skb,
 		 * give dev_queue_xmit something it can free.
diff --git a/drivers/net/qla3xxx.c b/drivers/net/qla3xxx.c
index 40d2639eedc..7b80fb7a9d9 100755
--- a/drivers/net/qla3xxx.c
+++ b/drivers/net/qla3xxx.c
@@ -1927,7 +1927,8 @@ static void ql_process_macip_rx_intr(struct ql3_adapter *qdev,
 		 * Copy the ethhdr from first buffer to second. This
 		 * is necessary for 3022 IP completions.
 		 */
-		memcpy(skb_push(skb2, size), skb1->data + VLAN_ID_LEN, size);
+		skb_copy_from_linear_data_offset(skb1, VLAN_ID_LEN,
+						 skb_push(skb2, size), size);
 	} else {
 		u16 checksum = le16_to_cpu(ib_ip_rsp_ptr->checksum);
 		if (checksum & 
diff --git a/drivers/net/rrunner.c b/drivers/net/rrunner.c
index 3a4fce38450..25c73d47daa 100644
--- a/drivers/net/rrunner.c
+++ b/drivers/net/rrunner.c
@@ -1451,7 +1451,7 @@ static int rr_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 		skb_reserve(new_skb, 8);
 		skb_put(new_skb, len);
-		memcpy(new_skb->data, skb->data, len);
+		skb_copy_from_linear_data(skb, new_skb->data, len);
 		dev_kfree_skb(skb);
 		skb = new_skb;
 	}
diff --git a/drivers/net/sgiseeq.c b/drivers/net/sgiseeq.c
index 5a891913218..d8c9c5d66d4 100644
--- a/drivers/net/sgiseeq.c
+++ b/drivers/net/sgiseeq.c
@@ -534,7 +534,7 @@ static int sgiseeq_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 *    entry and the HPC got to the end of the chain before we
 	 *    added this new entry and restarted it.
 	 */
-	memcpy((char *)(long)td->buf_vaddr, skb->data, skblen);
+	skb_copy_from_linear_data(skb, (char *)(long)td->buf_vaddr, skblen);
 	if (len != skblen)
 		memset((char *)(long)td->buf_vaddr + skb->len, 0, len-skblen);
 	td->tdma.cntinfo = (len & HPCDMA_BCNT) |
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index 99b61cfb7ce..f1a0e6c0fbd 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -2950,7 +2950,7 @@ static struct sk_buff *skge_rx_get(struct net_device *dev,
 		pci_dma_sync_single_for_cpu(skge->hw->pdev,
 					    pci_unmap_addr(e, mapaddr),
 					    len, PCI_DMA_FROMDEVICE);
-		memcpy(skb->data, e->skb->data, len);
+		skb_copy_from_linear_data(e->skb, skb->data, len);
 		pci_dma_sync_single_for_device(skge->hw->pdev,
 					       pci_unmap_addr(e, mapaddr),
 					       len, PCI_DMA_FROMDEVICE);
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index fd291fc9316..238c2ca34da 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -1971,7 +1971,7 @@ static struct sk_buff *receive_copy(struct sky2_port *sky2,
 		skb_reserve(skb, 2);
 		pci_dma_sync_single_for_cpu(sky2->hw->pdev, re->data_addr,
 					    length, PCI_DMA_FROMDEVICE);
-		memcpy(skb->data, re->skb->data, length);
+		skb_copy_from_linear_data(re->skb, skb->data, length);
 		skb->ip_summed = re->skb->ip_summed;
 		skb->csum = re->skb->csum;
 		pci_dma_sync_single_for_device(sky2->hw->pdev, re->data_addr,
diff --git a/drivers/net/sun3_82586.c b/drivers/net/sun3_82586.c
index 5bcc749bef1..396c3d961f8 100644
--- a/drivers/net/sun3_82586.c
+++ b/drivers/net/sun3_82586.c
@@ -1026,7 +1026,7 @@ static int sun3_82586_send_packet(struct sk_buff *skb, struct net_device *dev)
 			memset((char *)p->xmit_cbuffs[p->xmit_count], 0, ETH_ZLEN);
 			len = ETH_ZLEN;
 		}
-		memcpy((char *)p->xmit_cbuffs[p->xmit_count],(char *)(skb->data),skb->len);
+		skb_copy_from_linear_data(skb, p->xmit_cbuffs[p->xmit_count], skb->len);
 
 #if (NUM_XMIT_BUFFS == 1)
 #	ifdef NO_NOPCOMMANDS
diff --git a/drivers/net/sun3lance.c b/drivers/net/sun3lance.c
index 0454827c8c2..327ed7962fb 100644
--- a/drivers/net/sun3lance.c
+++ b/drivers/net/sun3lance.c
@@ -629,7 +629,7 @@ static int lance_start_xmit( struct sk_buff *skb, struct net_device *dev )
 	head->length = (-len) | 0xf000;
 	head->misc = 0;
 
-	memcpy( PKTBUF_ADDR(head), (void *)skb->data, skb->len );
+	skb_copy_from_linear_data(skb, PKTBUF_ADDR(head), skb->len);
 	if (len != skb->len)
 		memset(PKTBUF_ADDR(head) + skb->len, 0, len-skb->len);
 
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index 4bb89dec565..9df1038ec6b 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -848,7 +848,7 @@ static int gem_rx(struct gem *gp, int work_to_do)
 			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
 			pci_dma_sync_single_for_cpu(gp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
-			memcpy(copy_skb->data, skb->data, len);
+			skb_copy_from_linear_data(skb, copy_skb->data, len);
 			pci_dma_sync_single_for_device(gp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
 
 			/* We'll reuse the original ring buffer. */
diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c
index 4b69c1deb9f..5304d7b94e5 100644
--- a/drivers/net/sunhme.c
+++ b/drivers/net/sunhme.c
@@ -2061,7 +2061,7 @@ static void happy_meal_rx(struct happy_meal *hp, struct net_device *dev)
 			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
 			hme_dma_sync_for_cpu(hp, dma_addr, len, DMA_FROMDEVICE);
-			memcpy(copy_skb->data, skb->data, len);
+			skb_copy_from_linear_data(skb, copy_skb->data, len);
 			hme_dma_sync_for_device(hp, dma_addr, len, DMA_FROMDEVICE);
 
 			/* Reuse original ring buffer. */
diff --git a/drivers/net/sunlance.c b/drivers/net/sunlance.c
index 8f53a1ef608..42722530ab2 100644
--- a/drivers/net/sunlance.c
+++ b/drivers/net/sunlance.c
@@ -1143,7 +1143,7 @@ static int lance_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		struct lance_init_block *ib = lp->init_block_mem;
 		ib->btx_ring [entry].length = (-len) | 0xf000;
 		ib->btx_ring [entry].misc = 0;
-		memcpy((char *)&ib->tx_buf [entry][0], skb->data, skblen);
+		skb_copy_from_linear_data(skb, &ib->tx_buf [entry][0], skblen);
 		if (len != skblen)
 			memset((char *) &ib->tx_buf [entry][skblen], 0, len - skblen);
 		ib->btx_ring [entry].tmd1_bits = (LE_T1_POK | LE_T1_OWN);
diff --git a/drivers/net/sunqe.c b/drivers/net/sunqe.c
index fbfb98284fd..fa70e0b78af 100644
--- a/drivers/net/sunqe.c
+++ b/drivers/net/sunqe.c
@@ -592,7 +592,7 @@ static int qe_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Avoid a race... */
 	qep->qe_block->qe_txd[entry].tx_flags = TXD_UPDATE;
 
-	memcpy(txbuf, skb->data, len);
+	skb_copy_from_linear_data(skb, txbuf, len);
 
 	qep->qe_block->qe_txd[entry].tx_addr = txbuf_dvma;
 	qep->qe_block->qe_txd[entry].tx_flags =
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 414365c3198..38383e4e07a 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -3350,7 +3350,7 @@ static int tg3_rx(struct tg3 *tp, int budget)
 			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
 			pci_dma_sync_single_for_cpu(tp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
-			memcpy(copy_skb->data, skb->data, len);
+			skb_copy_from_linear_data(skb, copy_skb->data, len);
 			pci_dma_sync_single_for_device(tp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
 
 			/* We'll reuse the original ring buffer. */
diff --git a/drivers/net/tlan.c b/drivers/net/tlan.c
index 2ede3f58cf9..106dc1ef0ac 100644
--- a/drivers/net/tlan.c
+++ b/drivers/net/tlan.c
@@ -1112,7 +1112,7 @@ static int TLan_StartTx( struct sk_buff *skb, struct net_device *dev )
 
 	if ( bbuf ) {
 		tail_buffer = priv->txBuffer + ( priv->txTail * TLAN_MAX_FRAME_SIZE );
-		memcpy( tail_buffer, skb->data, skb->len );
+		skb_copy_from_linear_data(skb, tail_buffer, skb->len);
 	} else {
 		tail_list->buffer[0].address = pci_map_single(priv->pciDev, skb->data, skb->len, PCI_DMA_TODEVICE);
 		TLan_StoreSKB(tail_list, skb);
diff --git a/drivers/net/tokenring/3c359.c b/drivers/net/tokenring/3c359.c
index d293423ee8e..e22a3f5333e 100644
--- a/drivers/net/tokenring/3c359.c
+++ b/drivers/net/tokenring/3c359.c
@@ -937,14 +937,17 @@ static void xl_rx(struct net_device *dev)
 				copy_len = xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfraglen & 0x7FFF ; 
 				frame_length -= copy_len ;  
 				pci_dma_sync_single_for_cpu(xl_priv->pdev,xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr,xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
-				memcpy(skb_put(skb,copy_len), xl_priv->rx_ring_skb[xl_priv->rx_ring_tail]->data, copy_len) ; 
+				skb_copy_from_linear_data(xl_priv->rx_ring_skb[xl_priv->rx_ring_tail],
+							  skb_put(skb, copy_len),
+							  copy_len);
 				pci_dma_sync_single_for_device(xl_priv->pdev,xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr,xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
 				adv_rx_ring(dev) ; 
 			} 
 
 			/* Now we have found the last fragment */
 			pci_dma_sync_single_for_cpu(xl_priv->pdev,xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr,xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
-			memcpy(skb_put(skb,copy_len), xl_priv->rx_ring_skb[xl_priv->rx_ring_tail]->data, frame_length) ; 
+			skb_copy_from_linear_data(xl_priv->rx_ring_skb[xl_priv->rx_ring_tail],
+				      skb_put(skb,copy_len), frame_length);
 /*			memcpy(skb_put(skb,frame_length), bus_to_virt(xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr), frame_length) ; */
 			pci_dma_sync_single_for_device(xl_priv->pdev,xl_priv->xl_rx_ring[xl_priv->rx_ring_tail].upfragaddr,xl_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
 			adv_rx_ring(dev) ; 
diff --git a/drivers/net/tokenring/olympic.c b/drivers/net/tokenring/olympic.c
index a6206580888..09b3cfb8e80 100644
--- a/drivers/net/tokenring/olympic.c
+++ b/drivers/net/tokenring/olympic.c
@@ -845,7 +845,9 @@ static void olympic_rx(struct net_device *dev)
 							pci_dma_sync_single_for_cpu(olympic_priv->pdev,
 								le32_to_cpu(olympic_priv->olympic_rx_ring[rx_ring_last_received].buffer),
 								olympic_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ; 
-							memcpy(skb_put(skb,length-4),olympic_priv->rx_ring_skb[rx_ring_last_received]->data,length-4) ; 
+							skb_copy_from_linear_data(olympic_priv->rx_ring_skb[rx_ring_last_received],
+								      skb_put(skb,length - 4),
+								      length - 4);
 							pci_dma_sync_single_for_device(olympic_priv->pdev,
 								le32_to_cpu(olympic_priv->olympic_rx_ring[rx_ring_last_received].buffer),
 								olympic_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
@@ -862,7 +864,9 @@ static void olympic_rx(struct net_device *dev)
 								olympic_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ; 
 							rx_desc = &(olympic_priv->olympic_rx_ring[rx_ring_last_received]);
 							cpy_length = (i == 1 ? frag_len : le32_to_cpu(rx_desc->res_length)); 
-							memcpy(skb_put(skb, cpy_length), olympic_priv->rx_ring_skb[rx_ring_last_received]->data, cpy_length) ;
+							skb_copy_from_linear_data(olympic_priv->rx_ring_skb[rx_ring_last_received],
+								      skb_put(skb, cpy_length),
+								      cpy_length);
 							pci_dma_sync_single_for_device(olympic_priv->pdev,
 								le32_to_cpu(olympic_priv->olympic_rx_ring[rx_ring_last_received].buffer),
 								olympic_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
diff --git a/drivers/net/tokenring/tms380tr.c b/drivers/net/tokenring/tms380tr.c
index de6f72775ec..e6f0817c350 100644
--- a/drivers/net/tokenring/tms380tr.c
+++ b/drivers/net/tokenring/tms380tr.c
@@ -644,7 +644,7 @@ static int tms380tr_hardware_send_packet(struct sk_buff *skb, struct net_device
 		dmabuf  = 0;
 		i 	= tp->TplFree->TPLIndex;
 		buf 	= tp->LocalTxBuffers[i];
-		memcpy(buf, skb->data, length);
+		skb_copy_from_linear_data(skb, buf, length);
 		newbuf 	= ((char *)buf - (char *)tp) + tp->dmabuffer;
 	}
 	else {
diff --git a/drivers/net/tulip/de2104x.c b/drivers/net/tulip/de2104x.c
index 8a7effa7090..d19f8568440 100644
--- a/drivers/net/tulip/de2104x.c
+++ b/drivers/net/tulip/de2104x.c
@@ -449,8 +449,8 @@ static void de_rx (struct de_private *de)
 		} else {
 			pci_dma_sync_single_for_cpu(de->pdev, mapping, len, PCI_DMA_FROMDEVICE);
 			skb_reserve(copy_skb, RX_OFFSET);
-			memcpy(skb_put(copy_skb, len), skb->data, len);
-
+			skb_copy_from_linear_data(skb, skb_put(copy_skb, len),
+						  len);
 			pci_dma_sync_single_for_device(de->pdev, mapping, len, PCI_DMA_FROMDEVICE);
 
 			/* We'll reuse the original ring buffer. */
diff --git a/drivers/net/tulip/dmfe.c b/drivers/net/tulip/dmfe.c
index a5e0237a653..b3a64ca9863 100644
--- a/drivers/net/tulip/dmfe.c
+++ b/drivers/net/tulip/dmfe.c
@@ -682,7 +682,7 @@ static int dmfe_start_xmit(struct sk_buff *skb, struct DEVICE *dev)
 
 	/* transmit this packet */
 	txptr = db->tx_insert_ptr;
-	memcpy(txptr->tx_buf_ptr, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, txptr->tx_buf_ptr, skb->len);
 	txptr->tdes1 = cpu_to_le32(0xe1000000 | skb->len);
 
 	/* Point to next transmit free descriptor */
@@ -989,7 +989,9 @@ static void dmfe_rx_packet(struct DEVICE *dev, struct dmfe_board_info * db)
 						skb = newskb;
 						/* size less than COPY_SIZE, allocate a rxlen SKB */
 						skb_reserve(skb, 2); /* 16byte align */
-						memcpy(skb_put(skb, rxlen), rxptr->rx_skb_ptr->data, rxlen);
+						skb_copy_from_linear_data(rxptr->rx_skb_ptr,
+							  skb_put(skb, rxlen),
+									  rxlen);
 						dmfe_reuse_skb(db, rxptr->rx_skb_ptr);
 					} else
 						skb_put(skb, rxlen);
diff --git a/drivers/net/tulip/uli526x.c b/drivers/net/tulip/uli526x.c
index e46f4cb02c1..ca2548eb7d6 100644
--- a/drivers/net/tulip/uli526x.c
+++ b/drivers/net/tulip/uli526x.c
@@ -583,7 +583,7 @@ static int uli526x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* transmit this packet */
 	txptr = db->tx_insert_ptr;
-	memcpy(txptr->tx_buf_ptr, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, txptr->tx_buf_ptr, skb->len);
 	txptr->tdes1 = cpu_to_le32(0xe1000000 | skb->len);
 
 	/* Point to next transmit free descriptor */
diff --git a/drivers/net/tulip/xircom_cb.c b/drivers/net/tulip/xircom_cb.c
index 1fe3734e155..985a1810ca5 100644
--- a/drivers/net/tulip/xircom_cb.c
+++ b/drivers/net/tulip/xircom_cb.c
@@ -411,9 +411,9 @@ static int xircom_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			   sometimes sends more than you ask it to. */
 
 			memset(&card->tx_buffer[bufferoffsets[desc]/4],0,1536);
-			memcpy(&(card->tx_buffer[bufferoffsets[desc]/4]),skb->data,skb->len);
-
-
+			skb_copy_from_linear_data(skb,
+				  &(card->tx_buffer[bufferoffsets[desc] / 4]),
+						  skb->len);
 			/* FIXME: The specification tells us that the length we send HAS to be a multiple of
 			   4 bytes. */
 
diff --git a/drivers/net/tulip/xircom_tulip_cb.c b/drivers/net/tulip/xircom_tulip_cb.c
index 3f24c82755f..696b3b8aac8 100644
--- a/drivers/net/tulip/xircom_tulip_cb.c
+++ b/drivers/net/tulip/xircom_tulip_cb.c
@@ -915,7 +915,9 @@ xircom_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	tp->tx_skbuff[entry] = skb;
 	if (tp->chip_id == X3201_3) {
-		memcpy(tp->tx_aligned_skbuff[entry]->data,skb->data,skb->len);
+		skb_copy_from_linear_data(skb,
+					  tp->tx_aligned_skbuff[entry]->data,
+					  skb->len);
 		tp->tx_ring[entry].buffer1 = virt_to_bus(tp->tx_aligned_skbuff[entry]->data);
 	} else
 		tp->tx_ring[entry].buffer1 = virt_to_bus(skb->data);
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 288d8559f8c..4d461595406 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -386,8 +386,8 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
 		 *   - we are multicast promiscous.
 		 *   - we belong to the multicast group.
 		 */
-		memcpy(addr, skb->data,
-		       min_t(size_t, sizeof addr, skb->len));
+		skb_copy_from_linear_data(skb, addr, min_t(size_t, sizeof addr,
+								   skb->len));
 		bit_nr = ether_crc(sizeof addr, addr) >> 26;
 		if ((tun->if_flags & IFF_PROMISC) ||
 				memcmp(addr, tun->dev_addr, sizeof addr) == 0 ||
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index 422eaf8ea12..25b75b61518 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -1339,7 +1339,8 @@ static inline int velocity_rx_copy(struct sk_buff **rx_skb, int pkt_size,
 			if (vptr->flags & VELOCITY_FLAGS_IP_ALIGN)
 				skb_reserve(new_skb, 2);
 
-			memcpy(new_skb->data, rx_skb[0]->data, pkt_size);
+			skb_copy_from_linear_data(rx_skb[0], new_skb->data,
+						  pkt_size);
 			*rx_skb = new_skb;
 			ret = 0;
 		}
@@ -1927,7 +1928,7 @@ static int velocity_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (pktlen < ETH_ZLEN) {
 		/* Cannot occur until ZC support */
 		pktlen = ETH_ZLEN;
-		memcpy(tdinfo->buf, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, tdinfo->buf, skb->len);
 		memset(tdinfo->buf + skb->len, 0, ETH_ZLEN - skb->len);
 		tdinfo->skb = skb;
 		tdinfo->skb_dma[0] = tdinfo->buf_dma;
@@ -1943,7 +1944,7 @@ static int velocity_xmit(struct sk_buff *skb, struct net_device *dev)
 		int nfrags = skb_shinfo(skb)->nr_frags;
 		tdinfo->skb = skb;
 		if (nfrags > 6) {
-			memcpy(tdinfo->buf, skb->data, skb->len);
+			skb_copy_from_linear_data(skb, tdinfo->buf, skb->len);
 			tdinfo->skb_dma[0] = tdinfo->buf_dma;
 			td_ptr->tdesc0.pktsize =
 			td_ptr->td_buf[0].pa_low = cpu_to_le32(tdinfo->skb_dma[0]);
diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index a576113abbd..ae132c1c545 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c
@@ -1702,7 +1702,7 @@ static int lmc_rx (struct net_device *dev) /*fold00*/
             if(!nsb) {
                 goto give_it_anyways;
             }
-            memcpy(skb_put(nsb, len), skb->data, len);
+            skb_copy_from_linear_data(skb, skb_put(nsb, len), len);
             
             nsb->protocol = lmc_proto_type(sc, skb);
             skb_reset_mac_header(nsb);
diff --git a/drivers/net/wan/pc300_drv.c b/drivers/net/wan/pc300_drv.c
index edbc55528be..8ba75bb1732 100644
--- a/drivers/net/wan/pc300_drv.c
+++ b/drivers/net/wan/pc300_drv.c
@@ -1765,7 +1765,7 @@ cpc_trace(struct net_device *dev, struct sk_buff *skb_main, char rx_tx)
 	skb->data[7] = ']';
 	skb->data[8] = ':';
 	skb->data[9] = ' ';
-	memcpy(&skb->data[10], skb_main->data, skb_main->len);
+	skb_copy_from_linear_data(skb_main, &skb->data[10], skb_main->len);
 
 	netif_rx(skb);
 }
diff --git a/drivers/net/wan/z85230.c b/drivers/net/wan/z85230.c
index 8b4540bfc1b..9432d2ce774 100644
--- a/drivers/net/wan/z85230.c
+++ b/drivers/net/wan/z85230.c
@@ -1782,7 +1782,7 @@ int z8530_queue_xmit(struct z8530_channel *c, struct sk_buff *skb)
 		 */
 		c->tx_next_ptr=c->tx_dma_buf[c->tx_dma_used];
 		c->tx_dma_used^=1;	/* Flip temp buffer */
-		memcpy(c->tx_next_ptr, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, c->tx_next_ptr, skb->len);
 	}
 	else
 		c->tx_next_ptr=skb->data;	
diff --git a/drivers/net/wireless/atmel.c b/drivers/net/wireless/atmel.c
index 1c17cbe007b..51a7db53afa 100644
--- a/drivers/net/wireless/atmel.c
+++ b/drivers/net/wireless/atmel.c
@@ -827,14 +827,14 @@ static int start_tx(struct sk_buff *skb, struct net_device *dev)
 	if (priv->wep_is_on)
 		frame_ctl |= IEEE80211_FCTL_PROTECTED;
 	if (priv->operating_mode == IW_MODE_ADHOC) {
-		memcpy(&header.addr1, skb->data, 6);
+		skb_copy_from_linear_data(skb, &header.addr1, 6);
 		memcpy(&header.addr2, dev->dev_addr, 6);
 		memcpy(&header.addr3, priv->BSSID, 6);
 	} else {
 		frame_ctl |= IEEE80211_FCTL_TODS;
 		memcpy(&header.addr1, priv->CurrentBSSID, 6);
 		memcpy(&header.addr2, dev->dev_addr, 6);
-		memcpy(&header.addr3, skb->data, 6);
+		skb_copy_from_linear_data(skb, &header.addr3, 6);
 	}
 
 	if (priv->use_wpa)
diff --git a/drivers/net/wireless/bcm43xx/bcm43xx_dma.c b/drivers/net/wireless/bcm43xx/bcm43xx_dma.c
index 6e0dc76400e..e3d2e61a31e 100644
--- a/drivers/net/wireless/bcm43xx/bcm43xx_dma.c
+++ b/drivers/net/wireless/bcm43xx/bcm43xx_dma.c
@@ -998,7 +998,8 @@ static void dma_tx_fragment(struct bcm43xx_dmaring *ring,
 			assert(0);
 			return;
 		}
-		memcpy(skb_put(bounce_skb, skb->len), skb->data, skb->len);
+		skb_copy_from_linear_data(skb, skb_put(bounce_skb, skb->len),
+					  skb->len);
 		dev_kfree_skb_any(skb);
 		skb = bounce_skb;
 	}
diff --git a/drivers/net/wireless/hostap/hostap_80211_rx.c b/drivers/net/wireless/hostap/hostap_80211_rx.c
index 35a3a50724f..cbedc9ee740 100644
--- a/drivers/net/wireless/hostap/hostap_80211_rx.c
+++ b/drivers/net/wireless/hostap/hostap_80211_rx.c
@@ -933,12 +933,14 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb,
 		if (frag == 0) {
 			/* copy first fragment (including full headers) into
 			 * beginning of the fragment cache skb */
-			memcpy(skb_put(frag_skb, flen), skb->data, flen);
+			skb_copy_from_linear_data(skb, skb_put(frag_skb, flen),
+						  flen);
 		} else {
 			/* append frame payload to the end of the fragment
 			 * cache skb */
-			memcpy(skb_put(frag_skb, flen), skb->data + hdrlen,
-			       flen);
+			skb_copy_from_linear_data_offset(skb, hdrlen,
+							 skb_put(frag_skb,
+								 flen), flen);
 		}
 		dev_kfree_skb(skb);
 		skb = NULL;
@@ -1044,8 +1046,9 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb,
 	    skb->len >= ETH_HLEN + ETH_ALEN) {
 		/* Non-standard frame: get addr4 from its bogus location after
 		 * the payload */
-		memcpy(skb->data + ETH_ALEN,
-		       skb->data + skb->len - ETH_ALEN, ETH_ALEN);
+		skb_copy_from_linear_data_offset(skb, skb->len - ETH_ALEN,
+						 skb->data + ETH_ALEN,
+						 ETH_ALEN);
 		skb_trim(skb, skb->len - ETH_ALEN);
 	}
 
diff --git a/drivers/net/wireless/hostap/hostap_80211_tx.c b/drivers/net/wireless/hostap/hostap_80211_tx.c
index 159baef18e4..246fac0e800 100644
--- a/drivers/net/wireless/hostap/hostap_80211_tx.c
+++ b/drivers/net/wireless/hostap/hostap_80211_tx.c
@@ -146,7 +146,8 @@ int hostap_data_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			fc |= IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS;
 			/* From&To DS: Addr1 = RA, Addr2 = TA, Addr3 = DA,
 			 * Addr4 = SA */
-			memcpy(&hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN);
+			skb_copy_from_linear_data_offset(skb, ETH_ALEN,
+							 &hdr.addr4, ETH_ALEN);
 			hdr_len += ETH_ALEN;
 		} else {
 			/* bogus 4-addr format to workaround Prism2 station
@@ -159,7 +160,8 @@ int hostap_data_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			/* SA from skb->data + ETH_ALEN will be added after
 			 * frame payload; use hdr.addr4 as a temporary buffer
 			 */
-			memcpy(&hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN);
+			skb_copy_from_linear_data_offset(skb, ETH_ALEN,
+							 &hdr.addr4, ETH_ALEN);
 			need_tailroom += ETH_ALEN;
 		}
 
@@ -174,24 +176,27 @@ int hostap_data_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		else
 			memcpy(&hdr.addr1, local->bssid, ETH_ALEN);
 		memcpy(&hdr.addr2, dev->dev_addr, ETH_ALEN);
-		memcpy(&hdr.addr3, skb->data, ETH_ALEN);
+		skb_copy_from_linear_data(skb, &hdr.addr3, ETH_ALEN);
 	} else if (local->iw_mode == IW_MODE_MASTER && !to_assoc_ap) {
 		fc |= IEEE80211_FCTL_FROMDS;
 		/* From DS: Addr1 = DA, Addr2 = BSSID, Addr3 = SA */
-		memcpy(&hdr.addr1, skb->data, ETH_ALEN);
+		skb_copy_from_linear_data(skb, &hdr.addr1, ETH_ALEN);
 		memcpy(&hdr.addr2, dev->dev_addr, ETH_ALEN);
-		memcpy(&hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN);
+		skb_copy_from_linear_data_offset(skb, ETH_ALEN, &hdr.addr3,
+						 ETH_ALEN);
 	} else if (local->iw_mode == IW_MODE_INFRA || to_assoc_ap) {
 		fc |= IEEE80211_FCTL_TODS;
 		/* To DS: Addr1 = BSSID, Addr2 = SA, Addr3 = DA */
 		memcpy(&hdr.addr1, to_assoc_ap ?
 		       local->assoc_ap_addr : local->bssid, ETH_ALEN);
-		memcpy(&hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
-		memcpy(&hdr.addr3, skb->data, ETH_ALEN);
+		skb_copy_from_linear_data_offset(skb, ETH_ALEN, &hdr.addr2,
+						 ETH_ALEN);
+		skb_copy_from_linear_data(skb, &hdr.addr3, ETH_ALEN);
 	} else if (local->iw_mode == IW_MODE_ADHOC) {
 		/* not From/To DS: Addr1 = DA, Addr2 = SA, Addr3 = BSSID */
-		memcpy(&hdr.addr1, skb->data, ETH_ALEN);
-		memcpy(&hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
+		skb_copy_from_linear_data(skb, &hdr.addr1, ETH_ALEN);
+		skb_copy_from_linear_data_offset(skb, ETH_ALEN, &hdr.addr2,
+						 ETH_ALEN);
 		memcpy(&hdr.addr3, local->bssid, ETH_ALEN);
 	}
 
diff --git a/drivers/net/wireless/hostap/hostap_ap.c b/drivers/net/wireless/hostap/hostap_ap.c
index 797d950d5d6..4ca8a27b8c5 100644
--- a/drivers/net/wireless/hostap/hostap_ap.c
+++ b/drivers/net/wireless/hostap/hostap_ap.c
@@ -1277,8 +1277,8 @@ static char * ap_auth_make_challenge(struct ap_data *ap)
 		return NULL;
 	}
 
-	memcpy(tmpbuf, skb->data + ap->crypt->extra_mpdu_prefix_len,
-	       WLAN_AUTH_CHALLENGE_LEN);
+	skb_copy_from_linear_data_offset(skb, ap->crypt->extra_mpdu_prefix_len,
+					 tmpbuf, WLAN_AUTH_CHALLENGE_LEN);
 	dev_kfree_skb(skb);
 
 	return tmpbuf;
diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c
index 9003ff7d151..fb01fb95a9f 100644
--- a/drivers/net/wireless/hostap/hostap_hw.c
+++ b/drivers/net/wireless/hostap/hostap_hw.c
@@ -1838,13 +1838,14 @@ static int prism2_tx_80211(struct sk_buff *skb, struct net_device *dev)
 
 	/* skb->data starts with txdesc->frame_control */
 	hdr_len = 24;
-	memcpy(&txdesc.frame_control, skb->data, hdr_len);
+	skb_copy_from_linear_data(skb, &txdesc.frame_control, hdr_len);
  	fc = le16_to_cpu(txdesc.frame_control);
 	if (WLAN_FC_GET_TYPE(fc) == IEEE80211_FTYPE_DATA &&
 	    (fc & IEEE80211_FCTL_FROMDS) && (fc & IEEE80211_FCTL_TODS) &&
 	    skb->len >= 30) {
 		/* Addr4 */
-		memcpy(txdesc.addr4, skb->data + hdr_len, ETH_ALEN);
+		skb_copy_from_linear_data_offset(skb, hdr_len, txdesc.addr4,
+						 ETH_ALEN);
 		hdr_len += ETH_ALEN;
 	}
 
diff --git a/drivers/net/wireless/ipw2100.c b/drivers/net/wireless/ipw2100.c
index ad6e4a42835..9137a4dd02e 100644
--- a/drivers/net/wireless/ipw2100.c
+++ b/drivers/net/wireless/ipw2100.c
@@ -2416,8 +2416,9 @@ static void isr_rx(struct ipw2100_priv *priv, int i,
 #ifdef IPW2100_RX_DEBUG
 	/* Make a copy of the frame so we can dump it to the logs if
 	 * ieee80211_rx fails */
-	memcpy(packet_data, packet->skb->data,
-	       min_t(u32, status->frame_size, IPW_RX_NIC_BUFFER_LENGTH));
+	skb_copy_from_linear_data(packet->skb, packet_data,
+				  min_t(u32, status->frame_size,
+					     IPW_RX_NIC_BUFFER_LENGTH));
 #endif
 
 	if (!ieee80211_rx(priv->ieee, packet->skb, stats)) {
diff --git a/drivers/net/wireless/ipw2200.c b/drivers/net/wireless/ipw2200.c
index b04c56a25cc..4839a45098c 100644
--- a/drivers/net/wireless/ipw2200.c
+++ b/drivers/net/wireless/ipw2200.c
@@ -10355,7 +10355,7 @@ static void ipw_handle_promiscuous_tx(struct ipw_priv *priv,
 
 		rt_hdr->it_len = dst->len;
 
-		memcpy(skb_put(dst, len), src->data, len);
+		skb_copy_from_linear_data(src, skb_put(dst, len), len);
 
 		if (!ieee80211_rx(priv->prom_priv->ieee, dst, &dummystats))
 			dev_kfree_skb_any(dst);
diff --git a/drivers/net/wireless/prism54/islpci_eth.c b/drivers/net/wireless/prism54/islpci_eth.c
index 6ebfff03424..7d8bff1dbc4 100644
--- a/drivers/net/wireless/prism54/islpci_eth.c
+++ b/drivers/net/wireless/prism54/islpci_eth.c
@@ -162,13 +162,16 @@ islpci_eth_transmit(struct sk_buff *skb, struct net_device *ndev)
 
 			skb_put(newskb, init_wds ? skb->len + 6 : skb->len);
 			if (init_wds) {
-				memcpy(newskb->data + 6, skb->data, skb->len);
+				skb_copy_from_linear_data(skb,
+							  newskb->data + 6,
+							  skb->len);
 				memcpy(newskb->data, wds_mac, 6);
 #ifdef ISLPCI_ETH_DEBUG
 				printk("islpci_eth_transmit:wds_mac\n");
 #endif
 			} else
-				memcpy(newskb->data, skb->data, skb->len);
+				skb_copy_from_linear_data(skb, newskb->data,
+							  skb->len);
 
 #if VERBOSE > SHOW_ERROR_MESSAGES
 			DEBUG(SHOW_TRACING, "memcpy %p %p %i wds %i\n",
@@ -394,8 +397,10 @@ islpci_eth_receive(islpci_private *priv)
 			/* Update spy records */
 			wireless_spy_update(ndev, annex->addr2, &wstats);
 
-			memcpy(skb->data + sizeof (struct rfmon_header),
-			       skb->data, 2 * ETH_ALEN);
+			skb_copy_from_linear_data(skb,
+						  (skb->data +
+						   sizeof(struct rfmon_header)),
+						  2 * ETH_ALEN);
 			skb_pull(skb, sizeof (struct rfmon_header));
 		}
 		skb->protocol = eth_type_trans(skb, ndev);
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index 9633b0457f8..3be624295a1 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -2242,7 +2242,8 @@ static void rx_data(struct net_device *dev, struct rcs __iomem *prcs, unsigned i
     rx_ptr += copy_from_rx_buff(local, rx_ptr, pkt_addr & RX_BUFF_END, rx_len);
     /* Get source address */
 #ifdef WIRELESS_SPY
-    memcpy(linksrcaddr, ((struct mac_header *)skb->data)->addr_2, ETH_ALEN);
+    skb_copy_from_linear_data_offset(skb, offsetof(struct mac_header, addr_2),
+				     linksrcaddr, ETH_ALEN);
 #endif
     /* Now, deal with encapsulation/translation/sniffer */
     if (!sniffer) {
diff --git a/drivers/net/wireless/wavelan.c b/drivers/net/wireless/wavelan.c
index 2bf77b1ee53..1cf090d60ed 100644
--- a/drivers/net/wireless/wavelan.c
+++ b/drivers/net/wireless/wavelan.c
@@ -2938,7 +2938,7 @@ static int wavelan_packet_xmit(struct sk_buff *skb, struct net_device * dev)
 	 * need to pad. Jean II */
 	if (skb->len < ETH_ZLEN) {
 		memset(data, 0, ETH_ZLEN);
-		memcpy(data, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, data, skb->len);
 		/* Write packet on the card */
 		if(wv_packet_write(dev, data, ETH_ZLEN))
 			return 1;	/* We failed */
diff --git a/drivers/net/wireless/zd1201.c b/drivers/net/wireless/zd1201.c
index 1fe013a7297..935b144d9b5 100644
--- a/drivers/net/wireless/zd1201.c
+++ b/drivers/net/wireless/zd1201.c
@@ -807,10 +807,10 @@ static int zd1201_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	txbuf[4] = 0x00;
 	txbuf[5] = 0x00;
 
-	memcpy(txbuf+6, skb->data+12, skb->len-12);
+	skb_copy_from_linear_data_offset(skb, 12, txbuf + 6, skb->len - 12);
 	if (pad)
 		txbuf[skb->len-12+6]=0;
-	memcpy(txbuf+skb->len-12+6+pad, skb->data, 12);
+	skb_copy_from_linear_data(skb, txbuf + skb->len - 12 + 6 + pad, 12);
 	*(__be16*)&txbuf[skb->len+6+pad] = htons(skb->len-12+6);
 	txbuf[txbuflen-1] = 0;
 
diff --git a/drivers/s390/net/ctcmain.c b/drivers/s390/net/ctcmain.c
index 54e3f806cd5..b0f813e6f48 100644
--- a/drivers/s390/net/ctcmain.c
+++ b/drivers/s390/net/ctcmain.c
@@ -472,7 +472,8 @@ ctc_unpack_skb(struct channel *ch, struct sk_buff *pskb)
 			privptr->stats.rx_dropped++;
 			return;
 		}
-		memcpy(skb_put(skb, pskb->len), pskb->data, pskb->len);
+		skb_copy_from_linear_data(pskb, skb_put(skb, pskb->len),
+					  pskb->len);
 		skb_reset_mac_header(skb);
 		skb->dev = pskb->dev;
 		skb->protocol = pskb->protocol;
@@ -716,8 +717,9 @@ ch_action_txdone(fsm_instance * fi, int event, void *arg)
 		*((__u16 *) skb_put(ch->trans_skb, 2)) = ch->collect_len + 2;
 		i = 0;
 		while ((skb = skb_dequeue(&ch->collect_queue))) {
-			memcpy(skb_put(ch->trans_skb, skb->len), skb->data,
-			       skb->len);
+			skb_copy_from_linear_data(skb, skb_put(ch->trans_skb,
+							       skb->len),
+						  skb->len);
 			privptr->stats.tx_packets++;
 			privptr->stats.tx_bytes += skb->len - LL_HEADER_LENGTH;
 			atomic_dec(&skb->users);
@@ -2268,8 +2270,9 @@ transmit_skb(struct channel *ch, struct sk_buff *skb)
 			skb_reset_tail_pointer(ch->trans_skb);
 			ch->trans_skb->len = 0;
 			ch->ccw[1].count = skb->len;
-			memcpy(skb_put(ch->trans_skb, skb->len), skb->data,
-			       skb->len);
+			skb_copy_from_linear_data(skb, skb_put(ch->trans_skb,
+							       skb->len),
+						  skb->len);
 			atomic_dec(&skb->users);
 			dev_kfree_skb_irq(skb);
 			ccw_idx = 0;
diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c
index 1c23e187a3b..08a994fdd1a 100644
--- a/drivers/s390/net/lcs.c
+++ b/drivers/s390/net/lcs.c
@@ -1576,7 +1576,7 @@ __lcs_start_xmit(struct lcs_card *card, struct sk_buff *skb,
 	header->offset = card->tx_buffer->count;
 	header->type = card->lan_type;
 	header->slot = card->portno;
-	memcpy(header + 1, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, header + 1, skb->len);
 	spin_unlock(&card->lock);
 	card->stats.tx_bytes += skb->len;
 	card->stats.tx_packets++;
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index cd42bd54988..e10e85e85c8 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -645,7 +645,8 @@ static void netiucv_unpack_skb(struct iucv_connection *conn,
 			privptr->stats.rx_dropped++;
 			return;
 		}
-		memcpy(skb_put(skb, pskb->len), pskb->data, pskb->len);
+		skb_copy_from_linear_data(pskb, skb_put(skb, pskb->len),
+					  pskb->len);
 		skb_reset_mac_header(skb);
 		skb->dev = pskb->dev;
 		skb->protocol = pskb->protocol;
@@ -744,7 +745,9 @@ static void conn_action_txdone(fsm_instance *fi, int event, void *arg)
 		header.next = conn->tx_buff->len + skb->len + NETIUCV_HDRLEN;
 		memcpy(skb_put(conn->tx_buff, NETIUCV_HDRLEN), &header,
 		       NETIUCV_HDRLEN);
-		memcpy(skb_put(conn->tx_buff, skb->len), skb->data, skb->len);
+		skb_copy_from_linear_data(skb,
+					  skb_put(conn->tx_buff, skb->len),
+					  skb->len);
 		txbytes += skb->len;
 		txpackets++;
 		stat_maxcq++;
diff --git a/drivers/s390/net/qeth_eddp.c b/drivers/s390/net/qeth_eddp.c
index 5890bb5ad23..dd7034fbfff 100644
--- a/drivers/s390/net/qeth_eddp.c
+++ b/drivers/s390/net/qeth_eddp.c
@@ -267,7 +267,8 @@ qeth_eddp_copy_data_tcp(char *dst, struct qeth_eddp_data *eddp, int len,
 
 	QETH_DBF_TEXT(trace, 5, "eddpcdtc");
 	if (skb_shinfo(eddp->skb)->nr_frags == 0) {
-		memcpy(dst, eddp->skb->data + eddp->skb_offset, len);
+		skb_copy_from_linear_data_offset(eddp->skb, eddp->skb_offset,
+						 dst, len);
 		*hcsum = csum_partial(eddp->skb->data + eddp->skb_offset, len,
 				      *hcsum);
 		eddp->skb_offset += len;
diff --git a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c
index a076f735a7b..d287c575522 100644
--- a/drivers/usb/atm/usbatm.c
+++ b/drivers/usb/atm/usbatm.c
@@ -484,7 +484,7 @@ static unsigned int usbatm_write_cells(struct usbatm_data *instance,
 		ptr[4] = 0xec;
 		ptr += ATM_CELL_HEADER;
 
-		memcpy(ptr, skb->data, data_len);
+		skb_copy_from_linear_data(skb, ptr, data_len);
 		ptr += data_len;
 		__skb_pull(skb, data_len);
 
diff --git a/drivers/usb/net/catc.c b/drivers/usb/net/catc.c
index d82022dd7f2..ffec2e01b89 100644
--- a/drivers/usb/net/catc.c
+++ b/drivers/usb/net/catc.c
@@ -418,7 +418,7 @@ static int catc_hard_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 	catc->tx_ptr = (((catc->tx_ptr - 1) >> 6) + 1) << 6;
 	tx_buf = catc->tx_buf[catc->tx_idx] + catc->tx_ptr;
 	*((u16*)tx_buf) = (catc->is_f5u011) ? cpu_to_be16((u16)skb->len) : cpu_to_le16((u16)skb->len);
-	memcpy(tx_buf + 2, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, tx_buf + 2, skb->len);
 	catc->tx_ptr += skb->len + 2;
 
 	if (!test_and_set_bit(TX_RUNNING, &catc->flags))
diff --git a/drivers/usb/net/pegasus.c b/drivers/usb/net/pegasus.c
index 13f70e09ea4..1ad4ee54b18 100644
--- a/drivers/usb/net/pegasus.c
+++ b/drivers/usb/net/pegasus.c
@@ -889,7 +889,7 @@ static int pegasus_start_xmit(struct sk_buff *skb, struct net_device *net)
 	netif_stop_queue(net);
 
 	((__le16 *) pegasus->tx_buff)[0] = cpu_to_le16(l16);
-	memcpy(pegasus->tx_buff + 2, skb->data, skb->len);
+	skb_copy_from_linear_data(skb, pegasus->tx_buff + 2, skb->len);
 	usb_fill_bulk_urb(pegasus->tx_urb, pegasus->usb,
 			  usb_sndbulkpipe(pegasus->usb, 2),
 			  pegasus->tx_buff, count,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1c19b2d55c2..08c96bcbc59 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1507,6 +1507,20 @@ static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
 	return buffer;
 }
 
+static inline void skb_copy_from_linear_data(const struct sk_buff *skb,
+					     void *to,
+					     const unsigned int len)
+{
+	memcpy(to, skb->data, len);
+}
+
+static inline void skb_copy_from_linear_data_offset(const struct sk_buff *skb,
+						    const int offset, void *to,
+						    const unsigned int len)
+{
+	memcpy(to, skb->data + offset, len);
+}
+
 extern void skb_init(void);
 
 /**
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index e66953ce53e..92b517af726 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -150,7 +150,7 @@ void ax25_output(ax25_cb *ax25, int paclen, struct sk_buff *skb)
 				skb_reserve(skbn, frontlen + 2);
 				skb_set_network_header(skbn,
 						      skb_network_offset(skb));
-				memcpy(skb_put(skbn, len), skb->data, len);
+				skb_copy_from_linear_data(skb, skb_put(skbn, len), len);
 				p = skb_push(skbn, 2);
 
 				*p++ = AX25_P_SEGMENT;
@@ -164,7 +164,7 @@ void ax25_output(ax25_cb *ax25, int paclen, struct sk_buff *skb)
 				skb_reserve(skbn, frontlen + 1);
 				skb_set_network_header(skbn,
 						      skb_network_offset(skb));
-				memcpy(skb_put(skbn, len), skb->data, len);
+				skb_copy_from_linear_data(skb, skb_put(skbn, len), len);
 				p = skb_push(skbn, 1);
 				*p = AX25_P_TEXT;
 			}
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 97156c4abc8..ab2db55982c 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -382,7 +382,7 @@ static inline int bnep_rx_frame(struct bnep_session *s, struct sk_buff *skb)
 		break;
 	}
 
-	memcpy(__skb_put(nskb, skb->len), skb->data, skb->len);
+	skb_copy_from_linear_data(skb, __skb_put(nskb, skb->len), skb->len);
 	kfree_skb(skb);
 
 	s->stats.rx_packets++;
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 3933608a929..66bef1ccee2 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -124,7 +124,7 @@ static inline void cmtp_add_msgpart(struct cmtp_session *session, int id, const
 	}
 
 	if (skb && (skb->len > 0))
-		memcpy(skb_put(nskb, skb->len), skb->data, skb->len);
+		skb_copy_from_linear_data(skb, skb_put(nskb, skb->len), skb->len);
 
 	memcpy(skb_put(nskb, count), buf, count);
 
@@ -256,7 +256,7 @@ static void cmtp_process_transmit(struct cmtp_session *session)
 			hdr[2] = size >> 8;
 		}
 
-		memcpy(skb_put(nskb, size), skb->data, size);
+		skb_copy_from_linear_data(skb, skb_put(nskb, size), size);
 		skb_pull(skb, size);
 
 		if (skb->len > 0) {
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 162eab6a447..a5867879b61 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2107,7 +2107,8 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
 		if (!(conn->rx_skb = bt_skb_alloc(len, GFP_ATOMIC)))
 			goto drop;
 
-		memcpy(skb_put(conn->rx_skb, skb->len), skb->data, skb->len);
+		skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
+			      skb->len);
 		conn->rx_len = len - skb->len;
 	} else {
 		BT_DBG("Cont: frag len %d (expecting %d)", skb->len, conn->rx_len);
@@ -2128,7 +2129,8 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
 			goto drop;
 		}
 
-		memcpy(skb_put(conn->rx_skb, skb->len), skb->data, skb->len);
+		skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
+			      skb->len);
 		conn->rx_len -= skb->len;
 
 		if (!conn->rx_len) {
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 8cee7fdc16c..8b45224699f 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -129,7 +129,8 @@ static inline void nf_bridge_save_header(struct sk_buff *skb)
 	if (skb->protocol == htons(ETH_P_8021Q))
 		header_size += VLAN_HLEN;
 
-	memcpy(skb->nf_bridge->data, skb->data - header_size, header_size);
+	skb_copy_from_linear_data_offset(skb, -header_size,
+					 skb->nf_bridge->data, header_size);
 }
 
 /*
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f16c72204cf..17c6bb5927b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -576,7 +576,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
 	/* Set the tail pointer and length */
 	skb_put(n, skb_headlen(skb));
 	/* Copy the bytes */
-	memcpy(n->data, skb->data, n->len);
+	skb_copy_from_linear_data(skb, n->data, n->len);
 	n->csum	     = skb->csum;
 	n->ip_summed = skb->ip_summed;
 
@@ -1043,7 +1043,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
 	if ((copy = start - offset) > 0) {
 		if (copy > len)
 			copy = len;
-		memcpy(to, skb->data + offset, copy);
+		skb_copy_from_linear_data_offset(skb, offset, to, copy);
 		if ((len -= copy) == 0)
 			return 0;
 		offset += copy;
@@ -1362,7 +1362,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
 
 	BUG_ON(csstart > skb_headlen(skb));
 
-	memcpy(to, skb->data, csstart);
+	skb_copy_from_linear_data(skb, to, csstart);
 
 	csum = 0;
 	if (csstart != skb->len)
@@ -1536,8 +1536,8 @@ static inline void skb_split_inside_header(struct sk_buff *skb,
 {
 	int i;
 
-	memcpy(skb_put(skb1, pos - len), skb->data + len, pos - len);
-
+	skb_copy_from_linear_data_offset(skb, len, skb_put(skb1, pos - len),
+					 pos - len);
 	/* And move data appendix as is. */
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
 		skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
@@ -1927,8 +1927,8 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 		skb_set_network_header(nskb, skb->mac_len);
 		nskb->transport_header = (nskb->network_header +
 					  skb_network_header_len(skb));
-		memcpy(skb_put(nskb, doffset), skb->data, doffset);
-
+		skb_copy_from_linear_data(skb, skb_put(nskb, doffset),
+					  doffset);
 		if (!sg) {
 			nskb->csum = skb_copy_and_csum_bits(skb, offset,
 							    skb_put(nskb, len),
@@ -1941,7 +1941,8 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
 
 		nskb->ip_summed = CHECKSUM_PARTIAL;
 		nskb->csum = skb->csum;
-		memcpy(skb_put(nskb, hsize), skb->data + offset, hsize);
+		skb_copy_from_linear_data_offset(skb, offset,
+						 skb_put(nskb, hsize), hsize);
 
 		while (pos < offset + len) {
 			BUG_ON(i >= nfrags);
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index d9498a165ac..4074a6e5d0d 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -362,7 +362,8 @@ static void dn_nsp_conn_conf(struct sock *sk, struct sk_buff *skb)
 			u16 dlen = *skb->data;
 			if ((dlen <= 16) && (dlen <= skb->len)) {
 				scp->conndata_in.opt_optl = dn_htons(dlen);
-				memcpy(scp->conndata_in.opt_data, skb->data + 1, dlen);
+				skb_copy_from_linear_data_offset(skb, 1,
+					      scp->conndata_in.opt_data, dlen);
 			}
 		}
 		dn_nsp_send_link(sk, DN_NOCHANGE, 0);
@@ -406,7 +407,7 @@ static void dn_nsp_disc_init(struct sock *sk, struct sk_buff *skb)
 		u16 dlen = *skb->data;
 		if ((dlen <= 16) && (dlen <= skb->len)) {
 			scp->discdata_in.opt_optl = dn_htons(dlen);
-			memcpy(scp->discdata_in.opt_data, skb->data + 1, dlen);
+			skb_copy_from_linear_data_offset(skb, 1, scp->discdata_in.opt_data, dlen);
 		}
 	}
 
diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c
index ec6d8851a06..4eb35079e43 100644
--- a/net/ieee80211/ieee80211_crypt_wep.c
+++ b/net/ieee80211/ieee80211_crypt_wep.c
@@ -152,7 +152,7 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 		return -1;
 
 	/* Copy the IV into the first 3 bytes of the key */
-	memcpy(key, skb->data + hdr_len, 3);
+	skb_copy_from_linear_data_offset(skb, hdr_len, key, 3);
 
 	/* Copy rest of the WEP key (the secret part) */
 	memcpy(key + 3, wep->key, wep->key_len);
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index 59a765c49cf..94e2b8e2ab2 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -606,12 +606,12 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 		if (frag == 0) {
 			/* copy first fragment (including full headers) into
 			 * beginning of the fragment cache skb */
-			memcpy(skb_put(frag_skb, flen), skb->data, flen);
+			skb_copy_from_linear_data(skb, skb_put(frag_skb, flen), flen);
 		} else {
 			/* append frame payload to the end of the fragment
 			 * cache skb */
-			memcpy(skb_put(frag_skb, flen), skb->data + hdrlen,
-			       flen);
+			skb_copy_from_linear_data_offset(skb, hdrlen,
+				      skb_put(frag_skb, flen), flen);
 		}
 		dev_kfree_skb_any(skb);
 		skb = NULL;
diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c
index 62a8a2b7653..a4c3c51140a 100644
--- a/net/ieee80211/ieee80211_tx.c
+++ b/net/ieee80211/ieee80211_tx.c
@@ -309,8 +309,8 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	/* Save source and destination addresses */
-	memcpy(dest, skb->data, ETH_ALEN);
-	memcpy(src, skb->data + ETH_ALEN, ETH_ALEN);
+	skb_copy_from_linear_data(skb, dest, ETH_ALEN);
+	skb_copy_from_linear_data_offset(skb, ETH_ALEN, src, ETH_ALEN);
 
 	if (host_encrypt || host_build_iv)
 		fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA |
@@ -363,7 +363,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 		snapped = 1;
 		ieee80211_copy_snap(skb_put(skb_new, SNAP_SIZE + sizeof(u16)),
 				    ether_type);
-		memcpy(skb_put(skb_new, skb->len), skb->data, skb->len);
+		skb_copy_from_linear_data(skb, skb_put(skb_new, skb->len), skb->len);
 		res = crypt->ops->encrypt_msdu(skb_new, hdr_len, crypt->priv);
 		if (res < 0) {
 			IEEE80211_ERROR("msdu encryption failed\n");
@@ -492,7 +492,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 			bytes -= SNAP_SIZE + sizeof(u16);
 		}
 
-		memcpy(skb_put(skb_frag, bytes), skb->data, bytes);
+		skb_copy_from_linear_data(skb, skb_put(skb_frag, bytes), bytes);
 
 		/* Advance the SKB... */
 		skb_pull(skb, bytes);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 875da382d9b..34606eff8a0 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -596,7 +596,7 @@ slow_path:
 		 *	Copy the packet header into the new buffer.
 		 */
 
-		memcpy(skb_network_header(skb2), skb->data, hlen);
+		skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
 
 		/*
 		 *	Copy a block of the IP datagram.
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index be3f082a87e..4cfdad4e835 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -746,7 +746,7 @@ slow_path:
 		/*
 		 *	Copy the packet header into the new buffer.
 		 */
-		memcpy(skb_network_header(frag), skb->data, hlen);
+		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 
 		/*
 		 *	Build fragment header.
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index da3f2bc1b6f..b55bc8f989d 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -314,8 +314,8 @@ static inline void irttp_fragment_skb(struct tsap_cb *self,
 		skb_reserve(frag, self->max_header_size);
 
 		/* Copy data from the original skb into this fragment. */
-		memcpy(skb_put(frag, self->max_seg_size), skb->data,
-		       self->max_seg_size);
+		skb_copy_from_linear_data(skb, skb_put(frag, self->max_seg_size),
+			      self->max_seg_size);
 
 		/* Insert TTP header, with the more bit set */
 		frame = skb_push(frag, TTP_HEADER);
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 053fa26ff90..5dc7448925d 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1160,7 +1160,8 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 	if (sax != NULL) {
 		sax->sax25_family = AF_NETROM;
-		memcpy(sax->sax25_call.ax25_call, skb->data + 7, AX25_ADDR_LEN);
+		skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call,
+			      AX25_ADDR_LEN);
 	}
 
 	msg->msg_namelen = sizeof(*sax);
diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c
index 99fdab16ded..f324d5df418 100644
--- a/net/netrom/nr_loopback.c
+++ b/net/netrom/nr_loopback.c
@@ -34,7 +34,7 @@ int nr_loopback_queue(struct sk_buff *skb)
 	struct sk_buff *skbn;
 
 	if ((skbn = alloc_skb(skb->len, GFP_ATOMIC)) != NULL) {
-		memcpy(skb_put(skbn, skb->len), skb->data, skb->len);
+		skb_copy_from_linear_data(skb, skb_put(skbn, skb->len), skb->len);
 		skb_reset_transport_header(skbn);
 
 		skb_queue_tail(&loopback_queue, skbn);
diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c
index 0cbfb611465..7c467c95c7d 100644
--- a/net/netrom/nr_out.c
+++ b/net/netrom/nr_out.c
@@ -40,7 +40,7 @@ void nr_output(struct sock *sk, struct sk_buff *skb)
 
 	if (skb->len - NR_TRANSPORT_LEN > NR_MAX_PACKET_SIZE) {
 		/* Save a copy of the Transport Header */
-		memcpy(transport, skb->data, NR_TRANSPORT_LEN);
+		skb_copy_from_linear_data(skb, transport, NR_TRANSPORT_LEN);
 		skb_pull(skb, NR_TRANSPORT_LEN);
 
 		frontlen = skb_headroom(skb);
@@ -54,7 +54,7 @@ void nr_output(struct sock *sk, struct sk_buff *skb)
 			len = (NR_MAX_PACKET_SIZE > skb->len) ? skb->len : NR_MAX_PACKET_SIZE;
 
 			/* Copy the user data */
-			memcpy(skb_put(skbn, len), skb->data, len);
+			skb_copy_from_linear_data(skb, skb_put(skbn, len), len);
 			skb_pull(skb, len);
 
 			/* Duplicate the Transport Header */
diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c
index 07b694d1887..04e7d0d2fd8 100644
--- a/net/netrom/nr_subr.c
+++ b/net/netrom/nr_subr.c
@@ -226,13 +226,13 @@ void __nr_transmit_reply(struct sk_buff *skb, int mine, unsigned char cmdflags)
 
 	dptr = skb_put(skbn, NR_NETWORK_LEN + NR_TRANSPORT_LEN);
 
-	memcpy(dptr, skb->data + 7, AX25_ADDR_LEN);
+	skb_copy_from_linear_data_offset(skb, 7, dptr, AX25_ADDR_LEN);
 	dptr[6] &= ~AX25_CBIT;
 	dptr[6] &= ~AX25_EBIT;
 	dptr[6] |= AX25_SSSID_SPARE;
 	dptr += AX25_ADDR_LEN;
 
-	memcpy(dptr, skb->data + 0, AX25_ADDR_LEN);
+	skb_copy_from_linear_data(skb, dptr, AX25_ADDR_LEN);
 	dptr[6] &= ~AX25_CBIT;
 	dptr[6] |= AX25_EBIT;
 	dptr[6] |= AX25_SSSID_SPARE;
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 1511697b22b..f38c3b3471e 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1156,7 +1156,7 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
 		int lg;
 
 		/* Save a copy of the Header */
-		memcpy(header, skb->data, ROSE_MIN_LEN);
+		skb_copy_from_linear_data(skb, header, ROSE_MIN_LEN);
 		skb_pull(skb, ROSE_MIN_LEN);
 
 		frontlen = skb_headroom(skb);
@@ -1176,7 +1176,7 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
 			lg = (ROSE_PACLEN > skb->len) ? skb->len : ROSE_PACLEN;
 
 			/* Copy the user data */
-			memcpy(skb_put(skbn, lg), skb->data, lg);
+			skb_copy_from_linear_data(skb, skb_put(skbn, lg), lg);
 			skb_pull(skb, lg);
 
 			/* Duplicate the Header */
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index adcda8ebee9..0d6002fc77b 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -951,7 +951,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
 	 *	Incoming Call User Data.
 	 */
 	if (skb->len >= 0) {
-		memcpy(makex25->calluserdata.cuddata, skb->data, skb->len);
+		skb_copy_from_linear_data(skb, makex25->calluserdata.cuddata, skb->len);
 		makex25->calluserdata.cudlength = skb->len;
 	}
 
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index b2bbe552a89..ba13248aa1c 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -112,8 +112,9 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp
 			 *	Copy any Call User Data.
 			 */
 			if (skb->len >= 0) {
-				memcpy(x25->calluserdata.cuddata, skb->data,
-				       skb->len);
+				skb_copy_from_linear_data(skb,
+					      x25->calluserdata.cuddata,
+					      skb->len);
 				x25->calluserdata.cudlength = skb->len;
 			}
 			if (!sock_flag(sk, SOCK_DEAD))
diff --git a/net/x25/x25_out.c b/net/x25/x25_out.c
index 6f573785391..bb45e21ffce 100644
--- a/net/x25/x25_out.c
+++ b/net/x25/x25_out.c
@@ -61,7 +61,7 @@ int x25_output(struct sock *sk, struct sk_buff *skb)
 
 	if (skb->len - header_len > max_len) {
 		/* Save a copy of the Header */
-		memcpy(header, skb->data, header_len);
+		skb_copy_from_linear_data(skb, header, header_len);
 		skb_pull(skb, header_len);
 
 		frontlen = skb_headroom(skb);
@@ -84,7 +84,7 @@ int x25_output(struct sock *sk, struct sk_buff *skb)
 			len = max_len > skb->len ? skb->len : max_len;
 
 			/* Copy the user data */
-			memcpy(skb_put(skbn, len), skb->data, len);
+			skb_copy_from_linear_data(skb, skb_put(skbn, len), len);
 			skb_pull(skb, len);
 
 			/* Duplicate the Header */
-- 
cgit v1.2.3


From 4b19ca44cbafabfe0b7b98e2e24b21a96198f509 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Wed, 28 Mar 2007 14:18:52 -0700
Subject: [NET] fib_rules: delay route cache flush by ip_rt_min_delay

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/decnet/dn_rules.c | 2 +-
 net/ipv4/fib_rules.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 7f58b95b27d..17a1932216d 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -242,7 +242,7 @@ static u32 dn_fib_rule_default_pref(void)
 
 static void dn_fib_rule_flush_cache(void)
 {
-	dn_rt_cache_flush(0);
+	dn_rt_cache_flush(-1);
 }
 
 static struct fib_rules_ops dn_fib_rules_ops = {
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index fe29b98d6c8..33083ad52e9 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -300,7 +300,7 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
 
 static void fib4_rule_flush_cache(void)
 {
-	rt_cache_flush(0);
+	rt_cache_flush(-1);
 }
 
 static struct fib_rules_ops fib4_rules_ops = {
-- 
cgit v1.2.3


From f85958151900f9d30fa5ff941b0ce71eaa45a7de Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 28 Mar 2007 14:22:33 -0700
Subject: [NET]: random functions can use nsec resolution instead of usec

In order to get more randomness for secure_tcpv6_sequence_number(),
secure_tcp_sequence_number(), secure_dccp_sequence_number() functions,
we can use the high resolution time services, providing nanosec
resolution.

I've also done two kmalloc()/kzalloc() conversions.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/char/random.c | 36 +++++++++++++-----------------------
 1 file changed, 13 insertions(+), 23 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 03af50f900d..46c1b97748b 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -881,15 +881,15 @@ EXPORT_SYMBOL(get_random_bytes);
  */
 static void init_std_data(struct entropy_store *r)
 {
-	struct timeval tv;
+	ktime_t now;
 	unsigned long flags;
 
 	spin_lock_irqsave(&r->lock, flags);
 	r->entropy_count = 0;
 	spin_unlock_irqrestore(&r->lock, flags);
 
-	do_gettimeofday(&tv);
-	add_entropy_words(r, (__u32 *)&tv, sizeof(tv)/4);
+	now = ktime_get_real();
+	add_entropy_words(r, (__u32 *)&now, sizeof(now)/4);
 	add_entropy_words(r, (__u32 *)utsname(),
 			  sizeof(*(utsname()))/4);
 }
@@ -911,14 +911,12 @@ void rand_initialize_irq(int irq)
 		return;
 
 	/*
-	 * If kmalloc returns null, we just won't use that entropy
+	 * If kzalloc returns null, we just won't use that entropy
 	 * source.
 	 */
-	state = kmalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
-	if (state) {
-		memset(state, 0, sizeof(struct timer_rand_state));
+	state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
+	if (state)
 		irq_timer_state[irq] = state;
-	}
 }
 
 #ifdef CONFIG_BLOCK
@@ -927,14 +925,12 @@ void rand_initialize_disk(struct gendisk *disk)
 	struct timer_rand_state *state;
 
 	/*
-	 * If kmalloc returns null, we just won't use that entropy
+	 * If kzalloc returns null, we just won't use that entropy
 	 * source.
 	 */
-	state = kmalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
-	if (state) {
-		memset(state, 0, sizeof(struct timer_rand_state));
+	state = kzalloc(sizeof(struct timer_rand_state), GFP_KERNEL);
+	if (state)
 		disk->random = state;
-	}
 }
 #endif
 
@@ -1469,7 +1465,6 @@ late_initcall(seqgen_init);
 __u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr,
 				   __be16 sport, __be16 dport)
 {
-	struct timeval tv;
 	__u32 seq;
 	__u32 hash[12];
 	struct keydata *keyptr = get_keyptr();
@@ -1485,8 +1480,7 @@ __u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr,
 	seq = twothirdsMD4Transform((const __u32 *)daddr, hash) & HASH_MASK;
 	seq += keyptr->count;
 
-	do_gettimeofday(&tv);
-	seq += tv.tv_usec + tv.tv_sec * 1000000;
+	seq += ktime_get_real().tv64;
 
 	return seq;
 }
@@ -1521,7 +1515,6 @@ __u32 secure_ip_id(__be32 daddr)
 __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
 				 __be16 sport, __be16 dport)
 {
-	struct timeval tv;
 	__u32 seq;
 	__u32 hash[4];
 	struct keydata *keyptr = get_keyptr();
@@ -1543,12 +1536,11 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
 	 *	As close as possible to RFC 793, which
 	 *	suggests using a 250 kHz clock.
 	 *	Further reading shows this assumes 2 Mb/s networks.
-	 *	For 10 Mb/s Ethernet, a 1 MHz clock is appropriate.
+	 *	For 10 Gb/s Ethernet, a 1 GHz clock is appropriate.
 	 *	That's funny, Linux has one built in!  Use it!
 	 *	(Networks are faster now - should this be increased?)
 	 */
-	do_gettimeofday(&tv);
-	seq += tv.tv_usec + tv.tv_sec * 1000000;
+	seq += ktime_get_real().tv64;
 #if 0
 	printk("init_seq(%lx, %lx, %d, %d) = %d\n",
 	       saddr, daddr, sport, dport, seq);
@@ -1596,7 +1588,6 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16
 u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
 				__be16 sport, __be16 dport)
 {
-	struct timeval tv;
 	u64 seq;
 	__u32 hash[4];
 	struct keydata *keyptr = get_keyptr();
@@ -1609,8 +1600,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
 	seq = half_md4_transform(hash, keyptr->secret);
 	seq |= ((u64)keyptr->count) << (32 - HASH_BITS);
 
-	do_gettimeofday(&tv);
-	seq += tv.tv_usec + tv.tv_sec * 1000000;
+	seq += ktime_get_real().tv64;
 	seq &= (1ull << 48) - 1;
 #if 0
 	printk("dccp init_seq(%lx, %lx, %d, %d) = %d\n",
-- 
cgit v1.2.3


From c45d286e72dd72c0229dc9e2849743ba427fee84 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 28 Mar 2007 14:29:08 -0700
Subject: [NET]: Inline net_device_stats

Network drivers which keep stats allocate their own stats structure
then write a get_stats() function to return them.  It would be nice if
this were done by default.

1) Add a new "stats" field to "struct net_device".
2) Add a new feature field to say "this driver uses the internal one"
3) Have a default "get_stats" which returns NULL if that feature not set.
4) Change callers to check result of get_stats call for NULL, not if
   ->get_stats is set.

This should not break backwards compatibility with older drivers, yet
allow modern drivers to shed some boilerplate code.

Lightly tested: works for a modified lguest network driver.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/s390/appldata/appldata_net_sum.c |  4 ++--
 drivers/net/bonding/bond_main.c       |  5 ++---
 drivers/parisc/led.c                  |  4 ++--
 include/linux/netdevice.h             |  2 ++
 net/core/dev.c                        | 13 ++++++++++---
 5 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c
index f64b8c867ae..516b3ac9a9b 100644
--- a/arch/s390/appldata/appldata_net_sum.c
+++ b/arch/s390/appldata/appldata_net_sum.c
@@ -108,10 +108,10 @@ static void appldata_get_net_sum_data(void *data)
 	collisions = 0;
 	read_lock(&dev_base_lock);
 	for (dev = dev_base; dev != NULL; dev = dev->next) {
-		if (dev->get_stats == NULL) {
+		stats = dev->get_stats(dev);
+		if (stats == NULL) {
 			continue;
 		}
-		stats = dev->get_stats(dev);
 		rx_packets += stats->rx_packets;
 		tx_packets += stats->tx_packets;
 		rx_bytes   += stats->rx_bytes;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 76d3504505b..cea3783c92c 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3640,9 +3640,8 @@ static struct net_device_stats *bond_get_stats(struct net_device *bond_dev)
 	read_lock_bh(&bond->lock);
 
 	bond_for_each_slave(bond, slave, i) {
-		if (slave->dev->get_stats) {
-			sstats = slave->dev->get_stats(slave->dev);
-
+		sstats = slave->dev->get_stats(slave->dev);
+		if (sstats) {
 			stats->rx_packets += sstats->rx_packets;
 			stats->rx_bytes += sstats->rx_bytes;
 			stats->rx_errors += sstats->rx_errors;
diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c
index d190c05d87e..453e6829756 100644
--- a/drivers/parisc/led.c
+++ b/drivers/parisc/led.c
@@ -372,9 +372,9 @@ static __inline__ int led_get_net_activity(void)
 		continue;
 	    if (LOOPBACK(in_dev->ifa_list->ifa_local))
 		continue;
-	    if (!dev->get_stats) 
-		continue;
 	    stats = dev->get_stats(dev);
+	    if (!stats)
+		continue;
 	    rx_total += stats->rx_packets;
 	    tx_total += stats->tx_packets;
 	}
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1a528548cd1..71fc8ff4888 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -323,6 +323,7 @@ struct net_device
 #define NETIF_F_VLAN_CHALLENGED	1024	/* Device cannot handle VLAN packets */
 #define NETIF_F_GSO		2048	/* Enable software GSO. */
 #define NETIF_F_LLTX		4096	/* LockLess TX */
+#define NETIF_F_INTERNAL_STATS	8192	/* Use stats structure in net_device */
 
 	/* Segmentation offload features */
 #define NETIF_F_GSO_SHIFT	16
@@ -347,6 +348,7 @@ struct net_device
 
 
 	struct net_device_stats* (*get_stats)(struct net_device *dev);
+	struct net_device_stats	stats;
 
 	/* List of functions to handle Wireless Extensions (instead of ioctl).
 	 * See <net/iw_handler.h> for details. Jean II */
diff --git a/net/core/dev.c b/net/core/dev.c
index 86dc9f693f6..fec8cf27f75 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -817,7 +817,6 @@ static int default_rebuild_header(struct sk_buff *skb)
 	return 1;
 }
 
-
 /**
  *	dev_open	- prepare an interface for use.
  *	@dev:	device to open
@@ -2096,9 +2095,9 @@ void dev_seq_stop(struct seq_file *seq, void *v)
 
 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
 {
-	if (dev->get_stats) {
-		struct net_device_stats *stats = dev->get_stats(dev);
+	struct net_device_stats *stats = dev->get_stats(dev);
 
+	if (stats) {
 		seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
 				"%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
 			   dev->name, stats->rx_bytes, stats->rx_packets,
@@ -3282,6 +3281,13 @@ out:
 	mutex_unlock(&net_todo_run_mutex);
 }
 
+static struct net_device_stats *maybe_internal_stats(struct net_device *dev)
+{
+	if (dev->features & NETIF_F_INTERNAL_STATS)
+		return &dev->stats;
+	return NULL;
+}
+
 /**
  *	alloc_netdev - allocate network device
  *	@sizeof_priv:	size of private data to allocate space for
@@ -3317,6 +3323,7 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name,
 	if (sizeof_priv)
 		dev->priv = netdev_priv(dev);
 
+	dev->get_stats = maybe_internal_stats;
 	setup(dev);
 	strcpy(dev->name, name);
 	return dev;
-- 
cgit v1.2.3


From 3dbad80ac7632f243b824d469301abb97ec634a1 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 29 Mar 2007 19:16:03 -0700
Subject: [NET]: Fix warnings in 3c523.c and ni52.c

We have to put back the cast to "char *" because these
pointers are volatile.

Reported by Andrew Morton.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/3c523.c | 2 +-
 drivers/net/ni52.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/3c523.c b/drivers/net/3c523.c
index a384f7d478a..da1a22c1386 100644
--- a/drivers/net/3c523.c
+++ b/drivers/net/3c523.c
@@ -1145,7 +1145,7 @@ static int elmc_send_packet(struct sk_buff *skb, struct net_device *dev)
 
 	if (len != skb->len)
 		memset((char *) p->xmit_cbuffs[p->xmit_count], 0, ETH_ZLEN);
-	skb_copy_from_linear_data(skb, p->xmit_cbuffs[p->xmit_count], skb->len);
+	skb_copy_from_linear_data(skb, (char *) p->xmit_cbuffs[p->xmit_count], skb->len);
 
 #if (NUM_XMIT_BUFFS == 1)
 #ifdef NO_NOPCOMMANDS
diff --git a/drivers/net/ni52.c b/drivers/net/ni52.c
index 8646698c77d..8dbd6d1900b 100644
--- a/drivers/net/ni52.c
+++ b/drivers/net/ni52.c
@@ -1182,7 +1182,7 @@ static int ni52_send_packet(struct sk_buff *skb, struct net_device *dev)
 	else
 #endif
 	{
-		skb_copy_from_linear_data(skb, p->xmit_cbuffs[p->xmit_count], skb->len);
+		skb_copy_from_linear_data(skb, (char *) p->xmit_cbuffs[p->xmit_count], skb->len);
 		len = skb->len;
 		if (len < ETH_ZLEN) {
 			len = ETH_ZLEN;
-- 
cgit v1.2.3


From 27d7ff46a3498d3debc6ba68fb8014c702b81170 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Date: Sat, 31 Mar 2007 11:55:19 -0300
Subject: [SK_BUFF]: Introduce skb_copy_to_linear_data{_offset}

To clearly state the intent of copying to linear sk_buffs, _offset being a
overly long variant but interesting for the sake of saving some bytes.

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
---
 arch/ia64/hp/sim/simeth.c                 |  2 +-
 arch/ia64/sn/kernel/xpnet.c               |  2 +-
 drivers/isdn/hysdn/hycapi.c               |  3 +-
 drivers/net/8139too.c                     |  6 ++--
 drivers/net/appletalk/ltpc.c              |  2 +-
 drivers/net/atari_bionet.c                |  3 +-
 drivers/net/atari_pamsnet.c               |  3 +-
 drivers/net/chelsio/sge.c                 | 24 ++++++++++------
 drivers/net/cxgb3/sge.c                   |  6 ++--
 drivers/net/defxx.c                       |  4 ++-
 drivers/net/e100.c                        |  2 +-
 drivers/net/e1000/e1000_main.c            |  9 ++++--
 drivers/net/ehea/ehea_main.c              |  4 +--
 drivers/net/irda/ali-ircc.c               |  2 +-
 drivers/net/irda/au1k_ir.c                |  2 +-
 drivers/net/irda/donauboe.c               |  4 +--
 drivers/net/irda/mcs7780.c                |  4 +--
 drivers/net/irda/nsc-ircc.c               |  8 ++++--
 drivers/net/irda/pxaficp_ir.c             |  2 +-
 drivers/net/irda/stir4200.c               |  2 +-
 drivers/net/irda/via-ircc.c               |  6 ++--
 drivers/net/irda/w83977af_ir.c            |  8 ++++--
 drivers/net/ixgb/ixgb_main.c              |  9 ++++--
 drivers/net/loopback.c                    |  3 +-
 drivers/net/macb.c                        |  7 +++--
 drivers/net/myri10ge/myri10ge.c           |  2 +-
 drivers/net/sk98lin/skge.c                |  2 +-
 drivers/net/skfp/skfddi.c                 |  2 +-
 drivers/net/sun3lance.c                   |  2 +-
 drivers/net/tokenring/smctr.c             |  4 +--
 drivers/net/tokenring/tms380tr.c          |  3 +-
 drivers/net/wan/dscc4.c                   |  3 +-
 drivers/net/wan/pc300_drv.c               |  2 +-
 drivers/net/wan/pc300_tty.c               |  4 +--
 drivers/net/wan/z85230.c                  |  2 +-
 drivers/net/wireless/prism54/islpci_eth.c |  4 +--
 drivers/s390/net/qeth_main.c              |  9 +++---
 drivers/usb/atm/usbatm.c                  |  4 ++-
 drivers/usb/net/asix.c                    |  2 +-
 include/linux/skbuff.h                    | 15 ++++++++++
 net/atm/br2684.c                          |  2 +-
 net/atm/lec.c                             |  8 +++---
 net/atm/mpc.c                             | 11 +++++---
 net/bridge/br_netfilter.c                 |  3 +-
 net/core/netpoll.c                        |  2 +-
 net/core/skbuff.c                         |  2 +-
 net/ieee80211/ieee80211_rx.c              |  5 ++--
 net/ipv4/ipcomp.c                         |  2 +-
 net/ipv4/ipmr.c                           |  2 +-
 net/ipv4/ipvs/ip_vs_app.c                 |  2 +-
 net/ipv4/netfilter/ip_queue.c             |  2 +-
 net/ipv6/ipcomp6.c                        |  2 +-
 net/ipv6/netfilter/ip6_queue.c            |  2 +-
 net/irda/irttp.c                          |  2 +-
 net/irda/wrapper.c                        |  3 +-
 net/netfilter/nfnetlink_queue.c           |  2 +-
 net/netrom/nr_out.c                       |  4 +--
 net/rose/af_rose.c                        |  2 +-
 net/tipc/link.c                           | 46 ++++++++++++++++---------------
 net/tipc/msg.h                            |  7 +++--
 net/tipc/port.c                           |  8 +++---
 net/wanrouter/wanmain.c                   |  4 +--
 net/x25/x25_out.c                         |  2 +-
 63 files changed, 185 insertions(+), 127 deletions(-)

diff --git a/arch/ia64/hp/sim/simeth.c b/arch/ia64/hp/sim/simeth.c
index edef008c2b4..f26077a773d 100644
--- a/arch/ia64/hp/sim/simeth.c
+++ b/arch/ia64/hp/sim/simeth.c
@@ -473,7 +473,7 @@ simeth_rx(struct net_device *dev)
 		 * XXX Fix me
 		 * Should really do a csum+copy here
 		 */
-		memcpy(skb->data, frame, len);
+		skb_copy_to_linear_data(skb, frame, len);
 #endif
 		skb->protocol = eth_type_trans(skb, dev);
 
diff --git a/arch/ia64/sn/kernel/xpnet.c b/arch/ia64/sn/kernel/xpnet.c
index 9fc02654f0f..5419acb89a8 100644
--- a/arch/ia64/sn/kernel/xpnet.c
+++ b/arch/ia64/sn/kernel/xpnet.c
@@ -233,7 +233,7 @@ xpnet_receive(partid_t partid, int channel, struct xpnet_message *msg)
 			"%lu)\n", skb->data, &msg->data,
 			(size_t) msg->embedded_bytes);
 
-		memcpy(skb->data, &msg->data, (size_t) msg->embedded_bytes);
+		skb_copy_to_linear_data(skb, &msg->data, (size_t)msg->embedded_bytes);
 	} else {
 		dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t"
 			"bte_copy(0x%p, 0x%p, %hu)\n", (void *)msg->buf_pa,
diff --git a/drivers/isdn/hysdn/hycapi.c b/drivers/isdn/hysdn/hycapi.c
index 4433ce0fca5..f85450146bd 100644
--- a/drivers/isdn/hysdn/hycapi.c
+++ b/drivers/isdn/hysdn/hycapi.c
@@ -399,7 +399,8 @@ static u16 hycapi_send_message(struct capi_ctr *ctrl, struct sk_buff *skb)
 			if (_len > 22) {
 				_len2 = _len - 22;
 				skb_copy_from_linear_data(skb, msghead, 22);
-				memcpy(skb->data + _len2, msghead, 22);
+				skb_copy_to_linear_data_offset(skb, _len2,
+							       msghead, 22);
 				skb_pull(skb, _len2);
 				CAPIMSG_SETLEN(skb->data, 22);
 				retval = capilib_data_b3_req(&cinfo->ncci_head,
diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c
index 2101334a8ac..a844b1fe2dc 100644
--- a/drivers/net/8139too.c
+++ b/drivers/net/8139too.c
@@ -1904,10 +1904,10 @@ static __inline__ void wrap_copy(struct sk_buff *skb, const unsigned char *ring,
 	u32 left = RX_BUF_LEN - offset;
 
 	if (size > left) {
-		memcpy(skb->data, ring + offset, left);
-		memcpy(skb->data+left, ring, size - left);
+		skb_copy_to_linear_data(skb, ring + offset, left);
+		skb_copy_to_linear_data_offset(skb, left, ring, size - left);
 	} else
-		memcpy(skb->data, ring + offset, size);
+		skb_copy_to_linear_data(skb, ring + offset, size);
 }
 #endif
 
diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c
index 43c17c85c97..6a6cbd331a1 100644
--- a/drivers/net/appletalk/ltpc.c
+++ b/drivers/net/appletalk/ltpc.c
@@ -774,7 +774,7 @@ static int sendup_buffer (struct net_device *dev)
 	skb_pull(skb,3);
 
 	/* copy ddp(s,e)hdr + contents */
-	memcpy(skb->data,(void*)ltdmabuf,len);
+	skb_copy_to_linear_data(skb, ltdmabuf, len);
 
 	skb_reset_transport_header(skb);
 
diff --git a/drivers/net/atari_bionet.c b/drivers/net/atari_bionet.c
index 13dbed368d6..3d87bd2b419 100644
--- a/drivers/net/atari_bionet.c
+++ b/drivers/net/atari_bionet.c
@@ -550,7 +550,8 @@ bionet_poll_rx(struct net_device *dev) {
 
 			/* 'skb->data' points to the start of sk_buff data area.
 			 */
-			memcpy(skb->data, nic_packet->buffer, pkt_len);
+			skb_copy_to_linear_data(skb, nic_packet->buffer,
+						pkt_len);
 			skb->protocol = eth_type_trans( skb, dev );
 			netif_rx(skb);
 			dev->last_rx = jiffies;
diff --git a/drivers/net/atari_pamsnet.c b/drivers/net/atari_pamsnet.c
index 745101d7451..54714409a09 100644
--- a/drivers/net/atari_pamsnet.c
+++ b/drivers/net/atari_pamsnet.c
@@ -793,7 +793,8 @@ pamsnet_poll_rx(struct net_device *dev) {
 
 			/* 'skb->data' points to the start of sk_buff data area.
 			 */
-			memcpy(skb->data, nic_packet->buffer, pkt_len);
+			skb_copy_to_linear_data(skb, nic_packet->buffer,
+						pkt_len);
 			netif_rx(skb);
 			dev->last_rx = jiffies;
 			lp->stats.rx_packets++;
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index 1be1bbd1616..e4f874a70fe 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -2095,10 +2095,14 @@ static void espibug_workaround_t204(unsigned long data)
 					0x0, 0x7, 0x43, 0x0, 0x0, 0x0
 				};
 
-				memcpy(skb->data + sizeof(struct cpl_tx_pkt),
-					ch_mac_addr, ETH_ALEN);
-				memcpy(skb->data + skb->len - 10,
-					ch_mac_addr, ETH_ALEN);
+				skb_copy_to_linear_data_offset(skb,
+						    sizeof(struct cpl_tx_pkt),
+							       ch_mac_addr,
+							       ETH_ALEN);
+				skb_copy_to_linear_data_offset(skb,
+							       skb->len - 10,
+							       ch_mac_addr,
+							       ETH_ALEN);
 				skb->cb[0] = 0xff;
 			}
 
@@ -2125,10 +2129,14 @@ static void espibug_workaround(unsigned long data)
 	                if (!skb->cb[0]) {
 	                        u8 ch_mac_addr[ETH_ALEN] =
 	                            {0x0, 0x7, 0x43, 0x0, 0x0, 0x0};
-	                        memcpy(skb->data + sizeof(struct cpl_tx_pkt),
-	                               ch_mac_addr, ETH_ALEN);
-	                        memcpy(skb->data + skb->len - 10, ch_mac_addr,
-	                               ETH_ALEN);
+	                        skb_copy_to_linear_data_offset(skb,
+						     sizeof(struct cpl_tx_pkt),
+							       ch_mac_addr,
+							       ETH_ALEN);
+	                        skb_copy_to_linear_data_offset(skb,
+							       skb->len - 10,
+							       ch_mac_addr,
+							       ETH_ALEN);
 	                        skb->cb[0] = 0xff;
 	                }
 
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 166c959c94b..3666586a483 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -661,7 +661,7 @@ static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
 
 	if (skb) {
 		__skb_put(skb, IMMED_PKT_SIZE);
-		memcpy(skb->data, resp->imm_data, IMMED_PKT_SIZE);
+		skb_copy_to_linear_data(skb, resp->imm_data, IMMED_PKT_SIZE);
 	}
 	return skb;
 }
@@ -1722,11 +1722,11 @@ static void skb_data_init(struct sk_buff *skb, struct sge_fl_page *p,
 {
 	skb->len = len;
 	if (len <= SKB_DATA_SIZE) {
-		memcpy(skb->data, p->va, len);
+		skb_copy_to_linear_data(skb, p->va, len);
 		skb->tail += len;
 		put_page(p->frag.page);
 	} else {
-		memcpy(skb->data, p->va, SKB_DATA_SIZE);
+		skb_copy_to_linear_data(skb, p->va, SKB_DATA_SIZE);
 		skb_shinfo(skb)->frags[0].page = p->frag.page;
 		skb_shinfo(skb)->frags[0].page_offset =
 		    p->frag.page_offset + SKB_DATA_SIZE;
diff --git a/drivers/net/defxx.c b/drivers/net/defxx.c
index 8d29fae1c71..571d82f8008 100644
--- a/drivers/net/defxx.c
+++ b/drivers/net/defxx.c
@@ -3091,7 +3091,9 @@ static void dfx_rcv_queue_process(
 					{
 						/* Receive buffer allocated, pass receive packet up */
 
-						memcpy(skb->data, p_buff + RCV_BUFF_K_PADDING, pkt_len+3);
+						skb_copy_to_linear_data(skb,
+							       p_buff + RCV_BUFF_K_PADDING,
+							       pkt_len + 3);
 					}
 
 					skb_reserve(skb,3);		/* adjust data field so that it points to FC byte */
diff --git a/drivers/net/e100.c b/drivers/net/e100.c
index 0cefef5e3f0..4d0e0aea72b 100644
--- a/drivers/net/e100.c
+++ b/drivers/net/e100.c
@@ -1769,7 +1769,7 @@ static int e100_rx_alloc_skb(struct nic *nic, struct rx *rx)
 
 	/* Align, init, and map the RFD. */
 	skb_reserve(rx->skb, NET_IP_ALIGN);
-	memcpy(rx->skb->data, &nic->blank_rfd, sizeof(struct rfd));
+	skb_copy_to_linear_data(rx->skb, &nic->blank_rfd, sizeof(struct rfd));
 	rx->dma_addr = pci_map_single(nic->pdev, rx->skb->data,
 		RFD_BUF_LEN, PCI_DMA_BIDIRECTIONAL);
 
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index e7c93f44f81..610216ec491 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -4224,9 +4224,12 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter,
 			    netdev_alloc_skb(netdev, length + NET_IP_ALIGN);
 			if (new_skb) {
 				skb_reserve(new_skb, NET_IP_ALIGN);
-				memcpy(new_skb->data - NET_IP_ALIGN,
-				       skb->data - NET_IP_ALIGN,
-				       length + NET_IP_ALIGN);
+				skb_copy_to_linear_data_offset(new_skb,
+							       -NET_IP_ALIGN,
+							       (skb->data -
+							        NET_IP_ALIGN),
+							       (length +
+							        NET_IP_ALIGN));
 				/* save the skb in buffer_info as good */
 				buffer_info->skb = skb;
 				skb = new_skb;
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index 8b539207263..58364a0ff37 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -391,8 +391,8 @@ static int ehea_poll(struct net_device *dev, int *budget)
 					if (!skb)
 						break;
 				}
-				memcpy(skb->data, ((char*)cqe) + 64,
-				       cqe->num_bytes_transfered - 4);
+				skb_copy_to_linear_data(skb, ((char*)cqe) + 64,
+					       cqe->num_bytes_transfered - 4);
 				ehea_fill_skb(dev, skb, cqe);
 			} else if (rq == 2) {  /* RQ2 */
 				skb = get_skb_by_index(skb_arr_rq2,
diff --git a/drivers/net/irda/ali-ircc.c b/drivers/net/irda/ali-ircc.c
index fb2248a2551..f9c889c0dd0 100644
--- a/drivers/net/irda/ali-ircc.c
+++ b/drivers/net/irda/ali-ircc.c
@@ -1923,7 +1923,7 @@ static int  ali_ircc_dma_receive_complete(struct ali_ircc_cb *self)
 			
 			/* Copy frame without CRC, CRC is removed by hardware*/
 			skb_put(skb, len);
-			memcpy(skb->data, self->rx_buff.data, len);
+			skb_copy_to_linear_data(skb, self->rx_buff.data, len);
 
 			/* Move to next frame */
 			self->rx_buff.data += len;
diff --git a/drivers/net/irda/au1k_ir.c b/drivers/net/irda/au1k_ir.c
index cdd1f6c1e74..4dbdfaaf37b 100644
--- a/drivers/net/irda/au1k_ir.c
+++ b/drivers/net/irda/au1k_ir.c
@@ -604,7 +604,7 @@ static int au1k_irda_rx(struct net_device *dev)
 				skb_put(skb, count);
 			else
 				skb_put(skb, count-2);
-			memcpy(skb->data, (void *)pDB->vaddr, count-2);
+			skb_copy_to_linear_data(skb, pDB->vaddr, count - 2);
 			skb->dev = dev;
 			skb_reset_mac_header(skb);
 			skb->protocol = htons(ETH_P_IRDA);
diff --git a/drivers/net/irda/donauboe.c b/drivers/net/irda/donauboe.c
index 9987a0dc1ea..3ca47bf6dfe 100644
--- a/drivers/net/irda/donauboe.c
+++ b/drivers/net/irda/donauboe.c
@@ -1282,8 +1282,8 @@ dumpbufs(self->rx_bufs[self->rxs],len,'<');
                       skb_reserve (skb, 1);
 
                       skb_put (skb, len);
-                      memcpy (skb->data, self->rx_bufs[self->rxs], len);
-
+                      skb_copy_to_linear_data(skb, self->rx_bufs[self->rxs],
+					      len);
                       self->stats.rx_packets++;
                       skb->dev = self->netdev;
                       skb_reset_mac_header(skb);
diff --git a/drivers/net/irda/mcs7780.c b/drivers/net/irda/mcs7780.c
index 4b0037e498f..54d1d543c92 100644
--- a/drivers/net/irda/mcs7780.c
+++ b/drivers/net/irda/mcs7780.c
@@ -426,7 +426,7 @@ static void mcs_unwrap_mir(struct mcs_cb *mcs, __u8 *buf, int len)
 	}
 
 	skb_reserve(skb, 1);
-	memcpy(skb->data, buf, new_len);
+	skb_copy_to_linear_data(skb, buf, new_len);
 	skb_put(skb, new_len);
 	skb_reset_mac_header(skb);
 	skb->protocol = htons(ETH_P_IRDA);
@@ -479,7 +479,7 @@ static void mcs_unwrap_fir(struct mcs_cb *mcs, __u8 *buf, int len)
 	}
 
 	skb_reserve(skb, 1);
-	memcpy(skb->data, buf, new_len);
+	skb_copy_to_linear_data(skb, buf, new_len);
 	skb_put(skb, new_len);
 	skb_reset_mac_header(skb);
 	skb->protocol = htons(ETH_P_IRDA);
diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/net/irda/nsc-ircc.c
index 0ff99271413..d96c89751a7 100644
--- a/drivers/net/irda/nsc-ircc.c
+++ b/drivers/net/irda/nsc-ircc.c
@@ -1868,10 +1868,14 @@ static int nsc_ircc_dma_receive_complete(struct nsc_ircc_cb *self, int iobase)
 			/* Copy frame without CRC */
 			if (self->io.speed < 4000000) {
 				skb_put(skb, len-2);
-				memcpy(skb->data, self->rx_buff.data, len-2);
+				skb_copy_to_linear_data(skb,
+							self->rx_buff.data,
+							len - 2);
 			} else {
 				skb_put(skb, len-4);
-				memcpy(skb->data, self->rx_buff.data, len-4);
+				skb_copy_to_linear_data(skb,
+							self->rx_buff.data,
+							len - 4);
 			}
 
 			/* Move to next frame */
diff --git a/drivers/net/irda/pxaficp_ir.c b/drivers/net/irda/pxaficp_ir.c
index b3e1107420a..fb196fd9185 100644
--- a/drivers/net/irda/pxaficp_ir.c
+++ b/drivers/net/irda/pxaficp_ir.c
@@ -386,7 +386,7 @@ static void pxa_irda_fir_irq_eif(struct pxa_irda *si, struct net_device *dev, in
 
 		/* Align IP header to 20 bytes  */
 		skb_reserve(skb, 1);
-		memcpy(skb->data, si->dma_rx_buff, len);
+		skb_copy_to_linear_data(skb, si->dma_rx_buff, len);
 		skb_put(skb, len);
 
 		/* Feed it to IrLAP  */
diff --git a/drivers/net/irda/stir4200.c b/drivers/net/irda/stir4200.c
index aec86a21434..755aa444a4d 100644
--- a/drivers/net/irda/stir4200.c
+++ b/drivers/net/irda/stir4200.c
@@ -348,7 +348,7 @@ static void fir_eof(struct stir_cb *stir)
 		}
 		skb_reserve(nskb, 1);
 		skb = nskb;
-		memcpy(nskb->data, rx_buff->data, len);
+		skb_copy_to_linear_data(nskb, rx_buff->data, len);
 	} else {
 		nskb = dev_alloc_skb(rx_buff->truesize);
 		if (unlikely(!nskb)) {
diff --git a/drivers/net/irda/via-ircc.c b/drivers/net/irda/via-ircc.c
index 45bbd668615..ff5358574d0 100644
--- a/drivers/net/irda/via-ircc.c
+++ b/drivers/net/irda/via-ircc.c
@@ -1189,7 +1189,7 @@ F01_E */
 		skb_reserve(skb, 1);
 		skb_put(skb, len - 4);
 
-		memcpy(skb->data, self->rx_buff.data, len - 4);
+		skb_copy_to_linear_data(skb, self->rx_buff.data, len - 4);
 		IRDA_DEBUG(2, "%s(): len=%x.rx_buff=%p\n", __FUNCTION__,
 			   len - 4, self->rx_buff.data);
 
@@ -1234,7 +1234,7 @@ static int upload_rxdata(struct via_ircc_cb *self, int iobase)
 	}
 	skb_reserve(skb, 1);
 	skb_put(skb, len - 4 + 1);
-	memcpy(skb->data, self->rx_buff.data, len - 4 + 1);
+	skb_copy_to_linear_data(skb, self->rx_buff.data, len - 4 + 1);
 	st_fifo->tail++;
 	st_fifo->len++;
 	if (st_fifo->tail > MAX_RX_WINDOW)
@@ -1303,7 +1303,7 @@ static int RxTimerHandler(struct via_ircc_cb *self, int iobase)
 			}
 			skb_reserve(skb, 1);
 			skb_put(skb, len - 4);
-			memcpy(skb->data, self->rx_buff.data, len - 4);
+			skb_copy_to_linear_data(skb, self->rx_buff.data, len - 4);
 
 			IRDA_DEBUG(2, "%s(): len=%x.head=%x\n", __FUNCTION__,
 				   len - 4, st_fifo->head);
diff --git a/drivers/net/irda/w83977af_ir.c b/drivers/net/irda/w83977af_ir.c
index 0d4a68618fc..5182e800cc1 100644
--- a/drivers/net/irda/w83977af_ir.c
+++ b/drivers/net/irda/w83977af_ir.c
@@ -908,10 +908,14 @@ int w83977af_dma_receive_complete(struct w83977af_ir *self)
 			/* Copy frame without CRC */
 			if (self->io.speed < 4000000) {
 				skb_put(skb, len-2);
-				memcpy(skb->data, self->rx_buff.data, len-2);
+				skb_copy_to_linear_data(skb,
+							self->rx_buff.data,
+							len - 2);
 			} else {
 				skb_put(skb, len-4);
-				memcpy(skb->data, self->rx_buff.data, len-4);
+				skb_copy_to_linear_data(skb,
+							self->rx_buff.data,
+							len - 4);
 			}
 
 			/* Move to next frame */
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index e729ced52dc..dfde80e54ae 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -2017,9 +2017,12 @@ ixgb_clean_rx_irq(struct ixgb_adapter *adapter)
 			    netdev_alloc_skb(netdev, length + NET_IP_ALIGN);
 			if (new_skb) {
 				skb_reserve(new_skb, NET_IP_ALIGN);
-				memcpy(new_skb->data - NET_IP_ALIGN,
-				       skb->data - NET_IP_ALIGN,
-				       length + NET_IP_ALIGN);
+				skb_copy_to_linear_data_offset(new_skb,
+							       -NET_IP_ALIGN,
+							       (skb->data -
+							        NET_IP_ALIGN),
+							       (length +
+							        NET_IP_ALIGN));
 				/* save the skb in buffer_info as good */
 				buffer_info->skb = skb;
 				skb = new_skb;
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 20b5cb10136..6df673a058c 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -94,7 +94,8 @@ static void emulate_large_send_offload(struct sk_buff *skb)
 		skb_set_mac_header(nskb, -ETH_HLEN);
 		skb_reset_network_header(nskb);
 		iph = ip_hdr(nskb);
-		memcpy(nskb->data, skb_network_header(skb), doffset);
+		skb_copy_to_linear_data(nskb, skb_network_header(skb),
+					doffset);
 		if (skb_copy_bits(skb,
 				  doffset + offset,
 				  nskb->data + doffset,
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index 9e233f8216a..0e04f7ac3f2 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -367,9 +367,10 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
 			BUG_ON(frag != last_frag);
 			frag_len = len - offset;
 		}
-		memcpy(skb->data + offset,
-		       bp->rx_buffers + (RX_BUFFER_SIZE * frag),
-		       frag_len);
+		skb_copy_to_linear_data_offset(skb, offset,
+					       (bp->rx_buffers +
+					        (RX_BUFFER_SIZE * frag)),
+					       frag_len);
 		offset += RX_BUFFER_SIZE;
 		bp->rx_ring[frag].addr &= ~MACB_BIT(RX_USED);
 		wmb();
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index e4b69a0485b..16e3c4315e8 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -879,7 +879,7 @@ myri10ge_rx_skb_build(struct sk_buff *skb, u8 * va,
 	 * skb_pull() (for ether_pad and eth_type_trans()) requires
 	 * the beginning of the packet in skb_headlen(), move it
 	 * manually */
-	memcpy(skb->data, va, hlen);
+	skb_copy_to_linear_data(skb, va, hlen);
 	skb_shinfo(skb)->frags[0].page_offset += hlen;
 	skb_shinfo(skb)->frags[0].size -= hlen;
 	skb->data_len -= hlen;
diff --git a/drivers/net/sk98lin/skge.c b/drivers/net/sk98lin/skge.c
index b987a5c3f42..e0a93005e6d 100644
--- a/drivers/net/sk98lin/skge.c
+++ b/drivers/net/sk98lin/skge.c
@@ -2127,7 +2127,7 @@ rx_start:
 						    (dma_addr_t) PhysAddr,
 						    FrameLength,
 						    PCI_DMA_FROMDEVICE);
-			memcpy(pNewMsg->data, pMsg, FrameLength);
+			skb_copy_to_linear_data(pNewMsg, pMsg, FrameLength);
 
 			pci_dma_sync_single_for_device(pAC->PciDev,
 						       (dma_addr_t) PhysAddr,
diff --git a/drivers/net/skfp/skfddi.c b/drivers/net/skfp/skfddi.c
index 064e7c21c01..a7ef6c8b772 100644
--- a/drivers/net/skfp/skfddi.c
+++ b/drivers/net/skfp/skfddi.c
@@ -1937,7 +1937,7 @@ int mac_drv_rx_init(struct s_smc *smc, int len, int fc,
 	}
 	skb_reserve(skb, 3);
 	skb_put(skb, len);
-	memcpy(skb->data, look_ahead, len);
+	skb_copy_to_linear_data(skb, look_ahead, len);
 
 	// deliver frame to system
 	skb->protocol = fddi_type_trans(skb, smc->os.dev);
diff --git a/drivers/net/sun3lance.c b/drivers/net/sun3lance.c
index 327ed7962fb..791e081fdc1 100644
--- a/drivers/net/sun3lance.c
+++ b/drivers/net/sun3lance.c
@@ -853,7 +853,7 @@ static int lance_rx( struct net_device *dev )
 
 				skb_reserve( skb, 2 );	/* 16 byte align */
 				skb_put( skb, pkt_len );	/* Make room */
-//			        memcpy( skb->data, PKTBUF_ADDR(head), pkt_len );
+//			        skb_copy_to_linear_data(skb, PKTBUF_ADDR(head), pkt_len);
 				eth_copy_and_sum(skb,
 						 PKTBUF_ADDR(head),
 						 pkt_len, 0);
diff --git a/drivers/net/tokenring/smctr.c b/drivers/net/tokenring/smctr.c
index b0296d80e46..9bbea5c8acf 100644
--- a/drivers/net/tokenring/smctr.c
+++ b/drivers/net/tokenring/smctr.c
@@ -3889,7 +3889,7 @@ static int smctr_process_rx_packet(MAC_HEADER *rmf, __u16 size,
 
                 /* Slide data into a sleek skb. */
                 skb_put(skb, skb->len);
-                memcpy(skb->data, rmf, skb->len);
+                skb_copy_to_linear_data(skb, rmf, skb->len);
 
                 /* Update Counters */
                 tp->MacStat.rx_packets++;
@@ -4475,7 +4475,7 @@ static int smctr_rx_frame(struct net_device *dev)
 				if (skb) {
                                 	skb_put(skb, rx_size);
 
-                                	memcpy(skb->data, pbuff, rx_size);
+					skb_copy_to_linear_data(skb, pbuff, rx_size);
 
                                 	/* Update Counters */
                                 	tp->MacStat.rx_packets++;
diff --git a/drivers/net/tokenring/tms380tr.c b/drivers/net/tokenring/tms380tr.c
index e6f0817c350..12bd294045a 100644
--- a/drivers/net/tokenring/tms380tr.c
+++ b/drivers/net/tokenring/tms380tr.c
@@ -2178,7 +2178,8 @@ static void tms380tr_rcv_status_irq(struct net_device *dev)
 				|| rpl->SkbStat == SKB_DMA_DIRECT))
 			{
 				if(rpl->SkbStat == SKB_DATA_COPY)
-					memcpy(skb->data, ReceiveDataPtr, Length);
+					skb_copy_to_linear_data(skb, ReceiveDataPtr,
+						       Length);
 
 				/* Deliver frame to system */
 				rpl->Skb = NULL;
diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c
index 25021a7992a..dca02447145 100644
--- a/drivers/net/wan/dscc4.c
+++ b/drivers/net/wan/dscc4.c
@@ -1904,7 +1904,8 @@ static struct sk_buff *dscc4_init_dummy_skb(struct dscc4_dev_priv *dpriv)
 		struct TxFD *tx_fd = dpriv->tx_fd + last;
 
 		skb->len = DUMMY_SKB_SIZE;
-		memcpy(skb->data, version, strlen(version)%DUMMY_SKB_SIZE);
+		skb_copy_to_linear_data(skb, version,
+					strlen(version) % DUMMY_SKB_SIZE);
 		tx_fd->state = FrameEnd | TO_STATE_TX(DUMMY_SKB_SIZE);
 		tx_fd->data = pci_map_single(dpriv->pci_priv->pdev, skb->data,
 					     DUMMY_SKB_SIZE, PCI_DMA_TODEVICE);
diff --git a/drivers/net/wan/pc300_drv.c b/drivers/net/wan/pc300_drv.c
index 8ba75bb1732..999bf71937c 100644
--- a/drivers/net/wan/pc300_drv.c
+++ b/drivers/net/wan/pc300_drv.c
@@ -1759,7 +1759,7 @@ cpc_trace(struct net_device *dev, struct sk_buff *skb_main, char rx_tx)
 	skb->pkt_type = PACKET_HOST;
 	skb->len = 10 + skb_main->len;
 
-	memcpy(skb->data, dev->name, 5);
+	skb_copy_to_linear_data(skb, dev->name, 5);
 	skb->data[5] = '[';
 	skb->data[6] = rx_tx;
 	skb->data[7] = ']';
diff --git a/drivers/net/wan/pc300_tty.c b/drivers/net/wan/pc300_tty.c
index de02a07259c..07dbdfbfc15 100644
--- a/drivers/net/wan/pc300_tty.c
+++ b/drivers/net/wan/pc300_tty.c
@@ -1007,13 +1007,13 @@ static void cpc_tty_trace(pc300dev_t *dev, char* buf, int len, char rxtx)
 	skb->pkt_type = PACKET_HOST; 
 	skb->len = 10 + len; 
 
-	memcpy(skb->data,dev->dev->name,5);
+	skb_copy_to_linear_data(skb, dev->dev->name, 5);
 	skb->data[5] = '['; 
 	skb->data[6] = rxtx; 
 	skb->data[7] = ']'; 
 	skb->data[8] = ':'; 
 	skb->data[9] = ' '; 
-	memcpy(&skb->data[10], buf, len); 
+	skb_copy_to_linear_data_offset(skb, 10, buf, len);
 	netif_rx(skb); 
 } 	
 
diff --git a/drivers/net/wan/z85230.c b/drivers/net/wan/z85230.c
index 9432d2ce774..98ef400908b 100644
--- a/drivers/net/wan/z85230.c
+++ b/drivers/net/wan/z85230.c
@@ -1656,7 +1656,7 @@ static void z8530_rx_done(struct z8530_channel *c)
 		else
 		{
 			skb_put(skb, ct);
-			memcpy(skb->data, rxb, ct);
+			skb_copy_to_linear_data(skb, rxb, ct);
 			c->stats.rx_packets++;
 			c->stats.rx_bytes+=ct;
 		}
diff --git a/drivers/net/wireless/prism54/islpci_eth.c b/drivers/net/wireless/prism54/islpci_eth.c
index 7d8bff1dbc4..dd070cccf32 100644
--- a/drivers/net/wireless/prism54/islpci_eth.c
+++ b/drivers/net/wireless/prism54/islpci_eth.c
@@ -136,7 +136,7 @@ islpci_eth_transmit(struct sk_buff *skb, struct net_device *ndev)
 				printk("islpci_eth_transmit:wds_mac\n");
 #endif
 				memmove(skb->data + 6, src, skb->len);
-				memcpy(skb->data, wds_mac, 6);
+				skb_copy_to_linear_data(skb, wds_mac, 6);
 			} else {
 				memmove(skb->data, src, skb->len);
 			}
@@ -165,7 +165,7 @@ islpci_eth_transmit(struct sk_buff *skb, struct net_device *ndev)
 				skb_copy_from_linear_data(skb,
 							  newskb->data + 6,
 							  skb->len);
-				memcpy(newskb->data, wds_mac, 6);
+				skb_copy_to_linear_data(newskb, wds_mac, 6);
 #ifdef ISLPCI_ETH_DEBUG
 				printk("islpci_eth_transmit:wds_mac\n");
 #endif
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index df7f279ec40..ad7792dc1a0 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -2501,7 +2501,8 @@ qeth_process_inbound_buffer(struct qeth_card *card,
 			vlan_tag = qeth_rebuild_skb(card, skb, hdr);
 		else { /*in case of OSN*/
 			skb_push(skb, sizeof(struct qeth_hdr));
-			memcpy(skb->data, hdr, sizeof(struct qeth_hdr));
+			skb_copy_to_linear_data(skb, hdr,
+						sizeof(struct qeth_hdr));
 		}
 		/* is device UP ? */
 		if (!(card->dev->flags & IFF_UP)){
@@ -3870,9 +3871,9 @@ __qeth_prepare_skb(struct qeth_card *card, struct sk_buff *skb, int ipv)
 		 * memcpys instead of one memmove to save cycles.
 		 */
 		skb_push(skb, VLAN_HLEN);
-		memcpy(skb->data, skb->data + 4, 4);
-		memcpy(skb->data + 4, skb->data + 8, 4);
-		memcpy(skb->data + 8, skb->data + 12, 4);
+		skb_copy_to_linear_data(skb, skb->data + 4, 4);
+		skb_copy_to_linear_data_offset(skb, 4, skb->data + 8, 4);
+		skb_copy_to_linear_data_offset(skb, 8, skb->data + 12, 4);
 		tag = (u16 *)(skb->data + 12);
 		/*
 		 * first two bytes  = ETH_P_8021Q (0x8100)
diff --git a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c
index d287c575522..d3e2c5f90a2 100644
--- a/drivers/usb/atm/usbatm.c
+++ b/drivers/usb/atm/usbatm.c
@@ -396,7 +396,9 @@ static void usbatm_extract_one_cell(struct usbatm_data *instance, unsigned char
 			goto out;	/* atm_charge increments rx_drop */
 		}
 
-		memcpy(skb->data, skb_tail_pointer(sarb) - pdu_length, length);
+		skb_copy_to_linear_data(skb,
+					skb_tail_pointer(sarb) - pdu_length,
+					length);
 		__skb_put(skb, length);
 
 		vdbg("%s: sending skb 0x%p, skb->len %u, skb->truesize %u",
diff --git a/drivers/usb/net/asix.c b/drivers/usb/net/asix.c
index f56e2dab371..d5ef97bc4d0 100644
--- a/drivers/usb/net/asix.c
+++ b/drivers/usb/net/asix.c
@@ -352,7 +352,7 @@ static struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 	skb_push(skb, 4);
 	packet_len = (((skb->len - 4) ^ 0x0000ffff) << 16) + (skb->len - 4);
 	cpu_to_le32s(&packet_len);
-	memcpy(skb->data, &packet_len, sizeof(packet_len));
+	skb_copy_to_linear_data(skb, &packet_len, sizeof(packet_len));
 
 	if ((skb->len % 512) == 0) {
 		cpu_to_le32s(&padbytes);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 08c96bcbc59..92969f662ee 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1521,6 +1521,21 @@ static inline void skb_copy_from_linear_data_offset(const struct sk_buff *skb,
 	memcpy(to, skb->data + offset, len);
 }
 
+static inline void skb_copy_to_linear_data(struct sk_buff *skb,
+					   const void *from,
+					   const unsigned int len)
+{
+	memcpy(skb->data, from, len);
+}
+
+static inline void skb_copy_to_linear_data_offset(struct sk_buff *skb,
+						  const int offset,
+						  const void *from,
+						  const unsigned int len)
+{
+	memcpy(skb->data + offset, from, len);
+}
+
 extern void skb_init(void);
 
 /**
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index a1686dfcbb9..0e9f00c5c89 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -173,7 +173,7 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
 	}
 	skb_push(skb, minheadroom);
 	if (brvcc->encaps == e_llc)
-		memcpy(skb->data, llc_oui_pid_pad, 10);
+		skb_copy_to_linear_data(skb, llc_oui_pid_pad, 10);
 	else
 		memset(skb->data, 0, 2);
 #endif /* FASTER_VERSION */
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 4b3e72f31b3..6d63afa5764 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -576,8 +576,8 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
 					break;
 				}
 				skb2->len = sizeof(struct atmlec_msg);
-				memcpy(skb2->data, mesg,
-				       sizeof(struct atmlec_msg));
+				skb_copy_to_linear_data(skb2, mesg,
+							sizeof(*mesg));
 				atm_force_charge(priv->lecd, skb2->truesize);
 				sk = sk_atm(priv->lecd);
 				skb_queue_tail(&sk->sk_receive_queue, skb2);
@@ -1337,7 +1337,7 @@ static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force,
 		if (skb == NULL)
 			return -1;
 		skb->len = *sizeoftlvs;
-		memcpy(skb->data, *tlvs, *sizeoftlvs);
+		skb_copy_to_linear_data(skb, *tlvs, *sizeoftlvs);
 		retval = send_to_lecd(priv, l_arp_xmt, dst_mac, NULL, skb);
 	}
 	return retval;
@@ -1371,7 +1371,7 @@ static int lane2_associate_req(struct net_device *dev, u8 *lan_dst,
 	if (skb == NULL)
 		return 0;
 	skb->len = sizeoftlvs;
-	memcpy(skb->data, tlvs, sizeoftlvs);
+	skb_copy_to_linear_data(skb, tlvs, sizeoftlvs);
 	retval = send_to_lecd(priv, l_associate_req, NULL, NULL, skb);
 	if (retval != 0)
 		printk("lec.c: lane2_associate_req() failed\n");
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 4d2592c1409..813e08d6dc7 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -504,11 +504,13 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc)
 		tagged_llc_snap_hdr.tag = entry->ctrl_info.tag;
 		skb_pull(skb, ETH_HLEN);                       /* get rid of Eth header */
 		skb_push(skb, sizeof(tagged_llc_snap_hdr));    /* add LLC/SNAP header   */
-		memcpy(skb->data, &tagged_llc_snap_hdr, sizeof(tagged_llc_snap_hdr));
+		skb_copy_to_linear_data(skb, &tagged_llc_snap_hdr,
+					sizeof(tagged_llc_snap_hdr));
 	} else {
 		skb_pull(skb, ETH_HLEN);                        /* get rid of Eth header */
 		skb_push(skb, sizeof(struct llc_snap_hdr));     /* add LLC/SNAP header + tag  */
-		memcpy(skb->data, &llc_snap_mpoa_data, sizeof(struct llc_snap_hdr));
+		skb_copy_to_linear_data(skb, &llc_snap_mpoa_data,
+					sizeof(struct llc_snap_hdr));
 	}
 
 	atomic_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc);
@@ -711,7 +713,8 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
 		return;
 	}
 	skb_push(new_skb, eg->ctrl_info.DH_length);     /* add MAC header */
-	memcpy(new_skb->data, eg->ctrl_info.DLL_header, eg->ctrl_info.DH_length);
+	skb_copy_to_linear_data(new_skb, eg->ctrl_info.DLL_header,
+				eg->ctrl_info.DH_length);
 	new_skb->protocol = eth_type_trans(new_skb, dev);
 	skb_reset_network_header(new_skb);
 
@@ -936,7 +939,7 @@ int msg_to_mpoad(struct k_message *mesg, struct mpoa_client *mpc)
 	if (skb == NULL)
 		return -ENOMEM;
 	skb_put(skb, sizeof(struct k_message));
-	memcpy(skb->data, mesg, sizeof(struct k_message));
+	skb_copy_to_linear_data(skb, mesg, sizeof(*mesg));
 	atm_force_charge(mpc->mpoad_vcc, skb->truesize);
 
 	sk = sk_atm(mpc->mpoad_vcc);
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 8b45224699f..fd70d041e51 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -149,7 +149,8 @@ int nf_bridge_copy_header(struct sk_buff *skb)
 	if (err)
 		return err;
 
-	memcpy(skb->data - header_size, skb->nf_bridge->data, header_size);
+	skb_copy_to_linear_data_offset(skb, -header_size,
+				       skb->nf_bridge->data, header_size);
 
 	if (skb->protocol == htons(ETH_P_8021Q))
 		__skb_push(skb, VLAN_HLEN);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 57a82445c46..1fb30c3528b 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -293,7 +293,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 	if (!skb)
 		return;
 
-	memcpy(skb->data, msg, len);
+	skb_copy_to_linear_data(skb, msg, len);
 	skb->len += len;
 
 	skb_push(skb, sizeof(*udph));
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 17c6bb5927b..331d3efa82f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1129,7 +1129,7 @@ int skb_store_bits(const struct sk_buff *skb, int offset, void *from, int len)
 	if ((copy = start - offset) > 0) {
 		if (copy > len)
 			copy = len;
-		memcpy(skb->data + offset, from, copy);
+		skb_copy_to_linear_data_offset(skb, offset, from, copy);
 		if ((len -= copy) == 0)
 			return 0;
 		offset += copy;
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index 94e2b8e2ab2..6ae036b1920 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -759,8 +759,9 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 		    IEEE80211_FCTL_TODS) && skb->len >= ETH_HLEN + ETH_ALEN) {
 		/* Non-standard frame: get addr4 from its bogus location after
 		 * the payload */
-		memcpy(skb->data + ETH_ALEN,
-		       skb->data + skb->len - ETH_ALEN, ETH_ALEN);
+		skb_copy_to_linear_data_offset(skb, ETH_ALEN,
+					       skb->data + skb->len - ETH_ALEN,
+					       ETH_ALEN);
 		skb_trim(skb, skb->len - ETH_ALEN);
 	}
 #endif
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index ba348b1e5f8..ab86137c71d 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -66,7 +66,7 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
 
 	skb->truesize += dlen - plen;
 	__skb_put(skb, dlen - plen);
-	memcpy(skb->data, scratch, dlen);
+	skb_copy_to_linear_data(skb, scratch, dlen);
 out:
 	put_cpu();
 	return err;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 48027df5a90..0ebae413ae8 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -584,7 +584,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 
 	skb->network_header = skb->tail;
 	skb_put(skb, ihl);
-	memcpy(skb->data,pkt->data,ihl);
+	skb_copy_to_linear_data(skb, pkt->data, ihl);
 	ip_hdr(skb)->protocol = 0;			/* Flag to the kernel this is a route add */
 	msg = (struct igmpmsg *)skb_network_header(skb);
 	msg->im_vif = vifi;
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index c8a822c0aa7..15ad5dd2d98 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -602,7 +602,7 @@ int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
 		skb_put(skb, diff);
 		memmove(skb->data + o_offset + n_len,
 			skb->data + o_offset + o_len, o_left);
-		memcpy(skb->data + o_offset, n_buf, n_len);
+		skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len);
 	}
 
 	/* must update the iph total length here */
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index cd8fec05f9b..0d72693869e 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -368,7 +368,7 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
 	}
 	if (!skb_make_writable(&e->skb, v->data_len))
 		return -ENOMEM;
-	memcpy(e->skb->data, v->payload, v->data_len);
+	skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
 	e->skb->ip_summed = CHECKSUM_NONE;
 
 	return 0;
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 7691a1b5caa..1ee50b5782e 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -111,7 +111,7 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	skb->truesize += dlen - plen;
 	__skb_put(skb, dlen - plen);
-	memcpy(skb->data, scratch, dlen);
+	skb_copy_to_linear_data(skb, scratch, dlen);
 	err = ipch->nexthdr;
 
 out_put_cpu:
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 2f1ae422d87..bfae9fdc466 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -366,7 +366,7 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
 	}
 	if (!skb_make_writable(&e->skb, v->data_len))
 		return -ENOMEM;
-	memcpy(e->skb->data, v->payload, v->data_len);
+	skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
 	e->skb->ip_summed = CHECKSUM_NONE;
 
 	return 0;
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index b55bc8f989d..3279897a01b 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -256,7 +256,7 @@ static struct sk_buff *irttp_reassemble_skb(struct tsap_cb *self)
 	 *  Copy all fragments to a new buffer
 	 */
 	while ((frag = skb_dequeue(&self->rx_fragments)) != NULL) {
-		memcpy(skb->data+n, frag->data, frag->len);
+		skb_copy_to_linear_data_offset(skb, n, frag->data, frag->len);
 		n += frag->len;
 
 		dev_kfree_skb(frag);
diff --git a/net/irda/wrapper.c b/net/irda/wrapper.c
index 2acc66dfb55..a7a7f191f1a 100644
--- a/net/irda/wrapper.c
+++ b/net/irda/wrapper.c
@@ -239,7 +239,8 @@ async_bump(struct net_device *dev,
 
 	if(docopy) {
 		/* Copy data without CRC (lenght already checked) */
-		memcpy(newskb->data, rx_buff->data, rx_buff->len - 2);
+		skb_copy_to_linear_data(newskb, rx_buff->data,
+					rx_buff->len - 2);
 		/* Deliver this skb */
 		dataskb = newskb;
 	} else {
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 9aefb1c9bfa..7a97bec6772 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -648,7 +648,7 @@ nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e)
 	}
 	if (!skb_make_writable(&e->skb, data_len))
 		return -ENOMEM;
-	memcpy(e->skb->data, data, data_len);
+	skb_copy_to_linear_data(e->skb, data, data_len);
 	e->skb->ip_summed = CHECKSUM_NONE;
 	return 0;
 }
diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c
index 7c467c95c7d..e3e6c44e189 100644
--- a/net/netrom/nr_out.c
+++ b/net/netrom/nr_out.c
@@ -59,8 +59,8 @@ void nr_output(struct sock *sk, struct sk_buff *skb)
 
 			/* Duplicate the Transport Header */
 			skb_push(skbn, NR_TRANSPORT_LEN);
-			memcpy(skbn->data, transport, NR_TRANSPORT_LEN);
-
+			skb_copy_to_linear_data(skbn, transport,
+						NR_TRANSPORT_LEN);
 			if (skb->len > 0)
 				skbn->data[4] |= NR_MORE_FLAG;
 
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index f38c3b3471e..806bf6f5dc6 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1181,7 +1181,7 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 			/* Duplicate the Header */
 			skb_push(skbn, ROSE_MIN_LEN);
-			memcpy(skbn->data, header, ROSE_MIN_LEN);
+			skb_copy_to_linear_data(skbn, header, ROSE_MIN_LEN);
 
 			if (skb->len > 0)
 				skbn->data[2] |= M_BIT;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 71c2f2fd405..f3f99c8ea08 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1001,7 +1001,7 @@ static int link_bundle_buf(struct link *l_ptr,
 		return 0;
 
 	skb_put(bundler, pad + size);
-	memcpy(bundler->data + to_pos, buf->data, size);
+	skb_copy_to_linear_data_offset(bundler, to_pos, buf->data, size);
 	msg_set_size(bundler_msg, to_pos + size);
 	msg_set_msgcnt(bundler_msg, msg_msgcnt(bundler_msg) + 1);
 	dbg("Packed msg # %u(%u octets) into pos %u in buf(#%u)\n",
@@ -1109,8 +1109,8 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
 			if (bundler) {
 				msg_init(&bundler_hdr, MSG_BUNDLER, OPEN_MSG,
 					 TIPC_OK, INT_H_SIZE, l_ptr->addr);
-				memcpy(bundler->data, (unchar *)&bundler_hdr,
-				       INT_H_SIZE);
+				skb_copy_to_linear_data(bundler, &bundler_hdr,
+							INT_H_SIZE);
 				skb_trim(bundler, INT_H_SIZE);
 				link_bundle_buf(l_ptr, bundler, buf);
 				buf = bundler;
@@ -1383,9 +1383,9 @@ again:
 	if (!buf)
 		return -ENOMEM;
 	buf->next = NULL;
-	memcpy(buf->data, (unchar *)&fragm_hdr, INT_H_SIZE);
+	skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE);
 	hsz = msg_hdr_sz(hdr);
-	memcpy(buf->data + INT_H_SIZE, (unchar *)hdr, hsz);
+	skb_copy_to_linear_data_offset(buf, INT_H_SIZE, hdr, hsz);
 	msg_dbg(buf_msg(buf), ">BUILD>");
 
 	/* Chop up message: */
@@ -1416,8 +1416,8 @@ error:
 				return -EFAULT;
 			}
 		} else
-			memcpy(buf->data + fragm_crs, sect_crs, sz);
-
+			skb_copy_to_linear_data_offset(buf, fragm_crs,
+						       sect_crs, sz);
 		sect_crs += sz;
 		sect_rest -= sz;
 		fragm_crs += sz;
@@ -1442,7 +1442,7 @@ error:
 
 			buf->next = NULL;
 			prev->next = buf;
-			memcpy(buf->data, (unchar *)&fragm_hdr, INT_H_SIZE);
+			skb_copy_to_linear_data(buf, &fragm_hdr, INT_H_SIZE);
 			fragm_crs = INT_H_SIZE;
 			fragm_rest = fragm_sz;
 			msg_dbg(buf_msg(buf),"  >BUILD>");
@@ -2130,7 +2130,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
 		buf = l_ptr->proto_msg_queue;
 		if (!buf)
 			return;
-		memcpy(buf->data, (unchar *)msg, sizeof(l_ptr->proto_msg));
+		skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg));
 		return;
 	}
 	msg_set_timestamp(msg, jiffies_to_msecs(jiffies));
@@ -2143,7 +2143,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
 	if (!buf)
 		return;
 
-	memcpy(buf->data, (unchar *)msg, sizeof(l_ptr->proto_msg));
+	skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg));
 	msg_set_size(buf_msg(buf), msg_size);
 
 	if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
@@ -2319,8 +2319,8 @@ void tipc_link_tunnel(struct link *l_ptr,
 		     "unable to send tunnel msg\n");
 		return;
 	}
-	memcpy(buf->data, (unchar *)tunnel_hdr, INT_H_SIZE);
-	memcpy(buf->data + INT_H_SIZE, (unchar *)msg, length);
+	skb_copy_to_linear_data(buf, tunnel_hdr, INT_H_SIZE);
+	skb_copy_to_linear_data_offset(buf, INT_H_SIZE, msg, length);
 	dbg("%c->%c:", l_ptr->b_ptr->net_plane, tunnel->b_ptr->net_plane);
 	msg_dbg(buf_msg(buf), ">SEND>");
 	tipc_link_send_buf(tunnel, buf);
@@ -2361,7 +2361,7 @@ void tipc_link_changeover(struct link *l_ptr)
 
 		buf = buf_acquire(INT_H_SIZE);
 		if (buf) {
-			memcpy(buf->data, (unchar *)&tunnel_hdr, INT_H_SIZE);
+			skb_copy_to_linear_data(buf, &tunnel_hdr, INT_H_SIZE);
 			msg_set_size(&tunnel_hdr, INT_H_SIZE);
 			dbg("%c->%c:", l_ptr->b_ptr->net_plane,
 			    tunnel->b_ptr->net_plane);
@@ -2426,8 +2426,9 @@ void tipc_link_send_duplicate(struct link *l_ptr, struct link *tunnel)
 			     "unable to send duplicate msg\n");
 			return;
 		}
-		memcpy(outbuf->data, (unchar *)&tunnel_hdr, INT_H_SIZE);
-		memcpy(outbuf->data + INT_H_SIZE, iter->data, length);
+		skb_copy_to_linear_data(outbuf, &tunnel_hdr, INT_H_SIZE);
+		skb_copy_to_linear_data_offset(outbuf, INT_H_SIZE, iter->data,
+					       length);
 		dbg("%c->%c:", l_ptr->b_ptr->net_plane,
 		    tunnel->b_ptr->net_plane);
 		msg_dbg(buf_msg(outbuf), ">SEND>");
@@ -2457,7 +2458,7 @@ static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos)
 
 	eb = buf_acquire(size);
 	if (eb)
-		memcpy(eb->data, (unchar *)msg, size);
+		skb_copy_to_linear_data(eb, msg, size);
 	return eb;
 }
 
@@ -2631,9 +2632,9 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
 			goto exit;
 		}
 		msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE);
-		memcpy(fragm->data, (unchar *)&fragm_hdr, INT_H_SIZE);
-		memcpy(fragm->data + INT_H_SIZE, crs, fragm_sz);
-
+		skb_copy_to_linear_data(fragm, &fragm_hdr, INT_H_SIZE);
+		skb_copy_to_linear_data_offset(fragm, INT_H_SIZE, crs,
+					       fragm_sz);
 		/*  Send queued messages first, if any: */
 
 		l_ptr->stats.sent_fragments++;
@@ -2733,8 +2734,8 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
 		if (pbuf != NULL) {
 			pbuf->next = *pending;
 			*pending = pbuf;
-			memcpy(pbuf->data, (unchar *)imsg, msg_data_sz(fragm));
-
+			skb_copy_to_linear_data(pbuf, imsg,
+						msg_data_sz(fragm));
 			/*  Prepare buffer for subsequent fragments. */
 
 			set_long_msg_seqno(pbuf, long_msg_seq_no);
@@ -2750,7 +2751,8 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
 		u32 fsz = get_fragm_size(pbuf);
 		u32 crs = ((msg_fragm_no(fragm) - 1) * fsz);
 		u32 exp_frags = get_expected_frags(pbuf) - 1;
-		memcpy(pbuf->data + crs, msg_data(fragm), dsz);
+		skb_copy_to_linear_data_offset(pbuf, crs,
+					       msg_data(fragm), dsz);
 		buf_discard(fbuf);
 
 		/* Is message complete? */
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 62d54906360..aec7ce7af87 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -786,15 +786,16 @@ static inline int msg_build(struct tipc_msg *hdr,
 	*buf = buf_acquire(sz);
 	if (!(*buf))
 		return -ENOMEM;
-	memcpy((*buf)->data, (unchar *)hdr, hsz);
+	skb_copy_to_linear_data(*buf, hdr, hsz);
 	for (res = 1, cnt = 0; res && (cnt < num_sect); cnt++) {
 		if (likely(usrmem))
 			res = !copy_from_user((*buf)->data + pos,
 					      msg_sect[cnt].iov_base,
 					      msg_sect[cnt].iov_len);
 		else
-			memcpy((*buf)->data + pos, msg_sect[cnt].iov_base,
-			       msg_sect[cnt].iov_len);
+			skb_copy_to_linear_data_offset(*buf, pos,
+						       msg_sect[cnt].iov_base,
+						       msg_sect[cnt].iov_len);
 		pos += msg_sect[cnt].iov_len;
 	}
 	if (likely(res))
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 5f8217d4b45..bcd5da00737 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -464,7 +464,7 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err)
 	msg_set_size(rmsg, data_sz + hdr_sz);
 	msg_set_nametype(rmsg, msg_nametype(msg));
 	msg_set_nameinst(rmsg, msg_nameinst(msg));
-	memcpy(rbuf->data + hdr_sz, msg_data(msg), data_sz);
+	skb_copy_to_linear_data_offset(rbuf, hdr_sz, msg_data(msg), data_sz);
 
 	/* send self-abort message when rejecting on a connected port */
 	if (msg_connected(msg)) {
@@ -1419,7 +1419,7 @@ int tipc_send_buf(u32 ref, struct sk_buff *buf, unsigned int dsz)
 		return -ENOMEM;
 
 	skb_push(buf, hsz);
-	memcpy(buf->data, (unchar *)msg, hsz);
+	skb_copy_to_linear_data(buf, msg, hsz);
 	destnode = msg_destnode(msg);
 	p_ptr->publ.congested = 1;
 	if (!tipc_port_congested(p_ptr)) {
@@ -1555,7 +1555,7 @@ int tipc_forward_buf2name(u32 ref,
 	if (skb_cow(buf, LONG_H_SIZE))
 		return -ENOMEM;
 	skb_push(buf, LONG_H_SIZE);
-	memcpy(buf->data, (unchar *)msg, LONG_H_SIZE);
+	skb_copy_to_linear_data(buf, msg, LONG_H_SIZE);
 	msg_dbg(buf_msg(buf),"PREP:");
 	if (likely(destport || destnode)) {
 		p_ptr->sent++;
@@ -1679,7 +1679,7 @@ int tipc_forward_buf2port(u32 ref,
 		return -ENOMEM;
 
 	skb_push(buf, DIR_MSG_H_SIZE);
-	memcpy(buf->data, (unchar *)msg, DIR_MSG_H_SIZE);
+	skb_copy_to_linear_data(buf, msg, DIR_MSG_H_SIZE);
 	msg_dbg(msg, "buf2port: ");
 	p_ptr->sent++;
 	if (dest->node == tipc_own_addr)
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index c49e223084f..7a19e0ede28 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -277,8 +277,8 @@ int wanrouter_encapsulate(struct sk_buff *skb, struct net_device *dev,
 		skb_push(skb, 7);
 		skb->data[0] = 0;
 		skb->data[1] = NLPID_SNAP;
-		memcpy(&skb->data[2], wanrouter_oui_ether,
-		       sizeof(wanrouter_oui_ether));
+		skb_copy_to_linear_data_offset(skb, 2, wanrouter_oui_ether,
+					       sizeof(wanrouter_oui_ether));
 		*((unsigned short*)&skb->data[5]) = htons(type);
 		break;
 
diff --git a/net/x25/x25_out.c b/net/x25/x25_out.c
index bb45e21ffce..2b96b52114d 100644
--- a/net/x25/x25_out.c
+++ b/net/x25/x25_out.c
@@ -89,7 +89,7 @@ int x25_output(struct sock *sk, struct sk_buff *skb)
 
 			/* Duplicate the Header */
 			skb_push(skbn, header_len);
-			memcpy(skbn->data, header, header_len);
+			skb_copy_to_linear_data(skbn, header, header_len);
 
 			if (skb->len > 0) {
 				if (x25->neighbour->extended)
-- 
cgit v1.2.3


From 1a4e2d093fd5f3eaf8cffc04a1b803f8b0ddef6d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Date: Sat, 31 Mar 2007 11:55:45 -0300
Subject: [SK_BUFF]: Some more conversions to skb_copy_from_linear_data

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
---
 net/ax25/ax25_in.c              | 4 +++-
 net/decnet/netfilter/dn_rtmsg.c | 2 +-
 net/ipv4/tcp_output.c           | 4 +++-
 net/netrom/nr_in.c              | 4 +++-
 net/x25/x25_in.c                | 7 +++++--
 5 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c
index 31c59387a6f..0ddaff0df21 100644
--- a/net/ax25/ax25_in.c
+++ b/net/ax25/ax25_in.c
@@ -66,7 +66,9 @@ static int ax25_rx_fragment(ax25_cb *ax25, struct sk_buff *skb)
 
 					/* Copy data from the fragments */
 					while ((skbo = skb_dequeue(&ax25->frag_queue)) != NULL) {
-						memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len);
+						skb_copy_from_linear_data(skbo,
+							  skb_put(skbn, skbo->len),
+									  skbo->len);
 						kfree_skb(skbo);
 					}
 
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 9e8256a2361..2ee47bab693 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -48,7 +48,7 @@ static struct sk_buff *dnrmg_build_message(struct sk_buff *rt_skb, int *errp)
 	rtm = (struct nf_dn_rtmsg *)NLMSG_DATA(nlh);
 	rtm->nfdn_ifindex = rt_skb->dev->ifindex;
 	ptr = NFDN_RTMSG(rtm);
-	memcpy(ptr, rt_skb->data, rt_skb->len);
+	skb_copy_from_linear_data(rt_skb, ptr, rt_skb->len);
 	nlh->nlmsg_len = skb->tail - old_tail;
 	return skb;
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c22cdcd8432..94d9f0c6368 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1645,7 +1645,9 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
 		/* Ok.	We will be able to collapse the packet. */
 		tcp_unlink_write_queue(next_skb, sk);
 
-		memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
+		skb_copy_from_linear_data(next_skb,
+					  skb_put(skb, next_skb_size),
+					  next_skb_size);
 
 		if (next_skb->ip_summed == CHECKSUM_PARTIAL)
 			skb->ip_summed = CHECKSUM_PARTIAL;
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
index e6dc749e14b..68176483617 100644
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c
@@ -54,7 +54,9 @@ static int nr_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more)
 		skb_reset_transport_header(skbn);
 
 		while ((skbo = skb_dequeue(&nr->frag_queue)) != NULL) {
-			memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len);
+			skb_copy_from_linear_data(skbo,
+						  skb_put(skbn, skbo->len),
+						  skbo->len);
 			kfree_skb(skbo);
 		}
 
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index ba13248aa1c..1c88762c279 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -56,14 +56,17 @@ static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more)
 		skb_reset_transport_header(skbn);
 
 		skbo = skb_dequeue(&x25->fragment_queue);
-		memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len);
+		skb_copy_from_linear_data(skbo, skb_put(skbn, skbo->len),
+					  skbo->len);
 		kfree_skb(skbo);
 
 		while ((skbo =
 			skb_dequeue(&x25->fragment_queue)) != NULL) {
 			skb_pull(skbo, (x25->neighbour->extended) ?
 					X25_EXT_MIN_LEN : X25_STD_MIN_LEN);
-			memcpy(skb_put(skbn, skbo->len), skbo->data, skbo->len);
+			skb_copy_from_linear_data(skbo,
+						  skb_put(skbn, skbo->len),
+						  skbo->len);
 			kfree_skb(skbo);
 		}
 
-- 
cgit v1.2.3


From 926554c4b74e53d5da4cefdc3bbd7e92427fb1a9 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Date: Sat, 31 Mar 2007 12:05:49 -0300
Subject: [CREDITS]: Update Arnaldo entry

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
---
 CREDITS     |  8 ++++----
 MAINTAINERS | 17 ++++++++---------
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/CREDITS b/CREDITS
index 6bd8ab86b5b..e3e7271ace0 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2286,14 +2286,14 @@ S: D-90453 Nuernberg
 S: Germany
 
 N: Arnaldo Carvalho de Melo
-E: acme@mandriva.com
 E: acme@ghostprotocols.net
+E: arnaldo.melo@gmail.com
+E: acme@redhat.com
 W: http://oops.ghostprotocols.net:81/blog/
 P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD  841A B6AB 4681 9224 DF01
 D: IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks
-S: Mandriva
-S: R. Tocantins, 89 - Cristo Rei
-S: 80050-430 - Curitiba - Paran�
+S: R. Bras�lio Itiber�, 4270/1010 - �gua Verde
+S: 80240-060 - Curitiba - Paran�
 S: Brazil
 
 N: Karsten Merker
diff --git a/MAINTAINERS b/MAINTAINERS
index 277877a34ef..ab258c7f46a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -384,7 +384,7 @@ S:	Supported
 
 APPLETALK NETWORK LAYER
 P:	Arnaldo Carvalho de Melo
-M:	acme@conectiva.com.br
+M:	acme@ghostprotocols.net
 S:	Maintained
 
 ARC FRAMEBUFFER DRIVER
@@ -1034,9 +1034,8 @@ S:	Maintained
 
 CYCLADES 2X SYNC CARD DRIVER
 P:	Arnaldo Carvalho de Melo
-M:	acme@conectiva.com.br
-W:	http://advogato.org/person/acme
-L:	cycsyn-devel@bazar.conectiva.com.br
+M:	acme@ghostprotocols.net
+W:	http://oops.ghostprotocols.net:81/blog
 S:	Maintained
 
 CYCLADES ASYNC MUX DRIVER
@@ -1077,7 +1076,7 @@ S:	Maintained
 
 DCCP PROTOCOL
 P:	Arnaldo Carvalho de Melo
-M:	acme@mandriva.com
+M:	acme@ghostprotocols.net
 L:	dccp@vger.kernel.org
 W:	http://linux-net.osdl.org/index.php/DCCP
 S:	Maintained
@@ -1871,7 +1870,7 @@ S:	Supported
 
 IPX NETWORK LAYER
 P:	Arnaldo Carvalho de Melo
-M:	acme@conectiva.com.br
+M:	acme@ghostprotocols.net
 L:	netdev@vger.kernel.org
 S:	Maintained
 
@@ -2108,7 +2107,7 @@ S:	Supported
 
 LLC (802.2)
 P:	Arnaldo Carvalho de Melo
-M:	acme@conectiva.com.br
+M:	acme@ghostprotocols.net
 S:	Maintained
 
 LINUX FOR 64BIT POWERPC
@@ -3766,8 +3765,8 @@ S:	Maintained
 
 WL3501 WIRELESS PCMCIA CARD DRIVER
 P:	Arnaldo Carvalho de Melo
-M:	acme@conectiva.com.br
-W:	http://advogato.org/person/acme
+M:	acme@ghostprotocols.net
+W:	http://oops.ghostprotocols.net:81/blog
 S:	Maintained
 
 X.25 NETWORK LAYER
-- 
cgit v1.2.3


From 716ea3a7aae3a2bfc44cb97b5419c1c9868c7bc9 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 2 Apr 2007 20:19:53 -0700
Subject: [NET]: Move generic skbuff stuff from XFRM code to generic code

Move generic skbuff stuff from XFRM code to generic code so that
AF_RXRPC can use it too.

The kdoc comments I've attached to the functions needs to be checked
by whoever wrote them as I had to make some guesses about the workings
of these functions.

Signed-off-By: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |   6 ++
 include/net/esp.h      |   2 -
 net/core/skbuff.c      | 188 +++++++++++++++++++++++++++++++++++++++++++++++++
 net/xfrm/xfrm_algo.c   | 169 --------------------------------------------
 4 files changed, 194 insertions(+), 171 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 92969f662ee..9b2957d203c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -86,6 +86,7 @@
  */
 
 struct net_device;
+struct scatterlist;
 
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 struct nf_conntrack {
@@ -347,6 +348,11 @@ extern struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
 extern struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
 				       int newheadroom, int newtailroom,
 				       gfp_t priority);
+extern int	       skb_to_sgvec(struct sk_buff *skb,
+				    struct scatterlist *sg, int offset,
+				    int len);
+extern int	       skb_cow_data(struct sk_buff *skb, int tailbits,
+				    struct sk_buff **trailer);
 extern int	       skb_pad(struct sk_buff *skb, int pad);
 #define dev_kfree_skb(a)	kfree_skb(a)
 extern void	      skb_over_panic(struct sk_buff *skb, int len,
diff --git a/include/net/esp.h b/include/net/esp.h
index 713d039f4af..d05d8d2c78f 100644
--- a/include/net/esp.h
+++ b/include/net/esp.h
@@ -40,8 +40,6 @@ struct esp_data
 	} auth;
 };
 
-extern int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len);
-extern int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer);
 extern void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len);
 
 static inline int esp_mac_digest(struct esp_data *esp, struct sk_buff *skb,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 331d3efa82f..f927b6e8027 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -55,6 +55,7 @@
 #include <linux/cache.h>
 #include <linux/rtnetlink.h>
 #include <linux/init.h>
+#include <linux/scatterlist.h>
 
 #include <net/protocol.h>
 #include <net/dst.h>
@@ -2002,6 +2003,190 @@ void __init skb_init(void)
 						NULL, NULL);
 }
 
+/**
+ *	skb_to_sgvec - Fill a scatter-gather list from a socket buffer
+ *	@skb: Socket buffer containing the buffers to be mapped
+ *	@sg: The scatter-gather list to map into
+ *	@offset: The offset into the buffer's contents to start mapping
+ *	@len: Length of buffer space to be mapped
+ *
+ *	Fill the specified scatter-gather list with mappings/pointers into a
+ *	region of the buffer space attached to a socket buffer.
+ */
+int
+skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+	int elt = 0;
+
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		sg[elt].page = virt_to_page(skb->data + offset);
+		sg[elt].offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
+		sg[elt].length = copy;
+		elt++;
+		if ((len -= copy) == 0)
+			return elt;
+		offset += copy;
+	}
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+			if (copy > len)
+				copy = len;
+			sg[elt].page = frag->page;
+			sg[elt].offset = frag->page_offset+offset-start;
+			sg[elt].length = copy;
+			elt++;
+			if (!(len -= copy))
+				return elt;
+			offset += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list = list->next) {
+			int end;
+
+			BUG_TRAP(start <= offset + len);
+
+			end = start + list->len;
+			if ((copy = end - offset) > 0) {
+				if (copy > len)
+					copy = len;
+				elt += skb_to_sgvec(list, sg+elt, offset - start, copy);
+				if ((len -= copy) == 0)
+					return elt;
+				offset += copy;
+			}
+			start = end;
+		}
+	}
+	BUG_ON(len);
+	return elt;
+}
+
+/**
+ *	skb_cow_data - Check that a socket buffer's data buffers are writable
+ *	@skb: The socket buffer to check.
+ *	@tailbits: Amount of trailing space to be added
+ *	@trailer: Returned pointer to the skb where the @tailbits space begins
+ *
+ *	Make sure that the data buffers attached to a socket buffer are
+ *	writable. If they are not, private copies are made of the data buffers
+ *	and the socket buffer is set to use these instead.
+ *
+ *	If @tailbits is given, make sure that there is space to write @tailbits
+ *	bytes of data beyond current end of socket buffer.  @trailer will be
+ *	set to point to the skb in which this space begins.
+ *
+ *	The number of scatterlist elements required to completely map the
+ *	COW'd and extended socket buffer will be returned.
+ */
+int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
+{
+	int copyflag;
+	int elt;
+	struct sk_buff *skb1, **skb_p;
+
+	/* If skb is cloned or its head is paged, reallocate
+	 * head pulling out all the pages (pages are considered not writable
+	 * at the moment even if they are anonymous).
+	 */
+	if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
+	    __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
+		return -ENOMEM;
+
+	/* Easy case. Most of packets will go this way. */
+	if (!skb_shinfo(skb)->frag_list) {
+		/* A little of trouble, not enough of space for trailer.
+		 * This should not happen, when stack is tuned to generate
+		 * good frames. OK, on miss we reallocate and reserve even more
+		 * space, 128 bytes is fair. */
+
+		if (skb_tailroom(skb) < tailbits &&
+		    pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC))
+			return -ENOMEM;
+
+		/* Voila! */
+		*trailer = skb;
+		return 1;
+	}
+
+	/* Misery. We are in troubles, going to mincer fragments... */
+
+	elt = 1;
+	skb_p = &skb_shinfo(skb)->frag_list;
+	copyflag = 0;
+
+	while ((skb1 = *skb_p) != NULL) {
+		int ntail = 0;
+
+		/* The fragment is partially pulled by someone,
+		 * this can happen on input. Copy it and everything
+		 * after it. */
+
+		if (skb_shared(skb1))
+			copyflag = 1;
+
+		/* If the skb is the last, worry about trailer. */
+
+		if (skb1->next == NULL && tailbits) {
+			if (skb_shinfo(skb1)->nr_frags ||
+			    skb_shinfo(skb1)->frag_list ||
+			    skb_tailroom(skb1) < tailbits)
+				ntail = tailbits + 128;
+		}
+
+		if (copyflag ||
+		    skb_cloned(skb1) ||
+		    ntail ||
+		    skb_shinfo(skb1)->nr_frags ||
+		    skb_shinfo(skb1)->frag_list) {
+			struct sk_buff *skb2;
+
+			/* Fuck, we are miserable poor guys... */
+			if (ntail == 0)
+				skb2 = skb_copy(skb1, GFP_ATOMIC);
+			else
+				skb2 = skb_copy_expand(skb1,
+						       skb_headroom(skb1),
+						       ntail,
+						       GFP_ATOMIC);
+			if (unlikely(skb2 == NULL))
+				return -ENOMEM;
+
+			if (skb1->sk)
+				skb_set_owner_w(skb2, skb1->sk);
+
+			/* Looking around. Are we still alive?
+			 * OK, link new skb, drop old one */
+
+			skb2->next = skb1->next;
+			*skb_p = skb2;
+			kfree_skb(skb1);
+			skb1 = skb2;
+		}
+		elt++;
+		*trailer = skb1;
+		skb_p = &skb1->next;
+	}
+
+	return elt;
+}
+
 EXPORT_SYMBOL(___pskb_trim);
 EXPORT_SYMBOL(__kfree_skb);
 EXPORT_SYMBOL(kfree_skb);
@@ -2036,3 +2221,6 @@ EXPORT_SYMBOL(skb_seq_read);
 EXPORT_SYMBOL(skb_abort_seq_read);
 EXPORT_SYMBOL(skb_find_text);
 EXPORT_SYMBOL(skb_append_datato_frags);
+
+EXPORT_SYMBOL_GPL(skb_to_sgvec);
+EXPORT_SYMBOL_GPL(skb_cow_data);
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index f373a8a7d9c..6249a9405bb 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -612,175 +612,6 @@ EXPORT_SYMBOL_GPL(skb_icv_walk);
 
 #if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
 
-/* Looking generic it is not used in another places. */
-
-int
-skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
-{
-	int start = skb_headlen(skb);
-	int i, copy = start - offset;
-	int elt = 0;
-
-	if (copy > 0) {
-		if (copy > len)
-			copy = len;
-		sg[elt].page = virt_to_page(skb->data + offset);
-		sg[elt].offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
-		sg[elt].length = copy;
-		elt++;
-		if ((len -= copy) == 0)
-			return elt;
-		offset += copy;
-	}
-
-	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		int end;
-
-		BUG_TRAP(start <= offset + len);
-
-		end = start + skb_shinfo(skb)->frags[i].size;
-		if ((copy = end - offset) > 0) {
-			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-
-			if (copy > len)
-				copy = len;
-			sg[elt].page = frag->page;
-			sg[elt].offset = frag->page_offset+offset-start;
-			sg[elt].length = copy;
-			elt++;
-			if (!(len -= copy))
-				return elt;
-			offset += copy;
-		}
-		start = end;
-	}
-
-	if (skb_shinfo(skb)->frag_list) {
-		struct sk_buff *list = skb_shinfo(skb)->frag_list;
-
-		for (; list; list = list->next) {
-			int end;
-
-			BUG_TRAP(start <= offset + len);
-
-			end = start + list->len;
-			if ((copy = end - offset) > 0) {
-				if (copy > len)
-					copy = len;
-				elt += skb_to_sgvec(list, sg+elt, offset - start, copy);
-				if ((len -= copy) == 0)
-					return elt;
-				offset += copy;
-			}
-			start = end;
-		}
-	}
-	BUG_ON(len);
-	return elt;
-}
-EXPORT_SYMBOL_GPL(skb_to_sgvec);
-
-/* Check that skb data bits are writable. If they are not, copy data
- * to newly created private area. If "tailbits" is given, make sure that
- * tailbits bytes beyond current end of skb are writable.
- *
- * Returns amount of elements of scatterlist to load for subsequent
- * transformations and pointer to writable trailer skb.
- */
-
-int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
-{
-	int copyflag;
-	int elt;
-	struct sk_buff *skb1, **skb_p;
-
-	/* If skb is cloned or its head is paged, reallocate
-	 * head pulling out all the pages (pages are considered not writable
-	 * at the moment even if they are anonymous).
-	 */
-	if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
-	    __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
-		return -ENOMEM;
-
-	/* Easy case. Most of packets will go this way. */
-	if (!skb_shinfo(skb)->frag_list) {
-		/* A little of trouble, not enough of space for trailer.
-		 * This should not happen, when stack is tuned to generate
-		 * good frames. OK, on miss we reallocate and reserve even more
-		 * space, 128 bytes is fair. */
-
-		if (skb_tailroom(skb) < tailbits &&
-		    pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC))
-			return -ENOMEM;
-
-		/* Voila! */
-		*trailer = skb;
-		return 1;
-	}
-
-	/* Misery. We are in troubles, going to mincer fragments... */
-
-	elt = 1;
-	skb_p = &skb_shinfo(skb)->frag_list;
-	copyflag = 0;
-
-	while ((skb1 = *skb_p) != NULL) {
-		int ntail = 0;
-
-		/* The fragment is partially pulled by someone,
-		 * this can happen on input. Copy it and everything
-		 * after it. */
-
-		if (skb_shared(skb1))
-			copyflag = 1;
-
-		/* If the skb is the last, worry about trailer. */
-
-		if (skb1->next == NULL && tailbits) {
-			if (skb_shinfo(skb1)->nr_frags ||
-			    skb_shinfo(skb1)->frag_list ||
-			    skb_tailroom(skb1) < tailbits)
-				ntail = tailbits + 128;
-		}
-
-		if (copyflag ||
-		    skb_cloned(skb1) ||
-		    ntail ||
-		    skb_shinfo(skb1)->nr_frags ||
-		    skb_shinfo(skb1)->frag_list) {
-			struct sk_buff *skb2;
-
-			/* Fuck, we are miserable poor guys... */
-			if (ntail == 0)
-				skb2 = skb_copy(skb1, GFP_ATOMIC);
-			else
-				skb2 = skb_copy_expand(skb1,
-						       skb_headroom(skb1),
-						       ntail,
-						       GFP_ATOMIC);
-			if (unlikely(skb2 == NULL))
-				return -ENOMEM;
-
-			if (skb1->sk)
-				skb_set_owner_w(skb2, skb1->sk);
-
-			/* Looking around. Are we still alive?
-			 * OK, link new skb, drop old one */
-
-			skb2->next = skb1->next;
-			*skb_p = skb2;
-			kfree_skb(skb1);
-			skb1 = skb2;
-		}
-		elt++;
-		*trailer = skb1;
-		skb_p = &skb1->next;
-	}
-
-	return elt;
-}
-EXPORT_SYMBOL_GPL(skb_cow_data);
-
 void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
 {
 	if (tail != skb) {
-- 
cgit v1.2.3


From 038890fed8d1fa95bbbdeb517f5710eb75fa9e2e Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 5 Apr 2007 14:35:52 -0700
Subject: [RTNL]: Improve error codes for unsupported operations

The most common trigger of these errors is that the
config option hasn't been enable wich would make the
functionality available. Therefore returning EOPNOTSUPP
gives a better idea on what is going wrong.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index fa5f938b37e..5266df33705 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -862,7 +862,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 
 	type = nlh->nlmsg_type;
 	if (type > RTM_MAX)
-		return -EINVAL;
+		return -EOPNOTSUPP;
 
 	type -= RTM_BASE;
 
@@ -885,7 +885,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 
 		dumpit = rtnl_get_dumpit(family, type);
 		if (dumpit == NULL)
-			return -EINVAL;
+			return -EOPNOTSUPP;
 
 		return netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
 	}
@@ -913,7 +913,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 
 	doit = rtnl_get_doit(family, type);
 	if (doit == NULL)
-		return -EINVAL;
+		return -EOPNOTSUPP;
 
 	return doit(skb, nlh, (void *)&rta_buf[0]);
 }
-- 
cgit v1.2.3


From ea2f10a3c81724701fe6a754789eafd50b33909f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 5 Apr 2007 16:04:04 -0700
Subject: [XFRM]: beet: minor cleanups

Remove unnecessary initialization/variable.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_mode_beet.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index 3650e027ce7..0f7c3e30e4e 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -29,7 +29,7 @@
  */
 static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 {
-	struct iphdr *iph, *top_iph = NULL;
+	struct iphdr *iph, *top_iph;
 	int hdrlen, optlen;
 
 	iph = ip_hdr(skb);
@@ -73,11 +73,9 @@ static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 	struct iphdr *iph = ip_hdr(skb);
 	int phlen = 0;
 	int optlen = 0;
-	__u8 ph_nexthdr = 0, protocol = 0;
+	u8 ph_nexthdr = 0;
 	int err = -EINVAL;
 
-	protocol = iph->protocol;
-
 	if (unlikely(iph->protocol == IPPROTO_BEETPH)) {
 		struct ip_beet_phdr *ph;
 
@@ -109,8 +107,6 @@ static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 	iph->saddr = x->sel.saddr.a4;
 	if (ph_nexthdr)
 		iph->protocol = ph_nexthdr;
-	else
-		iph->protocol = protocol;
 	iph->check = 0;
 	iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
 	err = 0;
-- 
cgit v1.2.3


From 29f6af7712c40045e7886d0fa356d97a6f9aba49 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Fri, 6 Apr 2007 11:45:39 -0700
Subject: [IPV6] FIB6RULE: Find source address during looking up route.

When looking up route for destination with rules with
source address restrictions, we may need to find a source
address for the traffic if not given.

Based on patch from Noriaki TAKAMIYA <takamiya@po.ntts.co.jp>.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/fib_rules.h | 11 +++++++----
 net/ipv6/fib6_rules.c     | 34 +++++++++++++++++++++++++++++++---
 2 files changed, 38 insertions(+), 7 deletions(-)

diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h
index f278ba781d0..87b606b63f1 100644
--- a/include/linux/fib_rules.h
+++ b/include/linux/fib_rules.h
@@ -5,10 +5,13 @@
 #include <linux/rtnetlink.h>
 
 /* rule is permanent, and cannot be deleted */
-#define FIB_RULE_PERMANENT	1
-#define FIB_RULE_INVERT		2
-#define FIB_RULE_UNRESOLVED	4
-#define FIB_RULE_DEV_DETACHED	8
+#define FIB_RULE_PERMANENT	0x00000001
+#define FIB_RULE_INVERT		0x00000002
+#define FIB_RULE_UNRESOLVED	0x00000004
+#define FIB_RULE_DEV_DETACHED	0x00000008
+
+/* try to find source address in routing lookups */
+#define FIB_RULE_FIND_SADDR	0x00010000
 
 struct fib_rule_hdr
 {
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index dd9720e700e..fc3882c9060 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -17,6 +17,7 @@
 
 #include <net/fib_rules.h>
 #include <net/ipv6.h>
+#include <net/addrconf.h>
 #include <net/ip6_route.h>
 #include <net/netlink.h>
 
@@ -95,8 +96,27 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 	if (table)
 		rt = lookup(table, flp, flags);
 
-	if (rt != &ip6_null_entry)
+	if (rt != &ip6_null_entry) {
+		struct fib6_rule *r = (struct fib6_rule *)rule;
+
+		/*
+		 * If we need to find a source address for this traffic,
+		 * we check the result if it meets requirement of the rule.
+		 */
+		if ((rule->flags & FIB_RULE_FIND_SADDR) &&
+		    r->src.plen && !(flags & RT6_LOOKUP_F_HAS_SADDR)) {
+			struct in6_addr saddr;
+			if (ipv6_get_saddr(&rt->u.dst, &flp->fl6_dst,
+					   &saddr))
+				goto again;
+			if (!ipv6_prefix_equal(&saddr, &r->src.addr,
+					       r->src.plen))
+				goto again;
+			ipv6_addr_copy(&flp->fl6_src, &saddr);
+		}
 		goto out;
+	}
+again:
 	dst_release(&rt->u.dst);
 	rt = NULL;
 	goto out;
@@ -117,9 +137,17 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 	    !ipv6_prefix_equal(&fl->fl6_dst, &r->dst.addr, r->dst.plen))
 		return 0;
 
+	/*
+	 * If FIB_RULE_FIND_SADDR is set and we do not have a
+	 * source address for the traffic, we defer check for
+	 * source address.
+	 */
 	if (r->src.plen) {
-		if (!(flags & RT6_LOOKUP_F_HAS_SADDR) ||
-		    !ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, r->src.plen))
+		if (flags & RT6_LOOKUP_F_HAS_SADDR) {
+			if (!ipv6_prefix_equal(&fl->fl6_src, &r->src.addr,
+					       r->src.plen))
+				return 0;
+		} else if (!(r->common.flags & FIB_RULE_FIND_SADDR))
 			return 0;
 	}
 
-- 
cgit v1.2.3


From 56eb88828b78f6f3b11a2996350092a40745301f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 9 Apr 2007 11:45:04 -0700
Subject: [SK_BUFF]: Fix missing offset adjustment in pskb_expand_head

Since we're increasing the headroom, the header offsets need to be
increased by the same amount as well.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f927b6e8027..4965df29768 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -667,14 +667,15 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 	skb->data    += off;
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 	skb->end      = size;
+	off           = nhead;
 #else
 	skb->end      = skb->head + size;
+#endif
 	/* {transport,network,mac}_header and tail are relative to skb->head */
 	skb->tail	      += off;
 	skb->transport_header += off;
 	skb->network_header   += off;
 	skb->mac_header	      += off;
-#endif
 	skb->cloned   = 0;
 	skb->nohdr    = 0;
 	atomic_set(&skb_shinfo(skb)->dataref, 1);
-- 
cgit v1.2.3


From 557922584d9c5b6b990bcfb2fec3134f0e73a05d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 9 Apr 2007 11:46:17 -0700
Subject: [XFRM]: esp: fix skb_tail_pointer conversion bug

Fix incorrect switch of "trailer" skb by "skb" during skb_tail_pointer
conversion:

-       *(u8*)(trailer->tail - 1) = top_iph->protocol;
+       *(skb_tail_pointer(skb) - 1) = top_iph->protocol;

-       *(u8 *)(trailer->tail - 1) = *skb_network_header(skb);
+       *(skb_tail_pointer(skb) - 1) = *skb_network_header(skb);

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/esp4.c | 2 +-
 net/ipv6/esp6.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 5e5613930ff..bdc65d8af18 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -64,7 +64,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
 	esph = (struct ip_esp_hdr *)(skb_network_header(skb) +
 				     top_iph->ihl * 4);
 	top_iph->tot_len = htons(skb->len + alen);
-	*(skb_tail_pointer(skb) - 1) = top_iph->protocol;
+	*(skb_tail_pointer(trailer) - 1) = top_iph->protocol;
 
 	/* this is non-NULL only with UDP Encapsulation */
 	if (x->encap) {
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index b8e8914cc00..6b76c4c3113 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -91,7 +91,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	top_iph = (struct ipv6hdr *)__skb_push(skb, hdr_len);
 	esph = (struct ipv6_esp_hdr *)skb_transport_header(skb);
 	top_iph->payload_len = htons(skb->len + alen - sizeof(*top_iph));
-	*(skb_tail_pointer(skb) - 1) = *skb_network_header(skb);
+	*(skb_tail_pointer(trailer) - 1) = *skb_network_header(skb);
 	*skb_network_header(skb) = IPPROTO_ESP;
 
 	esph->spi = x->id.spi;
-- 
cgit v1.2.3


From c5c2523893747f88a83376abad310c8ad13f7197 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 9 Apr 2007 11:47:18 -0700
Subject: [XFRM]: Optimize MTU calculation

Replace the probing based MTU estimation, which usually takes 2-3 iterations
to find a fitting value and may underestimate the MTU, by an exact calculation.

Also fix underestimation of the XFRM trailer_len, which causes unnecessary
reallocations.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h    |  2 +-
 net/ipv4/esp4.c       | 30 ++++++++++++++++++------------
 net/ipv6/esp6.c       | 22 ++++++++++++----------
 net/xfrm/xfrm_state.c | 36 ++++++++----------------------------
 4 files changed, 39 insertions(+), 51 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 5a00aa85b75..e144a25814b 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -279,7 +279,7 @@ struct xfrm_type
 	xfrm_address_t		*(*local_addr)(struct xfrm_state *, xfrm_address_t *);
 	xfrm_address_t		*(*remote_addr)(struct xfrm_state *, xfrm_address_t *);
 	/* Estimate maximal size of result of transformation of a dgram */
-	u32			(*get_max_size)(struct xfrm_state *, int size);
+	u32			(*get_mtu)(struct xfrm_state *, int size);
 };
 
 extern int xfrm_register_type(struct xfrm_type *type, unsigned short family);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index bdc65d8af18..a315d5d2276 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -272,32 +272,34 @@ out:
 	return -EINVAL;
 }
 
-static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
+static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
 {
 	struct esp_data *esp = x->data;
 	u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
-	int enclen = 0;
+	u32 align = max_t(u32, blksize, esp->conf.padlen);
+	u32 rem;
+
+	mtu -= x->props.header_len + esp->auth.icv_trunc_len;
+	rem = mtu & (align - 1);
+	mtu &= ~(align - 1);
 
 	switch (x->props.mode) {
 	case XFRM_MODE_TUNNEL:
-		mtu = ALIGN(mtu +2, blksize);
 		break;
 	default:
 	case XFRM_MODE_TRANSPORT:
 		/* The worst case */
-		mtu = ALIGN(mtu + 2, 4) + blksize - 4;
+		mtu -= blksize - 4;
+		mtu += min_t(u32, blksize - 4, rem);
 		break;
 	case XFRM_MODE_BEET:
 		/* The worst case. */
-		enclen = IPV4_BEET_PHMAXLEN;
-		mtu = ALIGN(mtu + enclen + 2, blksize);
+		mtu -= IPV4_BEET_PHMAXLEN;
+		mtu += min_t(u32, IPV4_BEET_PHMAXLEN, rem);
 		break;
 	}
 
-	if (esp->conf.padlen)
-		mtu = ALIGN(mtu, esp->conf.padlen);
-
-	return mtu + x->props.header_len + esp->auth.icv_trunc_len - enclen;
+	return mtu - 2;
 }
 
 static void esp4_err(struct sk_buff *skb, u32 info)
@@ -340,6 +342,7 @@ static int esp_init_state(struct xfrm_state *x)
 {
 	struct esp_data *esp = NULL;
 	struct crypto_blkcipher *tfm;
+	u32 align;
 
 	/* null auth and encryption can have zero length keys */
 	if (x->aalg) {
@@ -421,7 +424,10 @@ static int esp_init_state(struct xfrm_state *x)
 		}
 	}
 	x->data = esp;
-	x->props.trailer_len = esp4_get_max_size(x, 0) - x->props.header_len;
+	align = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
+	if (esp->conf.padlen)
+		align = max_t(u32, align, esp->conf.padlen);
+	x->props.trailer_len = align + 1 + esp->auth.icv_trunc_len;
 	return 0;
 
 error:
@@ -438,7 +444,7 @@ static struct xfrm_type esp_type =
 	.proto	     	= IPPROTO_ESP,
 	.init_state	= esp_init_state,
 	.destructor	= esp_destroy,
-	.get_max_size	= esp4_get_max_size,
+	.get_mtu	= esp4_get_mtu,
 	.input		= esp_input,
 	.output		= esp_output
 };
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 6b76c4c3113..7107bb7e2e6 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -235,22 +235,24 @@ out:
 	return ret;
 }
 
-static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
+static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)
 {
 	struct esp_data *esp = x->data;
 	u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
+	u32 align = max_t(u32, blksize, esp->conf.padlen);
+	u32 rem;
 
-	if (x->props.mode == XFRM_MODE_TUNNEL) {
-		mtu = ALIGN(mtu + 2, blksize);
-	} else {
-		/* The worst case. */
+	mtu -= x->props.header_len + esp->auth.icv_trunc_len;
+	rem = mtu & (align - 1);
+	mtu &= ~(align - 1);
+
+	if (x->props.mode != XFRM_MODE_TUNNEL) {
 		u32 padsize = ((blksize - 1) & 7) + 1;
-		mtu = ALIGN(mtu + 2, padsize) + blksize - padsize;
+		mtu -= blksize - padsize;
+		mtu += min_t(u32, blksize - padsize, rem);
 	}
-	if (esp->conf.padlen)
-		mtu = ALIGN(mtu, esp->conf.padlen);
 
-	return mtu + x->props.header_len + esp->auth.icv_trunc_len;
+	return mtu - 2;
 }
 
 static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
@@ -380,7 +382,7 @@ static struct xfrm_type esp6_type =
 	.proto	     	= IPPROTO_ESP,
 	.init_state	= esp6_init_state,
 	.destructor	= esp6_destroy,
-	.get_max_size	= esp6_get_max_size,
+	.get_mtu	= esp6_get_mtu,
 	.input		= esp6_input,
 	.output		= esp6_output,
 	.hdr_offset	= xfrm6_find_1stfragopt,
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 63a20e81816..69a3600afd9 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1667,37 +1667,17 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x)
 }
 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
 
-/*
- * This function is NOT optimal.  For example, with ESP it will give an
- * MTU that's usually two bytes short of being optimal.  However, it will
- * usually give an answer that's a multiple of 4 provided the input is
- * also a multiple of 4.
- */
 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
 {
-	int res = mtu;
-
-	res -= x->props.header_len;
-
-	for (;;) {
-		int m = res;
-
-		if (m < 68)
-			return 68;
-
-		spin_lock_bh(&x->lock);
-		if (x->km.state == XFRM_STATE_VALID &&
-		    x->type && x->type->get_max_size)
-			m = x->type->get_max_size(x, m);
-		else
-			m += x->props.header_len;
-		spin_unlock_bh(&x->lock);
-
-		if (m <= mtu)
-			break;
-		res -= (m - mtu);
-	}
+	int res;
 
+	spin_lock_bh(&x->lock);
+	if (x->km.state == XFRM_STATE_VALID &&
+	    x->type && x->type->get_mtu)
+		res = x->type->get_mtu(x, mtu);
+	else
+		res = mtu;
+	spin_unlock_bh(&x->lock);
 	return res;
 }
 
-- 
cgit v1.2.3


From ac758e3c55c529714354fc268892ca4d23ca1e99 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 9 Apr 2007 11:47:58 -0700
Subject: [XFRM]: beet: fix worst case header_len calculation

esp_init_state doesn't account for the beet pseudo header in the header_len
calculation, which may result in undersized skbs hitting xfrm4_beet_output,
causing unnecessary reallocations in ip_finish_output2.

The skbs should still always have enough room to avoid causing
skb_under_panic in skb_push since we have at least 16 bytes available
from LL_RESERVED_SPACE in xfrm_state_check_space.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/esp4.c            | 3 ++-
 net/ipv4/xfrm4_mode_beet.c | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index a315d5d2276..47c95e8ef04 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -294,7 +294,6 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu)
 		break;
 	case XFRM_MODE_BEET:
 		/* The worst case. */
-		mtu -= IPV4_BEET_PHMAXLEN;
 		mtu += min_t(u32, IPV4_BEET_PHMAXLEN, rem);
 		break;
 	}
@@ -409,6 +408,8 @@ static int esp_init_state(struct xfrm_state *x)
 	x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen;
 	if (x->props.mode == XFRM_MODE_TUNNEL)
 		x->props.header_len += sizeof(struct iphdr);
+	else if (x->props.mode == XFRM_MODE_BEET)
+		x->props.header_len += IPV4_BEET_PHMAXLEN;
 	if (x->encap) {
 		struct xfrm_encap_tmpl *encap = x->encap;
 
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
index 0f7c3e30e4e..a73e710740c 100644
--- a/net/ipv4/xfrm4_mode_beet.c
+++ b/net/ipv4/xfrm4_mode_beet.c
@@ -40,7 +40,7 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 	if (unlikely(optlen))
 		hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4);
 
-	skb_push(skb, x->props.header_len + hdrlen);
+	skb_push(skb, x->props.header_len - IPV4_BEET_PHMAXLEN + hdrlen);
 	skb_reset_network_header(skb);
 	top_iph = ip_hdr(skb);
 	skb->transport_header += sizeof(*iph) - hdrlen;
-- 
cgit v1.2.3


From 663ead3bb8d5b561e70fc3bb3861c9220b5a77eb Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 9 Apr 2007 11:59:07 -0700
Subject: [NET]: Use csum_start offset instead of skb_transport_header

The skb transport pointer is currently used to specify the start
of the checksum region for transmit checksum offload.  Unfortunately,
the same pointer is also used during receive side processing.

This creates a problem when we want to retransmit a received
packet with partial checksums since the skb transport pointer
would be overwritten.

This patch solves this problem by creating a new 16-bit csum_start
offset value to replace the skb transport header for the purpose
of checksums.  This offset is calculated from skb->head so that
it does not have to change when skb->data changes.

No extra space is required since csum_offset itself fits within
a 16-bit word so we can use the other 16 bits for csum_start.

For backwards compatibility, just before we push a packet with
partial checksums off into the device driver, we set the skb
transport header to what it would have been under the old scheme.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  9 +++++++--
 net/core/dev.c         | 25 +++++++++++++++----------
 net/core/skbuff.c      |  2 +-
 net/ipv4/tcp_ipv4.c    |  2 ++
 net/ipv4/udp.c         |  1 +
 net/ipv6/tcp_ipv6.c    |  2 ++
 6 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9b2957d203c..910560e8556 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -205,7 +205,9 @@ typedef unsigned char *sk_buff_data_t;
  *	@len: Length of actual data
  *	@data_len: Data length
  *	@mac_len: Length of link layer header
- *	@csum: Checksum
+ *	@csum: Checksum (must include start/offset pair)
+ *	@csum_start: Offset from skb->head where checksumming should start
+ *	@csum_offset: Offset from csum_start where checksum should be stored
  *	@local_df: allow local fragmentation
  *	@cloned: Head may be cloned (check refcnt to be sure)
  *	@nohdr: Payload reference only, must not modify header
@@ -261,7 +263,10 @@ struct sk_buff {
 				mac_len;
 	union {
 		__wsum		csum;
-		__u32		csum_offset;
+		struct {
+			__u16	csum_start;
+			__u16	csum_offset;
+		};
 	};
 	__u32			priority;
 	__u8			local_df:1,
diff --git a/net/core/dev.c b/net/core/dev.c
index fec8cf27f75..d23972f56fc 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1155,7 +1155,7 @@ EXPORT_SYMBOL(netif_device_attach);
 int skb_checksum_help(struct sk_buff *skb)
 {
 	__wsum csum;
-	int ret = 0, offset = skb_transport_offset(skb);
+	int ret = 0, offset;
 
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		goto out_set_summed;
@@ -1171,15 +1171,16 @@ int skb_checksum_help(struct sk_buff *skb)
 			goto out;
 	}
 
+	offset = skb->csum_start - skb_headroom(skb);
 	BUG_ON(offset > (int)skb->len);
 	csum = skb_checksum(skb, offset, skb->len-offset, 0);
 
-	offset = skb->tail - skb->transport_header;
+	offset = skb_headlen(skb) - offset;
 	BUG_ON(offset <= 0);
 	BUG_ON(skb->csum_offset + 2 > offset);
 
-	*(__sum16 *)(skb_transport_header(skb) +
-		     skb->csum_offset) = csum_fold(csum);
+	*(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) =
+		csum_fold(csum);
 out_set_summed:
 	skb->ip_summed = CHECKSUM_NONE;
 out:
@@ -1431,12 +1432,16 @@ int dev_queue_xmit(struct sk_buff *skb)
 	/* If packet is not checksummed and device does not support
 	 * checksumming for this protocol, complete checksumming here.
 	 */
-	if (skb->ip_summed == CHECKSUM_PARTIAL &&
-	    (!(dev->features & NETIF_F_GEN_CSUM) &&
-	     (!(dev->features & NETIF_F_IP_CSUM) ||
-	      skb->protocol != htons(ETH_P_IP))))
-		if (skb_checksum_help(skb))
-			goto out_kfree_skb;
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		skb_set_transport_header(skb, skb->csum_start -
+					      skb_headroom(skb));
+
+		if (!(dev->features & NETIF_F_GEN_CSUM) &&
+		    (!(dev->features & NETIF_F_IP_CSUM) ||
+		     skb->protocol != htons(ETH_P_IP)))
+			if (skb_checksum_help(skb))
+				goto out_kfree_skb;
+	}
 
 gso:
 	spin_lock_prefetch(&dev->queue_lock);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4965df29768..52a4fdd4f31 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1358,7 +1358,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
 	long csstart;
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL)
-		csstart = skb_transport_offset(skb);
+		csstart = skb->csum_start - skb_headroom(skb);
 	else
 		csstart = skb_headlen(skb);
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e11eaf4cc26..a091a99ad26 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -504,6 +504,7 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		th->check = ~tcp_v4_check(len, inet->saddr,
 					  inet->daddr, 0);
+		skb->csum_start = skb_transport_header(skb) - skb->head;
 		skb->csum_offset = offsetof(struct tcphdr, check);
 	} else {
 		th->check = tcp_v4_check(len, inet->saddr, inet->daddr,
@@ -526,6 +527,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb)
 
 	th->check = 0;
 	th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
+	skb->csum_start = skb_transport_header(skb) - skb->head;
 	skb->csum_offset = offsetof(struct tcphdr, check);
 	skb->ip_summed = CHECKSUM_PARTIAL;
 	return 0;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 71b0b60ba53..5ad7a26e309 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -427,6 +427,7 @@ static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
 		/*
 		 * Only one fragment on the socket.
 		 */
+		skb->csum_start = skb_transport_header(skb) - skb->head;
 		skb->csum_offset = offsetof(struct udphdr, check);
 		uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0);
 	} else {
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4a55da079f5..7e824b97126 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -950,6 +950,7 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
+		skb->csum_start = skb_transport_header(skb) - skb->head;
 		skb->csum_offset = offsetof(struct tcphdr, check);
 	} else {
 		th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
@@ -972,6 +973,7 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb)
 	th->check = 0;
 	th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
 				     IPPROTO_TCP, 0);
+	skb->csum_start = skb_transport_header(skb) - skb->head;
 	skb->csum_offset = offsetof(struct tcphdr, check);
 	skb->ip_summed = CHECKSUM_PARTIAL;
 	return 0;
-- 
cgit v1.2.3


From 628592ccdbbb5bb751217cf02e2e7abb500d7ffe Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 23 Apr 2007 17:06:40 -0700
Subject: [NETDRV]: Perform missing csum_offset conversions

When csum_offset was introduced we did a conversion from csum to
csum_offset where applicable.  A couple of drivers were missed in
this process.

It was harmless to begin with since the two fields coincided.  Now
that we've made them different with the addition of csum_start, the
missed drivers must be converted or they can't send packets out at
all that require checksum offload.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/atl1/atl1_main.c   | 2 +-
 drivers/net/e1000/e1000_main.c | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/atl1/atl1_main.c b/drivers/net/atl1/atl1_main.c
index d60c2217332..4b1d4d153ec 100644
--- a/drivers/net/atl1/atl1_main.c
+++ b/drivers/net/atl1/atl1_main.c
@@ -1328,7 +1328,7 @@ static int atl1_tx_csum(struct atl1_adapter *adapter, struct sk_buff *skb,
 
 	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
 		cso = skb_transport_offset(skb);
-		css = cso + skb->csum;
+		css = cso + skb->csum_offset;
 		if (unlikely(cso & 0x1)) {
 			printk(KERN_DEBUG "%s: payload offset != even number\n",
 				atl1_driver_name);
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 610216ec491..48e2ade704d 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2959,7 +2959,8 @@ e1000_tx_csum(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
 
 		context_desc->lower_setup.ip_config = 0;
 		context_desc->upper_setup.tcp_fields.tucss = css;
-		context_desc->upper_setup.tcp_fields.tucso = css + skb->csum;
+		context_desc->upper_setup.tcp_fields.tucso =
+			css + skb->csum_offset;
 		context_desc->upper_setup.tcp_fields.tucse = 0;
 		context_desc->tcp_seg_setup.data = 0;
 		context_desc->cmd_and_length = cpu_to_le32(E1000_TXD_CMD_DEXT);
-- 
cgit v1.2.3


From 604763722c655c7e3f31ecf6f7b4dafcd26a7a15 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 9 Apr 2007 11:59:39 -0700
Subject: [NET]: Treat CHECKSUM_PARTIAL as CHECKSUM_UNNECESSARY

When a transmitted packet is looped back directly, CHECKSUM_PARTIAL
maps to the semantics of CHECKSUM_UNNECESSARY.  Therefore we should
treat it as such in the stack.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h     | 16 +++++++++++-----
 include/net/tcp.h          |  2 +-
 include/net/udp.h          |  2 +-
 net/core/netpoll.c         |  2 +-
 net/ipv4/ipvs/ip_vs_core.c |  6 ++----
 net/ipv4/tcp_input.c       |  6 +++---
 net/ipv4/tcp_ipv4.c        |  3 +--
 net/ipv4/udp.c             |  4 ++--
 net/ipv6/raw.c             |  4 ++--
 net/ipv6/tcp_ipv6.c        |  3 +--
 net/ipv6/udp.c             |  4 ++--
 net/sctp/input.c           |  3 +--
 net/sunrpc/socklib.c       |  2 +-
 13 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 910560e8556..c413afbe0b9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -32,10 +32,11 @@
 #define HAVE_ALLOC_SKB		/* For the drivers to know */
 #define HAVE_ALIGNABLE_SKB	/* Ditto 8)		   */
 
+/* Don't change this without changing skb_csum_unnecessary! */
 #define CHECKSUM_NONE 0
-#define CHECKSUM_PARTIAL 1
-#define CHECKSUM_UNNECESSARY 2
-#define CHECKSUM_COMPLETE 3
+#define CHECKSUM_UNNECESSARY 1
+#define CHECKSUM_COMPLETE 2
+#define CHECKSUM_PARTIAL 3
 
 #define SKB_DATA_ALIGN(X)	(((X) + (SMP_CACHE_BYTES - 1)) & \
 				 ~(SMP_CACHE_BYTES - 1))
@@ -1572,6 +1573,11 @@ static inline void __net_timestamp(struct sk_buff *skb)
 extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len);
 extern __sum16 __skb_checksum_complete(struct sk_buff *skb);
 
+static inline int skb_csum_unnecessary(const struct sk_buff *skb)
+{
+	return skb->ip_summed & CHECKSUM_UNNECESSARY;
+}
+
 /**
  *	skb_checksum_complete - Calculate checksum of an entire packet
  *	@skb: packet to process
@@ -1590,8 +1596,8 @@ extern __sum16 __skb_checksum_complete(struct sk_buff *skb);
  */
 static inline unsigned int skb_checksum_complete(struct sk_buff *skb)
 {
-	return skb->ip_summed != CHECKSUM_UNNECESSARY &&
-		__skb_checksum_complete(skb);
+	return skb_csum_unnecessary(skb) ?
+	       0 : __skb_checksum_complete(skb);
 }
 
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index af9273204cf..07f724e02f8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -818,7 +818,7 @@ static inline __sum16 __tcp_checksum_complete(struct sk_buff *skb)
 
 static inline int tcp_checksum_complete(struct sk_buff *skb)
 {
-	return skb->ip_summed != CHECKSUM_UNNECESSARY &&
+	return !skb_csum_unnecessary(skb) &&
 		__tcp_checksum_complete(skb);
 }
 
diff --git a/include/net/udp.h b/include/net/udp.h
index 4906ed7113e..98755ebaf16 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -77,7 +77,7 @@ static inline __sum16 __udp_lib_checksum_complete(struct sk_buff *skb)
 
 static inline int udp_lib_checksum_complete(struct sk_buff *skb)
 {
-	return skb->ip_summed != CHECKSUM_UNNECESSARY &&
+	return !skb_csum_unnecessary(skb) &&
 		__udp_lib_checksum_complete(skb);
 }
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 1fb30c3528b..b316435b0e2 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -86,7 +86,7 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
 {
 	__wsum psum;
 
-	if (uh->check == 0 || skb->ip_summed == CHECKSUM_UNNECESSARY)
+	if (uh->check == 0 || skb_csum_unnecessary(skb))
 		return 0;
 
 	psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 62cfbed317b..f005a2f929f 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -681,8 +681,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
 	}
 
 	/* Ensure the checksum is correct */
-	if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
-	    ip_vs_checksum_complete(skb, ihl)) {
+	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
 		/* Failed checksum! */
 		IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",
 			  NIPQUAD(iph->saddr));
@@ -921,8 +920,7 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
 	verdict = NF_DROP;
 
 	/* Ensure the checksum is correct */
-	if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
-	    ip_vs_checksum_complete(skb, ihl)) {
+	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
 		/* Failed checksum! */
 		IP_VS_DBG(1, "Incoming ICMP: failed checksum from %d.%d.%d.%d!\n",
 			  NIPQUAD(iph->saddr));
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9c3b4c7a50a..d1604f59d77 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4009,7 +4009,7 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
 	int err;
 
 	local_bh_enable();
-	if (skb->ip_summed==CHECKSUM_UNNECESSARY)
+	if (skb_csum_unnecessary(skb))
 		err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk);
 	else
 		err = skb_copy_and_csum_datagram_iovec(skb, hlen,
@@ -4041,7 +4041,7 @@ static __sum16 __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb
 
 static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
 {
-	return skb->ip_summed != CHECKSUM_UNNECESSARY &&
+	return !skb_csum_unnecessary(skb) &&
 		__tcp_checksum_complete_user(sk, skb);
 }
 
@@ -4059,7 +4059,7 @@ static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen
 	if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
 		tp->ucopy.dma_chan = get_softnet_dma();
 
-	if (tp->ucopy.dma_chan && skb->ip_summed == CHECKSUM_UNNECESSARY) {
+	if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
 
 		dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
 			skb, hlen, tp->ucopy.iov, chunk, tp->ucopy.pinned_list);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a091a99ad26..5a3e7f839fc 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1638,8 +1638,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	 * Packet length and doff are validated by header prediction,
 	 * provided case of th->doff==0 is eliminated.
 	 * So, we defer the checks. */
-	if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
-	     tcp_v4_checksum_init(skb)))
+	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
 		goto bad_packet;
 
 	th = tcp_hdr(skb);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5ad7a26e309..cec0f2cc49b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -848,7 +848,7 @@ try_again:
 			goto csum_copy_err;
 	}
 
-	if (skb->ip_summed == CHECKSUM_UNNECESSARY)
+	if (skb_csum_unnecessary(skb))
 		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
 					      msg->msg_iov, copied       );
 	else {
@@ -1190,7 +1190,7 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
 				      proto, skb->csum))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
-	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+	if (!skb_csum_unnecessary(skb))
 		skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
 					       skb->len, proto, 0);
 	/* Probably, we should checksum udp header (it should be in cache
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 2b3be68b70a..f65fcd7704c 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -368,7 +368,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
 				     skb->len, inet->num, skb->csum))
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
-	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+	if (!skb_csum_unnecessary(skb))
 		skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
 							 &ipv6_hdr(skb)->daddr,
 							 skb->len,
@@ -421,7 +421,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 		msg->msg_flags |= MSG_TRUNC;
 	}
 
-	if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
+	if (skb_csum_unnecessary(skb)) {
 		err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 	} else if (msg->msg_flags&MSG_TRUNC) {
 		if (__skb_checksum_complete(skb))
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7e824b97126..2b668a6ae69 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1707,8 +1707,7 @@ static int tcp_v6_rcv(struct sk_buff **pskb)
 	if (!pskb_may_pull(skb, th->doff*4))
 		goto discard_it;
 
-	if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
-	     tcp_v6_checksum_init(skb)))
+	if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
 		goto bad_packet;
 
 	th = tcp_hdr(skb);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1e3dfb20b1c..b083c09e3d2 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -153,7 +153,7 @@ try_again:
 			goto csum_copy_err;
 	}
 
-	if (skb->ip_summed == CHECKSUM_UNNECESSARY)
+	if (skb_csum_unnecessary(skb))
 		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
 					      msg->msg_iov, copied       );
 	else {
@@ -397,7 +397,7 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh,
 			     skb->len, proto, skb->csum))
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
+	if (!skb_csum_unnecessary(skb))
 		skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
 							 &ipv6_hdr(skb)->daddr,
 							 skb->len, proto, 0));
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 18b97eedc1f..885109fb3dd 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -140,8 +140,7 @@ int sctp_rcv(struct sk_buff *skb)
 	__skb_pull(skb, skb_transport_offset(skb));
 	if (skb->len < sizeof(struct sctphdr))
 		goto discard_it;
-	if ((skb->ip_summed != CHECKSUM_UNNECESSARY) &&
-	    (sctp_rcv_checksum(skb) < 0))
+	if (!skb_csum_unnecessary(skb) && sctp_rcv_checksum(skb) < 0)
 		goto discard_it;
 
 	skb_pull(skb, sizeof(struct sctphdr));
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 634885b0c04..1d377d1ab7f 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -154,7 +154,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
 	desc.offset = sizeof(struct udphdr);
 	desc.count = skb->len - desc.offset;
 
-	if (skb->ip_summed == CHECKSUM_UNNECESSARY)
+	if (skb_csum_unnecessary(skb))
 		goto no_checksum;
 
 	desc.csum = csum_partial(skb->data, desc.offset, skb->csum);
-- 
cgit v1.2.3


From 6229e362dd49b9e8387126bd4483ab0574d23e9c Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Wed, 21 Mar 2007 13:38:47 -0700
Subject: bridge: eliminate call by reference

Change the bridging hook to be simple function with return value
rather than modifying the skb argument. This could generate better
code and is cleaner.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
---
 include/linux/if_bridge.h |  3 ++-
 net/bridge/br_input.c     | 20 +++++++++-----------
 net/bridge/br_private.h   |  3 ++-
 net/core/dev.c            | 31 +++++++++++++++++++------------
 4 files changed, 32 insertions(+), 25 deletions(-)

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index fd1b6eb94a5..4ff211d9876 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -105,7 +105,8 @@ struct __fdb_entry
 #include <linux/netdevice.h>
 
 extern void brioctl_set(int (*ioctl_hook)(unsigned int, void __user *));
-extern int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
+extern struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
+					       struct sk_buff *skb);
 extern int (*br_should_route_hook)(struct sk_buff **pskb);
 
 #endif
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index a260679afad..2f5c379d9ff 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -121,13 +121,11 @@ static inline int is_link_local(const unsigned char *dest)
 
 /*
  * Called via br_handle_frame_hook.
- * Return 0 if *pskb should be processed furthur
- *	  1 if *pskb is handled
+ * Return NULL if skb is handled
  * note: already called with rcu_read_lock (preempt_disabled)
  */
-int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb)
+struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
 {
-	struct sk_buff *skb = *pskb;
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
 
 	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
@@ -135,15 +133,15 @@ int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb)
 
 	if (unlikely(is_link_local(dest))) {
 		skb->pkt_type = PACKET_HOST;
-		return NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
-			       NULL, br_handle_local_finish) != 0;
+
+		return (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
+				NULL, br_handle_local_finish) == 0) ? skb : NULL;
 	}
 
 	if (p->state == BR_STATE_FORWARDING || p->state == BR_STATE_LEARNING) {
 		if (br_should_route_hook) {
-			if (br_should_route_hook(pskb))
-				return 0;
-			skb = *pskb;
+			if (br_should_route_hook(&skb))
+				return skb;
 			dest = eth_hdr(skb)->h_dest;
 		}
 
@@ -152,10 +150,10 @@ int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb)
 
 		NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
 			br_handle_frame_finish);
-		return 1;
+		return NULL;
 	}
 
 err:
 	kfree_skb(skb);
-	return 1;
+	return NULL;
 }
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 2b73de6c0b4..fab8ce0ce88 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -182,7 +182,8 @@ extern void br_features_recompute(struct net_bridge *br);
 
 /* br_input.c */
 extern int br_handle_frame_finish(struct sk_buff *skb);
-extern int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb);
+extern struct sk_buff *br_handle_frame(struct net_bridge_port *p,
+				       struct sk_buff *skb);
 
 /* br_ioctl.c */
 extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
diff --git a/net/core/dev.c b/net/core/dev.c
index d23972f56fc..7f31d0f8842 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1687,31 +1687,37 @@ static inline int deliver_skb(struct sk_buff *skb,
 }
 
 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
-int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
+/* These hooks defined here for ATM */
 struct net_bridge;
 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
 						unsigned char *addr);
-void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
+void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
 
-static __inline__ int handle_bridge(struct sk_buff **pskb,
-				    struct packet_type **pt_prev, int *ret,
-				    struct net_device *orig_dev)
+/*
+ * If bridge module is loaded call bridging hook.
+ *  returns NULL if packet was consumed.
+ */
+struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
+					struct sk_buff *skb) __read_mostly;
+static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
+					    struct packet_type **pt_prev, int *ret,
+					    struct net_device *orig_dev)
 {
 	struct net_bridge_port *port;
 
-	if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
-	    (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
-		return 0;
+	if (skb->pkt_type == PACKET_LOOPBACK ||
+	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
+		return skb;
 
 	if (*pt_prev) {
-		*ret = deliver_skb(*pskb, *pt_prev, orig_dev);
+		*ret = deliver_skb(skb, *pt_prev, orig_dev);
 		*pt_prev = NULL;
 	}
 
-	return br_handle_frame_hook(port, pskb);
+	return br_handle_frame_hook(port, skb);
 }
 #else
-#define handle_bridge(skb, pt_prev, ret, orig_dev)	(0)
+#define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
 #endif
 
 #ifdef CONFIG_NET_CLS_ACT
@@ -1818,7 +1824,8 @@ int netif_receive_skb(struct sk_buff *skb)
 ncls:
 #endif
 
-	if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
+	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
+	if (!skb)
 		goto out;
 
 	type = skb->protocol;
-- 
cgit v1.2.3


From 467aea0ddfd1f0f1158c57cbef0e8941dd63374c Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Wed, 21 Mar 2007 13:42:06 -0700
Subject: bridge: don't route packets while learning

While in the STP learning state, don't route packets; wait until
forwarding delay has expired. The purpose of the forwarding delay
is to detect loops in the network, and if a brouter started up
and started forwarding, it could cause a flood.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
---
 net/bridge/br_input.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 2f5c379d9ff..8a552763209 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -129,7 +129,7 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
 
 	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
-		goto err;
+		goto drop;
 
 	if (unlikely(is_link_local(dest))) {
 		skb->pkt_type = PACKET_HOST;
@@ -138,22 +138,25 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
 				NULL, br_handle_local_finish) == 0) ? skb : NULL;
 	}
 
-	if (p->state == BR_STATE_FORWARDING || p->state == BR_STATE_LEARNING) {
+	switch (p->state) {
+	case BR_STATE_FORWARDING:
+
 		if (br_should_route_hook) {
 			if (br_should_route_hook(&skb))
 				return skb;
 			dest = eth_hdr(skb)->h_dest;
 		}
-
+		/* fall through */
+	case BR_STATE_LEARNING:
 		if (!compare_ether_addr(p->br->dev->dev_addr, dest))
 			skb->pkt_type = PACKET_HOST;
 
 		NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
 			br_handle_frame_finish);
-		return NULL;
+		break;
+	default:
+drop:
+		kfree_skb(skb);
 	}
-
-err:
-	kfree_skb(skb);
 	return NULL;
 }
-- 
cgit v1.2.3


From 3f890923182aeebc572f3818dd51c9014827e0ec Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Wed, 21 Mar 2007 13:42:33 -0700
Subject: bridge: simpler hash with salt

Instead of hashing the whole Ethernet address, it should be faster
to just use the last 4 bytes. Add a random salt value to the hash
to make it more difficult to construct worst case DoS hash chains.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
---
 net/bridge/br_fdb.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 8d566c13cc7..dd5a5d5fb28 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -20,19 +20,24 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/jhash.h>
+#include <linux/random.h>
 #include <asm/atomic.h>
+#include <asm/unaligned.h>
 #include "br_private.h"
 
 static struct kmem_cache *br_fdb_cache __read_mostly;
 static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
 		      const unsigned char *addr);
 
+static u32 fdb_salt __read_mostly;
+
 void __init br_fdb_init(void)
 {
 	br_fdb_cache = kmem_cache_create("bridge_fdb_cache",
 					 sizeof(struct net_bridge_fdb_entry),
 					 0,
 					 SLAB_HWCACHE_ALIGN, NULL, NULL);
+	get_random_bytes(&fdb_salt, sizeof(fdb_salt));
 }
 
 void __exit br_fdb_fini(void)
@@ -44,24 +49,26 @@ void __exit br_fdb_fini(void)
 /* if topology_changing then use forward_delay (default 15 sec)
  * otherwise keep longer (default 5 minutes)
  */
-static __inline__ unsigned long hold_time(const struct net_bridge *br)
+static inline unsigned long hold_time(const struct net_bridge *br)
 {
 	return br->topology_change ? br->forward_delay : br->ageing_time;
 }
 
-static __inline__ int has_expired(const struct net_bridge *br,
+static inline int has_expired(const struct net_bridge *br,
 				  const struct net_bridge_fdb_entry *fdb)
 {
 	return !fdb->is_static
 		&& time_before_eq(fdb->ageing_timer + hold_time(br), jiffies);
 }
 
-static __inline__ int br_mac_hash(const unsigned char *mac)
+static inline int br_mac_hash(const unsigned char *mac)
 {
-	return jhash(mac, ETH_ALEN, 0) & (BR_HASH_SIZE - 1);
+	/* use 1 byte of OUI cnd 3 bytes of NIC */
+	u32 key = get_unaligned((u32 *)(mac + 2));
+	return jhash_1word(key, fdb_salt) & (BR_HASH_SIZE - 1);
 }
 
-static __inline__ void fdb_delete(struct net_bridge_fdb_entry *f)
+static inline void fdb_delete(struct net_bridge_fdb_entry *f)
 {
 	hlist_del_rcu(&f->hlist);
 	br_fdb_put(f);
-- 
cgit v1.2.3


From 9cf637473c8535b5abe27fee79254c2d552e042a Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 9 Apr 2007 12:57:54 -0700
Subject: bridge: add sysfs hook to flush forwarding table

The RSTP daemon needs to be able to flush all dynamic forwarding
entries in the case of topology change.

This is a temporary interface. It will change to a netlink interface
before RSTP daemon is officially released.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
---
 net/bridge/br_fdb.c      | 19 +++++++++++++++++++
 net/bridge/br_private.h  |  1 +
 net/bridge/br_sysfs_br.c | 14 ++++++++++++++
 net/bridge/br_sysfs_if.c |  8 ++++++++
 4 files changed, 42 insertions(+)

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index dd5a5d5fb28..22645e3edf2 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -135,7 +135,26 @@ void br_fdb_cleanup(unsigned long _data)
 	mod_timer(&br->gc_timer, jiffies + HZ/10);
 }
 
+/* Completely flush all dynamic entries in forwarding database.*/
+void br_fdb_flush(struct net_bridge *br)
+{
+	int i;
 
+	spin_lock_bh(&br->hash_lock);
+	for (i = 0; i < BR_HASH_SIZE; i++) {
+		struct net_bridge_fdb_entry *f;
+		struct hlist_node *h, *n;
+		hlist_for_each_entry_safe(f, h, n, &br->hash[i], hlist) {
+			if (!f->is_static)
+				fdb_delete(f);
+		}
+	}
+	spin_unlock_bh(&br->hash_lock);
+}
+
+/* Flush all entries refering to a specific port.
+ * if do_all is set also flush static entries
+ */
 void br_fdb_delete_by_port(struct net_bridge *br,
 			   const struct net_bridge_port *p,
 			   int do_all)
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index fab8ce0ce88..3adacdf3406 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -135,6 +135,7 @@ extern int br_dev_xmit(struct sk_buff *skb, struct net_device *dev);
 /* br_fdb.c */
 extern void br_fdb_init(void);
 extern void br_fdb_fini(void);
+extern void br_fdb_flush(struct net_bridge *br);
 extern void br_fdb_changeaddr(struct net_bridge_port *p,
 			      const unsigned char *newaddr);
 extern void br_fdb_cleanup(unsigned long arg);
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 01a22ad0cc7..6cc5cfe665c 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -309,6 +309,19 @@ static ssize_t store_group_addr(struct device *d,
 static DEVICE_ATTR(group_addr, S_IRUGO | S_IWUSR,
 		   show_group_addr, store_group_addr);
 
+static ssize_t store_flush(struct device *d,
+			   struct device_attribute *attr,
+			   const char *buf, size_t len)
+{
+	struct net_bridge *br = to_bridge(d);
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	br_fdb_flush(br);
+	return len;
+}
+static DEVICE_ATTR(flush, S_IWUSR, NULL, store_flush);
 
 static struct attribute *bridge_attrs[] = {
 	&dev_attr_forward_delay.attr,
@@ -328,6 +341,7 @@ static struct attribute *bridge_attrs[] = {
 	&dev_attr_topology_change_timer.attr,
 	&dev_attr_gc_timer.attr,
 	&dev_attr_group_addr.attr,
+	&dev_attr_flush.attr,
 	NULL
 };
 
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 0bc2aef8f9f..2da22927d8d 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -137,6 +137,13 @@ static ssize_t show_hold_timer(struct net_bridge_port *p,
 }
 static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL);
 
+static ssize_t store_flush(struct net_bridge_port *p, unsigned long v)
+{
+	br_fdb_delete_by_port(p->br, p, 0); // Don't delete local entry
+	return 0;
+}
+static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush);
+
 static struct brport_attribute *brport_attrs[] = {
 	&brport_attr_path_cost,
 	&brport_attr_priority,
@@ -152,6 +159,7 @@ static struct brport_attribute *brport_attrs[] = {
 	&brport_attr_message_age_timer,
 	&brport_attr_forward_delay_timer,
 	&brport_attr_hold_timer,
+	&brport_attr_flush,
 	NULL
 };
 
-- 
cgit v1.2.3


From 9cde070874b822d4677f4f01fe146991785813b1 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Wed, 21 Mar 2007 14:22:44 -0700
Subject: bridge: add support for user mode STP

This patchset based on work by Aji_Srinivas@emc.com provides allows
spanning tree to be controled from userspace.  Like hotplug, it
uses call_usermodehelper when spanning tree is enabled so there
is no visible API change. If call to start usermode STP fails
it falls back to existing kernel STP.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
---
 net/bridge/br_if.c       |  2 +-
 net/bridge/br_ioctl.c    |  5 +++--
 net/bridge/br_netlink.c  |  2 +-
 net/bridge/br_private.h  | 15 ++++++++++---
 net/bridge/br_stp.c      | 10 ++++++---
 net/bridge/br_stp_bpdu.c | 19 ++++++++++------
 net/bridge/br_stp_if.c   | 56 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/bridge/br_sysfs_br.c |  4 +++-
 8 files changed, 96 insertions(+), 17 deletions(-)

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f3a2e29be40..cf10b8f2a1c 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -203,7 +203,7 @@ static struct net_device *new_bridge_dev(const char *name)
 	memcpy(br->group_addr, br_group_address, ETH_ALEN);
 
 	br->feature_mask = dev->features;
-	br->stp_enabled = 0;
+	br->stp_enabled = BR_NO_STP;
 	br->designated_root = br->bridge_id;
 	br->root_path_cost = 0;
 	br->root_port = 0;
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 147015fe5c7..eda0fbfc923 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -137,7 +137,8 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 		b.topology_change = br->topology_change;
 		b.topology_change_detected = br->topology_change_detected;
 		b.root_port = br->root_port;
-		b.stp_enabled = br->stp_enabled;
+
+		b.stp_enabled = (br->stp_enabled != BR_NO_STP);
 		b.ageing_time = jiffies_to_clock_t(br->ageing_time);
 		b.hello_timer_value = br_timer_value(&br->hello_timer);
 		b.tcn_timer_value = br_timer_value(&br->tcn_timer);
@@ -251,7 +252,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 		if (!capable(CAP_NET_ADMIN))
 			return -EPERM;
 
-		br->stp_enabled = args[1]?1:0;
+		br_stp_set_enabled(br, args[1]);
 		return 0;
 
 	case BRCTL_SET_BRIDGE_PRIORITY:
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index a14ac51753e..5e84ade129c 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -165,7 +165,7 @@ static int br_rtm_setlink(struct sk_buff *skb,  struct nlmsghdr *nlh, void *arg)
 		return -EINVAL;
 
 	/* if kernel STP is running, don't allow changes */
-	if (p->br->stp_enabled)
+	if (p->br->stp_enabled == BR_KERNEL_STP)
 		return -EBUSY;
 
 	if (!netif_running(dev) ||
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 3adacdf3406..974feccd28b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -26,7 +26,10 @@
 #define BR_PORT_BITS	10
 #define BR_MAX_PORTS	(1<<BR_PORT_BITS)
 
-#define BR_VERSION	"2.2"
+#define BR_VERSION	"2.3"
+
+/* Path to usermode spanning tree program */
+#define BR_STP_PROG	"/sbin/bridge-stp"
 
 typedef struct bridge_id bridge_id;
 typedef struct mac_addr mac_addr;
@@ -107,7 +110,13 @@ struct net_bridge
 
 	u8				group_addr[ETH_ALEN];
 	u16				root_port;
-	unsigned char			stp_enabled;
+
+	enum {
+		BR_NO_STP, 		/* no spanning tree */
+		BR_KERNEL_STP,		/* old STP in kernel */
+		BR_USER_STP,		/* new RSTP in userspace */
+	} stp_enabled;
+
 	unsigned char			topology_change;
 	unsigned char			topology_change_detected;
 
@@ -127,7 +136,6 @@ static inline int br_is_root_bridge(const struct net_bridge *br)
 	return !memcmp(&br->bridge_id, &br->designated_root, 8);
 }
 
-
 /* br_device.c */
 extern void br_dev_setup(struct net_device *dev);
 extern int br_dev_xmit(struct sk_buff *skb, struct net_device *dev);
@@ -209,6 +217,7 @@ extern void br_become_designated_port(struct net_bridge_port *p);
 /* br_stp_if.c */
 extern void br_stp_enable_bridge(struct net_bridge *br);
 extern void br_stp_disable_bridge(struct net_bridge *br);
+extern void br_stp_set_enabled(struct net_bridge *br, unsigned long val);
 extern void br_stp_enable_port(struct net_bridge_port *p);
 extern void br_stp_disable_port(struct net_bridge_port *p);
 extern void br_stp_recalculate_bridge_id(struct net_bridge *br);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index f9ff4d57b0d..ebb0861e9bd 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -370,11 +370,11 @@ static void br_make_blocking(struct net_bridge_port *p)
 static void br_make_forwarding(struct net_bridge_port *p)
 {
 	if (p->state == BR_STATE_BLOCKING) {
-		if (p->br->stp_enabled) {
+		if (p->br->stp_enabled == BR_KERNEL_STP)
 			p->state = BR_STATE_LISTENING;
-		} else {
+		else
 			p->state = BR_STATE_LEARNING;
-		}
+
 		br_log_state(p);
 		mod_timer(&p->forward_delay_timer, jiffies + p->br->forward_delay);	}
 }
@@ -384,6 +384,10 @@ void br_port_state_selection(struct net_bridge *br)
 {
 	struct net_bridge_port *p;
 
+	/* Don't change port states if userspace is handling STP */
+	if (br->stp_enabled == BR_USER_STP)
+		return;
+
 	list_for_each_entry(p, &br->port_list, list) {
 		if (p->state != BR_STATE_DISABLED) {
 			if (p->port_no == br->root_port) {
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index b9fb0dc4ab1..60112bce669 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -33,9 +33,6 @@ static void br_send_bpdu(struct net_bridge_port *p,
 {
 	struct sk_buff *skb;
 
-	if (!p->br->stp_enabled)
-		return;
-
 	skb = dev_alloc_skb(length+LLC_RESERVE);
 	if (!skb)
 		return;
@@ -75,6 +72,9 @@ void br_send_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu)
 {
 	unsigned char buf[35];
 
+	if (p->br->stp_enabled != BR_KERNEL_STP)
+		return;
+
 	buf[0] = 0;
 	buf[1] = 0;
 	buf[2] = 0;
@@ -117,6 +117,9 @@ void br_send_tcn_bpdu(struct net_bridge_port *p)
 {
 	unsigned char buf[4];
 
+	if (p->br->stp_enabled != BR_KERNEL_STP)
+		return;
+
 	buf[0] = 0;
 	buf[1] = 0;
 	buf[2] = 0;
@@ -157,9 +160,13 @@ int br_stp_rcv(struct sk_buff *skb, struct net_device *dev,
 	br = p->br;
 	spin_lock(&br->lock);
 
-	if (p->state == BR_STATE_DISABLED
-	    || !br->stp_enabled
-	    || !(br->dev->flags & IFF_UP))
+	if (br->stp_enabled != BR_KERNEL_STP)
+		goto out;
+
+	if (!(br->dev->flags & IFF_UP))
+		goto out;
+
+	if (p->state == BR_STATE_DISABLED)
 		goto out;
 
 	if (compare_ether_addr(dest, br->group_addr) != 0)
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index a285897a2fb..1c1806d7c48 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -123,6 +123,62 @@ void br_stp_disable_port(struct net_bridge_port *p)
 		br_become_root_bridge(br);
 }
 
+static void br_stp_start(struct net_bridge *br)
+{
+	int r;
+	char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL };
+	char *envp[] = { NULL };
+
+	r = call_usermodehelper(BR_STP_PROG, argv, envp, 1);
+	if (r == 0) {
+		br->stp_enabled = BR_USER_STP;
+		printk(KERN_INFO "%s: userspace STP started\n", br->dev->name);
+	} else {
+		br->stp_enabled = BR_KERNEL_STP;
+		printk(KERN_INFO "%s: starting userspace STP failed, "
+				"staring kernel STP\n", br->dev->name);
+
+		/* To start timers on any ports left in blocking */
+		spin_lock_bh(&br->lock);
+		br_port_state_selection(br);
+		spin_unlock_bh(&br->lock);
+	}
+}
+
+static void br_stp_stop(struct net_bridge *br)
+{
+	int r;
+	char *argv[] = { BR_STP_PROG, br->dev->name, "stop", NULL };
+	char *envp[] = { NULL };
+
+	if (br->stp_enabled == BR_USER_STP) {
+		r = call_usermodehelper(BR_STP_PROG, argv, envp, 1);
+		printk(KERN_INFO "%s: userspace STP stopped, return code %d\n",
+			br->dev->name, r);
+
+
+		/* To start timers on any ports left in blocking */
+		spin_lock_bh(&br->lock);
+		br_port_state_selection(br);
+		spin_unlock_bh(&br->lock);
+	}
+
+	br->stp_enabled = BR_NO_STP;
+}
+
+void br_stp_set_enabled(struct net_bridge *br, unsigned long val)
+{
+	ASSERT_RTNL();
+
+	if (val) {
+		if (br->stp_enabled == BR_NO_STP)
+			br_stp_start(br);
+	} else {
+		if (br->stp_enabled != BR_NO_STP)
+			br_stp_stop(br);
+	}
+}
+
 /* called under bridge lock */
 void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr)
 {
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 6cc5cfe665c..7ec0b76cdd2 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -149,7 +149,9 @@ static ssize_t show_stp_state(struct device *d,
 
 static void set_stp_state(struct net_bridge *br, unsigned long val)
 {
-	br->stp_enabled = val;
+	spin_unlock_bh(&br->lock);
+	br_stp_set_enabled(br, val);
+	spin_lock_bh(&br->lock);
 }
 
 static ssize_t store_stp_state(struct device *d,
-- 
cgit v1.2.3


From b86c45035c439cfa6ef5b2e4bf080b24bd8765f1 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Thu, 22 Mar 2007 14:08:46 -0700
Subject: bridge: change when netlink events go to STP

Need to tell STP daemon about more events, like any time a
device is added even when it is down.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
---
 net/bridge/br_if.c     |  4 ++++
 net/bridge/br_notify.c | 13 +++++++++----
 net/bridge/br_stp_if.c |  3 ---
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index cf10b8f2a1c..690573bbf01 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -152,6 +152,8 @@ static void del_nbp(struct net_bridge_port *p)
 	br_stp_disable_port(p);
 	spin_unlock_bh(&br->lock);
 
+	br_ifinfo_notify(RTM_DELLINK, p);
+
 	br_fdb_delete_by_port(br, p, 1);
 
 	list_del_rcu(&p->list);
@@ -434,6 +436,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 		br_stp_enable_port(p);
 	spin_unlock_bh(&br->lock);
 
+	br_ifinfo_notify(RTM_NEWLINK, p);
+
 	dev_set_mtu(br->dev, br_min_mtu(br));
 
 	kobject_uevent(&p->kobj, KOBJ_ADD);
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 37357ed2149..c8451d3a070 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -50,7 +50,6 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 	case NETDEV_CHANGEADDR:
 		spin_lock_bh(&br->lock);
 		br_fdb_changeaddr(p, dev->dev_addr);
-		br_ifinfo_notify(RTM_NEWLINK, p);
 		br_stp_recalculate_bridge_id(br);
 		spin_unlock_bh(&br->lock);
 		break;
@@ -74,10 +73,11 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 		break;
 
 	case NETDEV_UP:
-		spin_lock_bh(&br->lock);
-		if (netif_carrier_ok(dev) && (br->dev->flags & IFF_UP))
+		if (netif_carrier_ok(dev) && (br->dev->flags & IFF_UP)) {
+			spin_lock_bh(&br->lock);
 			br_stp_enable_port(p);
-		spin_unlock_bh(&br->lock);
+			spin_unlock_bh(&br->lock);
+		}
 		break;
 
 	case NETDEV_UNREGISTER:
@@ -85,5 +85,10 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 		break;
 	}
 
+	/* Events that may cause spanning tree to refresh */
+	if (event == NETDEV_CHANGEADDR || event == NETDEV_UP ||
+	    event == NETDEV_CHANGE || event == NETDEV_DOWN)
+		br_ifinfo_notify(RTM_NEWLINK, p);
+
 	return NOTIFY_DONE;
 }
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 1c1806d7c48..3e246b37020 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -87,7 +87,6 @@ void br_stp_disable_bridge(struct net_bridge *br)
 void br_stp_enable_port(struct net_bridge_port *p)
 {
 	br_init_port(p);
-	br_ifinfo_notify(RTM_NEWLINK, p);
 	br_port_state_selection(p->br);
 }
 
@@ -101,8 +100,6 @@ void br_stp_disable_port(struct net_bridge_port *p)
 	printk(KERN_INFO "%s: port %i(%s) entering %s state\n",
 	       br->dev->name, p->port_no, p->dev->name, "disabled");
 
-	br_ifinfo_notify(RTM_DELLINK, p);
-
 	wasroot = br_is_root_bridge(br);
 	br_become_designated_port(p);
 	p->state = BR_STATE_DISABLED;
-- 
cgit v1.2.3


From ffe1d49cc300f3dff990093aa952a2fbb371c1b6 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 9 Apr 2007 11:49:58 -0700
Subject: bridge: allow changing hardware address to any valid address

For case of bridging pseudo devices, the get created/destroyed (Xen)
need to allow setting address to any valid value.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
---
 net/bridge/br_device.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index b22ada529cc..39b87dad35b 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -83,27 +83,22 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
-/* Allow setting mac address of pseudo-bridge to be same as
- * any of the bound interfaces
- */
+/* Allow setting mac address to any valid ethernet address. */
 static int br_set_mac_address(struct net_device *dev, void *p)
 {
 	struct net_bridge *br = netdev_priv(dev);
 	struct sockaddr *addr = p;
 	struct net_bridge_port *port;
-	int err = -EADDRNOTAVAIL;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EINVAL;
 
 	spin_lock_bh(&br->lock);
-	list_for_each_entry(port, &br->port_list, list) {
-		if (!compare_ether_addr(port->dev->dev_addr, addr->sa_data)) {
-			br_stp_change_bridge_id(br, addr->sa_data);
-			err = 0;
-			break;
-		}
-	}
+	memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
+	br_stp_change_bridge_id(br, addr->sa_data);
 	spin_unlock_bh(&br->lock);
 
-	return err;
+	return 0;
 }
 
 static void br_getinfo(struct net_device *dev, struct ethtool_drvinfo *info)
-- 
cgit v1.2.3


From 87a596e0b8bc344bd6bfebe83b56d11fb79ee23a Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Sat, 7 Apr 2007 18:57:07 +0900
Subject: bridge: check kmem_cache_create() error

This patch checks kmem_cache_create() error and aborts loading module
on failure.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
---
 net/bridge/br.c         | 4 +++-
 net/bridge/br_fdb.c     | 6 +++++-
 net/bridge/br_private.h | 2 +-
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/net/bridge/br.c b/net/bridge/br.c
index 601c37d61c0..848b8fa8bed 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -37,7 +37,9 @@ static int __init br_init(void)
 		return -EADDRINUSE;
 	}
 
-	br_fdb_init();
+	err = br_fdb_init();
+	if (err)
+		goto err_out1;
 
 	err = br_netfilter_init();
 	if (err)
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 22645e3edf2..91b017016d5 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -31,13 +31,17 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
 
 static u32 fdb_salt __read_mostly;
 
-void __init br_fdb_init(void)
+int __init br_fdb_init(void)
 {
 	br_fdb_cache = kmem_cache_create("bridge_fdb_cache",
 					 sizeof(struct net_bridge_fdb_entry),
 					 0,
 					 SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (!br_fdb_cache)
+		return -ENOMEM;
+
 	get_random_bytes(&fdb_salt, sizeof(fdb_salt));
+	return 0;
 }
 
 void __exit br_fdb_fini(void)
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 974feccd28b..21bf3a9a03f 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -141,7 +141,7 @@ extern void br_dev_setup(struct net_device *dev);
 extern int br_dev_xmit(struct sk_buff *skb, struct net_device *dev);
 
 /* br_fdb.c */
-extern void br_fdb_init(void);
+extern int br_fdb_init(void);
 extern void br_fdb_fini(void);
 extern void br_fdb_flush(struct net_bridge *br);
 extern void br_fdb_changeaddr(struct net_bridge_port *p,
-- 
cgit v1.2.3


From 33036807b32d5ed1f4fdfe2d5e6bab2bb260b9f7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 10 Apr 2007 13:25:40 -0700
Subject: [NET]: loopback driver can use loopback_dev integrated
 net_device_stats

Rusty added a new 'stats' field to struct net_device.

loopback driver can use it instead of declaring another struct
net_device_stats This saves some memory.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/loopback.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 6df673a058c..6ba6ed2b480 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -164,11 +164,9 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
 	return 0;
 }
 
-static struct net_device_stats loopback_stats;
-
 static struct net_device_stats *get_stats(struct net_device *dev)
 {
-	struct net_device_stats *stats = &loopback_stats;
+	struct net_device_stats *stats = &dev->stats;
 	unsigned long bytes = 0;
 	unsigned long packets = 0;
 	int i;
@@ -208,7 +206,6 @@ static const struct ethtool_ops loopback_ethtool_ops = {
 struct net_device loopback_dev = {
 	.name	 		= "lo",
 	.get_stats		= &get_stats,
-	.priv			= &loopback_stats,
 	.mtu			= (16 * 1024) + 20 + 20 + 12,
 	.hard_start_xmit	= loopback_xmit,
 	.hard_header		= eth_header,
-- 
cgit v1.2.3


From efd1e8d569b3d35a3a636683c2a9ebffec9c1fcf Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 10 Apr 2007 18:30:09 -0700
Subject: [SK_BUFF]: Fix missing offset adjustment in skb_copy_expand

skb_copy_expand changes the headroom, so it needs to adjust the header
offsets by the difference between the old and the new value.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 52a4fdd4f31..c7a1b24b737 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -736,7 +736,9 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
 	 */
 	struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom,
 				      gfp_mask);
+	int oldheadroom = skb_headroom(skb);
 	int head_copy_len, head_copy_off;
+	int off = 0;
 
 	if (!n)
 		return NULL;
@@ -746,7 +748,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
 	/* Set the tail pointer and length */
 	skb_put(n, skb->len);
 
-	head_copy_len = skb_headroom(skb);
+	head_copy_len = oldheadroom;
 	head_copy_off = 0;
 	if (newheadroom <= head_copy_len)
 		head_copy_len = newheadroom;
@@ -760,6 +762,13 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
 
 	copy_skb_header(n, skb);
 
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+	off                  = newheadroom - oldheadroom;
+#endif
+	n->transport_header += off;
+	n->network_header   += off;
+	n->mac_header	    += off;
+
 	return n;
 }
 
-- 
cgit v1.2.3


From cb8c181f288a1157bc717cc7a02412d4d1dc19bc Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Tue, 10 Apr 2007 22:10:39 -0700
Subject: [S390]: Fix build on 31-bit.

Allow s390 to properly override the generic
__div64_32() implementation by:

1) Using obj-y for div64.o in s390's makefile instead
   of lib-y

2) Adding the weak attribute to the generic implementation.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/s390/lib/Makefile | 2 +-
 arch/s390/lib/div64.c  | 2 --
 lib/div64.c            | 2 +-
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 7a44fed21b3..59aea65ce99 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -5,6 +5,6 @@
 EXTRA_AFLAGS := -traditional
 
 lib-y += delay.o string.o uaccess_std.o uaccess_pt.o qrnnd.o
-lib-$(CONFIG_32BIT) += div64.o
+obj-$(CONFIG_32BIT) += div64.o
 lib-$(CONFIG_64BIT) += uaccess_mvcos.o
 lib-$(CONFIG_SMP) += spinlock.o
diff --git a/arch/s390/lib/div64.c b/arch/s390/lib/div64.c
index 0481f3424a1..a5f8300bf3e 100644
--- a/arch/s390/lib/div64.c
+++ b/arch/s390/lib/div64.c
@@ -147,5 +147,3 @@ uint32_t __div64_32(uint64_t *n, uint32_t base)
 }
 
 #endif /* MARCH_G5 */
-
-EXPORT_SYMBOL(__div64_32);
diff --git a/lib/div64.c b/lib/div64.c
index 74f0c8cb403..b71cf93c529 100644
--- a/lib/div64.c
+++ b/lib/div64.c
@@ -23,7 +23,7 @@
 /* Not needed on 64bit architectures */
 #if BITS_PER_LONG == 32
 
-uint32_t __div64_32(uint64_t *n, uint32_t base)
+uint32_t __attribute__((weak)) __div64_32(uint64_t *n, uint32_t base)
 {
 	uint64_t rem = *n;
 	uint64_t b = base;
-- 
cgit v1.2.3


From b2449fdc30ccac550344df5e60d38bb8427b109c Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Fri, 20 Apr 2007 13:02:55 -0700
Subject: [DCCP]: Fix bug in the calculation of very low sending rates

This fixes an error in the calculation of t_ipi when X converges towards
very low sending rates (between 1 and 64 bytes per second).

Although this case may not sound likely, it can be reproduced by connecting,
hitting enter (1 byte sent) and waiting for some time, during which the
nofeedback timer halves the sending rate until finally it reaches the region
1..64 bytes/sec. Computing X is handled correctly (tested separately); but by
dividing X _before_ entering the calculation of t_ipi, X becomes zero as
a result.  This in turn triggers a BUG condition caught in scaled_div().

Fixed by replacing with equivalent statement and explicit typecast for good
measure.

Calculation verified and effect of patch tested - reduced never below 1 byte
per 64 seconds afterwards, i.e. not allowing divide-by-zero.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index c3abd7695d5..d7d9ce73724 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -102,8 +102,8 @@ static inline u64 rfc3390_initial_rate(struct sock *sk)
 static inline void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hctx)
 {
 	/* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
-	hctx->ccid3hctx_t_ipi = scaled_div(hctx->ccid3hctx_s,
-					   hctx->ccid3hctx_x >> 6);
+	hctx->ccid3hctx_t_ipi = scaled_div32(((u64)hctx->ccid3hctx_s) << 6,
+					     hctx->ccid3hctx_x);
 
 	/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
 	hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2,
-- 
cgit v1.2.3


From f73f7097c986aab159491dcded7fc918e76e9ec3 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Fri, 20 Apr 2007 13:56:47 -0700
Subject: [DCCP]: Debug statements for Elapsed Time option

This prints the value of the parsed Elapsed Time when received via a
Timestamp Echo option [RFC 4342, 13.3].

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/options.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/dccp/options.c b/net/dccp/options.c
index 14b62122732..34d536d5f1a 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -172,21 +172,25 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
 			opt_recv->dccpor_timestamp_echo = ntohl(*(__be32 *)value);
 
 			dccp_pr_debug("%s rx opt: TIMESTAMP_ECHO=%u, len=%d, "
-				      "ackno=%llu, ",  dccp_role(sk),
+				      "ackno=%llu", dccp_role(sk),
 				      opt_recv->dccpor_timestamp_echo,
 				      len + 2,
 				      (unsigned long long)
 				      DCCP_SKB_CB(skb)->dccpd_ack_seq);
 
 
-			if (len == 4)
+			if (len == 4) {
+				dccp_pr_debug_cat("\n");
 				break;
+			}
 
 			if (len == 6)
 				elapsed_time = ntohs(*(__be16 *)(value + 4));
 			else
 				elapsed_time = ntohl(*(__be32 *)(value + 4));
 
+			dccp_pr_debug_cat(", ELAPSED_TIME=%d\n", elapsed_time);
+
 			/* Give precedence to the biggest ELAPSED_TIME */
 			if (elapsed_time > opt_recv->dccpor_elapsed_time)
 				opt_recv->dccpor_elapsed_time = elapsed_time;
-- 
cgit v1.2.3


From 91d73c15cb165195bc8c3d6a35e30df454b1485b Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Fri, 20 Apr 2007 13:57:21 -0700
Subject: [DCCP]: Complete documentation of dccp_sock

This fills in missing documentation for dccp_sock fields.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dccp.h | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index e668cf531ba..fda2148d8c8 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -465,21 +465,20 @@ struct dccp_ackvec;
  * @dccps_service_list - second .. last service code on passive socket
  * @dccps_timestamp_time - time of latest TIMESTAMP option
  * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option
- * @dccps_l_ack_ratio -
- * @dccps_r_ack_ratio -
+ * @dccps_l_ack_ratio - feature-local Ack Ratio
+ * @dccps_r_ack_ratio - feature-remote Ack Ratio
  * @dccps_pcslen - sender   partial checksum coverage (via sockopt)
  * @dccps_pcrlen - receiver partial checksum coverage (via sockopt)
  * @dccps_ndp_count - number of Non Data Packets since last data packet
- * @dccps_mss_cache -
- * @dccps_minisock -
+ * @dccps_mss_cache - current value of MSS (path MTU minus header sizes)
+ * @dccps_minisock - associated minisock (accessed via dccp_msk)
  * @dccps_hc_rx_ackvec - rx half connection ack vector
- * @dccps_hc_rx_ccid -
- * @dccps_hc_tx_ccid -
- * @dccps_options_received -
- * @dccps_epoch -
- * @dccps_role - Role of this sock, one of %dccp_role
- * @dccps_hc_rx_insert_options -
- * @dccps_hc_tx_insert_options -
+ * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection)
+ * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection)
+ * @dccps_options_received - parsed set of retrieved options
+ * @dccps_role - role of this sock, one of %dccp_role
+ * @dccps_hc_rx_insert_options - receiver wants to add options when acking
+ * @dccps_hc_tx_insert_options - sender wants to add options when sending
  * @dccps_xmit_timer - timer for when CCID is not ready to send
  * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs)
  */
-- 
cgit v1.2.3


From 516299d2f5b6f9703b9b388faf91898dc636a678 Mon Sep 17 00:00:00 2001
From: Michael Milner <milner@blissisland.ca>
Date: Thu, 12 Apr 2007 22:14:23 -0700
Subject: [NETFILTER]: bridge-nf: filter bridged IPv4/IPv6 encapsulated in
 pppoe traffic

The attached patch by Michael Milner adds support for using iptables and
ip6tables on bridged traffic encapsulated in ppoe frames, similar to
what's already supported for vlan.

Signed-off-by: Michael Milner <milner@blissisland.ca>
Signed-off-by: Bart De Schuymer <bdschuym@pandora.be>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |  7 +++-
 include/linux/if_pppox.h               |  3 ++
 include/linux/netfilter_bridge.h       | 11 ++++-
 include/linux/sysctl.h                 |  1 +
 net/bridge/br_netfilter.c              | 77 ++++++++++++++++++++++++++++++++--
 5 files changed, 92 insertions(+), 7 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 054c515bd72..af6a63ab902 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1015,7 +1015,12 @@ bridge-nf-call-ip6tables - BOOLEAN
 	Default: 1
 
 bridge-nf-filter-vlan-tagged - BOOLEAN
-	1 : pass bridged vlan-tagged ARP/IP traffic to arptables/iptables.
+	1 : pass bridged vlan-tagged ARP/IP/IPv6 traffic to {arp,ip,ip6}tables.
+	0 : disable this.
+	Default: 1
+
+bridge-nf-filter-pppoe-tagged - BOOLEAN
+	1 : pass bridged pppoe-tagged IP/IPv6 traffic to {ip,ip6}tables.
 	0 : disable this.
 	Default: 1
 
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index 29d6579ff1a..6f987be60fe 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -111,6 +111,9 @@ struct pppoe_hdr {
 	struct pppoe_tag tag[0];
 } __attribute__ ((packed));
 
+/* Length of entire PPPoE + PPP header */
+#define PPPOE_SES_HLEN	8
+
 #ifdef __KERNEL__
 #include <linux/skbuff.h>
 
diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index 55689f39f77..19060030bac 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -7,6 +7,7 @@
 #include <linux/netfilter.h>
 #include <linux/if_ether.h>
 #include <linux/if_vlan.h>
+#include <linux/if_pppox.h>
 
 /* Bridge Hooks */
 /* After promisc drops, checksum checks. */
@@ -58,8 +59,14 @@ static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
  * enough room for the encapsulating header (if there is one). */
 static inline int nf_bridge_pad(const struct sk_buff *skb)
 {
- 	return (skb->nf_bridge && skb->protocol == htons(ETH_P_8021Q))
-		? VLAN_HLEN : 0;
+	int padding = 0;
+
+	if (skb->nf_bridge && skb->protocol == htons(ETH_P_8021Q))
+		padding = VLAN_HLEN;
+	else if (skb->nf_bridge && skb->protocol == htons(ETH_P_PPP_SES))
+		padding = PPPOE_SES_HLEN;
+
+	return padding;
 }
 
 struct bridge_skb_cb {
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index df2d9ed20a4..47f1c53332c 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -792,6 +792,7 @@ enum {
 	NET_BRIDGE_NF_CALL_IPTABLES = 2,
 	NET_BRIDGE_NF_CALL_IP6TABLES = 3,
 	NET_BRIDGE_NF_FILTER_VLAN_TAGGED = 4,
+	NET_BRIDGE_NF_FILTER_PPPOE_TAGGED = 5,
 };
 
 /* CTL_FS names: */
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index fd70d041e51..9b2986b182b 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -29,6 +29,8 @@
 #include <linux/if_arp.h>
 #include <linux/if_ether.h>
 #include <linux/if_vlan.h>
+#include <linux/if_pppox.h>
+#include <linux/ppp_defs.h>
 #include <linux/netfilter_bridge.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv6.h>
@@ -57,8 +59,10 @@ static int brnf_call_iptables __read_mostly = 1;
 static int brnf_call_ip6tables __read_mostly = 1;
 static int brnf_call_arptables __read_mostly = 1;
 static int brnf_filter_vlan_tagged __read_mostly = 1;
+static int brnf_filter_pppoe_tagged __read_mostly = 1;
 #else
 #define brnf_filter_vlan_tagged 1
+#define brnf_filter_pppoe_tagged 1
 #endif
 
 static inline __be16 vlan_proto(const struct sk_buff *skb)
@@ -81,6 +85,22 @@ static inline __be16 vlan_proto(const struct sk_buff *skb)
 	 vlan_proto(skb) == htons(ETH_P_ARP) &&	\
 	 brnf_filter_vlan_tagged)
 
+static inline __be16 pppoe_proto(const struct sk_buff *skb)
+{
+	return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
+			    sizeof(struct pppoe_hdr)));
+}
+
+#define IS_PPPOE_IP(skb) \
+	(skb->protocol == htons(ETH_P_PPP_SES) && \
+	 pppoe_proto(skb) == htons(PPP_IP) && \
+	 brnf_filter_pppoe_tagged)
+
+#define IS_PPPOE_IPV6(skb) \
+	(skb->protocol == htons(ETH_P_PPP_SES) && \
+	 pppoe_proto(skb) == htons(PPP_IPV6) && \
+	 brnf_filter_pppoe_tagged)
+
 /* We need these fake structures to make netfilter happy --
  * lots of places assume that skb->dst != NULL, which isn't
  * all that unreasonable.
@@ -128,6 +148,8 @@ static inline void nf_bridge_save_header(struct sk_buff *skb)
 
 	if (skb->protocol == htons(ETH_P_8021Q))
 		header_size += VLAN_HLEN;
+	else if (skb->protocol == htons(ETH_P_PPP_SES))
+		header_size += PPPOE_SES_HLEN;
 
 	skb_copy_from_linear_data_offset(skb, -header_size,
 					 skb->nf_bridge->data, header_size);
@@ -144,6 +166,8 @@ int nf_bridge_copy_header(struct sk_buff *skb)
 
 	if (skb->protocol == htons(ETH_P_8021Q))
 		header_size += VLAN_HLEN;
+	else if (skb->protocol == htons(ETH_P_PPP_SES))
+		header_size += PPPOE_SES_HLEN;
 
 	err = skb_cow(skb, header_size);
 	if (err)
@@ -154,6 +178,8 @@ int nf_bridge_copy_header(struct sk_buff *skb)
 
 	if (skb->protocol == htons(ETH_P_8021Q))
 		__skb_push(skb, VLAN_HLEN);
+	else if (skb->protocol == htons(ETH_P_PPP_SES))
+		__skb_push(skb, PPPOE_SES_HLEN);
 	return 0;
 }
 
@@ -177,6 +203,9 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 	if (skb->protocol == htons(ETH_P_8021Q)) {
 		skb_push(skb, VLAN_HLEN);
 		skb->network_header -= VLAN_HLEN;
+	} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
+		skb_push(skb, PPPOE_SES_HLEN);
+		skb->network_header -= PPPOE_SES_HLEN;
 	}
 	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
 		       br_handle_frame_finish, 1);
@@ -258,6 +287,9 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
 		if (skb->protocol == htons(ETH_P_8021Q)) {
 			skb_pull(skb, VLAN_HLEN);
 			skb->network_header += VLAN_HLEN;
+		} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
+			skb_pull(skb, PPPOE_SES_HLEN);
+			skb->network_header += PPPOE_SES_HLEN;
 		}
 		skb->dst->output(skb);
 	}
@@ -328,6 +360,10 @@ bridged_dnat:
 				    htons(ETH_P_8021Q)) {
 					skb_push(skb, VLAN_HLEN);
 					skb->network_header -= VLAN_HLEN;
+				} else if(skb->protocol ==
+				    htons(ETH_P_PPP_SES)) {
+					skb_push(skb, PPPOE_SES_HLEN);
+					skb->network_header -= PPPOE_SES_HLEN;
 				}
 				NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING,
 					       skb, skb->dev, NULL,
@@ -347,6 +383,9 @@ bridged_dnat:
 	if (skb->protocol == htons(ETH_P_8021Q)) {
 		skb_push(skb, VLAN_HLEN);
 		skb->network_header -= VLAN_HLEN;
+	} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
+		skb_push(skb, PPPOE_SES_HLEN);
+		skb->network_header -= PPPOE_SES_HLEN;
 	}
 	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
 		       br_handle_frame_finish, 1);
@@ -489,7 +528,8 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
 	__u32 len;
 	struct sk_buff *skb = *pskb;
 
-	if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb)) {
+	if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) ||
+	    IS_PPPOE_IPV6(skb)) {
 #ifdef CONFIG_SYSCTL
 		if (!brnf_call_ip6tables)
 			return NF_ACCEPT;
@@ -500,6 +540,9 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
 		if (skb->protocol == htons(ETH_P_8021Q)) {
 			skb_pull_rcsum(skb, VLAN_HLEN);
 			skb->network_header += VLAN_HLEN;
+		} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
+			skb_pull_rcsum(skb, PPPOE_SES_HLEN);
+			skb->network_header += PPPOE_SES_HLEN;
 		}
 		return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn);
 	}
@@ -508,7 +551,8 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
 		return NF_ACCEPT;
 #endif
 
-	if (skb->protocol != htons(ETH_P_IP) && !IS_VLAN_IP(skb))
+	if (skb->protocol != htons(ETH_P_IP) && !IS_VLAN_IP(skb) &&
+	    !IS_PPPOE_IP(skb))
 		return NF_ACCEPT;
 
 	if ((skb = skb_share_check(*pskb, GFP_ATOMIC)) == NULL)
@@ -517,6 +561,9 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
 	if (skb->protocol == htons(ETH_P_8021Q)) {
 		skb_pull_rcsum(skb, VLAN_HLEN);
 		skb->network_header += VLAN_HLEN;
+	} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
+		skb_pull_rcsum(skb, PPPOE_SES_HLEN);
+		skb->network_header += PPPOE_SES_HLEN;
 	}
 
 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
@@ -598,6 +645,9 @@ static int br_nf_forward_finish(struct sk_buff *skb)
 	if (skb->protocol == htons(ETH_P_8021Q)) {
 		skb_push(skb, VLAN_HLEN);
 		skb->network_header -= VLAN_HLEN;
+	} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
+		skb_push(skb, PPPOE_SES_HLEN);
+		skb->network_header -= PPPOE_SES_HLEN;
 	}
 	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_FORWARD, skb, in,
 		       skb->dev, br_forward_finish, 1);
@@ -626,7 +676,8 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb,
 	if (!parent)
 		return NF_DROP;
 
-	if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb))
+	if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb) ||
+	    IS_PPPOE_IP(skb))
 		pf = PF_INET;
 	else
 		pf = PF_INET6;
@@ -634,6 +685,9 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb,
 	if (skb->protocol == htons(ETH_P_8021Q)) {
 		skb_pull(*pskb, VLAN_HLEN);
 		(*pskb)->network_header += VLAN_HLEN;
+	} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
+		skb_pull(*pskb, PPPOE_SES_HLEN);
+		(*pskb)->network_header += PPPOE_SES_HLEN;
 	}
 
 	nf_bridge = skb->nf_bridge;
@@ -726,6 +780,9 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
 	if (skb->protocol == htons(ETH_P_8021Q)) {
 		skb_push(skb, VLAN_HLEN);
 		skb->network_header -= VLAN_HLEN;
+	} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
+		skb_push(skb, PPPOE_SES_HLEN);
+		skb->network_header -= PPPOE_SES_HLEN;
 	}
 
 	NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev,
@@ -771,7 +828,8 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
 	if (!realoutdev)
 		return NF_DROP;
 
-	if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb))
+	if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb) ||
+	    IS_PPPOE_IP(skb))
 		pf = PF_INET;
 	else
 		pf = PF_INET6;
@@ -793,6 +851,9 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
 	if (skb->protocol == htons(ETH_P_8021Q)) {
 		skb_pull(skb, VLAN_HLEN);
 		skb->network_header += VLAN_HLEN;
+	} else if (skb->protocol == htons(ETH_P_PPP_SES)) {
+		skb_pull(skb, PPPOE_SES_HLEN);
+		skb->network_header += PPPOE_SES_HLEN;
 	}
 
 	nf_bridge_save_header(skb);
@@ -930,6 +991,14 @@ static ctl_table brnf_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &brnf_sysctl_call_tables,
 	},
+	{
+		.ctl_name	= NET_BRIDGE_NF_FILTER_PPPOE_TAGGED,
+		.procname	= "bridge-nf-filter-pppoe-tagged",
+		.data		= &brnf_filter_pppoe_tagged,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &brnf_sysctl_call_tables,
+	},
 	{ .ctl_name = 0 }
 };
 
-- 
cgit v1.2.3


From c15bf6e699f4c366f2d1e19ac5d7add21c6b5a19 Mon Sep 17 00:00:00 2001
From: Bart De Schuymer <bdschuym@pandora.be>
Date: Thu, 12 Apr 2007 22:15:06 -0700
Subject: [NETFILTER]: ebt_arp: add gratuitous arp filtering

The attached patch adds gratuitous arp filtering, more precisely: it
allows checking that the IPv4 source address matches the IPv4
destination address inside the ARP header. It also adds a check for the
hardware address type when matching MAC addresses (nothing critical,
just for better consistency).

Signed-off-by: Bart De Schuymer <bdschuym@pandora.be>
Acked-by: Carl-Daniel Hailfinger <c-d.hailfinger.devel.2006@gmx.net>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_bridge/ebt_arp.h |  4 ++-
 net/bridge/netfilter/ebt_arp.c           | 48 +++++++++++++++-----------------
 2 files changed, 25 insertions(+), 27 deletions(-)

diff --git a/include/linux/netfilter_bridge/ebt_arp.h b/include/linux/netfilter_bridge/ebt_arp.h
index 97e4dbde1f8..cbf4843b6b0 100644
--- a/include/linux/netfilter_bridge/ebt_arp.h
+++ b/include/linux/netfilter_bridge/ebt_arp.h
@@ -8,8 +8,10 @@
 #define EBT_ARP_DST_IP 0x10
 #define EBT_ARP_SRC_MAC 0x20
 #define EBT_ARP_DST_MAC 0x40
+#define EBT_ARP_GRAT 0x80
 #define EBT_ARP_MASK (EBT_ARP_OPCODE | EBT_ARP_HTYPE | EBT_ARP_PTYPE | \
-   EBT_ARP_SRC_IP | EBT_ARP_DST_IP | EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC)
+   EBT_ARP_SRC_IP | EBT_ARP_DST_IP | EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC | \
+   EBT_ARP_GRAT)
 #define EBT_ARP_MATCH "arp"
 
 struct ebt_arp_info
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index 9c599800a90..1a46952a56d 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -35,40 +35,36 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in
 		return EBT_NOMATCH;
 
 	if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP)) {
-		__be32 _addr, *ap;
+		__be32 saddr, daddr, *sap, *dap;
 
-		/* IPv4 addresses are always 4 bytes */
-		if (ah->ar_pln != sizeof(__be32))
+		if (ah->ar_pln != sizeof(__be32) || ah->ar_pro != htons(ETH_P_IP))
+			return EBT_NOMATCH;
+		sap = skb_header_pointer(skb, sizeof(struct arphdr) +
+					ah->ar_hln, sizeof(saddr),
+					&saddr);
+		if (sap == NULL)
+			return EBT_NOMATCH;
+		dap = skb_header_pointer(skb, sizeof(struct arphdr) +
+					2*ah->ar_hln+sizeof(saddr),
+					sizeof(daddr), &daddr);
+		if (dap == NULL)
+			return EBT_NOMATCH;
+		if (info->bitmask & EBT_ARP_SRC_IP &&
+		    FWINV(info->saddr != (*sap & info->smsk), EBT_ARP_SRC_IP))
+			return EBT_NOMATCH;
+		if (info->bitmask & EBT_ARP_DST_IP &&
+		    FWINV(info->daddr != (*dap & info->dmsk), EBT_ARP_DST_IP))
+			return EBT_NOMATCH;
+		if (info->bitmask & EBT_ARP_GRAT &&
+		    FWINV(*dap != *sap, EBT_ARP_GRAT))
 			return EBT_NOMATCH;
-		if (info->bitmask & EBT_ARP_SRC_IP) {
-			ap = skb_header_pointer(skb, sizeof(struct arphdr) +
-						ah->ar_hln, sizeof(_addr),
-						&_addr);
-			if (ap == NULL)
-				return EBT_NOMATCH;
-			if (FWINV(info->saddr != (*ap & info->smsk),
-			   EBT_ARP_SRC_IP))
-				return EBT_NOMATCH;
-		}
-
-		if (info->bitmask & EBT_ARP_DST_IP) {
-			ap = skb_header_pointer(skb, sizeof(struct arphdr) +
-						2*ah->ar_hln+sizeof(__be32),
-						sizeof(_addr), &_addr);
-			if (ap == NULL)
-				return EBT_NOMATCH;
-			if (FWINV(info->daddr != (*ap & info->dmsk),
-			   EBT_ARP_DST_IP))
-				return EBT_NOMATCH;
-		}
 	}
 
 	if (info->bitmask & (EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC)) {
 		unsigned char _mac[ETH_ALEN], *mp;
 		uint8_t verdict, i;
 
-		/* MAC addresses are 6 bytes */
-		if (ah->ar_hln != ETH_ALEN)
+		if (ah->ar_hln != ETH_ALEN || ah->ar_hrd != htons(ARPHRD_ETHER))
 			return EBT_NOMATCH;
 		if (info->bitmask & EBT_ARP_SRC_MAC) {
 			mp = skb_header_pointer(skb, sizeof(struct arphdr),
-- 
cgit v1.2.3


From fe6092ea0019cbba5263a915c9ce9f2bf383209e Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 12 Apr 2007 22:15:50 -0700
Subject: [NETFILTER]: nf_nat: use HW checksumming when possible

When mangling packets forwarded to a HW checksumming capable device,
offload recalculation of the checksum instead of doing it in software.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/nf_nat_helper.c | 49 ++++++++++++++++++++++++++++----------
 1 file changed, 37 insertions(+), 12 deletions(-)

diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 8a40fbe842b..15b6e5ce3a0 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -153,6 +153,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
 			 const char *rep_buffer,
 			 unsigned int rep_len)
 {
+	struct rtable *rt = (struct rtable *)(*pskb)->dst;
 	struct iphdr *iph;
 	struct tcphdr *tcph;
 	int oldlen, datalen;
@@ -176,11 +177,22 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
 
 	datalen = (*pskb)->len - iph->ihl*4;
 	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
-		tcph->check = 0;
-		tcph->check = tcp_v4_check(datalen,
-					   iph->saddr, iph->daddr,
-					   csum_partial((char *)tcph,
-							datalen, 0));
+		if (!(rt->rt_flags & RTCF_LOCAL) &&
+		    (*pskb)->dev->features & NETIF_F_ALL_CSUM) {
+			(*pskb)->ip_summed = CHECKSUM_PARTIAL;
+			(*pskb)->csum_start = skb_headroom(*pskb) +
+					      skb_network_offset(*pskb) +
+					      iph->ihl * 4;
+			(*pskb)->csum_offset = offsetof(struct tcphdr, check);
+			tcph->check = ~tcp_v4_check(datalen,
+						    iph->saddr, iph->daddr, 0);
+		} else {
+			tcph->check = 0;
+			tcph->check = tcp_v4_check(datalen,
+						   iph->saddr, iph->daddr,
+						   csum_partial((char *)tcph,
+								datalen, 0));
+		}
 	} else
 		nf_proto_csum_replace2(&tcph->check, *pskb,
 				       htons(oldlen), htons(datalen), 1);
@@ -217,6 +229,7 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb,
 			 const char *rep_buffer,
 			 unsigned int rep_len)
 {
+	struct rtable *rt = (struct rtable *)(*pskb)->dst;
 	struct iphdr *iph;
 	struct udphdr *udph;
 	int datalen, oldlen;
@@ -251,13 +264,25 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb,
 		return 1;
 
 	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
-		udph->check = 0;
-		udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
-						datalen, IPPROTO_UDP,
-						csum_partial((char *)udph,
-							     datalen, 0));
-		if (!udph->check)
-			udph->check = CSUM_MANGLED_0;
+		if (!(rt->rt_flags & RTCF_LOCAL) &&
+		    (*pskb)->dev->features & NETIF_F_ALL_CSUM) {
+			(*pskb)->ip_summed = CHECKSUM_PARTIAL;
+			(*pskb)->csum_start = skb_headroom(*pskb) +
+					      skb_network_offset(*pskb) +
+					      iph->ihl * 4;
+			(*pskb)->csum_offset = offsetof(struct udphdr, check);
+			udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+							 datalen, IPPROTO_UDP,
+							 0);
+		} else {
+			udph->check = 0;
+			udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
+							datalen, IPPROTO_UDP,
+							csum_partial((char *)udph,
+								     datalen, 0));
+			if (!udph->check)
+				udph->check = CSUM_MANGLED_0;
+		}
 	} else
 		nf_proto_csum_replace2(&udph->check, *pskb,
 				       htons(oldlen), htons(datalen), 1);
-- 
cgit v1.2.3


From 3b5018d6766186474366f26cc87fba81407b9089 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 12 Apr 2007 22:16:18 -0700
Subject: [NETFILTER]: {eb,ip6,ip}t_LOG: remove remains of LOG target
 overloading

All LOG targets always use their internal logging function nowadays, so
remove the incorrect error message and handle real errors (!= -EEXIST)
by failing to load.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/netfilter/ebt_log.c | 12 ++++--------
 net/ipv4/netfilter/ipt_LOG.c   | 12 ++++--------
 net/ipv6/netfilter/ip6t_LOG.c  | 12 ++++--------
 3 files changed, 12 insertions(+), 24 deletions(-)

diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 45712aec6a0..031bfa4a51f 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -196,14 +196,10 @@ static int __init ebt_log_init(void)
 	ret = ebt_register_watcher(&log);
 	if (ret < 0)
 		return ret;
-	if (nf_log_register(PF_BRIDGE, &ebt_log_logger) < 0) {
-		printk(KERN_WARNING "ebt_log: not logging via system console "
-		       "since somebody else already registered for PF_INET\n");
-		/* we cannot make module load fail here, since otherwise
-		 * ebtables userspace would abort */
-	}
-
-	return 0;
+	ret = nf_log_register(PF_BRIDGE, &ebt_log_logger);
+	if (ret < 0 && ret != -EEXIST)
+		ebt_unregister_watcher(&log);
+	return ret;
 }
 
 static void __exit ebt_log_fini(void)
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 2fa36618c51..a42c5cd968b 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -477,14 +477,10 @@ static int __init ipt_log_init(void)
 	ret = xt_register_target(&ipt_log_reg);
 	if (ret < 0)
 		return ret;
-	if (nf_log_register(PF_INET, &ipt_log_logger) < 0) {
-		printk(KERN_WARNING "ipt_LOG: not logging via system console "
-		       "since somebody else already registered for PF_INET\n");
-		/* we cannot make module load fail here, since otherwise
-		 * iptables userspace would abort */
-	}
-
-	return 0;
+	ret = nf_log_register(PF_INET, &ipt_log_logger);
+	if (ret < 0 && ret != -EEXIST)
+		xt_unregister_target(&ipt_log_reg);
+	return ret;
 }
 
 static void __exit ipt_log_fini(void)
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index b465e24e90b..5bb9cd34935 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -490,14 +490,10 @@ static int __init ip6t_log_init(void)
 	ret = xt_register_target(&ip6t_log_reg);
 	if (ret < 0)
 		return ret;
-	if (nf_log_register(PF_INET6, &ip6t_logger) < 0) {
-		printk(KERN_WARNING "ip6t_LOG: not logging via system console "
-		       "since somebody else already registered for PF_INET6\n");
-		/* we cannot make module load fail here, since otherwise
-		 * ip6tables userspace would abort */
-	}
-
-	return 0;
+	ret = nf_log_register(PF_INET6, &ip6t_logger);
+	if (ret < 0 && ret != -EEXIST)
+		xt_unregister_target(&ip6t_log_reg);
+	return ret;
 }
 
 static void __exit ip6t_log_fini(void)
-- 
cgit v1.2.3


From d3a2c3ca8e7d908824701db978b936d115aea506 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 12 Apr 2007 22:16:38 -0700
Subject: [NETFILTER]: nfnetlink_log: remove fallback to group 0

Don't fallback to group 0 if no instance can be found for the given group.
This potentially confuses the listener and is not what the user configured.
Also remove the ring buffer spamming that happens when rules are set up
before the logging daemon is started.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nfnetlink_log.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index d2c6aab6bb7..e32e30e7a17 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -584,12 +584,7 @@ nfulnl_log_packet(unsigned int pf,
 
 	inst = instance_lookup_get(li->u.ulog.group);
 	if (!inst)
-		inst = instance_lookup_get(0);
-	if (!inst) {
-		PRINTR("nfnetlink_log: trying to log packet, "
-			"but no instance for group %u\n", li->u.ulog.group);
 		return;
-	}
 
 	plen = 0;
 	if (prefix)
-- 
cgit v1.2.3


From b076deb8498e26c9aa2f44046fe5e9936ae2fb5a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 12 Apr 2007 22:17:05 -0700
Subject: [NETFILTER]: ipt_ULOG: add compat conversion functions

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/ipt_ULOG.c | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index d26bbd2daaa..a2bcba70af5 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -349,12 +349,52 @@ static int ipt_ulog_checkentry(const char *tablename,
 	return 1;
 }
 
+#ifdef CONFIG_COMPAT
+struct compat_ipt_ulog_info {
+	compat_uint_t	nl_group;
+	compat_size_t	copy_range;
+	compat_size_t	qthreshold;
+	char		prefix[ULOG_PREFIX_LEN];
+};
+
+static void compat_from_user(void *dst, void *src)
+{
+	struct compat_ipt_ulog_info *cl = src;
+	struct ipt_ulog_info l = {
+		.nl_group	= cl->nl_group,
+		.copy_range	= cl->copy_range,
+		.qthreshold	= cl->qthreshold,
+	};
+
+	memcpy(l.prefix, cl->prefix, sizeof(l.prefix));
+	memcpy(dst, &l, sizeof(l));
+}
+
+static int compat_to_user(void __user *dst, void *src)
+{
+	struct ipt_ulog_info *l = src;
+	struct compat_ipt_ulog_info cl = {
+		.nl_group	= l->nl_group,
+		.copy_range	= l->copy_range,
+		.qthreshold	= l->qthreshold,
+	};
+
+	memcpy(cl.prefix, l->prefix, sizeof(cl.prefix));
+	return copy_to_user(dst, &cl, sizeof(cl)) ? -EFAULT : 0;
+}
+#endif /* CONFIG_COMPAT */
+
 static struct xt_target ipt_ulog_reg = {
 	.name		= "ULOG",
 	.family		= AF_INET,
 	.target		= ipt_ulog_target,
 	.targetsize	= sizeof(struct ipt_ulog_info),
 	.checkentry	= ipt_ulog_checkentry,
+#ifdef CONFIG_COMPAT
+	.compatsize	= sizeof(struct compat_ipt_ulog_info),
+	.compat_from_user = compat_from_user,
+	.compat_to_user	= compat_to_user,
+#endif
 	.me		= THIS_MODULE,
 };
 
-- 
cgit v1.2.3


From af65bdfce98d7965fbe93a48b8128444a2eea024 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 20 Apr 2007 14:14:21 -0700
Subject: [NETLINK]: Switch cb_lock spinlock to mutex and allow to override it

Switch cb_lock to mutex and allow netlink kernel users to override it
with a subsystem specific mutex for consistent locking in dump callbacks.
All netlink_dump_start users have been audited not to rely on any
side-effects of the previously used spinlock.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/connector/connector.c       |  2 +-
 drivers/scsi/scsi_netlink.c         |  3 ++-
 drivers/scsi/scsi_transport_iscsi.c |  2 +-
 fs/ecryptfs/netlink.c               |  2 +-
 include/linux/netlink.h             |  5 ++++-
 kernel/audit.c                      |  2 +-
 lib/kobject_uevent.c                |  2 +-
 net/bridge/netfilter/ebt_ulog.c     |  2 +-
 net/core/rtnetlink.c                |  2 +-
 net/decnet/netfilter/dn_rtmsg.c     |  2 +-
 net/ipv4/fib_frontend.c             |  3 ++-
 net/ipv4/inet_diag.c                |  2 +-
 net/ipv4/netfilter/ip_queue.c       |  2 +-
 net/ipv4/netfilter/ipt_ULOG.c       |  2 +-
 net/ipv6/netfilter/ip6_queue.c      |  2 +-
 net/netfilter/nfnetlink.c           |  2 +-
 net/netlink/af_netlink.c            | 38 ++++++++++++++++++++++---------------
 net/netlink/genetlink.c             |  2 +-
 net/xfrm/xfrm_user.c                |  2 +-
 security/selinux/netlink.c          |  2 +-
 20 files changed, 47 insertions(+), 34 deletions(-)

diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 7f9c4fb7e5b..a7b9e9bb3e8 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -448,7 +448,7 @@ static int __devinit cn_init(void)
 
 	dev->nls = netlink_kernel_create(NETLINK_CONNECTOR,
 					 CN_NETLINK_USERS + 0xf,
-					 dev->input, THIS_MODULE);
+					 dev->input, NULL, THIS_MODULE);
 	if (!dev->nls)
 		return -EIO;
 
diff --git a/drivers/scsi/scsi_netlink.c b/drivers/scsi/scsi_netlink.c
index 45646a28524..4bf9aa547c7 100644
--- a/drivers/scsi/scsi_netlink.c
+++ b/drivers/scsi/scsi_netlink.c
@@ -168,7 +168,8 @@ scsi_netlink_init(void)
 	}
 
 	scsi_nl_sock = netlink_kernel_create(NETLINK_SCSITRANSPORT,
-				SCSI_NL_GRP_CNT, scsi_nl_rcv, THIS_MODULE);
+				SCSI_NL_GRP_CNT, scsi_nl_rcv, NULL,
+				THIS_MODULE);
 	if (!scsi_nl_sock) {
 		printk(KERN_ERR "%s: register of recieve handler failed\n",
 				__FUNCTION__);
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 10590cd7e9e..aabaa0576ab 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -1435,7 +1435,7 @@ static __init int iscsi_transport_init(void)
 	if (err)
 		goto unregister_conn_class;
 
-	nls = netlink_kernel_create(NETLINK_ISCSI, 1, iscsi_if_rx,
+	nls = netlink_kernel_create(NETLINK_ISCSI, 1, iscsi_if_rx, NULL,
 			THIS_MODULE);
 	if (!nls) {
 		err = -ENOBUFS;
diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c
index 8405d216a5f..fe9186312d7 100644
--- a/fs/ecryptfs/netlink.c
+++ b/fs/ecryptfs/netlink.c
@@ -229,7 +229,7 @@ int ecryptfs_init_netlink(void)
 
 	ecryptfs_nl_sock = netlink_kernel_create(NETLINK_ECRYPTFS, 0,
 						 ecryptfs_receive_nl_message,
-						 THIS_MODULE);
+						 NULL, THIS_MODULE);
 	if (!ecryptfs_nl_sock) {
 		rc = -EIO;
 		ecryptfs_printk(KERN_ERR, "Failed to create netlink socket\n");
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 0d11f6a7389..f41688f5663 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -157,7 +157,10 @@ struct netlink_skb_parms
 #define NETLINK_CREDS(skb)	(&NETLINK_CB((skb)).creds)
 
 
-extern struct sock *netlink_kernel_create(int unit, unsigned int groups, void (*input)(struct sock *sk, int len), struct module *module);
+extern struct sock *netlink_kernel_create(int unit, unsigned int groups,
+					  void (*input)(struct sock *sk, int len),
+					  struct mutex *cb_mutex,
+					  struct module *module);
 extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
 extern int netlink_has_listeners(struct sock *sk, unsigned int group);
 extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock);
diff --git a/kernel/audit.c b/kernel/audit.c
index 80a7457dadb..4e9d2082968 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -795,7 +795,7 @@ static int __init audit_init(void)
 	printk(KERN_INFO "audit: initializing netlink socket (%s)\n",
 	       audit_default ? "enabled" : "disabled");
 	audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive,
-					   THIS_MODULE);
+					   NULL, THIS_MODULE);
 	if (!audit_sock)
 		audit_panic("cannot initialize netlink socket");
 	else
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 84272ed77f0..82fc1794b69 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -293,7 +293,7 @@ EXPORT_SYMBOL_GPL(add_uevent_var);
 static int __init kobject_uevent_init(void)
 {
 	uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, 1, NULL,
-					    THIS_MODULE);
+					    NULL, THIS_MODULE);
 
 	if (!uevent_sock) {
 		printk(KERN_ERR
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 8b84cd40279..9411db62591 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -302,7 +302,7 @@ static int __init ebt_ulog_init(void)
 	}
 
 	ebtulognl = netlink_kernel_create(NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS,
-					  NULL, THIS_MODULE);
+					  NULL, NULL, THIS_MODULE);
 	if (!ebtulognl)
 		ret = -ENOMEM;
 	else if ((ret = ebt_register_watcher(&ulog)))
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5266df33705..648a7b6d15d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -972,7 +972,7 @@ void __init rtnetlink_init(void)
 		panic("rtnetlink_init: cannot allocate rta_buf\n");
 
 	rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv,
-				     THIS_MODULE);
+				     NULL, THIS_MODULE);
 	if (rtnl == NULL)
 		panic("rtnetlink_init: cannot initialize rtnetlink\n");
 	netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 2ee47bab693..696234688cf 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -138,7 +138,7 @@ static int __init dn_rtmsg_init(void)
 	int rv = 0;
 
 	dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, DNRNG_NLGRP_MAX,
-				      dnrmg_receive_user_sk, THIS_MODULE);
+				      dnrmg_receive_user_sk, NULL, THIS_MODULE);
 	if (dnrmg == NULL) {
 		printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket");
 		return -ENOMEM;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 5bf718a3e49..953dd458c23 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -827,7 +827,8 @@ static void nl_fib_input(struct sock *sk, int len)
 
 static void nl_fib_lookup_init(void)
 {
-      netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE);
+      netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, NULL,
+      			    THIS_MODULE);
 }
 
 static void fib_disable_ip(struct net_device *dev, int force)
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 0148f0e34ce..dbeacd8b0f9 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -893,7 +893,7 @@ static int __init inet_diag_init(void)
 		goto out;
 
 	idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv,
-					THIS_MODULE);
+					NULL, THIS_MODULE);
 	if (idiagnl == NULL)
 		goto out_free_table;
 	err = 0;
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 0d72693869e..702d94db19b 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -668,7 +668,7 @@ static int __init ip_queue_init(void)
 
 	netlink_register_notifier(&ipq_nl_notifier);
 	ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk,
-				      THIS_MODULE);
+				      NULL, THIS_MODULE);
 	if (ipqnl == NULL) {
 		printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
 		goto cleanup_netlink_notifier;
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index a2bcba70af5..23b607b33b3 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -420,7 +420,7 @@ static int __init ipt_ulog_init(void)
 		setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
 
 	nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL,
-					THIS_MODULE);
+					NULL, THIS_MODULE);
 	if (!nflognl)
 		return -ENOMEM;
 
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index bfae9fdc466..0004db38af6 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -657,7 +657,7 @@ static int __init ip6_queue_init(void)
 	struct proc_dir_entry *proc;
 
 	netlink_register_notifier(&ipq_nl_notifier);
-	ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk,
+	ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk, NULL,
 				      THIS_MODULE);
 	if (ipqnl == NULL) {
 		printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index b0da853eabe..8797e6953ef 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -265,7 +265,7 @@ static int __init nfnetlink_init(void)
 	printk("Netfilter messages via NETLINK v%s.\n", nfversion);
 
 	nfnl = netlink_kernel_create(NETLINK_NETFILTER, NFNLGRP_MAX,
-				     nfnetlink_rcv, THIS_MODULE);
+				     nfnetlink_rcv, NULL, THIS_MODULE);
 	if (!nfnl) {
 		printk(KERN_ERR "cannot initialize nfnetlink!\n");
 		return -1;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 2cbf1682f63..ec16c9b7b3b 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -56,6 +56,7 @@
 #include <linux/types.h>
 #include <linux/audit.h>
 #include <linux/selinux.h>
+#include <linux/mutex.h>
 
 #include <net/sock.h>
 #include <net/scm.h>
@@ -76,7 +77,8 @@ struct netlink_sock {
 	unsigned long		state;
 	wait_queue_head_t	wait;
 	struct netlink_callback	*cb;
-	spinlock_t		cb_lock;
+	struct mutex		*cb_mutex;
+	struct mutex		cb_def_mutex;
 	void			(*data_ready)(struct sock *sk, int bytes);
 	struct module		*module;
 };
@@ -108,6 +110,7 @@ struct netlink_table {
 	unsigned long *listeners;
 	unsigned int nl_nonroot;
 	unsigned int groups;
+	struct mutex *cb_mutex;
 	struct module *module;
 	int registered;
 };
@@ -370,7 +373,8 @@ static struct proto netlink_proto = {
 	.obj_size = sizeof(struct netlink_sock),
 };
 
-static int __netlink_create(struct socket *sock, int protocol)
+static int __netlink_create(struct socket *sock, struct mutex *cb_mutex,
+			    int protocol)
 {
 	struct sock *sk;
 	struct netlink_sock *nlk;
@@ -384,7 +388,8 @@ static int __netlink_create(struct socket *sock, int protocol)
 	sock_init_data(sock, sk);
 
 	nlk = nlk_sk(sk);
-	spin_lock_init(&nlk->cb_lock);
+	nlk->cb_mutex = cb_mutex ? : &nlk->cb_def_mutex;
+	mutex_init(nlk->cb_mutex);
 	init_waitqueue_head(&nlk->wait);
 
 	sk->sk_destruct = netlink_sock_destruct;
@@ -395,6 +400,7 @@ static int __netlink_create(struct socket *sock, int protocol)
 static int netlink_create(struct socket *sock, int protocol)
 {
 	struct module *module = NULL;
+	struct mutex *cb_mutex;
 	struct netlink_sock *nlk;
 	int err = 0;
 
@@ -417,9 +423,10 @@ static int netlink_create(struct socket *sock, int protocol)
 	if (nl_table[protocol].registered &&
 	    try_module_get(nl_table[protocol].module))
 		module = nl_table[protocol].module;
+	cb_mutex = nl_table[protocol].cb_mutex;
 	netlink_unlock_table();
 
-	if ((err = __netlink_create(sock, protocol)) < 0)
+	if ((err = __netlink_create(sock, cb_mutex, protocol)) < 0)
 		goto out_module;
 
 	nlk = nlk_sk(sock->sk);
@@ -444,14 +451,14 @@ static int netlink_release(struct socket *sock)
 	sock_orphan(sk);
 	nlk = nlk_sk(sk);
 
-	spin_lock(&nlk->cb_lock);
+	mutex_lock(nlk->cb_mutex);
 	if (nlk->cb) {
 		if (nlk->cb->done)
 			nlk->cb->done(nlk->cb);
 		netlink_destroy_callback(nlk->cb);
 		nlk->cb = NULL;
 	}
-	spin_unlock(&nlk->cb_lock);
+	mutex_unlock(nlk->cb_mutex);
 
 	/* OK. Socket is unlinked, and, therefore,
 	   no new packets will arrive */
@@ -1266,7 +1273,7 @@ static void netlink_data_ready(struct sock *sk, int len)
 struct sock *
 netlink_kernel_create(int unit, unsigned int groups,
 		      void (*input)(struct sock *sk, int len),
-		      struct module *module)
+		      struct mutex *cb_mutex, struct module *module)
 {
 	struct socket *sock;
 	struct sock *sk;
@@ -1281,7 +1288,7 @@ netlink_kernel_create(int unit, unsigned int groups,
 	if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
 		return NULL;
 
-	if (__netlink_create(sock, unit) < 0)
+	if (__netlink_create(sock, cb_mutex, unit) < 0)
 		goto out_sock_release;
 
 	if (groups < 32)
@@ -1305,6 +1312,7 @@ netlink_kernel_create(int unit, unsigned int groups,
 	netlink_table_grab();
 	nl_table[unit].groups = groups;
 	nl_table[unit].listeners = listeners;
+	nl_table[unit].cb_mutex = cb_mutex;
 	nl_table[unit].module = module;
 	nl_table[unit].registered = 1;
 	netlink_table_ungrab();
@@ -1347,7 +1355,7 @@ static int netlink_dump(struct sock *sk)
 	if (!skb)
 		goto errout;
 
-	spin_lock(&nlk->cb_lock);
+	mutex_lock(nlk->cb_mutex);
 
 	cb = nlk->cb;
 	if (cb == NULL) {
@@ -1358,7 +1366,7 @@ static int netlink_dump(struct sock *sk)
 	len = cb->dump(skb, cb);
 
 	if (len > 0) {
-		spin_unlock(&nlk->cb_lock);
+		mutex_unlock(nlk->cb_mutex);
 		skb_queue_tail(&sk->sk_receive_queue, skb);
 		sk->sk_data_ready(sk, len);
 		return 0;
@@ -1376,13 +1384,13 @@ static int netlink_dump(struct sock *sk)
 	if (cb->done)
 		cb->done(cb);
 	nlk->cb = NULL;
-	spin_unlock(&nlk->cb_lock);
+	mutex_unlock(nlk->cb_mutex);
 
 	netlink_destroy_callback(cb);
 	return 0;
 
 errout_skb:
-	spin_unlock(&nlk->cb_lock);
+	mutex_unlock(nlk->cb_mutex);
 	kfree_skb(skb);
 errout:
 	return err;
@@ -1414,15 +1422,15 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 	}
 	nlk = nlk_sk(sk);
 	/* A dump or destruction is in progress... */
-	spin_lock(&nlk->cb_lock);
+	mutex_lock(nlk->cb_mutex);
 	if (nlk->cb || sock_flag(sk, SOCK_DEAD)) {
-		spin_unlock(&nlk->cb_lock);
+		mutex_unlock(nlk->cb_mutex);
 		netlink_destroy_callback(cb);
 		sock_put(sk);
 		return -EBUSY;
 	}
 	nlk->cb = cb;
-	spin_unlock(&nlk->cb_lock);
+	mutex_unlock(nlk->cb_mutex);
 
 	netlink_dump(sk);
 	sock_put(sk);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index fac2e7a6dbe..6e31234a419 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -558,7 +558,7 @@ static int __init genl_init(void)
 
 	netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV);
 	genl_sock = netlink_kernel_create(NETLINK_GENERIC, GENL_MAX_ID,
-					  genl_rcv, THIS_MODULE);
+					  genl_rcv, NULL, THIS_MODULE);
 	if (genl_sock == NULL)
 		panic("GENL: Cannot initialize generic netlink\n");
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 2ff968373f1..88659edc9b1 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2444,7 +2444,7 @@ static int __init xfrm_user_init(void)
 	printk(KERN_INFO "Initializing XFRM netlink socket\n");
 
 	nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX,
-				     xfrm_netlink_rcv, THIS_MODULE);
+				     xfrm_netlink_rcv, NULL, THIS_MODULE);
 	if (nlsk == NULL)
 		return -ENOMEM;
 	rcu_assign_pointer(xfrm_nl, nlsk);
diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c
index 33f2e064a68..f49046de63a 100644
--- a/security/selinux/netlink.c
+++ b/security/selinux/netlink.c
@@ -104,7 +104,7 @@ void selnl_notify_policyload(u32 seqno)
 
 static int __init selnl_init(void)
 {
-	selnl = netlink_kernel_create(NETLINK_SELINUX, SELNLGRP_MAX, NULL,
+	selnl = netlink_kernel_create(NETLINK_SELINUX, SELNLGRP_MAX, NULL, NULL,
 	                              THIS_MODULE);
 	if (selnl == NULL)
 		panic("SELinux:  Cannot create netlink socket.");
-- 
cgit v1.2.3


From 1c2d670f3660e9103fdcdca702f6dbf8ea7d6afb Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 16 Apr 2007 16:59:10 -0700
Subject: [RTNETLINK]: Hold rtnl_mutex during netlink dump callbacks

Hold rtnl_mutex during the entire netlink dump operation. This allows
to simplify locking in the dump callbacks, since they can now rely on
that no concurrent changes happen.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 648a7b6d15d..62f5c7f98d1 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -859,6 +859,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	int min_len;
 	int family;
 	int type;
+	int err;
 
 	type = nlh->nlmsg_type;
 	if (type > RTM_MAX)
@@ -887,7 +888,10 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		if (dumpit == NULL)
 			return -EOPNOTSUPP;
 
-		return netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
+		__rtnl_unlock();
+		err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
+		rtnl_lock();
+		return err;
 	}
 
 	memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
@@ -972,7 +976,7 @@ void __init rtnetlink_init(void)
 		panic("rtnetlink_init: cannot allocate rta_buf\n");
 
 	rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv,
-				     NULL, THIS_MODULE);
+				     &rtnl_mutex, THIS_MODULE);
 	if (rtnl == NULL)
 		panic("rtnetlink_init: cannot initialize rtnetlink\n");
 	netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
-- 
cgit v1.2.3


From 6313c1e0992feaee56bc09b85042b3186041fa3c Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 16 Apr 2007 17:00:53 -0700
Subject: [RTNETLINK]: Remove unnecessary locking in dump callbacks

Since we're now holding the rtnl during the entire dump operation, we can
remove additional locking for rtnl protected data. This patch does that
for all simple cases (dev_base_lock for dev_base walking, RCU protection
for FIB rule dumping).

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netlink.c |  2 --
 net/core/fib_rules.c    |  4 +---
 net/core/rtnetlink.c    |  2 --
 net/decnet/dn_dev.c     |  3 ---
 net/ipv4/devinet.c      | 12 ++----------
 net/ipv6/addrconf.c     |  2 --
 6 files changed, 3 insertions(+), 22 deletions(-)

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 5e84ade129c..35facc0c11c 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -109,7 +109,6 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	struct net_device *dev;
 	int idx;
 
-	read_lock(&dev_base_lock);
 	for (dev = dev_base, idx = 0; dev; dev = dev->next) {
 		/* not a bridge port */
 		if (dev->br_port == NULL || idx < cb->args[0])
@@ -122,7 +121,6 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 skip:
 		++idx;
 	}
-	read_unlock(&dev_base_lock);
 
 	cb->args[0] = idx;
 
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index cb2dae19531..8c5474e1668 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -495,8 +495,7 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
 	int idx = 0;
 	struct fib_rule *rule;
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(rule, ops->rules_list, list) {
+	list_for_each_entry(rule, ops->rules_list, list) {
 		if (idx < cb->args[1])
 			goto skip;
 
@@ -507,7 +506,6 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
 skip:
 		idx++;
 	}
-	rcu_read_unlock();
 	cb->args[1] = idx;
 	rules_ops_put(ops);
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 62f5c7f98d1..bc95fab0b0c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -543,7 +543,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	int s_idx = cb->args[0];
 	struct net_device *dev;
 
-	read_lock(&dev_base_lock);
 	for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
 		if (idx < s_idx)
 			continue;
@@ -552,7 +551,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 				     cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0)
 			break;
 	}
-	read_unlock(&dev_base_lock);
 	cb->args[0] = idx;
 
 	return skb->len;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 61be2caddc5..5c2a9951b63 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -799,7 +799,6 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 	skip_ndevs = cb->args[0];
 	skip_naddr = cb->args[1];
 
-	read_lock(&dev_base_lock);
 	for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
 		if (idx < skip_ndevs)
 			continue;
@@ -824,8 +823,6 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 		}
 	}
 done:
-	read_unlock(&dev_base_lock);
-
 	cb->args[0] = idx;
 	cb->args[1] = dn_idx;
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 9bdc79564cc..088888db8b3 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1182,17 +1182,13 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 	int s_ip_idx, s_idx = cb->args[0];
 
 	s_ip_idx = ip_idx = cb->args[1];
-	read_lock(&dev_base_lock);
 	for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
 		if (idx < s_idx)
 			continue;
 		if (idx > s_idx)
 			s_ip_idx = 0;
-		rcu_read_lock();
-		if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
-			rcu_read_unlock();
+		if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
 			continue;
-		}
 
 		for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
 		     ifa = ifa->ifa_next, ip_idx++) {
@@ -1200,16 +1196,12 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 				continue;
 			if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
 					     cb->nlh->nlmsg_seq,
-					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
-				rcu_read_unlock();
+					     RTM_NEWADDR, NLM_F_MULTI) <= 0)
 				goto done;
-			}
 		}
-		rcu_read_unlock();
 	}
 
 done:
-	read_unlock(&dev_base_lock);
 	cb->args[0] = idx;
 	cb->args[1] = ip_idx;
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index eecba1886b4..1486f76f787 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3224,7 +3224,6 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 
 	s_idx = cb->args[0];
 	s_ip_idx = ip_idx = cb->args[1];
-	read_lock(&dev_base_lock);
 
 	for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
 		if (idx < s_idx)
@@ -3286,7 +3285,6 @@ done:
 		read_unlock_bh(&idev->lock);
 		in6_dev_put(idev);
 	}
-	read_unlock(&dev_base_lock);
 	cb->args[0] = idx;
 	cb->args[1] = ip_idx;
 	return skb->len;
-- 
cgit v1.2.3


From ffa4d7216e848fbfdcb8e6f0bb66abeaa1888964 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 25 Apr 2007 14:01:17 -0700
Subject: [NETLINK]: don't reinitialize callback mutex

Don't reinitialize the callback mutex the netlink_kernel_create caller
handed in, it is supposed to already be initialized and could already
be held by someone.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index ec16c9b7b3b..64d4b27f25a 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -388,8 +388,12 @@ static int __netlink_create(struct socket *sock, struct mutex *cb_mutex,
 	sock_init_data(sock, sk);
 
 	nlk = nlk_sk(sk);
-	nlk->cb_mutex = cb_mutex ? : &nlk->cb_def_mutex;
-	mutex_init(nlk->cb_mutex);
+	if (cb_mutex)
+		nlk->cb_mutex = cb_mutex;
+	else {
+		nlk->cb_mutex = &nlk->cb_def_mutex;
+		mutex_init(nlk->cb_mutex);
+	}
 	init_waitqueue_head(&nlk->wait);
 
 	sk->sk_destruct = netlink_sock_destruct;
-- 
cgit v1.2.3


From 0463d4ae25771aaf3379bb6b2392f6edf23c2828 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 16 Apr 2007 17:02:10 -0700
Subject: [NET_SCHED]: Eliminate qdisc_tree_lock

Since we're now holding the rtnl during the entire dump operation, we
can remove qdisc_tree_lock, whose only purpose is to protect dump
callbacks from concurrent changes to the qdisc tree.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h |  2 --
 net/sched/cls_api.c     |  2 --
 net/sched/sch_api.c     | 22 +++-------------------
 net/sched/sch_generic.c | 29 +++++++----------------------
 4 files changed, 10 insertions(+), 45 deletions(-)

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index b2cc9a8ed4e..5754d53d9ef 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -13,8 +13,6 @@ struct qdisc_walker
 	int	(*fn)(struct Qdisc *, unsigned long cl, struct qdisc_walker *);
 };
 
-extern rwlock_t qdisc_tree_lock;
-
 #define QDISC_ALIGNTO		32
 #define QDISC_ALIGN(len)	(((len) + QDISC_ALIGNTO-1) & ~(QDISC_ALIGNTO-1))
 
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index ca3da5013b7..ebf94edf047 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -400,7 +400,6 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 	if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
 		return skb->len;
 
-	read_lock(&qdisc_tree_lock);
 	if (!tcm->tcm_parent)
 		q = dev->qdisc_sleeping;
 	else
@@ -457,7 +456,6 @@ errout:
 	if (cl)
 		cops->put(q, cl);
 out:
-	read_unlock(&qdisc_tree_lock);
 	dev_put(dev);
 	return skb->len;
 }
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 2e863bdaa9a..0ce6914f598 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -191,7 +191,7 @@ int unregister_qdisc(struct Qdisc_ops *qops)
    (root qdisc, all its children, children of children etc.)
  */
 
-static struct Qdisc *__qdisc_lookup(struct net_device *dev, u32 handle)
+struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
 {
 	struct Qdisc *q;
 
@@ -202,16 +202,6 @@ static struct Qdisc *__qdisc_lookup(struct net_device *dev, u32 handle)
 	return NULL;
 }
 
-struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
-{
-	struct Qdisc *q;
-
-	read_lock(&qdisc_tree_lock);
-	q = __qdisc_lookup(dev, handle);
-	read_unlock(&qdisc_tree_lock);
-	return q;
-}
-
 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
 {
 	unsigned long cl;
@@ -405,7 +395,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
 	if (n == 0)
 		return;
 	while ((parentid = sch->parent)) {
-		sch = __qdisc_lookup(sch->dev, TC_H_MAJ(parentid));
+		sch = qdisc_lookup(sch->dev, TC_H_MAJ(parentid));
 		cops = sch->ops->cl_ops;
 		if (cops->qlen_notify) {
 			cl = cops->get(sch, parentid);
@@ -905,7 +895,6 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 			continue;
 		if (idx > s_idx)
 			s_q_idx = 0;
-		read_lock(&qdisc_tree_lock);
 		q_idx = 0;
 		list_for_each_entry(q, &dev->qdisc_list, list) {
 			if (q_idx < s_q_idx) {
@@ -913,13 +902,10 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 				continue;
 			}
 			if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
-					  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) {
-				read_unlock(&qdisc_tree_lock);
+					  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
 				goto done;
-			}
 			q_idx++;
 		}
-		read_unlock(&qdisc_tree_lock);
 	}
 
 done:
@@ -1142,7 +1128,6 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 	s_t = cb->args[0];
 	t = 0;
 
-	read_lock(&qdisc_tree_lock);
 	list_for_each_entry(q, &dev->qdisc_list, list) {
 		if (t < s_t || !q->ops->cl_ops ||
 		    (tcm->tcm_parent &&
@@ -1164,7 +1149,6 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 			break;
 		t++;
 	}
-	read_unlock(&qdisc_tree_lock);
 
 	cb->args[0] = t;
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 52eb3439d7c..1894eb72f6c 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -36,34 +36,23 @@
 
 /* Main transmission queue. */
 
-/* Main qdisc structure lock.
-
-   However, modifications
-   to data, participating in scheduling must be additionally
-   protected with dev->queue_lock spinlock.
-
-   The idea is the following:
-   - enqueue, dequeue are serialized via top level device
-     spinlock dev->queue_lock.
-   - tree walking is protected by read_lock(qdisc_tree_lock)
-     and this lock is used only in process context.
-   - updates to tree are made only under rtnl semaphore,
-     hence this lock may be made without local bh disabling.
-
-   qdisc_tree_lock must be grabbed BEFORE dev->queue_lock!
+/* Modifications to data participating in scheduling must be protected with
+ * dev->queue_lock spinlock.
+ *
+ * The idea is the following:
+ * - enqueue, dequeue are serialized via top level device
+ *   spinlock dev->queue_lock.
+ * - updates to tree and tree walking are only done under the rtnl mutex.
  */
-DEFINE_RWLOCK(qdisc_tree_lock);
 
 void qdisc_lock_tree(struct net_device *dev)
 {
-	write_lock(&qdisc_tree_lock);
 	spin_lock_bh(&dev->queue_lock);
 }
 
 void qdisc_unlock_tree(struct net_device *dev)
 {
 	spin_unlock_bh(&dev->queue_lock);
-	write_unlock(&qdisc_tree_lock);
 }
 
 /*
@@ -528,15 +517,11 @@ void dev_activate(struct net_device *dev)
 				printk(KERN_INFO "%s: activation failed\n", dev->name);
 				return;
 			}
-			write_lock(&qdisc_tree_lock);
 			list_add_tail(&qdisc->list, &dev->qdisc_list);
-			write_unlock(&qdisc_tree_lock);
 		} else {
 			qdisc =  &noqueue_qdisc;
 		}
-		write_lock(&qdisc_tree_lock);
 		dev->qdisc_sleeping = qdisc;
-		write_unlock(&qdisc_tree_lock);
 	}
 
 	if (!netif_carrier_ok(dev))
-- 
cgit v1.2.3


From fd44de7cc1d430caef91ad9aecec9ff000fe86f8 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 16 Apr 2007 17:07:08 -0700
Subject: [NET_SCHED]: ingress: switch back to using ingress_lock

Switch ingress queueing back to use ingress_lock. qdisc_lock_tree now locks
both the ingress and egress qdiscs on the device. All changes to data that
might be used on both ingress and egress needs to be protected by using
qdisc_lock_tree instead of manually taking dev->queue_lock. Additionally
the qdisc stats_lock needs to be initialized to ingress_lock for ingress
qdiscs.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c          |  4 ++--
 net/sched/cls_route.c   |  4 ++--
 net/sched/sch_api.c     | 26 +++++++++++++++-----------
 net/sched/sch_generic.c |  6 +++++-
 net/sched/sch_ingress.c |  9 ++-------
 5 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 7f31d0f8842..c8f5ea9aea8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1747,10 +1747,10 @@ static int ing_filter(struct sk_buff *skb)
 
 		skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
 
-		spin_lock(&dev->queue_lock);
+		spin_lock(&dev->ingress_lock);
 		if ((q = dev->qdisc_ingress) != NULL)
 			result = q->enqueue(skb, q);
-		spin_unlock(&dev->queue_lock);
+		spin_unlock(&dev->ingress_lock);
 
 	}
 
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index e92d716c915..cc941d0ee3a 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -89,9 +89,9 @@ static __inline__ int route4_fastmap_hash(u32 id, int iif)
 static inline
 void route4_reset_fastmap(struct net_device *dev, struct route4_head *head, u32 id)
 {
-	spin_lock_bh(&dev->queue_lock);
+	qdisc_lock_tree(dev);
 	memset(head->fastmap, 0, sizeof(head->fastmap));
-	spin_unlock_bh(&dev->queue_lock);
+	qdisc_unlock_tree(dev);
 }
 
 static inline void
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 0ce6914f598..8699e7006d8 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -500,12 +500,16 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
 
 	if (handle == TC_H_INGRESS) {
 		sch->flags |= TCQ_F_INGRESS;
+		sch->stats_lock = &dev->ingress_lock;
 		handle = TC_H_MAKE(TC_H_INGRESS, 0);
-	} else if (handle == 0) {
-		handle = qdisc_alloc_handle(dev);
-		err = -ENOMEM;
-		if (handle == 0)
-			goto err_out3;
+	} else {
+		sch->stats_lock = &dev->queue_lock;
+		if (handle == 0) {
+			handle = qdisc_alloc_handle(dev);
+			err = -ENOMEM;
+			if (handle == 0)
+				goto err_out3;
+		}
 	}
 
 	sch->handle = handle;
@@ -654,9 +658,9 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 			return err;
 		if (q) {
 			qdisc_notify(skb, n, clid, q, NULL);
-			spin_lock_bh(&dev->queue_lock);
+			qdisc_lock_tree(dev);
 			qdisc_destroy(q);
-			spin_unlock_bh(&dev->queue_lock);
+			qdisc_unlock_tree(dev);
 		}
 	} else {
 		qdisc_notify(skb, n, clid, NULL, q);
@@ -789,17 +793,17 @@ graft:
 		err = qdisc_graft(dev, p, clid, q, &old_q);
 		if (err) {
 			if (q) {
-				spin_lock_bh(&dev->queue_lock);
+				qdisc_lock_tree(dev);
 				qdisc_destroy(q);
-				spin_unlock_bh(&dev->queue_lock);
+				qdisc_unlock_tree(dev);
 			}
 			return err;
 		}
 		qdisc_notify(skb, n, clid, old_q, q);
 		if (old_q) {
-			spin_lock_bh(&dev->queue_lock);
+			qdisc_lock_tree(dev);
 			qdisc_destroy(old_q);
-			spin_unlock_bh(&dev->queue_lock);
+			qdisc_unlock_tree(dev);
 		}
 	}
 	return 0;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 1894eb72f6c..3385ee59254 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -42,16 +42,20 @@
  * The idea is the following:
  * - enqueue, dequeue are serialized via top level device
  *   spinlock dev->queue_lock.
+ * - ingress filtering is serialized via top level device
+ *   spinlock dev->ingress_lock.
  * - updates to tree and tree walking are only done under the rtnl mutex.
  */
 
 void qdisc_lock_tree(struct net_device *dev)
 {
 	spin_lock_bh(&dev->queue_lock);
+	spin_lock(&dev->ingress_lock);
 }
 
 void qdisc_unlock_tree(struct net_device *dev)
 {
+	spin_unlock(&dev->ingress_lock);
 	spin_unlock_bh(&dev->queue_lock);
 }
 
@@ -431,7 +435,6 @@ struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops)
 	sch->dequeue = ops->dequeue;
 	sch->dev = dev;
 	dev_hold(dev);
-	sch->stats_lock = &dev->queue_lock;
 	atomic_set(&sch->refcnt, 1);
 
 	return sch;
@@ -447,6 +450,7 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops,
 	sch = qdisc_alloc(dev, ops);
 	if (IS_ERR(sch))
 		goto errout;
+	sch->stats_lock = &dev->queue_lock;
 	sch->parent = parentid;
 
 	if (!ops->init || ops->init(sch, NULL) == 0)
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 1fb60aba1e6..ad22dc6af22 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -248,16 +248,11 @@ ing_hook(unsigned int hook, struct sk_buff **pskb,
 		skb->dev ? (*pskb)->dev->name : "(no dev)",
 		skb->len);
 
-/*
-revisit later: Use a private since lock dev->queue_lock is also
-used on the egress (might slow things for an iota)
-*/
-
 	if (dev->qdisc_ingress) {
-		spin_lock(&dev->queue_lock);
+		spin_lock(&dev->ingress_lock);
 		if ((q = dev->qdisc_ingress) != NULL)
 			fwres = q->enqueue(skb, q);
-		spin_unlock(&dev->queue_lock);
+		spin_unlock(&dev->ingress_lock);
 	}
 
 	return fwres;
-- 
cgit v1.2.3


From b881ef7603230550aa0150b22af94089f07ab00d Mon Sep 17 00:00:00 2001
From: John Heffner <jheffner@psc.edu>
Date: Fri, 20 Apr 2007 15:52:39 -0700
Subject: [IPV6]: MTU discovery check in ip6_fragment()

Adds a check in ip6_fragment() mirroring ip_fragment() for packets
that we can't fragment, and sends an ICMP Packet Too Big message
in response.

Signed-off-by: John Heffner <jheffner@psc.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 4cfdad4e835..5a5b7d4ad31 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -567,6 +567,19 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	nexthdr = *prevhdr;
 
 	mtu = dst_mtu(&rt->u.dst);
+
+	/* We must not fragment if the socket is set to force MTU discovery
+	 * or if the skb it not generated by a local socket.  (This last
+	 * check should be redundant, but it's free.)
+	 */
+	if (!np || np->pmtudisc >= IPV6_PMTUDISC_DO) {
+		skb->dev = skb->dst->dev;
+		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
+		kfree_skb(skb);
+		return -EMSGSIZE;
+	}
+
 	if (np && np->frag_size < mtu) {
 		if (np->frag_size)
 			mtu = np->frag_size;
-- 
cgit v1.2.3


From 628a5c561890a9a9a74dea017873530584aab06e Mon Sep 17 00:00:00 2001
From: John Heffner <jheffner@psc.edu>
Date: Fri, 20 Apr 2007 15:53:27 -0700
Subject: [INET]: Add IP(V6)_PMTUDISC_RPOBE

Add IP(V6)_PMTUDISC_PROBE value for IP(V6)_MTU_DISCOVER.  This option forces
us not to fragment, but does not make use of the kernel path MTU discovery.
That is, it allows for user-mode MTU probing (or, packetization-layer path
MTU discovery).  This is particularly useful for diagnostic utilities, like
traceroute/tracepath.

Signed-off-by: John Heffner <jheffner@psc.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in.h       |  1 +
 include/linux/in6.h      |  1 +
 net/ipv4/ip_output.c     | 20 +++++++++++++++-----
 net/ipv4/ip_sockglue.c   |  2 +-
 net/ipv6/ip6_output.c    | 15 ++++++++++++---
 net/ipv6/ipv6_sockglue.c |  2 +-
 6 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/include/linux/in.h b/include/linux/in.h
index 1912e7c0bc2..3975cbf52f2 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -83,6 +83,7 @@ struct in_addr {
 #define IP_PMTUDISC_DONT		0	/* Never send DF frames */
 #define IP_PMTUDISC_WANT		1	/* Use per route hints	*/
 #define IP_PMTUDISC_DO			2	/* Always DF		*/
+#define IP_PMTUDISC_PROBE		3       /* Ignore dst pmtu      */
 
 #define IP_MULTICAST_IF			32
 #define IP_MULTICAST_TTL 		33
diff --git a/include/linux/in6.h b/include/linux/in6.h
index 4e8350ae886..d559fac4a26 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -179,6 +179,7 @@ struct in6_flowlabel_req
 #define IPV6_PMTUDISC_DONT		0
 #define IPV6_PMTUDISC_WANT		1
 #define IPV6_PMTUDISC_DO		2
+#define IPV6_PMTUDISC_PROBE		3
 
 /* Flowlabel */
 #define IPV6_FLOWLABEL_MGR	32
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 34606eff8a0..534650cad3a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -189,6 +189,14 @@ static inline int ip_finish_output2(struct sk_buff *skb)
 	return -EINVAL;
 }
 
+static inline int ip_skb_dst_mtu(struct sk_buff *skb)
+{
+	struct inet_sock *inet = skb->sk ? inet_sk(skb->sk) : NULL;
+
+	return (inet && inet->pmtudisc == IP_PMTUDISC_PROBE) ?
+	       skb->dst->dev->mtu : dst_mtu(skb->dst);
+}
+
 static inline int ip_finish_output(struct sk_buff *skb)
 {
 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
@@ -198,7 +206,7 @@ static inline int ip_finish_output(struct sk_buff *skb)
 		return dst_output(skb);
 	}
 #endif
-	if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb))
+	if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
 		return ip_fragment(skb, ip_finish_output2);
 	else
 		return ip_finish_output2(skb);
@@ -422,7 +430,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 	if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
 		IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
-			  htonl(dst_mtu(&rt->u.dst)));
+			  htonl(ip_skb_dst_mtu(skb)));
 		kfree_skb(skb);
 		return -EMSGSIZE;
 	}
@@ -787,7 +795,9 @@ int ip_append_data(struct sock *sk,
 			inet->cork.addr = ipc->addr;
 		}
 		dst_hold(&rt->u.dst);
-		inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
+		inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
+					    rt->u.dst.dev->mtu :
+					    dst_mtu(rt->u.dst.path);
 		inet->cork.rt = rt;
 		inet->cork.length = 0;
 		sk->sk_sndmsg_page = NULL;
@@ -1203,13 +1213,13 @@ int ip_push_pending_frames(struct sock *sk)
 	 * to fragment the frame generated here. No matter, what transforms
 	 * how transforms change size of the packet, it will come out.
 	 */
-	if (inet->pmtudisc != IP_PMTUDISC_DO)
+	if (inet->pmtudisc < IP_PMTUDISC_DO)
 		skb->local_df = 1;
 
 	/* DF bit is set when we want to see DF on outgoing frames.
 	 * If local_df is set too, we still allow to fragment this frame
 	 * locally. */
-	if (inet->pmtudisc == IP_PMTUDISC_DO ||
+	if (inet->pmtudisc >= IP_PMTUDISC_DO ||
 	    (skb->len <= dst_mtu(&rt->u.dst) &&
 	     ip_dont_fragment(sk, &rt->u.dst)))
 		df = htons(IP_DF);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index c199d231173..4d544573f48 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -542,7 +542,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 		inet->hdrincl = val ? 1 : 0;
 		break;
 	case IP_MTU_DISCOVER:
-		if (val<0 || val>2)
+		if (val<0 || val>3)
 			goto e_inval;
 		inet->pmtudisc = val;
 		break;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5a5b7d4ad31..f508171bab7 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -137,9 +137,17 @@ static int ip6_output2(struct sk_buff *skb)
 	return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
 }
 
+static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
+{
+	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
+
+	return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
+	       skb->dst->dev->mtu : dst_mtu(skb->dst);
+}
+
 int ip6_output(struct sk_buff *skb)
 {
-	if ((skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) ||
+	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 				dst_allfrag(skb->dst))
 		return ip6_fragment(skb, ip6_output2);
 	else
@@ -566,7 +574,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	hlen = ip6_find_1stfragopt(skb, &prevhdr);
 	nexthdr = *prevhdr;
 
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = ip6_skb_dst_mtu(skb);
 
 	/* We must not fragment if the socket is set to force MTU discovery
 	 * or if the skb it not generated by a local socket.  (This last
@@ -1063,7 +1071,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 		inet->cork.fl = *fl;
 		np->cork.hop_limit = hlimit;
 		np->cork.tclass = tclass;
-		mtu = dst_mtu(rt->u.dst.path);
+		mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
+		      rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
 		if (np->frag_size < mtu) {
 			if (np->frag_size)
 				mtu = np->frag_size;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index da930fa089c..aa3d07c52a8 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -694,7 +694,7 @@ done:
 		retv = ip6_ra_control(sk, val, NULL);
 		break;
 	case IPV6_MTU_DISCOVER:
-		if (val<0 || val>2)
+		if (val<0 || val>3)
 			goto e_inval;
 		np->pmtudisc = val;
 		retv = 0;
-- 
cgit v1.2.3


From bf99f1bde3b3009af74874f3465f6861431fbb66 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Fri, 20 Apr 2007 15:56:20 -0700
Subject: [IPV6] SNMP: Netlink interface.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_link.h |  1 +
 include/net/ipv6.h      |  1 +
 net/ipv6/addrconf.c     | 22 +++++++++++++++++-----
 net/ipv6/proc.c         | 32 ++++++++++++++++++++++++++++++++
 4 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 35ed3b5467f..604c2434f71 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -126,6 +126,7 @@ enum
 	IFLA_INET6_STATS,	/* statistics			*/
 	IFLA_INET6_MCAST,	/* MC things. What of them?	*/
 	IFLA_INET6_CACHEINFO,	/* time values and max reasm size */
+	IFLA_INET6_ICMP6STATS,	/* statistics (icmpv6)		*/
 	__IFLA_INET6_MAX
 };
 
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 00328b71a08..4408def379b 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -172,6 +172,7 @@ int snmp6_alloc_dev(struct inet6_dev *idev);
 int snmp6_free_dev(struct inet6_dev *idev);
 int snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign);
 void snmp6_mib_free(void *ptr[2]);
+void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, int bytes);
 
 struct ip6_ra_chain
 {
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1486f76f787..9ba9e92d193 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3433,6 +3433,8 @@ static inline size_t inet6_if_nlmsg_size(void)
 			nla_total_size(4) /* IFLA_INET6_FLAGS */
 			+ nla_total_size(sizeof(struct ifla_cacheinfo))
 			+ nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
+			+ nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
+			+ nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
 		 );
 }
 
@@ -3440,7 +3442,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
 			     u32 pid, u32 seq, int event, unsigned int flags)
 {
 	struct net_device *dev = idev->dev;
-	struct nlattr *conf;
+	struct nlattr *nla;
 	struct ifinfomsg *hdr;
 	struct nlmsghdr *nlh;
 	void *protoinfo;
@@ -3480,12 +3482,22 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
 	ci.retrans_time = idev->nd_parms->retrans_time;
 	NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
 
-	conf = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
-	if (conf == NULL)
+	nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32));
+	if (nla == NULL)
 		goto nla_put_failure;
-	ipv6_store_devconf(&idev->cnf, nla_data(conf), nla_len(conf));
+	ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla));
 
-	/* XXX - Statistics/MC not implemented */
+	/* XXX - MC not implemented */
+
+	nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
+	if (nla == NULL)
+		goto nla_put_failure;
+	snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));
+
+	nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64));
+	if (nla == NULL)
+		goto nla_put_failure;
+	snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
 
 	nla_nest_end(skb, protoinfo);
 	return nlmsg_end(skb, nlh);
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index fa3fb509f18..0dc55150151 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -207,6 +207,31 @@ static const struct file_operations snmp6_seq_fops = {
 	.release = single_release,
 };
 
+static inline void
+__snmp6_fill_stats(u64 *stats, void **mib, int items, int bytes)
+{
+	int i;
+	int pad = bytes - sizeof(u64) * items;
+	BUG_ON(pad < 0);
+	stats[0] = items;
+	for (i = 1; i < items; i++)
+		stats[i] = (u64)fold_field(mib, i);
+	memset(&stats[items], 0, pad);
+}
+
+void
+snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, int bytes)
+{
+	switch(attrtype) {
+	case IFLA_INET6_STATS:
+		__snmp6_fill_stats(stats, (void **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes);
+		break;
+	case IFLA_INET6_ICMP6STATS:
+		__snmp6_fill_stats(stats, (void **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes);
+		break;
+	}
+}
+
 int snmp6_register_dev(struct inet6_dev *idev)
 {
 	struct proc_dir_entry *p;
@@ -283,6 +308,13 @@ int snmp6_unregister_dev(struct inet6_dev *idev)
 {
 	return 0;
 }
+
+void
+snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, int bytes)
+{
+	memset(stats, 0, sizeof(bytes));
+}
+
 #endif	/* CONFIG_PROC_FS */
 
 int snmp6_alloc_dev(struct inet6_dev *idev)
-- 
cgit v1.2.3


From 49ed67a9eee3c756263feed4474e4fcf5c8eaed2 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Fri, 20 Apr 2007 15:56:48 -0700
Subject: [IPV6] SNMP: Move some statistic bits to net/ipv6/proc.c.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 33 ---------------------------------
 net/ipv6/proc.c     | 30 ++++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+), 33 deletions(-)

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index df31cdd33cd..825d03e87ae 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -712,39 +712,6 @@ int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
 
 EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
 
-int
-snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
-{
-	if (ptr == NULL)
-		return -EINVAL;
-
-	ptr[0] = __alloc_percpu(mibsize);
-	if (!ptr[0])
-		goto err0;
-
-	ptr[1] = __alloc_percpu(mibsize);
-	if (!ptr[1])
-		goto err1;
-
-	return 0;
-
-err1:
-	free_percpu(ptr[0]);
-	ptr[0] = NULL;
-err0:
-	return -ENOMEM;
-}
-
-void
-snmp6_mib_free(void *ptr[2])
-{
-	if (ptr == NULL)
-		return;
-	free_percpu(ptr[0]);
-	free_percpu(ptr[1]);
-	ptr[0] = ptr[1] = NULL;
-}
-
 static int __init init_ipv6_mibs(void)
 {
 	if (snmp6_mib_init((void **)ipv6_statistics, sizeof (struct ipstats_mib),
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 0dc55150151..5c3ce1c687c 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -346,4 +346,34 @@ int snmp6_free_dev(struct inet6_dev *idev)
 	return 0;
 }
 
+int snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
+{
+	if (ptr == NULL)
+		return -EINVAL;
+
+	ptr[0] = __alloc_percpu(mibsize);
+	if (!ptr[0])
+		goto err0;
+
+	ptr[1] = __alloc_percpu(mibsize);
+	if (!ptr[1])
+		goto err1;
+
+	return 0;
+
+err1:
+	free_percpu(ptr[0]);
+	ptr[0] = NULL;
+err0:
+	return -ENOMEM;
+}
+
+void snmp6_mib_free(void *ptr[2])
+{
+	if (ptr == NULL)
+		return;
+	free_percpu(ptr[0]);
+	free_percpu(ptr[1]);
+	ptr[0] = ptr[1] = NULL;
+}
 
-- 
cgit v1.2.3


From 334901700f9f58993ebd7f6136d3f9062460d34d Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Fri, 20 Apr 2007 15:57:15 -0700
Subject: [IPV4] SNMP: Move some statistic bits to net/ipv4/proc.c.

This also fixes memory leak in error path.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h   |  3 +++
 net/ipv4/af_inet.c | 59 ++++++++++++++++++++++++++++++++++++------------------
 net/ipv4/proc.c    | 25 +++++++++++++++++++++++
 3 files changed, 67 insertions(+), 20 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index 75f226d26e0..f41ce07f670 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -166,6 +166,9 @@ DECLARE_SNMP_STAT(struct linux_mib, net_statistics);
 #define NET_ADD_STATS_BH(field, adnd)	SNMP_ADD_STATS_BH(net_statistics, field, adnd)
 #define NET_ADD_STATS_USER(field, adnd)	SNMP_ADD_STATS_USER(net_statistics, field, adnd)
 
+extern int snmp_mib_init(void *ptr[2], size_t mibsize, size_t mibalign);
+extern void snmp_mib_free(void *ptr[2]);
+
 extern int sysctl_local_port_range[2];
 extern int sysctl_ip_default_ttl;
 extern int sysctl_ip_nonlocal_bind;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 45ced52c03d..a33ca7e7e8f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1245,28 +1245,47 @@ static struct net_protocol icmp_protocol = {
 
 static int __init init_ipv4_mibs(void)
 {
-	net_statistics[0] = alloc_percpu(struct linux_mib);
-	net_statistics[1] = alloc_percpu(struct linux_mib);
-	ip_statistics[0] = alloc_percpu(struct ipstats_mib);
-	ip_statistics[1] = alloc_percpu(struct ipstats_mib);
-	icmp_statistics[0] = alloc_percpu(struct icmp_mib);
-	icmp_statistics[1] = alloc_percpu(struct icmp_mib);
-	tcp_statistics[0] = alloc_percpu(struct tcp_mib);
-	tcp_statistics[1] = alloc_percpu(struct tcp_mib);
-	udp_statistics[0] = alloc_percpu(struct udp_mib);
-	udp_statistics[1] = alloc_percpu(struct udp_mib);
-	udplite_statistics[0] = alloc_percpu(struct udp_mib);
-	udplite_statistics[1] = alloc_percpu(struct udp_mib);
-	if (!
-	    (net_statistics[0] && net_statistics[1] && ip_statistics[0]
-	     && ip_statistics[1] && tcp_statistics[0] && tcp_statistics[1]
-	     && udp_statistics[0] && udp_statistics[1]
-	     && udplite_statistics[0] && udplite_statistics[1]             ) )
-		return -ENOMEM;
-
-	(void) tcp_mib_init();
+	if (snmp_mib_init((void **)net_statistics,
+			  sizeof(struct linux_mib),
+			  __alignof__(struct linux_mib)) < 0)
+		goto err_net_mib;
+	if (snmp_mib_init((void **)ip_statistics,
+			  sizeof(struct ipstats_mib),
+			  __alignof__(struct ipstats_mib)) < 0)
+		goto err_ip_mib;
+	if (snmp_mib_init((void **)icmp_statistics,
+			  sizeof(struct icmp_mib),
+			  __alignof__(struct icmp_mib)) < 0)
+		goto err_icmp_mib;
+	if (snmp_mib_init((void **)tcp_statistics,
+			  sizeof(struct tcp_mib),
+			  __alignof__(struct tcp_mib)) < 0)
+		goto err_tcp_mib;
+	if (snmp_mib_init((void **)udp_statistics,
+			  sizeof(struct udp_mib),
+			  __alignof__(struct udp_mib)) < 0)
+		goto err_udp_mib;
+	if (snmp_mib_init((void **)udplite_statistics,
+			  sizeof(struct udp_mib),
+			  __alignof__(struct udp_mib)) < 0)
+		goto err_udplite_mib;
+
+	tcp_mib_init();
 
 	return 0;
+
+err_udplite_mib:
+	snmp_mib_free((void **)udp_statistics);
+err_udp_mib:
+	snmp_mib_free((void **)tcp_statistics);
+err_tcp_mib:
+	snmp_mib_free((void **)icmp_statistics);
+err_icmp_mib:
+	snmp_mib_free((void **)ip_statistics);
+err_ip_mib:
+	snmp_mib_free((void **)net_statistics);
+err_net_mib:
+	return -ENOMEM;
 }
 
 static int ipv4_proc_init(void);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index ae68a691e8c..97952d54ae8 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -391,3 +391,28 @@ out_netstat:
 	goto out;
 }
 
+int snmp_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
+{
+	BUG_ON(ptr == NULL);
+	ptr[0] = __alloc_percpu(mibsize);
+	if (!ptr[0])
+		goto err0;
+	ptr[1] = __alloc_percpu(mibsize);
+	if (!ptr[1])
+		goto err1;
+	return 0;
+err1:
+	free_percpu(ptr[0]);
+	ptr[0] = NULL;
+err0:
+	return -ENOMEM;
+}
+
+void snmp_mib_free(void *ptr[2])
+{
+	BUG_ON(ptr == NULL);
+	free_percpu(ptr[0]);
+	free_percpu(ptr[1]);
+	ptr[0] = ptr[1] = NULL;
+}
+
-- 
cgit v1.2.3


From 1370b5a59b941ac3873b5e8614d496e9f481d670 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Fri, 20 Apr 2007 15:57:45 -0700
Subject: [IPV6] SNMP: Export statistics via netlink without CONFIG_PROC_FS.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/proc.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 5c3ce1c687c..c847cef626a 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -141,6 +141,7 @@ static struct snmp_mib snmp6_udplite6_list[] = {
 	SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
 	SNMP_MIB_SENTINEL
 };
+#endif	/* CONFIG_PROC_FS */
 
 static unsigned long
 fold_field(void *mib[], int offt)
@@ -155,6 +156,7 @@ fold_field(void *mib[], int offt)
 	return res;
 }
 
+#ifdef CONFIG_PROC_FS
 static inline void
 snmp6_seq_show_item(struct seq_file *seq, void **mib, struct snmp_mib *itemlist)
 {
@@ -206,6 +208,7 @@ static const struct file_operations snmp6_seq_fops = {
 	.llseek	 = seq_lseek,
 	.release = single_release,
 };
+#endif	/* CONFIG_PROC_FS */
 
 static inline void
 __snmp6_fill_stats(u64 *stats, void **mib, int items, int bytes)
@@ -232,6 +235,7 @@ snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, int bytes)
 	}
 }
 
+#ifdef CONFIG_PROC_FS
 int snmp6_register_dev(struct inet6_dev *idev)
 {
 	struct proc_dir_entry *p;
@@ -309,12 +313,6 @@ int snmp6_unregister_dev(struct inet6_dev *idev)
 	return 0;
 }
 
-void
-snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, int bytes)
-{
-	memset(stats, 0, sizeof(bytes));
-}
-
 #endif	/* CONFIG_PROC_FS */
 
 int snmp6_alloc_dev(struct inet6_dev *idev)
-- 
cgit v1.2.3


From 80feaacb8a6400a9540a961b6743c69a5896b937 Mon Sep 17 00:00:00 2001
From: "Peter P. Waskiewicz Jr" <peter.p.waskiewicz.jr@intel.com>
Date: Fri, 20 Apr 2007 16:05:39 -0700
Subject: [AF_PACKET]: Add option to return orig_dev to userspace.

Add a packet socket option to allow the orig_dev index to be returned
to userspace when passing traffic through a decapsulated device, such
as the bonding driver.

This is very useful for layer 2 traffic being able to report which
physical device actually received the traffic, instead of having the
encapsulating device hide that information.

The new option is called PACKET_ORIGDEV.

Signed-off-by: Peter P. Waskiewicz Jr. <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_packet.h |  1 +
 net/packet/af_packet.c    | 32 +++++++++++++++++++++++++++++---
 2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index f3de05c3067..ad09609227f 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -42,6 +42,7 @@ struct sockaddr_ll
 #define PACKET_STATISTICS		6
 #define PACKET_COPY_THRESH		7
 #define PACKET_AUXDATA			8
+#define PACKET_ORIGDEV			9
 
 struct tpacket_stats
 {
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 36388b2f32f..02e401cd683 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -201,7 +201,8 @@ struct packet_sock {
 	struct packet_type	prot_hook;
 	spinlock_t		bind_lock;
 	unsigned int		running:1,	/* prot_hook is attached*/
-				auxdata:1;
+				auxdata:1,
+				origdev:1;
 	int			ifindex;	/* bound device		*/
 	__be16			num;
 #ifdef CONFIG_PACKET_MULTICAST
@@ -528,7 +529,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
 	sll->sll_hatype = dev->type;
 	sll->sll_protocol = skb->protocol;
 	sll->sll_pkttype = skb->pkt_type;
-	sll->sll_ifindex = dev->ifindex;
+	if (unlikely(po->origdev) && skb->pkt_type == PACKET_HOST)
+		sll->sll_ifindex = orig_dev->ifindex;
+	else
+		sll->sll_ifindex = dev->ifindex;
 	sll->sll_halen = 0;
 
 	if (dev->hard_header_parse)
@@ -673,7 +677,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 	sll->sll_hatype = dev->type;
 	sll->sll_protocol = skb->protocol;
 	sll->sll_pkttype = skb->pkt_type;
-	sll->sll_ifindex = dev->ifindex;
+	if (unlikely(po->origdev) && skb->pkt_type == PACKET_HOST)
+		sll->sll_ifindex = orig_dev->ifindex;
+	else
+		sll->sll_ifindex = dev->ifindex;
 
 	h->tp_status = status;
 	smp_mb();
@@ -1413,6 +1420,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 		po->auxdata = !!val;
 		return 0;
 	}
+	case PACKET_ORIGDEV:
+	{
+		int val;
+
+		if (optlen < sizeof(val))
+			return -EINVAL;
+		if (copy_from_user(&val, optval, sizeof(val)))
+			return -EFAULT;
+
+		po->origdev = !!val;
+		return 0;
+	}
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -1454,6 +1473,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 			len = sizeof(int);
 		val = po->auxdata;
 
+		data = &val;
+		break;
+	case PACKET_ORIGDEV:
+		if (len > sizeof(int))
+			len = sizeof(int);
+		val = po->origdev;
+
 		data = &val;
 		break;
 	default:
-- 
cgit v1.2.3


From 75606dc69adcfff433bca0ff747538d8495da0ab Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Fri, 20 Apr 2007 16:06:45 -0700
Subject: [AX25/NETROM/ROSE]: Convert to use modern wait queue API

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ax25/af_ax25.c     | 77 ++++++++++++++++++++---------------------
 net/netrom/af_netrom.c | 93 +++++++++++++++++++++++++-------------------------
 net/rose/af_rose.c     | 54 ++++++++++++++---------------
 3 files changed, 109 insertions(+), 115 deletions(-)

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 5f28887822e..6ded95272a5 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1127,22 +1127,22 @@ static int __must_check ax25_connect(struct socket *sock,
 		switch (sk->sk_state) {
 		case TCP_SYN_SENT: /* still trying */
 			err = -EINPROGRESS;
-			goto out;
+			goto out_release;
 
 		case TCP_ESTABLISHED: /* connection established */
 			sock->state = SS_CONNECTED;
-			goto out;
+			goto out_release;
 
 		case TCP_CLOSE: /* connection refused */
 			sock->state = SS_UNCONNECTED;
 			err = -ECONNREFUSED;
-			goto out;
+			goto out_release;
 		}
 	}
 
 	if (sk->sk_state == TCP_ESTABLISHED && sk->sk_type == SOCK_SEQPACKET) {
 		err = -EISCONN;	/* No reconnect on a seqpacket socket */
-		goto out;
+		goto out_release;
 	}
 
 	sk->sk_state   = TCP_CLOSE;
@@ -1159,12 +1159,12 @@ static int __must_check ax25_connect(struct socket *sock,
 		/* Valid number of digipeaters ? */
 		if (fsa->fsa_ax25.sax25_ndigis < 1 || fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS) {
 			err = -EINVAL;
-			goto out;
+			goto out_release;
 		}
 
 		if ((digi = kmalloc(sizeof(ax25_digi), GFP_KERNEL)) == NULL) {
 			err = -ENOBUFS;
-			goto out;
+			goto out_release;
 		}
 
 		digi->ndigi      = fsa->fsa_ax25.sax25_ndigis;
@@ -1194,7 +1194,7 @@ static int __must_check ax25_connect(struct socket *sock,
 			current->comm);
 		if ((err = ax25_rt_autobind(ax25, &fsa->fsa_ax25.sax25_call)) < 0) {
 			kfree(digi);
-			goto out;
+			goto out_release;
 		}
 
 		ax25_fillin_cb(ax25, ax25->ax25_dev);
@@ -1203,7 +1203,7 @@ static int __must_check ax25_connect(struct socket *sock,
 		if (ax25->ax25_dev == NULL) {
 			kfree(digi);
 			err = -EHOSTUNREACH;
-			goto out;
+			goto out_release;
 		}
 	}
 
@@ -1213,7 +1213,7 @@ static int __must_check ax25_connect(struct socket *sock,
 		kfree(digi);
 		err = -EADDRINUSE;		/* Already such a connection */
 		ax25_cb_put(ax25t);
-		goto out;
+		goto out_release;
 	}
 
 	ax25->dest_addr = fsa->fsa_ax25.sax25_call;
@@ -1223,7 +1223,7 @@ static int __must_check ax25_connect(struct socket *sock,
 	if (sk->sk_type != SOCK_SEQPACKET) {
 		sock->state = SS_CONNECTED;
 		sk->sk_state   = TCP_ESTABLISHED;
-		goto out;
+		goto out_release;
 	}
 
 	/* Move to connecting socket, ax.25 lapb WAIT_UA.. */
@@ -1255,55 +1255,53 @@ static int __must_check ax25_connect(struct socket *sock,
 	/* Now the loop */
 	if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) {
 		err = -EINPROGRESS;
-		goto out;
+		goto out_release;
 	}
 
 	if (sk->sk_state == TCP_SYN_SENT) {
-		struct task_struct *tsk = current;
-		DECLARE_WAITQUEUE(wait, tsk);
+		DEFINE_WAIT(wait);
 
-		add_wait_queue(sk->sk_sleep, &wait);
 		for (;;) {
+			prepare_to_wait(sk->sk_sleep, &wait,
+			                TASK_INTERRUPTIBLE);
 			if (sk->sk_state != TCP_SYN_SENT)
 				break;
-			set_current_state(TASK_INTERRUPTIBLE);
-			release_sock(sk);
-			if (!signal_pending(tsk)) {
+			if (!signal_pending(current)) {
+				release_sock(sk);
 				schedule();
 				lock_sock(sk);
 				continue;
 			}
-			current->state = TASK_RUNNING;
-			remove_wait_queue(sk->sk_sleep, &wait);
-			return -ERESTARTSYS;
+			err = -ERESTARTSYS;
+			break;
 		}
-		current->state = TASK_RUNNING;
-		remove_wait_queue(sk->sk_sleep, &wait);
+		finish_wait(sk->sk_sleep, &wait);
+
+		if (err)
+			goto out_release;
 	}
 
 	if (sk->sk_state != TCP_ESTABLISHED) {
 		/* Not in ABM, not in WAIT_UA -> failed */
 		sock->state = SS_UNCONNECTED;
 		err = sock_error(sk);	/* Always set at this point */
-		goto out;
+		goto out_release;
 	}
 
 	sock->state = SS_CONNECTED;
 
-	err=0;
-out:
+	err = 0;
+out_release:
 	release_sock(sk);
 
 	return err;
 }
 
-
 static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
 {
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
 	struct sk_buff *skb;
 	struct sock *newsk;
+	DEFINE_WAIT(wait);
 	struct sock *sk;
 	int err = 0;
 
@@ -1328,30 +1326,29 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
 	 *	The read queue this time is holding sockets ready to use
 	 *	hooked into the SABM we saved
 	 */
-	add_wait_queue(sk->sk_sleep, &wait);
 	for (;;) {
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
 		skb = skb_dequeue(&sk->sk_receive_queue);
 		if (skb)
 			break;
 
-		release_sock(sk);
-		current->state = TASK_INTERRUPTIBLE;
 		if (flags & O_NONBLOCK) {
-			current->state = TASK_RUNNING;
-			remove_wait_queue(sk->sk_sleep, &wait);
-			return -EWOULDBLOCK;
+			err = -EWOULDBLOCK;
+			break;
 		}
-		if (!signal_pending(tsk)) {
+		if (!signal_pending(current)) {
+			release_sock(sk);
 			schedule();
 			lock_sock(sk);
 			continue;
 		}
-		current->state = TASK_RUNNING;
-		remove_wait_queue(sk->sk_sleep, &wait);
-		return -ERESTARTSYS;
+		err = -ERESTARTSYS;
+		break;
 	}
-	current->state = TASK_RUNNING;
-	remove_wait_queue(sk->sk_sleep, &wait);
+	finish_wait(sk->sk_sleep, &wait);
+
+	if (err)
+		goto out;
 
 	newsk		 = skb->sk;
 	newsk->sk_socket = newsock;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 5dc7448925d..5d4a26c2aa0 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -625,42 +625,42 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
 	ax25_address *source = NULL;
 	ax25_uid_assoc *user;
 	struct net_device *dev;
+	int err = 0;
 
 	lock_sock(sk);
 	if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) {
 		sock->state = SS_CONNECTED;
-		release_sock(sk);
-		return 0;	/* Connect completed during a ERESTARTSYS event */
+		goto out_release;	/* Connect completed during a ERESTARTSYS event */
 	}
 
 	if (sk->sk_state == TCP_CLOSE && sock->state == SS_CONNECTING) {
 		sock->state = SS_UNCONNECTED;
-		release_sock(sk);
-		return -ECONNREFUSED;
+		err = -ECONNREFUSED;
+		goto out_release;
 	}
 
 	if (sk->sk_state == TCP_ESTABLISHED) {
-		release_sock(sk);
-		return -EISCONN;	/* No reconnect on a seqpacket socket */
+		err = -EISCONN;	/* No reconnect on a seqpacket socket */
+		goto out_release;
 	}
 
 	sk->sk_state   = TCP_CLOSE;
 	sock->state = SS_UNCONNECTED;
 
 	if (addr_len != sizeof(struct sockaddr_ax25) && addr_len != sizeof(struct full_sockaddr_ax25)) {
-		release_sock(sk);
-		return -EINVAL;
+		err = -EINVAL;
+		goto out_release;
 	}
 	if (addr->sax25_family != AF_NETROM) {
-		release_sock(sk);
-		return -EINVAL;
+		err = -EINVAL;
+		goto out_release;
 	}
 	if (sock_flag(sk, SOCK_ZAPPED)) {	/* Must bind first - autobinding in this may or may not work */
 		sock_reset_flag(sk, SOCK_ZAPPED);
 
 		if ((dev = nr_dev_first()) == NULL) {
-			release_sock(sk);
-			return -ENETUNREACH;
+			err = -ENETUNREACH;
+			goto out_release;
 		}
 		source = (ax25_address *)dev->dev_addr;
 
@@ -671,8 +671,8 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
 		} else {
 			if (ax25_uid_policy && !capable(CAP_NET_ADMIN)) {
 				dev_put(dev);
-				release_sock(sk);
-				return -EPERM;
+				err = -EPERM;
+				goto out_release;
 			}
 			nr->user_addr   = *source;
 		}
@@ -707,8 +707,8 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
 
 	/* Now the loop */
 	if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) {
-		release_sock(sk);
-		return -EINPROGRESS;
+		err = -EINPROGRESS;
+		goto out_release;
 	}
 
 	/*
@@ -716,46 +716,46 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
 	 * closed.
 	 */
 	if (sk->sk_state == TCP_SYN_SENT) {
-		struct task_struct *tsk = current;
-		DECLARE_WAITQUEUE(wait, tsk);
+		DEFINE_WAIT(wait);
 
-		add_wait_queue(sk->sk_sleep, &wait);
 		for (;;) {
-			set_current_state(TASK_INTERRUPTIBLE);
+			prepare_to_wait(sk->sk_sleep, &wait,
+			                TASK_INTERRUPTIBLE);
 			if (sk->sk_state != TCP_SYN_SENT)
 				break;
-			release_sock(sk);
-			if (!signal_pending(tsk)) {
+			if (!signal_pending(current)) {
+				release_sock(sk);
 				schedule();
 				lock_sock(sk);
 				continue;
 			}
-			current->state = TASK_RUNNING;
-			remove_wait_queue(sk->sk_sleep, &wait);
-			return -ERESTARTSYS;
+			err = -ERESTARTSYS;
+			break;
 		}
-		current->state = TASK_RUNNING;
-		remove_wait_queue(sk->sk_sleep, &wait);
+		finish_wait(sk->sk_sleep, &wait);
+		if (err)
+			goto out_release;
 	}
 
 	if (sk->sk_state != TCP_ESTABLISHED) {
 		sock->state = SS_UNCONNECTED;
-		release_sock(sk);
-		return sock_error(sk);	/* Always set at this point */
+		err = sock_error(sk);	/* Always set at this point */
+		goto out_release;
 	}
 
 	sock->state = SS_CONNECTED;
+
+out_release:
 	release_sock(sk);
 
-	return 0;
+	return err;
 }
 
 static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
 {
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
 	struct sk_buff *skb;
 	struct sock *newsk;
+	DEFINE_WAIT(wait);
 	struct sock *sk;
 	int err = 0;
 
@@ -765,42 +765,40 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
 	lock_sock(sk);
 	if (sk->sk_type != SOCK_SEQPACKET) {
 		err = -EOPNOTSUPP;
-		goto out;
+		goto out_release;
 	}
 
 	if (sk->sk_state != TCP_LISTEN) {
 		err = -EINVAL;
-		goto out;
+		goto out_release;
 	}
 
 	/*
 	 *	The write queue this time is holding sockets ready to use
 	 *	hooked into the SABM we saved
 	 */
-	add_wait_queue(sk->sk_sleep, &wait);
 	for (;;) {
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
 		skb = skb_dequeue(&sk->sk_receive_queue);
 		if (skb)
 			break;
 
-		current->state = TASK_INTERRUPTIBLE;
-		release_sock(sk);
 		if (flags & O_NONBLOCK) {
-			current->state = TASK_RUNNING;
-			remove_wait_queue(sk->sk_sleep, &wait);
-			return -EWOULDBLOCK;
+			err = -EWOULDBLOCK;
+			break;
 		}
-		if (!signal_pending(tsk)) {
+		if (!signal_pending(current)) {
+			release_sock(sk);
 			schedule();
 			lock_sock(sk);
 			continue;
 		}
-		current->state = TASK_RUNNING;
-		remove_wait_queue(sk->sk_sleep, &wait);
-		return -ERESTARTSYS;
+		err = -ERESTARTSYS;
+		break;
 	}
-	current->state = TASK_RUNNING;
-	remove_wait_queue(sk->sk_sleep, &wait);
+	finish_wait(sk->sk_sleep, &wait);
+	if (err)
+		goto out_release;
 
 	newsk = skb->sk;
 	newsk->sk_socket = newsock;
@@ -811,8 +809,9 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
 	sk_acceptq_removed(sk);
 	newsock->sk = newsk;
 
-out:
+out_release:
 	release_sock(sk);
+
 	return err;
 }
 
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 806bf6f5dc6..d476c43d521 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -812,26 +812,26 @@ rose_try_next_neigh:
 	 * closed.
 	 */
 	if (sk->sk_state == TCP_SYN_SENT) {
-		struct task_struct *tsk = current;
-		DECLARE_WAITQUEUE(wait, tsk);
+		DEFINE_WAIT(wait);
 
-		add_wait_queue(sk->sk_sleep, &wait);
 		for (;;) {
-			set_current_state(TASK_INTERRUPTIBLE);
+			prepare_to_wait(sk->sk_sleep, &wait,
+			                TASK_INTERRUPTIBLE);
 			if (sk->sk_state != TCP_SYN_SENT)
 				break;
-			release_sock(sk);
-			if (!signal_pending(tsk)) {
+			if (!signal_pending(current)) {
+				release_sock(sk);
 				schedule();
 				lock_sock(sk);
 				continue;
 			}
-			current->state = TASK_RUNNING;
-			remove_wait_queue(sk->sk_sleep, &wait);
-			return -ERESTARTSYS;
+			err = -ERESTARTSYS;
+			break;
 		}
-		current->state = TASK_RUNNING;
-		remove_wait_queue(sk->sk_sleep, &wait);
+		finish_wait(sk->sk_sleep, &wait);
+
+		if (err)
+			goto out_release;
 	}
 
 	if (sk->sk_state != TCP_ESTABLISHED) {
@@ -856,10 +856,9 @@ out_release:
 
 static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
 {
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
 	struct sk_buff *skb;
 	struct sock *newsk;
+	DEFINE_WAIT(wait);
 	struct sock *sk;
 	int err = 0;
 
@@ -869,42 +868,41 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
 	lock_sock(sk);
 	if (sk->sk_type != SOCK_SEQPACKET) {
 		err = -EOPNOTSUPP;
-		goto out;
+		goto out_release;
 	}
 
 	if (sk->sk_state != TCP_LISTEN) {
 		err = -EINVAL;
-		goto out;
+		goto out_release;
 	}
 
 	/*
 	 *	The write queue this time is holding sockets ready to use
 	 *	hooked into the SABM we saved
 	 */
-	add_wait_queue(sk->sk_sleep, &wait);
 	for (;;) {
+		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+
 		skb = skb_dequeue(&sk->sk_receive_queue);
 		if (skb)
 			break;
 
-		current->state = TASK_INTERRUPTIBLE;
-		release_sock(sk);
 		if (flags & O_NONBLOCK) {
-			current->state = TASK_RUNNING;
-			remove_wait_queue(sk->sk_sleep, &wait);
-			return -EWOULDBLOCK;
+			err = -EWOULDBLOCK;
+			break;
 		}
-		if (!signal_pending(tsk)) {
+		if (!signal_pending(current)) {
+			release_sock(sk);
 			schedule();
 			lock_sock(sk);
 			continue;
 		}
-		current->state = TASK_RUNNING;
-		remove_wait_queue(sk->sk_sleep, &wait);
-		return -ERESTARTSYS;
+		err = -ERESTARTSYS;
+		break;
 	}
-	current->state = TASK_RUNNING;
-	remove_wait_queue(sk->sk_sleep, &wait);
+	finish_wait(sk->sk_sleep, &wait);
+	if (err)
+		goto out_release;
 
 	newsk = skb->sk;
 	newsk->sk_socket = newsock;
@@ -916,7 +914,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
 	sk->sk_ack_backlog--;
 	newsock->sk = newsk;
 
-out:
+out_release:
 	release_sock(sk);
 
 	return err;
-- 
cgit v1.2.3


From 3e6cf558b0098a15d8c360c4eaad3e4d719a555a Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Fri, 20 Apr 2007 16:39:17 -0700
Subject: [BRIDGE]: Fix warning in net-2.6.22

The following is leftover from earlier change in net-2.6.22.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 39b87dad35b..5e1892d8d87 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -88,7 +88,6 @@ static int br_set_mac_address(struct net_device *dev, void *p)
 {
 	struct net_bridge *br = netdev_priv(dev);
 	struct sockaddr *addr = p;
-	struct net_bridge_port *port;
 
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EINVAL;
-- 
cgit v1.2.3


From 0c6fcc8a8cfcc737d05b6be8b2c3e931ef99cfc2 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Fri, 20 Apr 2007 16:40:01 -0700
Subject: [NET] skbuff: skb_store_bits const is backwards

Getting warnings becuase skb_store_bits has skb as constant,
but the function overwrites it. Looks like const was on the
wrong side.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 4 ++--
 net/core/skbuff.c      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c413afbe0b9..50f6f6a094c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1494,8 +1494,8 @@ extern __wsum	       skb_checksum(const struct sk_buff *skb, int offset,
 				    int len, __wsum csum);
 extern int	       skb_copy_bits(const struct sk_buff *skb, int offset,
 				     void *to, int len);
-extern int	       skb_store_bits(const struct sk_buff *skb, int offset,
-				      void *from, int len);
+extern int	       skb_store_bits(struct sk_buff *skb, int offset,
+				      const void *from, int len);
 extern __wsum	       skb_copy_and_csum_bits(const struct sk_buff *skb,
 					      int offset, u8 *to, int len,
 					      __wsum csum);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c7a1b24b737..6b50d58cce1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1129,7 +1129,7 @@ fault:
  *	traversing fragment lists and such.
  */
 
-int skb_store_bits(const struct sk_buff *skb, int offset, void *from, int len)
+int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
 {
 	int i, copy;
 	int start = skb_headlen(skb);
-- 
cgit v1.2.3


From bfafb26e11849fe99e03cc1902a91f7f65354e47 Mon Sep 17 00:00:00 2001
From: Florian Zumbiehl <florz@florz.de>
Date: Fri, 20 Apr 2007 16:56:31 -0700
Subject: [PPPoE]: miscellaneous smaller cleanups

below is a patch that just removes dead code/initializers without any
effect (first access is an assignment) that I stumbled accross while
reading the source.

Signed-off-by: Florian Zumbiehl <florz@florz.de>
Acked-by: Michal Ostrowski <mostrows@earthlink.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pppoe.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c
index e9fb616ff66..f761a9aae4c 100644
--- a/drivers/net/pppoe.c
+++ b/drivers/net/pppoe.c
@@ -207,7 +207,7 @@ static inline struct pppox_sock *get_item(unsigned long sid,
 
 static inline struct pppox_sock *get_item_by_addr(struct sockaddr_pppox *sp)
 {
-	struct net_device *dev = NULL;
+	struct net_device *dev;
 	int ifindex;
 
 	dev = dev_get_by_name(sp->sa_addr.pppoe.dev);
@@ -222,9 +222,6 @@ static inline int set_item(struct pppox_sock *po)
 {
 	int i;
 
-	if (!po)
-		return -EINVAL;
-
 	write_lock_bh(&pppoe_hash_lock);
 	i = __set_item(po);
 	write_unlock_bh(&pppoe_hash_lock);
@@ -344,7 +341,7 @@ static struct notifier_block pppoe_notifier = {
 static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb)
 {
 	struct pppox_sock *po = pppox_sk(sk);
-	struct pppox_sock *relay_po = NULL;
+	struct pppox_sock *relay_po;
 
 	if (sk->sk_state & PPPOX_BOUND) {
 		struct pppoe_hdr *ph = pppoe_hdr(skb);
@@ -514,7 +511,6 @@ static int pppoe_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
 	struct pppox_sock *po;
-	int error = 0;
 
 	if (!sk)
 		return 0;
@@ -543,7 +539,7 @@ static int pppoe_release(struct socket *sock)
 	skb_queue_purge(&sk->sk_receive_queue);
 	sock_put(sk);
 
-	return error;
+	return 0;
 }
 
 
@@ -762,10 +758,10 @@ static int pppoe_ioctl(struct socket *sock, unsigned int cmd,
 static int pppoe_sendmsg(struct kiocb *iocb, struct socket *sock,
 		  struct msghdr *m, size_t total_len)
 {
-	struct sk_buff *skb = NULL;
+	struct sk_buff *skb;
 	struct sock *sk = sock->sk;
 	struct pppox_sock *po = pppox_sk(sk);
-	int error = 0;
+	int error;
 	struct pppoe_hdr hdr;
 	struct pppoe_hdr *ph;
 	struct net_device *dev;
@@ -930,7 +926,7 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock,
 		  struct msghdr *m, size_t total_len, int flags)
 {
 	struct sock *sk = sock->sk;
-	struct sk_buff *skb = NULL;
+	struct sk_buff *skb;
 	int error = 0;
 
 	if (sk->sk_state & PPPOX_BOUND) {
@@ -941,9 +937,8 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock,
 	skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
 				flags & MSG_DONTWAIT, &error);
 
-	if (error < 0) {
+	if (error < 0)
 		goto end;
-	}
 
 	m->msg_namelen = 0;
 
@@ -986,7 +981,7 @@ out:
 
 static __inline__ struct pppox_sock *pppoe_get_idx(loff_t pos)
 {
-	struct pppox_sock *po = NULL;
+	struct pppox_sock *po;
 	int i = 0;
 
 	for (; i < PPPOE_HASH_SIZE; i++) {
-- 
cgit v1.2.3


From 74b885cf86def9bc836772e3c1788c00b72a35c9 Mon Sep 17 00:00:00 2001
From: Florian Zumbiehl <florz@florz.de>
Date: Fri, 20 Apr 2007 16:57:27 -0700
Subject: [PPPOE]: race between interface going down and connect()

below you find a patch that (hopefully) fixes a race between an interface
going down and a connect() to a peer on that interface. Before,
connect() would determine that an interface is up, then the interface
could go down and all entries referring to that interface in the
item_hash_table would be marked as ZOMBIEs and their references to
the device would be freed, and after that, connect() would put a new
entry into the hash table referring to the device that meanwhile is
down already - which also would cause unregister_netdevice() to wait
until the socket has been release()d.

This patch does not suffice if we are not allowed to accept connect()s
referring to a device that we already acked a NETDEV_GOING_DOWN for
(that is: all references are only guaranteed to be freed after
NETDEV_DOWN has been acknowledged, not necessarily after the
NETDEV_GOING_DOWN already). And if we are allowed to, we could avoid
looking through the hash table upon NETDEV_GOING_DOWN completely and
only do that once we get the NETDEV_DOWN ...

mostrows:
pppoe_flush_dev is called on NETDEV_GOING_DOWN and NETDEV_DOWN to deal with
this "late connect" issue.  Ideally one would hope to notify users at the
"NETDEV_GOING_DOWN" phase (just to pretend to be nice).  However, it is the
NETDEV_DOWN scan that takes all the responsibility for ensuring nobody is
hanging around at that time.

Signed-off-by: Florian Zumbiehl <florz@florz.de>
Acked-by: Michal Ostrowski <mostrows@earthlink.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pppoe.c | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c
index f761a9aae4c..bc4fc30bc85 100644
--- a/drivers/net/pppoe.c
+++ b/drivers/net/pppoe.c
@@ -218,17 +218,6 @@ static inline struct pppox_sock *get_item_by_addr(struct sockaddr_pppox *sp)
 	return get_item(sp->sa_addr.pppoe.sid, sp->sa_addr.pppoe.remote, ifindex);
 }
 
-static inline int set_item(struct pppox_sock *po)
-{
-	int i;
-
-	write_lock_bh(&pppoe_hash_lock);
-	i = __set_item(po);
-	write_unlock_bh(&pppoe_hash_lock);
-
-	return i;
-}
-
 static inline struct pppox_sock *delete_item(unsigned long sid, char *addr, int ifindex)
 {
 	struct pppox_sock *ret;
@@ -595,14 +584,18 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
 		po->pppoe_dev = dev;
 		po->pppoe_ifindex = dev->ifindex;
 
-		if (!(dev->flags & IFF_UP))
+		write_lock_bh(&pppoe_hash_lock);
+		if (!(dev->flags & IFF_UP)){
+			write_unlock_bh(&pppoe_hash_lock);
 			goto err_put;
+		}
 
 		memcpy(&po->pppoe_pa,
 		       &sp->sa_addr.pppoe,
 		       sizeof(struct pppoe_addr));
 
-		error = set_item(po);
+		error = __set_item(po);
+		write_unlock_bh(&pppoe_hash_lock);
 		if (error < 0)
 			goto err_put;
 
-- 
cgit v1.2.3


From 202a03acf9994076055df40ae093a5c5474ad0bd Mon Sep 17 00:00:00 2001
From: Florian Zumbiehl <florz@florz.de>
Date: Fri, 20 Apr 2007 16:58:14 -0700
Subject: [PPPOE]: memory leak when socket is release()d before PPPIOCGCHAN has
 been called on it

below you find a patch that fixes a memory leak when a PPPoE socket is
release()d after it has been connect()ed, but before the PPPIOCGCHAN ioctl
ever has been called on it.

This is somewhat of a security problem, too, since PPPoE sockets can be
created by any user, so any user can easily allocate all the machine's
RAM to non-swappable address space and thus DoS the system.

Is there any specific reason for PPPoE sockets being available to any
unprivileged process, BTW? After all, you need a packet socket for the
discovery stage anyway, so it's unlikely that any unprivileged process
will ever need to create a PPPoE socket, no? Allocating all session IDs
for a known AC is a kind of DoS, too, after all - with Juniper ERXes,
this is really easy, actually, since they don't ever assign session ids
above 8000 ...

Signed-off-by: Florian Zumbiehl <florz@florz.de>
Acked-by: Michal Ostrowski <mostrows@earthlink.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pppox.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/pppox.c b/drivers/net/pppox.c
index 9315046b3f5..3f8115db4d5 100644
--- a/drivers/net/pppox.c
+++ b/drivers/net/pppox.c
@@ -58,7 +58,7 @@ void pppox_unbind_sock(struct sock *sk)
 {
 	/* Clear connection to ppp device, if attached. */
 
-	if (sk->sk_state & (PPPOX_BOUND | PPPOX_ZOMBIE)) {
+	if (sk->sk_state & (PPPOX_BOUND | PPPOX_CONNECTED | PPPOX_ZOMBIE)) {
 		ppp_unregister_channel(&pppox_sk(sk)->chan);
 		sk->sk_state = PPPOX_DEAD;
 	}
-- 
cgit v1.2.3


From 42dc9cd54b7290f862874a2544e50395e5719985 Mon Sep 17 00:00:00 2001
From: Michal Ostrowski <mostrows@earthlink.net>
Date: Fri, 20 Apr 2007 16:59:24 -0700
Subject: [PPPOE]: Fix device tear-down notification.

pppoe_flush_dev() kicks all sockets bound to a device that is going down.
In doing so, locks must be taken in the right order consistently (sock lock,
followed by the pppoe_hash_lock).  However, the scan process is based on
us holding the sock lock.  So, when something is found in the scan we must
release the lock we're holding and grab the sock lock.

This patch fixes race conditions between this code and pppoe_release(),
both of which perform similar functions but would naturally prefer to grab
locks in opposing orders.  Both code paths are now going after these locks
in a consistent manner.

pppoe_hash_lock protects the contents of the "pppox_sock" objects that reside
inside the hash.  Thus, NULL'ing out the pppoe_dev field should be done
under the protection of this lock.

Signed-off-by: Michal Ostrowski <mostrows@earthlink.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pppoe.c | 87 ++++++++++++++++++++++++++++++-----------------------
 1 file changed, 50 insertions(+), 37 deletions(-)

diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c
index bc4fc30bc85..6f98834e6ac 100644
--- a/drivers/net/pppoe.c
+++ b/drivers/net/pppoe.c
@@ -241,54 +241,53 @@ static inline struct pppox_sock *delete_item(unsigned long sid, char *addr, int
 static void pppoe_flush_dev(struct net_device *dev)
 {
 	int hash;
-
 	BUG_ON(dev == NULL);
 
-	read_lock_bh(&pppoe_hash_lock);
+	write_lock_bh(&pppoe_hash_lock);
 	for (hash = 0; hash < PPPOE_HASH_SIZE; hash++) {
 		struct pppox_sock *po = item_hash_table[hash];
 
 		while (po != NULL) {
-			if (po->pppoe_dev == dev) {
-				struct sock *sk = sk_pppox(po);
-
-				sock_hold(sk);
-				po->pppoe_dev = NULL;
+			struct sock *sk = sk_pppox(po);
+			if (po->pppoe_dev != dev) {
+				po = po->next;
+				continue;
+			}
+			po->pppoe_dev = NULL;
+			dev_put(dev);
 
-				/* We hold a reference to SK, now drop the
-				 * hash table lock so that we may attempt
-				 * to lock the socket (which can sleep).
-				 */
-				read_unlock_bh(&pppoe_hash_lock);
 
-				lock_sock(sk);
+			/* We always grab the socket lock, followed by the
+			 * pppoe_hash_lock, in that order.  Since we should
+			 * hold the sock lock while doing any unbinding,
+			 * we need to release the lock we're holding.
+			 * Hold a reference to the sock so it doesn't disappear
+			 * as we're jumping between locks.
+			 */
 
-				if (sk->sk_state &
-				    (PPPOX_CONNECTED | PPPOX_BOUND)) {
-					pppox_unbind_sock(sk);
-					dev_put(dev);
-					sk->sk_state = PPPOX_ZOMBIE;
-					sk->sk_state_change(sk);
-				}
+			sock_hold(sk);
 
-				release_sock(sk);
+			write_unlock_bh(&pppoe_hash_lock);
+			lock_sock(sk);
 
-				sock_put(sk);
+			if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
+				pppox_unbind_sock(sk);
+				sk->sk_state = PPPOX_ZOMBIE;
+				sk->sk_state_change(sk);
+			}
 
-				read_lock_bh(&pppoe_hash_lock);
+			release_sock(sk);
+			sock_put(sk);
 
-				/* Now restart from the beginning of this
-				 * hash chain.  We always NULL out pppoe_dev
-				 * so we are guaranteed to make forward
-				 * progress.
-				 */
-				po = item_hash_table[hash];
-				continue;
-			}
-			po = po->next;
+			/* Restart scan at the beginning of this hash chain.
+			 * While the lock was dropped the chain contents may
+			 * have changed.
+			 */
+			write_lock_bh(&pppoe_hash_lock);
+			po = item_hash_table[hash];
 		}
 	}
-	read_unlock_bh(&pppoe_hash_lock);
+	write_unlock_bh(&pppoe_hash_lock);
 }
 
 static int pppoe_device_event(struct notifier_block *this,
@@ -504,28 +503,42 @@ static int pppoe_release(struct socket *sock)
 	if (!sk)
 		return 0;
 
-	if (sock_flag(sk, SOCK_DEAD))
+	lock_sock(sk);
+	if (sock_flag(sk, SOCK_DEAD)){
+		release_sock(sk);
 		return -EBADF;
+	}
 
 	pppox_unbind_sock(sk);
 
 	/* Signal the death of the socket. */
 	sk->sk_state = PPPOX_DEAD;
 
+
+	/* Write lock on hash lock protects the entire "po" struct from
+	 * concurrent updates via pppoe_flush_dev. The "po" struct should
+	 * be considered part of the hash table contents, thus protected
+	 * by the hash table lock */
+	write_lock_bh(&pppoe_hash_lock);
+
 	po = pppox_sk(sk);
 	if (po->pppoe_pa.sid) {
-		delete_item(po->pppoe_pa.sid, po->pppoe_pa.remote, po->pppoe_ifindex);
+		__delete_item(po->pppoe_pa.sid,
+			      po->pppoe_pa.remote, po->pppoe_ifindex);
 	}
 
-	if (po->pppoe_dev)
+	if (po->pppoe_dev) {
 		dev_put(po->pppoe_dev);
+		po->pppoe_dev = NULL;
+	}
 
-	po->pppoe_dev = NULL;
+	write_unlock_bh(&pppoe_hash_lock);
 
 	sock_orphan(sk);
 	sock->sk = NULL;
 
 	skb_queue_purge(&sk->sk_receive_queue);
+	release_sock(sk);
 	sock_put(sk);
 
 	return 0;
-- 
cgit v1.2.3


From 9be9a6b983314dd57e2c5ba548dee8b53d338ac3 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Fri, 20 Apr 2007 17:02:45 -0700
Subject: [NET]: Get rid of netdev_nit

It isn't any faster to test a boolean global variable than do a simple
check for empty list.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index c8f5ea9aea8..431998d9cee 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -225,12 +225,6 @@ extern void netdev_unregister_sysfs(struct net_device *);
 
 *******************************************************************************/
 
-/*
- *	For efficiency
- */
-
-static int netdev_nit;
-
 /*
  *	Add a protocol ID to the list. Now that the input handler is
  *	smarter we can dispense with all the messy stuff that used to be
@@ -265,10 +259,9 @@ void dev_add_pack(struct packet_type *pt)
 	int hash;
 
 	spin_lock_bh(&ptype_lock);
-	if (pt->type == htons(ETH_P_ALL)) {
-		netdev_nit++;
+	if (pt->type == htons(ETH_P_ALL))
 		list_add_rcu(&pt->list, &ptype_all);
-	} else {
+	else {
 		hash = ntohs(pt->type) & 15;
 		list_add_rcu(&pt->list, &ptype_base[hash]);
 	}
@@ -295,10 +288,9 @@ void __dev_remove_pack(struct packet_type *pt)
 
 	spin_lock_bh(&ptype_lock);
 
-	if (pt->type == htons(ETH_P_ALL)) {
-		netdev_nit--;
+	if (pt->type == htons(ETH_P_ALL))
 		head = &ptype_all;
-	} else
+	else
 		head = &ptype_base[ntohs(pt->type) & 15];
 
 	list_for_each_entry(pt1, head, list) {
@@ -1330,7 +1322,7 @@ static int dev_gso_segment(struct sk_buff *skb)
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	if (likely(!skb->next)) {
-		if (netdev_nit)
+		if (!list_empty(&ptype_all))
 			dev_queue_xmit_nit(skb, dev);
 
 		if (netif_needs_gso(dev, skb)) {
-- 
cgit v1.2.3


From c462238d6a6d8ee855bda10f9fff442971540ed2 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Fri, 20 Apr 2007 17:07:51 -0700
Subject: [TCP]: TCP Illinois congestion control (rev3)

This is an implementation of TCP Illinois invented by Shao Liu
at University of Illinois. It is a another variant of Reno which adapts
the alpha and beta parameters based on RTT. The basic idea is to increase
window less rapidly as delay approaches the maximum. See the papers
and talks to get a more complete description.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/Kconfig        |  13 +++
 net/ipv4/Makefile       |   1 +
 net/ipv4/tcp_illinois.c | 284 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 298 insertions(+)
 create mode 100644 net/ipv4/tcp_illinois.c

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index dc61e664162..e62aee0ec4c 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -588,6 +588,19 @@ config TCP_CONG_YEAH
 	For further details look here:
 	  http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf
 
+config TCP_CONG_ILLINOIS
+	tristate "TCP Illinois"
+	depends on EXPERIMENTAL
+	default n
+	---help---
+	TCP-Illinois is a sender-side modificatio of TCP Reno for
+	high speed long delay links. It uses round-trip-time to
+	adjust the alpha and beta parameters to achieve a higher average
+	throughput and maintain fairness.
+
+	For further details see:
+	  http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html
+
 choice
 	prompt "Default TCP congestion control"
 	default DEFAULT_CUBIC
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index eeb94d5cac9..4ff6c151d7f 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -50,6 +50,7 @@ obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
 obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
 obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
 obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
+obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
 obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
 
 obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
new file mode 100644
index 00000000000..91a2a34604c
--- /dev/null
+++ b/net/ipv4/tcp_illinois.c
@@ -0,0 +1,284 @@
+/*
+ * TCP Illinois congestion control.
+ * Home page:
+ *	http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html
+ *
+ * The algorithm is described in:
+ * "TCP-Illinois: A Loss and Delay-Based Congestion Control Algorithm
+ *  for High-Speed Networks"
+ * http://www.ews.uiuc.edu/~shaoliu/papersandslides/liubassri06perf.pdf
+ *
+ * Implemented from description in paper and ns-2 simulation.
+ * Copyright (C) 2007 Stephen Hemminger <shemminger@linux-foundation.org>
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/inet_diag.h>
+#include <asm/div64.h>
+#include <net/tcp.h>
+
+#define ALPHA_SHIFT	7
+#define ALPHA_SCALE	(1u<<ALPHA_SHIFT)
+#define ALPHA_MIN	((3*ALPHA_SCALE)/10)	/* ~0.3 */
+#define ALPHA_MAX	(10*ALPHA_SCALE)	/* 10.0 */
+#define ALPHA_BASE	ALPHA_SCALE		/* 1.0 */
+
+#define BETA_SHIFT	6
+#define BETA_SCALE	(1u<<BETA_SHIFT)
+#define BETA_MIN	(BETA_SCALE/8)		/* 0.8 */
+#define BETA_MAX	(BETA_SCALE/2)
+#define BETA_BASE	BETA_MAX		/* 0.5 */
+
+#define THETA		5
+
+static int win_thresh __read_mostly = 15;
+module_param(win_thresh, int, 0644);
+MODULE_PARM_DESC(win_thresh, "Window threshold for starting adaptive sizing");
+
+#define MAX_RTT		0x7fffffff
+
+/* TCP Illinois Parameters */
+struct tcp_illinois {
+	u32	last_alpha;
+	u32	min_rtt;
+	u32	max_rtt;
+	u32	rtt_low;
+	u32	rtt_cnt;
+	u64	sum_rtt;
+};
+
+static void tcp_illinois_init(struct sock *sk)
+{
+	struct tcp_illinois *ca = inet_csk_ca(sk);
+
+	ca->last_alpha = ALPHA_BASE;
+	ca->min_rtt = 0x7fffffff;
+}
+
+/*
+ * Keep track of min, max and average RTT
+ */
+static void tcp_illinois_rtt_calc(struct sock *sk, u32 rtt)
+{
+	struct tcp_illinois *ca = inet_csk_ca(sk);
+
+	if (rtt < ca->min_rtt)
+		ca->min_rtt = rtt;
+	if (rtt > ca->max_rtt)
+		ca->max_rtt = rtt;
+
+	if (++ca->rtt_cnt == 1)
+		ca->sum_rtt = rtt;
+	else
+		ca->sum_rtt += rtt;
+}
+
+/* max queuing delay */
+static inline u32 max_delay(const struct tcp_illinois *ca)
+{
+	return ca->max_rtt - ca->min_rtt;
+}
+
+/* average queueing delay */
+static u32 avg_delay(struct tcp_illinois *ca)
+{
+	u64 avg_rtt = ca->sum_rtt;
+
+	do_div(avg_rtt, ca->rtt_cnt);
+
+	ca->sum_rtt = 0;
+	ca->rtt_cnt = 0;
+
+	return avg_rtt - ca->min_rtt;
+}
+
+/*
+ * Compute value of alpha used for additive increase.
+ * If small window then use 1.0, equivalent to Reno.
+ *
+ * For larger windows, adjust based on average delay.
+ * A. If average delay is at minimum (we are uncongested),
+ *    then use large alpha (10.0) to increase faster.
+ * B. If average delay is at maximum (getting congested)
+ *    then use small alpha (1.0)
+ *
+ * The result is a convex window growth curve.
+ */
+static u32 alpha(const struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcp_illinois *ca = inet_csk_ca(sk);
+	u32 dm = max_delay(ca);
+	u32 da = avg_delay(ca);
+	u32 d1, a;
+
+	if (tp->snd_cwnd < win_thresh)
+		return ALPHA_BASE;	/* same as Reno (1.0) */
+
+	d1 = dm / 100;
+	if (da <= d1) {
+		/* Don't let noise force agressive response */
+		if (ca->rtt_low < THETA) {
+			++ca->rtt_low;
+			return ca->last_alpha;
+		} else
+			return ALPHA_MAX;
+	}
+
+	ca->rtt_low = 0;
+
+	/*
+	 * Based on:
+	 *
+	 *      (dm - d1) amin amax
+	 * k1 = -------------------
+	 *         amax - amin
+	 *
+	 *       (dm - d1) amin
+	 * k2 = ----------------  - d1
+	 *        amax - amin
+	 *
+	 *             k1
+	 * alpha = ----------
+	 *          k2 + da
+	 */
+
+	dm -= d1;
+	da -= d1;
+
+	a = (dm * ALPHA_MAX) / (dm - (da  * (ALPHA_MAX - ALPHA_MIN)) / ALPHA_MIN);
+	ca->last_alpha = a;
+	return a;
+}
+
+/*
+ * Increase window in response to successful acknowledgment.
+ */
+static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
+				    u32 in_flight, int flag)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	/* RFC2861 only increase cwnd if fully utilized */
+	if (!tcp_is_cwnd_limited(sk, in_flight))
+		return;
+
+	/* In slow start */
+	if (tp->snd_cwnd <= tp->snd_ssthresh)
+		tcp_slow_start(tp);
+
+	else {
+		/* additive increase  cwnd += alpha / cwnd */
+		if ((tp->snd_cwnd_cnt * alpha(sk)) >> ALPHA_SHIFT >= tp->snd_cwnd) {
+			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
+				tp->snd_cwnd++;
+			tp->snd_cwnd_cnt = 0;
+		} else
+			tp->snd_cwnd_cnt++;
+	}
+}
+
+/*
+ * Beta used for multiplicative decrease.
+ * For small window sizes returns same value as Reno (0.5)
+ *
+ * If delay is small (10% of max) then beta = 1/8
+ * If delay is up to 80% of max then beta = 1/2
+ * In between is a linear function
+ */
+static inline u32 beta(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcp_illinois *ca = inet_csk_ca(sk);
+	u32 dm = max_delay(ca);
+	u32 da = avg_delay(ca);
+	u32 d2, d3;
+
+	if (tp->snd_cwnd < win_thresh)
+		return BETA_BASE;
+
+	d2 = dm / 10;
+	if (da <= d2)
+		return BETA_MIN;
+	d3 = (8 * dm) / 10;
+	if (da >= d3 || d3 <= d2)
+		return BETA_MAX;
+
+	/*
+	 * Based on:
+	 *
+	 *       bmin d3 - bmax d2
+	 * k3 = -------------------
+	 *         d3 - d2
+	 *
+	 *       bmax - bmin
+	 * k4 = -------------
+	 *         d3 - d2
+	 *
+	 * b = k3 + k4 da
+	 */
+	return (BETA_MIN * d3 - BETA_MAX * d2 + (BETA_MAX - BETA_MIN) * da)
+		/ (d3 - d2);
+}
+
+static u32 tcp_illinois_ssthresh(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	/* Multiplicative decrease */
+	return max((tp->snd_cwnd * beta(sk)) >> BETA_SHIFT, 2U);
+}
+
+/* Extract info for TCP socket info provided via netlink.
+ * We aren't really doing Vegas, but we can provide RTT info
+ */
+static void tcp_illinois_get_info(struct sock *sk, u32 ext,
+			       struct sk_buff *skb)
+{
+	const struct tcp_illinois *ca = inet_csk_ca(sk);
+
+	if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
+		struct tcpvegas_info info = {
+			.tcpv_enabled = 1,
+			.tcpv_rttcnt = ca->rtt_cnt,
+			.tcpv_minrtt = ca->min_rtt,
+		};
+		u64 avg_rtt = ca->sum_rtt;
+		do_div(avg_rtt, ca->rtt_cnt);
+		info.tcpv_rtt = avg_rtt;
+
+		nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
+	}
+}
+
+static struct tcp_congestion_ops tcp_illinois = {
+	.init		= tcp_illinois_init,
+	.ssthresh	= tcp_illinois_ssthresh,
+	.min_cwnd	= tcp_reno_min_cwnd,
+	.cong_avoid	= tcp_illinois_cong_avoid,
+	.rtt_sample	= tcp_illinois_rtt_calc,
+	.get_info	= tcp_illinois_get_info,
+
+	.owner		= THIS_MODULE,
+	.name		= "illinois",
+};
+
+static int __init tcp_illinois_register(void)
+{
+	BUILD_BUG_ON(sizeof(struct tcp_illinois) > ICSK_CA_PRIV_SIZE);
+	return tcp_register_congestion_control(&tcp_illinois);
+}
+
+static void __exit tcp_illinois_unregister(void)
+{
+	tcp_unregister_congestion_control(&tcp_illinois);
+}
+
+module_init(tcp_illinois_register);
+module_exit(tcp_illinois_unregister);
+
+MODULE_AUTHOR("Stephen Hemminger, Shao Liu");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("TCP Illinois");
+MODULE_VERSION("0.3");
-- 
cgit v1.2.3


From 3ff50b7997fe06cd5d276b229967bb52d6b3b6c1 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Fri, 20 Apr 2007 17:09:22 -0700
Subject: [NET]: cleanup extra semicolons

Spring cleaning time...

There seems to be a lot of places in the network code that have
extra bogus semicolons after conditionals.  Most commonly is a
bogus semicolon after: switch() { }

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/802/tr.c             |  3 ++-
 net/8021q/vlan.c         |  6 +++---
 net/8021q/vlan_dev.c     |  4 ++--
 net/ax25/ax25_ip.c       |  2 +-
 net/bluetooth/hci_core.c |  4 ++--
 net/bluetooth/sco.c      |  2 +-
 net/core/link_watch.c    |  2 +-
 net/core/pktgen.c        |  6 +++---
 net/core/rtnetlink.c     |  2 +-
 net/core/skbuff.c        |  2 +-
 net/ipv4/fib_semantics.c |  2 +-
 net/ipv4/ipconfig.c      |  4 ++--
 net/ipv4/multipath_drr.c |  2 +-
 net/ipv4/tcp.c           |  7 ++++---
 net/ipv4/tcp_input.c     |  9 +++++----
 net/ipv4/xfrm4_policy.c  |  2 +-
 net/ipv6/addrconf.c      |  4 ++--
 net/ipv6/datagram.c      |  2 +-
 net/ipv6/exthdrs.c       |  2 +-
 net/ipv6/icmp.c          |  5 +++--
 net/ipv6/ndisc.c         |  2 +-
 net/ipv6/raw.c           | 14 ++++++++------
 net/ipv6/route.c         |  2 +-
 net/ipv6/xfrm6_policy.c  |  5 +++--
 net/irda/af_irda.c       |  4 ++--
 net/iucv/af_iucv.c       |  2 +-
 net/key/af_key.c         |  2 +-
 net/sched/cls_u32.c      |  2 +-
 net/sched/sch_dsmark.c   |  6 +++---
 net/sched/sch_ingress.c  |  4 ++--
 net/sched/sch_prio.c     |  2 +-
 net/sctp/associola.c     |  2 +-
 net/sctp/debug.c         |  5 +++--
 net/sctp/ipv6.c          |  2 +-
 net/sctp/output.c        |  2 +-
 net/sctp/outqueue.c      | 12 ++++++------
 net/sctp/sm_make_chunk.c |  4 ++--
 net/sctp/sm_sideeffect.c |  5 +++--
 net/sctp/sm_statefuns.c  | 10 +++++-----
 net/sctp/sm_statetable.c |  2 +-
 net/sctp/socket.c        | 11 ++++++-----
 net/sctp/transport.c     |  2 +-
 net/sctp/ulpqueue.c      |  8 ++++----
 net/tipc/link.c          |  2 +-
 net/xfrm/xfrm_policy.c   |  2 +-
 net/xfrm/xfrm_state.c    |  8 ++++----
 net/xfrm/xfrm_user.c     | 22 +++++++++++-----------
 47 files changed, 113 insertions(+), 103 deletions(-)

diff --git a/net/802/tr.c b/net/802/tr.c
index eb2de0d1620..0ba1946211c 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -554,7 +554,8 @@ static int rif_seq_show(struct seq_file *seq, void *v)
 					if(j==1) {
 						segment=ntohs(entry->rseg[j-1])>>4;
 						seq_printf(seq,"  %03X",segment);
-					};
+					}
+
 					segment=ntohs(entry->rseg[j])>>4;
 					brdgnmb=ntohs(entry->rseg[j-1])&0x00f;
 					seq_printf(seq,"-%01X-%03X",brdgnmb,segment);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index eb1c71ed7df..c0c7bb8e9f0 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -470,7 +470,7 @@ static struct net_device *register_vlan_device(const char *eth_IF_name,
 		 */
 	default:
 		snprintf(name, IFNAMSIZ, "vlan%.4i", VLAN_ID);
-	};
+	}
 
 	new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name,
 			       vlan_setup);
@@ -685,7 +685,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 				break;
 		}
 		break;
-	};
+	}
 
 out:
 	return NOTIFY_DONE;
@@ -819,7 +819,7 @@ static int vlan_ioctl_handler(void __user *arg)
 		printk(VLAN_DBG "%s: Unknown VLAN CMD: %x \n",
 			__FUNCTION__, args.cmd);
 		return -EINVAL;
-	};
+	}
 out:
 	return err;
 }
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 7ff6b794848..ec46084f44b 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -66,7 +66,7 @@ int vlan_dev_rebuild_header(struct sk_buff *skb)
 
 		memcpy(veth->h_source, dev->dev_addr, ETH_ALEN);
 		break;
-	};
+	}
 
 	return 0;
 }
@@ -219,7 +219,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 		break;
 	default:
 		break;
-	};
+	}
 
 	/*  Was a VLAN packet, grab the encapsulated protocol, which the layer
 	 * three protocols care about.
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index 4d4ef35e178..930e4918037 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -121,7 +121,7 @@ int ax25_rebuild_header(struct sk_buff *skb)
 		digipeat = route->digipeat;
 		dev = route->dev;
 		ip_mode = route->ip_mode;
-	};
+	}
 
 	if (dev == NULL)
 		dev = skb->dev;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index c177e75d64a..aa4b56a8c3e 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -149,7 +149,7 @@ static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev,
 	default:
 		err = -ETIMEDOUT;
 		break;
-	};
+	}
 
 	hdev->req_status = hdev->req_result = 0;
 
@@ -1388,7 +1388,7 @@ static void hci_rx_task(unsigned long arg)
 			case HCI_SCODATA_PKT:
 				kfree_skb(skb);
 				continue;
-			};
+			}
 		}
 
 		/* Process frame */
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index ae439144095..3f5163e725e 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -393,7 +393,7 @@ static void sco_sock_close(struct sock *sk)
 	default:
 		sock_set_flag(sk, SOCK_ZAPPED);
 		break;
-	};
+	}
 
 	release_sock(sk);
 
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 8b45c9d3b24..e3c26a9ccad 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -79,7 +79,7 @@ static void rfc2863_policy(struct net_device *dev)
 	case IF_LINK_MODE_DEFAULT:
 	default:
 		break;
-	};
+	}
 
 	dev->operstate = operstate;
 
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index f9469ea530c..b92a322872a 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -786,7 +786,7 @@ static int count_trail_chars(const char __user * user_buffer,
 			break;
 		default:
 			goto done;
-		};
+		}
 	}
 done:
 	return i;
@@ -829,7 +829,7 @@ static int strn_len(const char __user * user_buffer, unsigned int maxlen)
 			break;
 		default:
 			break;
-		};
+		}
 	}
 done_str:
 	return i;
@@ -1906,7 +1906,7 @@ static int pktgen_device_event(struct notifier_block *unused,
 	case NETDEV_UNREGISTER:
 		pktgen_mark_device(dev->name);
 		break;
-	};
+	}
 
 	return NOTIFY_DONE;
 }
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index bc95fab0b0c..75cea8ea4cf 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -399,7 +399,7 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
 		    operstate == IF_OPER_UNKNOWN)
 			operstate = IF_OPER_DORMANT;
 		break;
-	};
+	}
 
 	if (dev->operstate != operstate) {
 		write_lock_bh(&dev_base_lock);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6b50d58cce1..142257307fa 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -304,7 +304,7 @@ void kfree_skbmem(struct sk_buff *skb)
 		if (atomic_dec_and_test(fclone_ref))
 			kmem_cache_free(skbuff_fclone_cache, other);
 		break;
-	};
+	}
 }
 
 /**
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 3dad12ee76c..406ea7050ae 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -927,7 +927,7 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp,
 			default:
 				printk(KERN_DEBUG "impossible 102\n");
 				return -EINVAL;
-			};
+			}
 		}
 		return err;
 	}
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 4e19ee0e010..597c800b2fd 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -939,7 +939,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
 					if (opt[1] >= 4)
 						memcpy(&server_id, opt + 2, 4);
 					break;
-				};
+				}
 			}
 
 #ifdef IPCONFIG_DEBUG
@@ -984,7 +984,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
 				ic_myaddr = NONE;
 				ic_servaddr = NONE;
 				goto drop_unlock;
-			};
+			}
 
 			ic_dhcp_msgtype = mt;
 
diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c
index 574c735836f..b03c5ca2c82 100644
--- a/net/ipv4/multipath_drr.c
+++ b/net/ipv4/multipath_drr.c
@@ -100,7 +100,7 @@ static int drr_dev_event(struct notifier_block *this,
 
 		spin_unlock_bh(&state_lock);
 		break;
-	};
+	}
 
 	return NOTIFY_DONE;
 }
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bb9d91a7662..4664733f139 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -444,7 +444,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 		break;
 	default:
 		return -ENOIOCTLCMD;
-	};
+	}
 
 	return put_user(answ, (int __user *)arg);
 }
@@ -1954,7 +1954,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 	default:
 		err = -ENOPROTOOPT;
 		break;
-	};
+	}
+
 	release_sock(sk);
 	return err;
 }
@@ -2124,7 +2125,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		return 0;
 	default:
 		return -ENOPROTOOPT;
-	};
+	}
 
 	if (put_user(len, optlen))
 		return -EFAULT;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d1604f59d77..2fbfc2e4209 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2708,7 +2708,7 @@ static int tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
 		default:
 			tcp_ratehalving_spur_to_response(sk);
 			break;
-		};
+		}
 		tp->frto_counter = 0;
 	}
 	return 0;
@@ -2915,10 +2915,11 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 					 */
 					break;
 #endif
-				};
+				}
+
 				ptr+=opsize-2;
 				length-=opsize;
-		};
+		}
 	}
 }
 
@@ -3124,7 +3125,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
 			printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
 			       __FUNCTION__, sk->sk_state);
 			break;
-	};
+	}
 
 	/* It _is_ possible, that we have something out-of-order _after_ FIN.
 	 * Probably, we should reset in this case. For now drop them.
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index f1c32ff59d1..4ff8ed30024 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -263,7 +263,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl)
 		default:
 			fl->fl_ipsec_spi = 0;
 			break;
-		};
+		}
 	}
 	fl->proto = iph->protocol;
 	fl->fl4_dst = iph->daddr;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 9ba9e92d193..33ccc95c349 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2288,7 +2288,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 		default:
 			addrconf_dev_config(dev);
 			break;
-		};
+		}
 		if (idev) {
 			if (run_pending)
 				addrconf_dad_run(idev);
@@ -2341,7 +2341,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 		}
 #endif
 		break;
-	};
+	}
 
 	return NOTIFY_OK;
 }
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 4a355fea409..403eee66b9c 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -723,7 +723,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
 				       cmsg->cmsg_type);
 			err = -EINVAL;
 			break;
-		};
+		}
 	}
 
 exit_f:
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 275d2e812a4..4aa7fb024b3 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -130,7 +130,7 @@ static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff)
 	case 2: /* send ICMP PARM PROB regardless and drop packet */
 		icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
 		return 0;
-	};
+	}
 
 	kfree_skb(skb);
 	return 0;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index e94992ab92e..e9bcce9e7bd 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -732,7 +732,8 @@ static int icmpv6_rcv(struct sk_buff **pskb)
 		 */
 
 		icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
-	};
+	}
+
 	kfree_skb(skb);
 	return 0;
 
@@ -865,7 +866,7 @@ int icmpv6_err_convert(int type, int code, int *err)
 	case ICMPV6_TIME_EXCEED:
 		*err = EHOSTUNREACH;
 		break;
-	};
+	}
 
 	return fatal;
 }
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index b1cf7081647..4ee1216f801 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1630,7 +1630,7 @@ int ndisc_rcv(struct sk_buff *skb)
 	case NDISC_REDIRECT:
 		ndisc_redirect_rcv(skb);
 		break;
-	};
+	}
 
 	return 0;
 }
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index f65fcd7704c..009a1047fc3 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -882,7 +882,7 @@ static int rawv6_seticmpfilter(struct sock *sk, int level, int optname,
 		return 0;
 	default:
 		return -ENOPROTOOPT;
-	};
+	}
 
 	return 0;
 }
@@ -907,7 +907,7 @@ static int rawv6_geticmpfilter(struct sock *sk, int level, int optname,
 		return 0;
 	default:
 		return -ENOPROTOOPT;
-	};
+	}
 
 	return 0;
 }
@@ -961,7 +961,8 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname,
 		default:
 			return ipv6_setsockopt(sk, level, optname, optval,
 					       optlen);
-	};
+	}
+
 	return do_rawv6_setsockopt(sk, level, optname, optval, optlen);
 }
 
@@ -982,7 +983,7 @@ static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname,
 	default:
 		return compat_ipv6_setsockopt(sk, level, optname,
 					      optval, optlen);
-	};
+	}
 	return do_rawv6_setsockopt(sk, level, optname, optval, optlen);
 }
 #endif
@@ -1035,7 +1036,8 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
 		default:
 			return ipv6_getsockopt(sk, level, optname, optval,
 					       optlen);
-	};
+	}
+
 	return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
 }
 
@@ -1056,7 +1058,7 @@ static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname,
 	default:
 		return compat_ipv6_getsockopt(sk, level, optname,
 					      optval, optlen);
-	};
+	}
 	return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
 }
 #endif
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 70f760f069b..6264ec3bb15 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1760,7 +1760,7 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
 		rtnl_unlock();
 
 		return err;
-	};
+	}
 
 	return -EINVAL;
 }
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index ef746d4f313..1faa2ea80af 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -240,7 +240,8 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int
 		if (!afinfo) {
 			dst = *dst_p;
 			goto error;
-		};
+		}
+
 		dst_prev->output = afinfo->output;
 		xfrm_state_put_afinfo(afinfo);
 		/* Sheit... I remember I did this right. Apparently,
@@ -337,7 +338,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl)
 			fl->fl_ipsec_spi = 0;
 			fl->proto = nexthdr;
 			return;
-		};
+		}
 	}
 }
 
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index bf994c85e45..baca1565aa1 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -220,7 +220,7 @@ static void irda_connect_confirm(void *instance, void *sap,
 		break;
 	default:
 		self->max_data_size = irttp_get_max_seg_size(self->tsap);
-	};
+	}
 
 	IRDA_DEBUG(2, "%s(), max_data_size=%d\n", __FUNCTION__,
 		   self->max_data_size);
@@ -283,7 +283,7 @@ static void irda_connect_indication(void *instance, void *sap,
 		break;
 	default:
 		self->max_data_size = irttp_get_max_seg_size(self->tsap);
-	};
+	}
 
 	IRDA_DEBUG(2, "%s(), max_data_size=%d\n", __FUNCTION__,
 		   self->max_data_size);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 55632883d17..e84c924a81e 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -181,7 +181,7 @@ static void iucv_sock_close(struct sock *sk)
 	default:
 		sock_set_flag(sk, SOCK_ZAPPED);
 		break;
-	};
+	}
 
 	release_sock(sk);
 	iucv_sock_kill(sk);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 3cd228aacfe..a99444142dc 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -379,7 +379,7 @@ static int verify_address_len(void *p)
 		 */
 		return -EINVAL;
 		break;
-	};
+	}
 
 	return 0;
 }
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 62e1deb27a1..c7a347bd6d7 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -436,7 +436,7 @@ static void u32_destroy(struct tcf_proto *tp)
 			BUG_TRAP(ht->refcnt == 0);
 
 			kfree(ht);
-		};
+		}
 
 		kfree(tp_c);
 	}
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index e38e0d00d1e..3c6fd181263 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -226,7 +226,7 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
 			default:
 				skb->tc_index = 0;
 				break;
-		};
+		}
 	}
 
 	if (TC_H_MAJ(skb->priority) == sch->handle)
@@ -257,7 +257,7 @@ static int dsmark_enqueue(struct sk_buff *skb,struct Qdisc *sch)
 				if (p->default_index != NO_DEFAULT_INDEX)
 					skb->tc_index = p->default_index;
 				break;
-		};
+		}
 	}
 
 	err = p->q->enqueue(skb,p->q);
@@ -310,7 +310,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 				       "unsupported protocol %d\n",
 				       ntohs(skb->protocol));
 			break;
-	};
+	}
 
 	return skb;
 }
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index ad22dc6af22..f8b9f1cdf73 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -170,7 +170,7 @@ static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch)
 			skb->tc_index = TC_H_MIN(res.classid);
 			result = TC_ACT_OK;
 			break;
-	};
+	}
 /* backward compat */
 #else
 #ifdef	CONFIG_NET_CLS_POLICE
@@ -187,7 +187,7 @@ static int ingress_enqueue(struct sk_buff *skb,struct Qdisc *sch)
 		sch->bstats.bytes += skb->len;
 		result = NF_ACCEPT;
 		break;
-	};
+	}
 
 #else
 	D2PRINTK("Overriding result to ACCEPT\n");
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 5cfe60bf6e2..269a6e17c6c 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -62,7 +62,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 			*qerr = NET_XMIT_SUCCESS;
 		case TC_ACT_SHOT:
 			return NULL;
-		};
+		}
 
 		if (!q->filter_list ) {
 #else
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 37a343e1ebb..db73ef97485 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -733,7 +733,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
 
 	default:
 		return;
-	};
+	}
 
 	/* Generate and send a SCTP_PEER_ADDR_CHANGE notification to the
 	 * user.
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 5f5ab28977c..e8c0f7435d7 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -93,8 +93,9 @@ const char *sctp_cname(const sctp_subtype_t cid)
 		return "FWD_TSN";
 
 	default:
-		return "unknown chunk";
-	};
+		break;
+	}
+
 	return "unknown chunk";
 }
 
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 5b0cdda4b44..ca527a27dd0 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -604,7 +604,7 @@ static sctp_scope_t sctp_v6_scope(union sctp_addr *addr)
 	default:
 		retval = SCTP_SCOPE_GLOBAL;
 		break;
-	};
+	}
 
 	return retval;
 }
diff --git a/net/sctp/output.c b/net/sctp/output.c
index f875fc3ced5..d85543def75 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -176,7 +176,7 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
 	case SCTP_XMIT_OK:
 	case SCTP_XMIT_NAGLE_DELAY:
 		break;
-	};
+	}
 
 	return retval;
 }
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 41abfd17627..992f361084b 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -338,7 +338,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
 				SCTP_INC_STATS(SCTP_MIB_OUTORDERCHUNKS);
 			q->empty = 0;
 			break;
-		};
+		}
 	} else {
 		list_add_tail(&chunk->list, &q->control_chunk_list);
 		SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
@@ -630,7 +630,7 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 			/* Retrieve a new chunk to bundle. */
 			lchunk = sctp_list_dequeue(lqueue);
 			break;
-		};
+		}
 
 		/* If we are here due to a retransmit timeout or a fast
 		 * retransmit and if there are any chunks left in the retransmit
@@ -779,7 +779,7 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 		default:
 			/* We built a chunk with an illegal type! */
 			BUG();
-		};
+		}
 	}
 
 	/* Is it OK to send data chunks?  */
@@ -1397,7 +1397,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 				SCTP_DEBUG_PRINTK("ACKed: %08x", tsn);
 				dbg_prt_state = 0;
 				dbg_ack_tsn = tsn;
-			};
+			}
 
 			dbg_last_ack_tsn = tsn;
 #endif /* SCTP_DEBUG */
@@ -1452,7 +1452,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 				SCTP_DEBUG_PRINTK("KEPT: %08x",tsn);
 				dbg_prt_state = 1;
 				dbg_kept_tsn = tsn;
-			};
+			}
 
 			dbg_last_kept_tsn = tsn;
 #endif /* SCTP_DEBUG */
@@ -1476,7 +1476,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 		} else {
 			SCTP_DEBUG_PRINTK("\n");
 		}
-	};
+	}
 #endif /* SCTP_DEBUG */
 	if (transport) {
 		if (bytes_acked) {
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 759ea3d1997..be783a3761c 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2077,7 +2077,7 @@ static int sctp_process_param(struct sctp_association *asoc,
 
 			default: /* Just ignore anything else.  */
 				break;
-			};
+			}
 		}
 		break;
 
@@ -2118,7 +2118,7 @@ static int sctp_process_param(struct sctp_association *asoc,
 		SCTP_DEBUG_PRINTK("Ignoring param: %d for association %p.\n",
 				  ntohs(param.p->type), asoc);
 		break;
-	};
+	}
 
 	return retval;
 }
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 0a1a197193a..b37a7adeb15 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1009,7 +1009,7 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
 		       status, state, event_type, subtype.chunk);
 		BUG();
 		break;
-	};
+	}
 
 bail:
 	return error;
@@ -1489,7 +1489,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 			printk(KERN_WARNING "Impossible command: %u, %p\n",
 			       cmd->verb, cmd->obj.ptr);
 			break;
-		};
+		}
+
 		if (error)
 			break;
 	}
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index e0ec16dd678..9e28a5d5120 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -629,7 +629,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep,
 		case -SCTP_IERROR_BAD_SIG:
 		default:
 			return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
-		};
+		}
 	}
 
 
@@ -1195,7 +1195,7 @@ static void sctp_tietags_populate(struct sctp_association *new_asoc,
 		new_asoc->c.my_ttag   = asoc->c.my_vtag;
 		new_asoc->c.peer_ttag = asoc->c.peer_vtag;
 		break;
-	};
+	}
 
 	/* Other parameters for the endpoint SHOULD be copied from the
 	 * existing parameters of the association (e.g. number of
@@ -1904,7 +1904,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
 		case -SCTP_IERROR_BAD_SIG:
 		default:
 			return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
-		};
+		}
 	}
 
 	/* Compare the tie_tag in cookie with the verification tag of
@@ -1936,7 +1936,7 @@ sctp_disposition_t sctp_sf_do_5_2_4_dupcook(const struct sctp_endpoint *ep,
 	default: /* Discard packet for all others. */
 		retval = sctp_sf_pdiscard(ep, asoc, type, arg, commands);
 		break;
-	};
+	}
 
 	/* Delete the tempory new association. */
 	sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc));
@@ -4816,7 +4816,7 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep,
 	default:
 		BUG();
 		break;
-	};
+	}
 
 	if (!reply)
 		goto nomem;
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 5e54b17377f..523071c7902 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -101,7 +101,7 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
 	default:
 		/* Yikes!  We got an illegal event type.  */
 		return &bug;
-	};
+	}
 }
 
 #define TYPE_SCTP_FUNC(func) {.fn = func, .name = #func}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f904f2bc0f2..11938fb2039 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -941,7 +941,7 @@ SCTP_STATIC int sctp_setsockopt_bindx(struct sock* sk,
 	default:
 		err = -EINVAL;
 		break;
-	};
+	}
 
 out:
 	kfree(kaddrs);
@@ -3048,7 +3048,7 @@ SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
 	default:
 		retval = -ENOPROTOOPT;
 		break;
-	};
+	}
 
 	sctp_release_sock(sk);
 
@@ -4873,7 +4873,7 @@ SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
 	default:
 		retval = -ENOPROTOOPT;
 		break;
-	};
+	}
 
 	sctp_release_sock(sk);
 	return retval;
@@ -5198,7 +5198,8 @@ int sctp_inet_listen(struct socket *sock, int backlog)
 		break;
 	default:
 		break;
-	};
+	}
+
 	if (err)
 		goto cleanup;
 
@@ -5461,7 +5462,7 @@ SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *msg,
 
 		default:
 			return -EINVAL;
-		};
+		}
 	}
 	return 0;
 }
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 4d8c2ab864f..961df275d5b 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -507,7 +507,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 			transport->cwnd = max(transport->cwnd/2,
 						 4*transport->asoc->pathmtu);
 		break;
-	};
+	}
 
 	transport->partial_bytes_acked = 0;
 	SCTP_DEBUG_PRINTK("%s: transport: %p reason: %d cwnd: "
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 0fa4d4d4df1..34eb977a204 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -391,7 +391,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *qu
 			break;
 		pos->next = pnext;
 		pos = pnext;
-	};
+	}
 
 	event = sctp_skb2event(f_frag);
 	SCTP_INC_STATS(SCTP_MIB_REASMUSRMSGS);
@@ -476,7 +476,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u
 			else
 				first_frag = NULL;
 			break;
-		};
+		}
 	}
 
 	asoc = ulpq->asoc;
@@ -556,7 +556,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq
 			goto done;
 		default:
 			return NULL;
-		};
+		}
 	}
 
 	/* We have the reassembled event. There is no need to look
@@ -648,7 +648,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *u
 			break;
 		default:
 			return NULL;
-		};
+		}
 	}
 
 	/* We have the reassembled event. There is no need to look
diff --git a/net/tipc/link.c b/net/tipc/link.c
index f3f99c8ea08..2124f32ef29 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2570,7 +2570,7 @@ void tipc_link_recv_bundle(struct sk_buff *buf)
 		if (obuf == NULL) {
 			warn("Link unable to unbundle message(s)\n");
 			break;
-		};
+		}
 		pos += align(msg_size(buf_msg(obuf)));
 		msg_dbg(buf_msg(obuf), "     /");
 		tipc_net_route_msg(obuf);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 19425755455..762926009c0 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1049,7 +1049,7 @@ static inline int policy_to_flow_dir(int dir)
 		return FLOW_DIR_OUT;
 	case XFRM_POLICY_FWD:
 		return FLOW_DIR_FWD;
-	};
+	}
 }
 
 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 69a3600afd9..743f07e7f69 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -458,7 +458,7 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi,
 					     x->id.daddr.a6))
 				continue;
 			break;
-		};
+		}
 
 		xfrm_state_hold(x);
 		return x;
@@ -493,7 +493,7 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm
 					     x->props.saddr.a6))
 				continue;
 			break;
-		};
+		}
 
 		xfrm_state_hold(x);
 		return x;
@@ -722,7 +722,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
 					     (struct in6_addr *)saddr))
 				continue;
 			break;
-		};
+		}
 
 		xfrm_state_hold(x);
 		return x;
@@ -755,7 +755,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
 			ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
 				       (struct in6_addr *)daddr);
 			break;
-		};
+		}
 
 		x->km.state = XFRM_STATE_ACQ;
 		x->id.proto = proto;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 88659edc9b1..f91521d5f2a 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -71,7 +71,7 @@ static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type)
 
 	default:
 		return -EINVAL;
-	};
+	}
 
 	algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0';
 	return 0;
@@ -152,7 +152,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 
 	default:
 		goto out;
-	};
+	}
 
 	err = -EINVAL;
 	switch (p->id.proto) {
@@ -192,7 +192,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 
 	default:
 		goto out;
-	};
+	}
 
 	if ((err = verify_one_alg(xfrma, XFRMA_ALG_AUTH)))
 		goto out;
@@ -217,7 +217,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 
 	default:
 		goto out;
-	};
+	}
 
 	err = 0;
 
@@ -711,7 +711,7 @@ static int verify_userspi_info(struct xfrm_userspi_info *p)
 
 	default:
 		return -EINVAL;
-	};
+	}
 
 	if (p->min > p->max)
 		return -EINVAL;
@@ -789,7 +789,7 @@ static int verify_policy_dir(u8 dir)
 
 	default:
 		return -EINVAL;
-	};
+	}
 
 	return 0;
 }
@@ -805,7 +805,7 @@ static int verify_policy_type(u8 type)
 
 	default:
 		return -EINVAL;
-	};
+	}
 
 	return 0;
 }
@@ -821,7 +821,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
 
 	default:
 		return -EINVAL;
-	};
+	}
 
 	switch (p->action) {
 	case XFRM_POLICY_ALLOW:
@@ -830,7 +830,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
 
 	default:
 		return -EINVAL;
-	};
+	}
 
 	switch (p->sel.family) {
 	case AF_INET:
@@ -845,7 +845,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
 
 	default:
 		return -EINVAL;
-	};
+	}
 
 	return verify_policy_dir(p->dir);
 }
@@ -912,7 +912,7 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
 #endif
 		default:
 			return -EINVAL;
-		};
+		}
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 4ac02bab77438b484a5cf855a002fb6a1d592894 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Fri, 20 Apr 2007 17:11:46 -0700
Subject: [TCP]: Uninline tcp_done().

The function is quite big and has several call sites and nothing
to collapse by compiler optimization on inlining.

Besides it's nicer to read in a in .c file.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 16 +---------------
 net/ipv4/tcp.c    | 17 +++++++++++++++++
 2 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 07f724e02f8..e79803353c8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -921,21 +921,7 @@ static inline void tcp_set_state(struct sock *sk, int state)
 #endif	
 }
 
-static inline void tcp_done(struct sock *sk)
-{
-	if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
-		TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
-
-	tcp_set_state(sk, TCP_CLOSE);
-	tcp_clear_xmit_timers(sk);
-
-	sk->sk_shutdown = SHUTDOWN_MASK;
-
-	if (!sock_flag(sk, SOCK_DEAD))
-		sk->sk_state_change(sk);
-	else
-		inet_csk_destroy_sock(sk);
-}
+extern void tcp_done(struct sock *sk);
 
 static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
 {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4664733f139..99ad52c00c9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2375,6 +2375,23 @@ void __tcp_put_md5sig_pool(void)
 EXPORT_SYMBOL(__tcp_put_md5sig_pool);
 #endif
 
+void tcp_done(struct sock *sk)
+{
+	if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
+		TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
+
+	tcp_set_state(sk, TCP_CLOSE);
+	tcp_clear_xmit_timers(sk);
+
+	sk->sk_shutdown = SHUTDOWN_MASK;
+
+	if (!sock_flag(sk, SOCK_DEAD))
+		sk->sk_state_change(sk);
+	else
+		inet_csk_destroy_sock(sk);
+}
+EXPORT_SYMBOL_GPL(tcp_done);
+
 extern void __skb_cb_too_small_for_tcp(int, int);
 extern struct tcp_congestion_ops tcp_reno;
 
-- 
cgit v1.2.3


From 9958089a43ae8a9af07402461c0b2b7548c7341e Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Fri, 20 Apr 2007 17:12:43 -0700
Subject: [NET]: Move sk_setup_caps() out of line.

It is far too large to be an inline and not in any hot paths.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 14 +-------------
 net/core/sock.c    | 15 +++++++++++++++
 2 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 390c0470059..25c37e34bfd 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1075,19 +1075,7 @@ static inline int sk_can_gso(const struct sock *sk)
 	return net_gso_ok(sk->sk_route_caps, sk->sk_gso_type);
 }
 
-static inline void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
-{
-	__sk_dst_set(sk, dst);
-	sk->sk_route_caps = dst->dev->features;
-	if (sk->sk_route_caps & NETIF_F_GSO)
-		sk->sk_route_caps |= NETIF_F_GSO_MASK;
-	if (sk_can_gso(sk)) {
-		if (dst->header_len)
-			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
-		else 
-			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
-	}
-}
+extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst);
 
 static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb)
 {
diff --git a/net/core/sock.c b/net/core/sock.c
index 73a8018029a..043bdc05d21 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -979,6 +979,21 @@ out:
 
 EXPORT_SYMBOL_GPL(sk_clone);
 
+void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
+{
+	__sk_dst_set(sk, dst);
+	sk->sk_route_caps = dst->dev->features;
+	if (sk->sk_route_caps & NETIF_F_GSO)
+		sk->sk_route_caps |= NETIF_F_GSO_MASK;
+	if (sk_can_gso(sk)) {
+		if (dst->header_len)
+			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+		else
+			sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
+	}
+}
+EXPORT_SYMBOL_GPL(sk_setup_caps);
+
 void __init sk_init(void)
 {
 	if (num_physpages <= 4096) {
-- 
cgit v1.2.3


From 305f2aa18214555e611ad05e586dd385e64ab665 Mon Sep 17 00:00:00 2001
From: Olaf Kirch <olaf.kirch@oracle.com>
Date: Fri, 20 Apr 2007 22:05:27 -0700
Subject: [IrDA] af_irda: irda_recvmsg_stream cleanup

This patch cleans up some code in irda_recvmsg_stream, replacing some
homebrew code with prepare_to_wait/finish_wait, and by making the
code honor sock_rcvtimeo.

Signed-off-by: Olaf Kirch <olaf.kirch@oracle.com>
Signed-off-by: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/af_irda.c | 31 +++++++++++++------------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index baca1565aa1..a0dbe07a1b1 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1402,8 +1402,8 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
 	struct irda_sock *self = irda_sk(sk);
 	int noblock = flags & MSG_DONTWAIT;
 	size_t copied = 0;
-	int target = 1;
-	DECLARE_WAITQUEUE(waitq, current);
+	int target;
+	long timeo;
 
 	IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
 
@@ -1416,8 +1416,8 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
 	if (flags & MSG_OOB)
 		return -EOPNOTSUPP;
 
-	if (flags & MSG_WAITALL)
-		target = size;
+	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
+	timeo = sock_rcvtimeo(sk, noblock);
 
 	msg->msg_namelen = 0;
 
@@ -1425,19 +1425,14 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
 		int chunk;
 		struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue);
 
-		if (skb==NULL) {
+		if (skb == NULL) {
+			DEFINE_WAIT(wait);
 			int ret = 0;
 
 			if (copied >= target)
 				break;
 
-			/* The following code is a cut'n'paste of the
-			 * wait_event_interruptible() macro.
-			 * We don't us the macro because the test condition
-			 * is messy. - Jean II */
-			set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
-			add_wait_queue(sk->sk_sleep, &waitq);
-			set_current_state(TASK_INTERRUPTIBLE);
+			prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
 
 			/*
 			 *	POSIX 1003.1g mandates this order.
@@ -1450,17 +1445,17 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
 			else if (noblock)
 				ret = -EAGAIN;
 			else if (signal_pending(current))
-				ret = -ERESTARTSYS;
+				ret = sock_intr_errno(timeo);
+			else if (sk->sk_state != TCP_ESTABLISHED)
+				ret = -ENOTCONN;
 			else if (skb_peek(&sk->sk_receive_queue) == NULL)
 				/* Wait process until data arrives */
 				schedule();
 
-			current->state = TASK_RUNNING;
-			remove_wait_queue(sk->sk_sleep, &waitq);
-			clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+			finish_wait(sk->sk_sleep, &wait);
 
-			if(ret)
-				return(ret);
+			if (ret)
+				return ret;
 			if (sk->sk_shutdown & RCV_SHUTDOWN)
 				break;
 
-- 
cgit v1.2.3


From 6e66aa15d8873ae7418d5afc6476daec466ff93b Mon Sep 17 00:00:00 2001
From: Olaf Kirch <olaf.kirch@oracle.com>
Date: Fri, 20 Apr 2007 22:08:15 -0700
Subject: [IrDA] af_irda: Silence kernel message in irda_recvmsg_stream

This patch silences an IRDA_ASSERT in irda_recvmsg_stream, as described in
http://bugzilla.kernel.org/show_bug.cgi?id=7512 irda_disconnect_indication
would set sk->sk_err to ECONNRESET, and a subsequent call to recvmsg
would print an irritating kernel message and return -1.

When a connected socket is closed by the peer, recvmsg should return 0
rather than an error. This patch fixes this.

Signed-off-by: Olaf Kirch <olaf.kirch@oracle.com>
Signed-off-by: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/af_irda.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index a0dbe07a1b1..e9aa1626ea8 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -131,14 +131,12 @@ static void irda_disconnect_indication(void *instance, void *sap,
 	}
 
 	/* Prevent race conditions with irda_release() and irda_shutdown() */
+	bh_lock_sock(sk);
 	if (!sock_flag(sk, SOCK_DEAD) && sk->sk_state != TCP_CLOSE) {
-		lock_sock(sk);
 		sk->sk_state     = TCP_CLOSE;
-		sk->sk_err       = ECONNRESET;
 		sk->sk_shutdown |= SEND_SHUTDOWN;
 
 		sk->sk_state_change(sk);
-		release_sock(sk);
 
 		/* Close our TSAP.
 		 * If we leave it open, IrLMP put it back into the list of
@@ -158,6 +156,7 @@ static void irda_disconnect_indication(void *instance, void *sap,
 			self->tsap = NULL;
 		}
 	}
+	bh_unlock_sock(sk);
 
 	/* Note : once we are there, there is not much you want to do
 	 * with the socket anymore, apart from closing it.
@@ -1061,7 +1060,8 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr,
 
 	if (sk->sk_state != TCP_ESTABLISHED) {
 		sock->state = SS_UNCONNECTED;
-		return sock_error(sk);	/* Always set at this point */
+		err = sock_error(sk);
+		return err? err : -ECONNRESET;
 	}
 
 	sock->state = SS_CONNECTED;
@@ -1355,7 +1355,9 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock,
 	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
 
 	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(!sock_error(sk), return -1;);
+
+	if ((err = sock_error(sk)) < 0)
+		return err;
 
 	skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
 				flags & MSG_DONTWAIT, &err);
@@ -1402,13 +1404,15 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
 	struct irda_sock *self = irda_sk(sk);
 	int noblock = flags & MSG_DONTWAIT;
 	size_t copied = 0;
-	int target;
+	int target, err;
 	long timeo;
 
 	IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
 
 	IRDA_ASSERT(self != NULL, return -1;);
-	IRDA_ASSERT(!sock_error(sk), return -1;);
+
+	if ((err = sock_error(sk)) < 0)
+		return err;
 
 	if (sock->flags & __SO_ACCEPTCON)
 		return(-EINVAL);
-- 
cgit v1.2.3


From d7f48d1a9398a3bd7bb6f4774640b24a0294cda3 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <samuel@ortiz.org>
Date: Fri, 20 Apr 2007 22:09:33 -0700
Subject: [IrDA] af_irda: irda_accept cleanup

This patch removes a cut'n'paste copy of wait_event_interruptible
from irda_accept.

Signed-off-by: Samuel Ortiz <samuel@ortiz.org>
Acked-by: Olaf Kirch <olaf.kirch@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/af_irda.c | 34 ++++++++--------------------------
 1 file changed, 8 insertions(+), 26 deletions(-)

diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index e9aa1626ea8..4eda10d7914 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -872,37 +872,19 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
 	 * calling us, the data is waiting for us ;-)
 	 * Jean II
 	 */
-	skb = skb_dequeue(&sk->sk_receive_queue);
-	if (skb == NULL) {
-		int ret = 0;
-		DECLARE_WAITQUEUE(waitq, current);
+	while (1) {
+		skb = skb_dequeue(&sk->sk_receive_queue);
+		if (skb)
+			break;
 
 		/* Non blocking operation */
 		if (flags & O_NONBLOCK)
 			return -EWOULDBLOCK;
 
-		/* The following code is a cut'n'paste of the
-		 * wait_event_interruptible() macro.
-		 * We don't us the macro because the condition has
-		 * side effects : we want to make sure that only one
-		 * skb get dequeued - Jean II */
-		add_wait_queue(sk->sk_sleep, &waitq);
-		for (;;) {
-			set_current_state(TASK_INTERRUPTIBLE);
-			skb = skb_dequeue(&sk->sk_receive_queue);
-			if (skb != NULL)
-				break;
-			if (!signal_pending(current)) {
-				schedule();
-				continue;
-			}
-			ret = -ERESTARTSYS;
-			break;
-		}
-		current->state = TASK_RUNNING;
-		remove_wait_queue(sk->sk_sleep, &waitq);
-		if(ret)
-			return -ERESTARTSYS;
+		err = wait_event_interruptible(*(sk->sk_sleep),
+					skb_peek(&sk->sk_receive_queue));
+		if (err)
+			return err;
 	}
 
 	newsk = newsock->sk;
-- 
cgit v1.2.3


From c3ea9fa2741320f9cade15efe10559b549af4ebf Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <samuel@ortiz.org>
Date: Fri, 20 Apr 2007 22:10:13 -0700
Subject: [IrDA] af_irda: IRDA_ASSERT cleanups

In af_irda.c, the multiple IRDA_ASSERT() are either hiding bugs, useless, or
returning the wrong value.
Let's clean that up.

Signed-off-by: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/af_irda.c | 32 +++++---------------------------
 1 file changed, 5 insertions(+), 27 deletions(-)

diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 4eda10d7914..06c97c60d54 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -89,7 +89,6 @@ static int irda_data_indication(void *instance, void *sap, struct sk_buff *skb)
 
 	self = instance;
 	sk = instance;
-	IRDA_ASSERT(sk != NULL, return -1;);
 
 	err = sock_queue_rcv_skb(sk, skb);
 	if (err) {
@@ -305,8 +304,6 @@ static void irda_connect_response(struct irda_sock *self)
 
 	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
 
-	IRDA_ASSERT(self != NULL, return;);
-
 	skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER,
 			GFP_ATOMIC);
 	if (skb == NULL) {
@@ -336,7 +333,7 @@ static void irda_flow_indication(void *instance, void *sap, LOCAL_FLOW flow)
 
 	self = instance;
 	sk = instance;
-	IRDA_ASSERT(sk != NULL, return;);
+	BUG_ON(sk == NULL);
 
 	switch (flow) {
 	case FLOW_STOP:
@@ -448,7 +445,7 @@ static void irda_discovery_timeout(u_long priv)
 	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
 
 	self = (struct irda_sock *) priv;
-	IRDA_ASSERT(self != NULL, return;);
+	BUG_ON(self == NULL);
 
 	/* Nothing for the caller */
 	self->cachelog = NULL;
@@ -545,8 +542,6 @@ static int irda_find_lsap_sel(struct irda_sock *self, char *name)
 {
 	IRDA_DEBUG(2, "%s(%p, %s)\n", __FUNCTION__, self, name);
 
-	IRDA_ASSERT(self != NULL, return -1;);
-
 	if (self->iriap) {
 		IRDA_WARNING("%s(): busy with a previous query\n",
 			     __FUNCTION__);
@@ -634,8 +629,6 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
 
 	IRDA_DEBUG(2, "%s(), name=%s\n", __FUNCTION__, name);
 
-	IRDA_ASSERT(self != NULL, return -1;);
-
 	/* Ask lmp for the current discovery log
 	 * Note : we have to use irlmp_get_discoveries(), as opposed
 	 * to play with the cachelog directly, because while we are
@@ -783,8 +776,6 @@ static int irda_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	struct irda_sock *self = irda_sk(sk);
 	int err;
 
-	IRDA_ASSERT(self != NULL, return -1;);
-
 	IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self);
 
 	if (addr_len != sizeof(struct sockaddr_irda))
@@ -840,8 +831,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
 
 	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
 
-	IRDA_ASSERT(self != NULL, return -1;);
-
 	err = irda_create(newsock, sk->sk_protocol);
 	if (err)
 		return err;
@@ -888,10 +877,12 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
 	}
 
 	newsk = newsock->sk;
+	if (newsk == NULL)
+		return -EIO;
+
 	newsk->sk_state = TCP_ESTABLISHED;
 
 	new = irda_sk(newsk);
-	IRDA_ASSERT(new != NULL, return -1;);
 
 	/* Now attach up the new socket */
 	new->tsap = irttp_dup(self->tsap, new);
@@ -1153,8 +1144,6 @@ static void irda_destroy_socket(struct irda_sock *self)
 {
 	IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self);
 
-	IRDA_ASSERT(self != NULL, return;);
-
 	/* Unregister with IrLMP */
 	irlmp_unregister_client(self->ckey);
 	irlmp_unregister_service(self->skey);
@@ -1273,7 +1262,6 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
 		return -ENOTCONN;
 
 	self = irda_sk(sk);
-	IRDA_ASSERT(self != NULL, return -1;);
 
 	/* Check if IrTTP is wants us to slow down */
 
@@ -1336,8 +1324,6 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock,
 
 	IRDA_DEBUG(4, "%s()\n", __FUNCTION__);
 
-	IRDA_ASSERT(self != NULL, return -1;);
-
 	if ((err = sock_error(sk)) < 0)
 		return err;
 
@@ -1391,8 +1377,6 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
 
 	IRDA_DEBUG(3, "%s()\n", __FUNCTION__);
 
-	IRDA_ASSERT(self != NULL, return -1;);
-
 	if ((err = sock_error(sk)) < 0)
 		return err;
 
@@ -1526,7 +1510,6 @@ static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock,
 		return -ENOTCONN;
 
 	self = irda_sk(sk);
-	IRDA_ASSERT(self != NULL, return -1;);
 
 	/*
 	 * Check that we don't send out too big frames. This is an unreliable
@@ -1595,7 +1578,6 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock,
 	}
 
 	self = irda_sk(sk);
-	IRDA_ASSERT(self != NULL, return -1;);
 
 	/* Check if an address was specified with sendto. Jean II */
 	if (msg->msg_name) {
@@ -1669,8 +1651,6 @@ static int irda_shutdown(struct socket *sock, int how)
 	struct sock *sk = sock->sk;
 	struct irda_sock *self = irda_sk(sk);
 
-	IRDA_ASSERT(self != NULL, return -1;);
-
 	IRDA_DEBUG(1, "%s(%p)\n", __FUNCTION__, self);
 
 	sk->sk_state       = TCP_CLOSE;
@@ -1843,8 +1823,6 @@ static int irda_setsockopt(struct socket *sock, int level, int optname,
 	struct ias_attrib *	ias_attr;	/* Attribute in IAS object */
 	int opt;
 
-	IRDA_ASSERT(self != NULL, return -1;);
-
 	IRDA_DEBUG(2, "%s(%p)\n", __FUNCTION__, self);
 
 	if (level != SOL_IRLMP)
-- 
cgit v1.2.3


From 599b1fa91439cff8605a71f1a2b5bb42c177b667 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <samuel@ortiz.org>
Date: Fri, 20 Apr 2007 22:12:07 -0700
Subject: [IrDA]: Adding carriage returns to mcs7780 debug statements

Signed-off-by: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/irda/mcs7780.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/net/irda/mcs7780.c b/drivers/net/irda/mcs7780.c
index 54d1d543c92..0de867288a4 100644
--- a/drivers/net/irda/mcs7780.c
+++ b/drivers/net/irda/mcs7780.c
@@ -200,14 +200,14 @@ static inline int mcs_setup_transceiver_vishay(struct mcs_cb *mcs)
 /* Setup a communication between mcs7780 and agilent chip. */
 static inline int mcs_setup_transceiver_agilent(struct mcs_cb *mcs)
 {
-	IRDA_WARNING("This transceiver type is not supported yet.");
+	IRDA_WARNING("This transceiver type is not supported yet.\n");
 	return 1;
 }
 
 /* Setup a communication between mcs7780 and sharp chip. */
 static inline int mcs_setup_transceiver_sharp(struct mcs_cb *mcs)
 {
-	IRDA_WARNING("This transceiver type is not supported yet.");
+	IRDA_WARNING("This transceiver type is not supported yet.\n");
 	return 1;
 }
 
@@ -279,7 +279,7 @@ static inline int mcs_setup_transceiver(struct mcs_cb *mcs)
 		break;
 
 	default:
-		IRDA_WARNING("Unknown transceiver type: %d",
+		IRDA_WARNING("Unknown transceiver type: %d\n",
 			     mcs->transceiver_type);
 		ret = 1;
 	}
@@ -318,7 +318,7 @@ static inline int mcs_setup_transceiver(struct mcs_cb *mcs)
 		return ret;
 
 error:
-	IRDA_ERROR("%s", msg);
+	IRDA_ERROR("%s\n", msg);
 	return ret;
 }
 
@@ -587,7 +587,7 @@ static int mcs_speed_change(struct mcs_cb *mcs)
 	} while(cnt++ < 100 && (rval & MCS_IRINTX));
 
 	if(cnt >= 100) {
-		IRDA_ERROR("unable to change speed");
+		IRDA_ERROR("unable to change speed\n");
 		ret = -EIO;
 		goto error;
 	}
@@ -638,7 +638,7 @@ static int mcs_speed_change(struct mcs_cb *mcs)
 
 		default:
 			ret = 1;
-			IRDA_WARNING("Unknown transceiver type: %d",
+			IRDA_WARNING("Unknown transceiver type: %d\n",
 				     mcs->transceiver_type);
 		}
 	if (unlikely(ret))
@@ -733,7 +733,7 @@ static int mcs_net_open(struct net_device *netdev)
 	sprintf(hwname, "usb#%d", mcs->usbdev->devnum);
 	mcs->irlap = irlap_open(netdev, &mcs->qos, hwname);
 	if (!mcs->irlap) {
-		IRDA_ERROR("mcs7780: irlap_open failed");
+		IRDA_ERROR("mcs7780: irlap_open failed\n");
 		goto error2;
 	}
 
@@ -862,7 +862,7 @@ static int mcs_hard_xmit(struct sk_buff *skb, struct net_device *ndev)
 			  mcs->out_buf, wraplen, mcs_send_irq, mcs);
 
 	if ((ret = usb_submit_urb(mcs->tx_urb, GFP_ATOMIC))) {
-		IRDA_ERROR("failed tx_urb: %d", ret);
+		IRDA_ERROR("failed tx_urb: %d\n", ret);
 		switch (ret) {
 		case -ENODEV:
 		case -EPIPE:
@@ -897,7 +897,7 @@ static int mcs_probe(struct usb_interface *intf,
 	if (!ndev)
 		goto error1;
 
-	IRDA_DEBUG(1, "MCS7780 USB-IrDA bridge found at %d.", udev->devnum);
+	IRDA_DEBUG(1, "MCS7780 USB-IrDA bridge found at %d.\n", udev->devnum);
 
 	/* what is it realy for? */
 	SET_MODULE_OWNER(ndev);
@@ -905,7 +905,7 @@ static int mcs_probe(struct usb_interface *intf,
 
 	ret = usb_reset_configuration(udev);
 	if (ret != 0) {
-		IRDA_ERROR("mcs7780: usb reset configuration failed");
+		IRDA_ERROR("mcs7780: usb reset configuration failed\n");
 		goto error2;
 	}
 
@@ -950,7 +950,7 @@ static int mcs_probe(struct usb_interface *intf,
 	if (ret != 0)
 		goto error2;
 
-	IRDA_DEBUG(1, "IrDA: Registered MosChip MCS7780 device as %s",
+	IRDA_DEBUG(1, "IrDA: Registered MosChip MCS7780 device as %s\n",
 		   ndev->name);
 
 	mcs->transceiver_type = transceiver_type;
@@ -981,7 +981,7 @@ static void mcs_disconnect(struct usb_interface *intf)
 	free_netdev(mcs->netdev);
 
 	usb_set_intfdata(intf, NULL);
-	IRDA_DEBUG(0, "MCS7780 now disconnected.");
+	IRDA_DEBUG(0, "MCS7780 now disconnected.\n");
 }
 
 /* Module insertion */
@@ -992,7 +992,7 @@ static int __init mcs_init(void)
 	/* register this driver with the USB subsystem */
 	result = usb_register(&mcs_driver);
 	if (result)
-		IRDA_ERROR("usb_register failed. Error number %d", result);
+		IRDA_ERROR("usb_register failed. Error number %d\n", result);
 
 	return result;
 }
-- 
cgit v1.2.3


From b450777a572d68975c8748b0d48d517dd3468ea6 Mon Sep 17 00:00:00 2001
From: "G. Liakhovetski" <gl@dsa-ac.de>
Date: Fri, 20 Apr 2007 22:12:48 -0700
Subject: [IrDA]: Misc spelling corrections.

Spelling corrections, from "to" to "too".

Signed-off-by: G. Liakhovetski <gl@dsa-ac.de>
Signed-off-by: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irlap_event.c |  2 +-
 net/irda/irlap_frame.c | 14 +++++++-------
 net/irda/irttp.c       |  4 ++--
 net/irda/parameters.c  |  8 ++++----
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c
index 7b6433fe1dc..0b02073ffdf 100644
--- a/net/irda/irlap_event.c
+++ b/net/irda/irlap_event.c
@@ -590,7 +590,7 @@ static int irlap_state_query(struct irlap_cb *self, IRLAP_EVENT event,
 		if (!self->discovery_log) {
 			IRDA_WARNING("%s: discovery log is gone! "
 				     "maybe the discovery timeout has been set"
-				     " to short?\n", __FUNCTION__);
+				     " too short?\n", __FUNCTION__);
 			break;
 		}
 		hashbin_insert(self->discovery_log,
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
index 803ac418748..3c5a68e3641 100644
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -413,7 +413,7 @@ static void irlap_recv_discovery_xid_rsp(struct irlap_cb *self,
 	IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
 
 	if (!pskb_may_pull(skb, sizeof(struct xid_frame))) {
-		IRDA_ERROR("%s: frame to short!\n", __FUNCTION__);
+		IRDA_ERROR("%s: frame too short!\n", __FUNCTION__);
 		return;
 	}
 
@@ -484,7 +484,7 @@ static void irlap_recv_discovery_xid_cmd(struct irlap_cb *self,
 	char *text;
 
 	if (!pskb_may_pull(skb, sizeof(struct xid_frame))) {
-		IRDA_ERROR("%s: frame to short!\n", __FUNCTION__);
+		IRDA_ERROR("%s: frame too short!\n", __FUNCTION__);
 		return;
 	}
 
@@ -528,7 +528,7 @@ static void irlap_recv_discovery_xid_cmd(struct irlap_cb *self,
 		/* Check if things are sane at this point... */
 		if((discovery_info == NULL) ||
 		   !pskb_may_pull(skb, 3)) {
-			IRDA_ERROR("%s: discovery frame to short!\n",
+			IRDA_ERROR("%s: discovery frame too short!\n",
 				   __FUNCTION__);
 			return;
 		}
@@ -1173,7 +1173,7 @@ static void irlap_recv_frmr_frame(struct irlap_cb *self, struct sk_buff *skb,
 	IRDA_ASSERT(info != NULL, return;);
 
 	if (!pskb_may_pull(skb, 4)) {
-		IRDA_ERROR("%s: frame to short!\n", __FUNCTION__);
+		IRDA_ERROR("%s: frame too short!\n", __FUNCTION__);
 		return;
 	}
 
@@ -1262,7 +1262,7 @@ static void irlap_recv_test_frame(struct irlap_cb *self, struct sk_buff *skb,
 	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
 
 	if (!pskb_may_pull(skb, sizeof(*frame))) {
-		IRDA_ERROR("%s: frame to short!\n", __FUNCTION__);
+		IRDA_ERROR("%s: frame too short!\n", __FUNCTION__);
 		return;
 	}
 	frame = (struct test_frame *) skb->data;
@@ -1270,7 +1270,7 @@ static void irlap_recv_test_frame(struct irlap_cb *self, struct sk_buff *skb,
 	/* Broadcast frames must carry saddr and daddr fields */
 	if (info->caddr == CBROADCAST) {
 		if (skb->len < sizeof(struct test_frame)) {
-			IRDA_DEBUG(0, "%s() test frame to short!\n",
+			IRDA_DEBUG(0, "%s() test frame too short!\n",
 				   __FUNCTION__);
 			return;
 		}
@@ -1336,7 +1336,7 @@ int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	/* Check if frame is large enough for parsing */
 	if (!pskb_may_pull(skb, 2)) {
-		IRDA_ERROR("%s: frame to short!\n", __FUNCTION__);
+		IRDA_ERROR("%s: frame too short!\n", __FUNCTION__);
 		dev_kfree_skb(skb);
 		return -1;
 	}
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index 3279897a01b..7069e4a5825 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -551,7 +551,7 @@ int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb)
 	}
 
 	if (skb->len > self->max_seg_size) {
-		IRDA_DEBUG(1, "%s(), UData is to large for IrLAP!\n",
+		IRDA_DEBUG(1, "%s(), UData is too large for IrLAP!\n",
 			   __FUNCTION__);
 		goto err;
 	}
@@ -598,7 +598,7 @@ int irttp_data_request(struct tsap_cb *self, struct sk_buff *skb)
 	 *  inside an IrLAP frame
 	 */
 	if ((self->tx_max_sdu_size == 0) && (skb->len > self->max_seg_size)) {
-		IRDA_ERROR("%s: SAR disabled, and data is to large for IrLAP!\n",
+		IRDA_ERROR("%s: SAR disabled, and data is too large for IrLAP!\n",
 			   __FUNCTION__);
 		ret = -EMSGSIZE;
 		goto err;
diff --git a/net/irda/parameters.c b/net/irda/parameters.c
index 75a72d203b0..2627dad7cd8 100644
--- a/net/irda/parameters.c
+++ b/net/irda/parameters.c
@@ -160,7 +160,7 @@ static int irda_insert_integer(void *self, __u8 *buf, int len, __u8 pi,
 	}
 	/* Check if buffer is long enough for insertion */
 	if (len < (2+p.pl)) {
-		IRDA_WARNING("%s: buffer to short for insertion!\n",
+		IRDA_WARNING("%s: buffer too short for insertion!\n",
 			     __FUNCTION__);
 		return -1;
 	}
@@ -216,7 +216,7 @@ static int irda_extract_integer(void *self, __u8 *buf, int len, __u8 pi,
 
 	/* Check if buffer is long enough for parsing */
 	if (len < (2+p.pl)) {
-		IRDA_WARNING("%s: buffer to short for parsing! "
+		IRDA_WARNING("%s: buffer too short for parsing! "
 			     "Need %d bytes, but len is only %d\n",
 			     __FUNCTION__, p.pl, len);
 		return -1;
@@ -304,7 +304,7 @@ static int irda_extract_string(void *self, __u8 *buf, int len, __u8 pi,
 
 	/* Check if buffer is long enough for parsing */
 	if (len < (2+p.pl)) {
-		IRDA_WARNING("%s: buffer to short for parsing! "
+		IRDA_WARNING("%s: buffer too short for parsing! "
 			     "Need %d bytes, but len is only %d\n",
 			     __FUNCTION__, p.pl, len);
 		return -1;
@@ -343,7 +343,7 @@ static int irda_extract_octseq(void *self, __u8 *buf, int len, __u8 pi,
 
 	/* Check if buffer is long enough for parsing */
 	if (len < (2+p.pl)) {
-		IRDA_WARNING("%s: buffer to short for parsing! "
+		IRDA_WARNING("%s: buffer too short for parsing! "
 			     "Need %d bytes, but len is only %d\n",
 			     __FUNCTION__, p.pl, len);
 		return -1;
-- 
cgit v1.2.3


From 38b4da383705394788aa09208917ba200792de4b Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bbpetkov@yahoo.de>
Date: Fri, 20 Apr 2007 22:14:10 -0700
Subject: [NET]: Fix comments for register_netdev().

Correct the function name in the comments supplied with
register_netdev()

Signed-off-by: Borislav Petkov <bbpetkov@yahoo.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 431998d9cee..f3b99701da5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3130,7 +3130,7 @@ out:
  *	chain. 0 is returned on success. A negative errno code is returned
  *	on a failure to set up the device, or if the name is a duplicate.
  *
- *	This is a wrapper around register_netdev that takes the rtnl semaphore
+ *	This is a wrapper around register_netdevice that takes the rtnl semaphore
  *	and expands the device name if you passed a format string to
  *	alloc_netdev.
  */
-- 
cgit v1.2.3


From 9e412ba7632f71259a53085665d4983b78257b7c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Fri, 20 Apr 2007 22:18:02 -0700
Subject: [TCP]: Sed magic converts func(sk, tp, ...) -> func(sk, ...)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is (mostly) automated change using magic:

sed -e '/struct sock \*sk/ N' -e '/struct sock \*sk/ N'
    -e '/struct sock \*sk/ N' -e '/struct sock \*sk/ N'
    -e 's|struct sock \*sk,[\n\t ]*struct tcp_sock \*tp\([^{]*\n{\n\)|
	  struct sock \*sk\1\tstruct tcp_sock *tp = tcp_sk(sk);\n|g'
    -e 's|struct sock \*sk, struct tcp_sock \*tp|
	  struct sock \*sk|g' -e 's|sk, tp\([^-]\)|sk\1|g'

Fixed four unused variable (tp) warnings that were introduced.

In addition, manually added newlines after local variables and
tweaked function arguments positioning.

$ gcc --version
gcc (GCC) 4.1.1 20060525 (Red Hat 4.1.1-1)
...
$ codiff -fV built-in.o.old built-in.o.new
net/ipv4/route.c:
  rt_cache_flush |  +14
 1 function changed, 14 bytes added

net/ipv4/tcp.c:
  tcp_setsockopt |   -5
  tcp_sendpage   |  -25
  tcp_sendmsg    |  -16
 3 functions changed, 46 bytes removed

net/ipv4/tcp_input.c:
  tcp_try_undo_recovery |   +3
  tcp_try_undo_dsack    |   +2
  tcp_mark_head_lost    |  -12
  tcp_ack               |  -15
  tcp_event_data_recv   |  -32
  tcp_rcv_state_process |  -10
  tcp_rcv_established   |   +1
 7 functions changed, 6 bytes added, 69 bytes removed, diff: -63

net/ipv4/tcp_output.c:
  update_send_head          |   -9
  tcp_transmit_skb          |  +19
  tcp_cwnd_validate         |   +1
  tcp_write_wakeup          |  -17
  __tcp_push_pending_frames |  -25
  tcp_push_one              |   -8
  tcp_send_fin              |   -4
 7 functions changed, 20 bytes added, 63 bytes removed, diff: -43

built-in.o.new:
 18 functions changed, 40 bytes added, 178 bytes removed, diff: -138

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  25 +++++----
 include/net/tcp_ecn.h |  11 ++--
 net/ipv4/tcp.c        |  39 +++++++-------
 net/ipv4/tcp_input.c  | 145 ++++++++++++++++++++++++++++++--------------------
 net/ipv4/tcp_output.c |  54 ++++++++++---------
 5 files changed, 158 insertions(+), 116 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index e79803353c8..43910fe3c44 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -420,9 +420,9 @@ extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
 
 /* tcp_output.c */
 
-extern void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp,
-				      unsigned int cur_mss, int nonagle);
-extern int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp);
+extern void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
+				      int nonagle);
+extern int tcp_may_send_now(struct sock *sk);
 extern int tcp_retransmit_skb(struct sock *, struct sk_buff *);
 extern void tcp_xmit_retransmit_queue(struct sock *);
 extern void tcp_simple_retransmit(struct sock *);
@@ -479,8 +479,10 @@ static inline void tcp_fast_path_on(struct tcp_sock *tp)
 	__tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
 }
 
-static inline void tcp_fast_path_check(struct sock *sk, struct tcp_sock *tp)
+static inline void tcp_fast_path_check(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (skb_queue_empty(&tp->out_of_order_queue) &&
 	    tp->rcv_wnd &&
 	    atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
@@ -591,10 +593,10 @@ static inline void tcp_dec_pcount_approx(__u32 *count,
 	}
 }
 
-static inline void tcp_packets_out_inc(struct sock *sk, 
-				       struct tcp_sock *tp,
+static inline void tcp_packets_out_inc(struct sock *sk,
 				       const struct sk_buff *skb)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	int orig = tp->packets_out;
 
 	tp->packets_out += tcp_skb_pcount(skb);
@@ -778,18 +780,21 @@ static inline void tcp_minshall_update(struct tcp_sock *tp, int mss,
 		tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
 }
 
-static inline void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp)
+static inline void tcp_check_probe_timer(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
+
 	if (!tp->packets_out && !icsk->icsk_pending)
 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
 					  icsk->icsk_rto, TCP_RTO_MAX);
 }
 
-static inline void tcp_push_pending_frames(struct sock *sk,
-					   struct tcp_sock *tp)
+static inline void tcp_push_pending_frames(struct sock *sk)
 {
-	__tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1), tp->nonagle);
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	__tcp_push_pending_frames(sk, tcp_current_mss(sk, 1), tp->nonagle);
 }
 
 static inline void tcp_init_wl(struct tcp_sock *tp, u32 ack, u32 seq)
diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h
index b5f7c6ac088..89eb3e05116 100644
--- a/include/net/tcp_ecn.h
+++ b/include/net/tcp_ecn.h
@@ -27,9 +27,10 @@ static inline void TCP_ECN_send_synack(struct tcp_sock *tp,
 		TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE;
 }
 
-static inline void TCP_ECN_send_syn(struct sock *sk, struct tcp_sock *tp,
-				    struct sk_buff *skb)
+static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	tp->ecn_flags = 0;
 	if (sysctl_tcp_ecn) {
 		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE|TCPCB_FLAG_CWR;
@@ -44,9 +45,11 @@ TCP_ECN_make_synack(struct request_sock *req, struct tcphdr *th)
 		th->ece = 1;
 }
 
-static inline void TCP_ECN_send(struct sock *sk, struct tcp_sock *tp,
-				struct sk_buff *skb, int tcp_header_len)
+static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
+				int tcp_header_len)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (tp->ecn_flags & TCP_ECN_OK) {
 		/* Not-retransmitted data segment: set ECT and inject CWR. */
 		if (skb->len != tcp_header_len &&
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 99ad52c00c9..2cf9a898ce5 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -460,9 +460,9 @@ static inline int forced_push(struct tcp_sock *tp)
 	return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
 }
 
-static inline void skb_entail(struct sock *sk, struct tcp_sock *tp,
-			      struct sk_buff *skb)
+static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 
 	skb->csum    = 0;
@@ -486,15 +486,17 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags,
 	}
 }
 
-static inline void tcp_push(struct sock *sk, struct tcp_sock *tp, int flags,
-			    int mss_now, int nonagle)
+static inline void tcp_push(struct sock *sk, int flags, int mss_now,
+			    int nonagle)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (tcp_send_head(sk)) {
 		struct sk_buff *skb = tcp_write_queue_tail(sk);
 		if (!(flags & MSG_MORE) || forced_push(tp))
 			tcp_mark_push(tp, skb);
 		tcp_mark_urg(tp, flags, skb);
-		__tcp_push_pending_frames(sk, tp, mss_now,
+		__tcp_push_pending_frames(sk, mss_now,
 					  (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle);
 	}
 }
@@ -540,7 +542,7 @@ new_segment:
 			if (!skb)
 				goto wait_for_memory;
 
-			skb_entail(sk, tp, skb);
+			skb_entail(sk, skb);
 			copy = size_goal;
 		}
 
@@ -586,7 +588,7 @@ new_segment:
 
 		if (forced_push(tp)) {
 			tcp_mark_push(tp, skb);
-			__tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH);
+			__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
 		} else if (skb == tcp_send_head(sk))
 			tcp_push_one(sk, mss_now);
 		continue;
@@ -595,7 +597,7 @@ wait_for_sndbuf:
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 wait_for_memory:
 		if (copied)
-			tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+			tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
 
 		if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
 			goto do_error;
@@ -606,7 +608,7 @@ wait_for_memory:
 
 out:
 	if (copied)
-		tcp_push(sk, tp, flags, mss_now, tp->nonagle);
+		tcp_push(sk, flags, mss_now, tp->nonagle);
 	return copied;
 
 do_error:
@@ -637,8 +639,9 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
 #define TCP_PAGE(sk)	(sk->sk_sndmsg_page)
 #define TCP_OFF(sk)	(sk->sk_sndmsg_off)
 
-static inline int select_size(struct sock *sk, struct tcp_sock *tp)
+static inline int select_size(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	int tmp = tp->mss_cache;
 
 	if (sk->sk_route_caps & NETIF_F_SG) {
@@ -714,7 +717,7 @@ new_segment:
 				if (!sk_stream_memory_free(sk))
 					goto wait_for_sndbuf;
 
-				skb = sk_stream_alloc_pskb(sk, select_size(sk, tp),
+				skb = sk_stream_alloc_pskb(sk, select_size(sk),
 							   0, sk->sk_allocation);
 				if (!skb)
 					goto wait_for_memory;
@@ -725,7 +728,7 @@ new_segment:
 				if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
 					skb->ip_summed = CHECKSUM_PARTIAL;
 
-				skb_entail(sk, tp, skb);
+				skb_entail(sk, skb);
 				copy = size_goal;
 			}
 
@@ -830,7 +833,7 @@ new_segment:
 
 			if (forced_push(tp)) {
 				tcp_mark_push(tp, skb);
-				__tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_PUSH);
+				__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
 			} else if (skb == tcp_send_head(sk))
 				tcp_push_one(sk, mss_now);
 			continue;
@@ -839,7 +842,7 @@ wait_for_sndbuf:
 			set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 wait_for_memory:
 			if (copied)
-				tcp_push(sk, tp, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
+				tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
 
 			if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
 				goto do_error;
@@ -851,7 +854,7 @@ wait_for_memory:
 
 out:
 	if (copied)
-		tcp_push(sk, tp, flags, mss_now, tp->nonagle);
+		tcp_push(sk, flags, mss_now, tp->nonagle);
 	TCP_CHECK_TIMER(sk);
 	release_sock(sk);
 	return copied;
@@ -1389,7 +1392,7 @@ do_prequeue:
 skip_copy:
 		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) {
 			tp->urg_data = 0;
-			tcp_fast_path_check(sk, tp);
+			tcp_fast_path_check(sk);
 		}
 		if (used + offset < skb->len)
 			continue;
@@ -1830,7 +1833,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 			 * for currently queued segments.
 			 */
 			tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
-			tcp_push_pending_frames(sk, tp);
+			tcp_push_pending_frames(sk);
 		} else {
 			tp->nonagle &= ~TCP_NAGLE_OFF;
 		}
@@ -1854,7 +1857,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 			tp->nonagle &= ~TCP_NAGLE_CORK;
 			if (tp->nonagle&TCP_NAGLE_OFF)
 				tp->nonagle |= TCP_NAGLE_PUSH;
-			tcp_push_pending_frames(sk, tp);
+			tcp_push_pending_frames(sk);
 		}
 		break;
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2fbfc2e4209..63338939078 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -235,9 +235,9 @@ static void tcp_fixup_sndbuf(struct sock *sk)
  */
 
 /* Slow part of check#2. */
-static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
-			     const struct sk_buff *skb)
+static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	/* Optimize this! */
 	int truesize = tcp_win_from_space(skb->truesize)/2;
 	int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2;
@@ -252,9 +252,11 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
 	return 0;
 }
 
-static void tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
+static void tcp_grow_window(struct sock *sk,
 			    struct sk_buff *skb)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	/* Check #1 */
 	if (tp->rcv_ssthresh < tp->window_clamp &&
 	    (int)tp->rcv_ssthresh < tcp_space(sk) &&
@@ -267,7 +269,7 @@ static void tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
 		if (tcp_win_from_space(skb->truesize) <= skb->len)
 			incr = 2*tp->advmss;
 		else
-			incr = __tcp_grow_window(sk, tp, skb);
+			incr = __tcp_grow_window(sk, skb);
 
 		if (incr) {
 			tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp);
@@ -330,8 +332,9 @@ static void tcp_init_buffer_space(struct sock *sk)
 }
 
 /* 5. Recalculate window clamp after socket hit its memory bounds. */
-static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
+static void tcp_clamp_window(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
 	icsk->icsk_ack.quick = 0;
@@ -503,8 +506,9 @@ new_measure:
  * each ACK we send, he increments snd_cwnd and transmits more of his
  * queue.  -DaveM
  */
-static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)
+static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	u32 now;
 
@@ -545,7 +549,7 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_
 	TCP_ECN_check_ce(tp, skb);
 
 	if (skb->len >= 128)
-		tcp_grow_window(sk, tp, skb);
+		tcp_grow_window(sk, skb);
 }
 
 /* Called to compute a smoothed rtt estimate. The data fed to this
@@ -1541,8 +1545,10 @@ static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
 	return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto);
 }
 
-static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp)
+static inline int tcp_head_timedout(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	return tp->packets_out &&
 	       tcp_skb_timedout(sk, tcp_write_queue_head(sk));
 }
@@ -1640,8 +1646,9 @@ static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp)
  * Main question: may we further continue forward transmission
  * with the same cwnd?
  */
-static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp)
+static int tcp_time_to_recover(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	__u32 packets_out;
 
 	/* Do not perform any recovery during FRTO algorithm */
@@ -1659,7 +1666,7 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp)
 	/* Trick#3 : when we use RFC2988 timer restart, fast
 	 * retransmit can be triggered by timeout of queue head.
 	 */
-	if (tcp_head_timedout(sk, tp))
+	if (tcp_head_timedout(sk))
 		return 1;
 
 	/* Trick#4: It is still not OK... But will it be useful to delay
@@ -1668,7 +1675,7 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp)
 	packets_out = tp->packets_out;
 	if (packets_out <= tp->reordering &&
 	    tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
-	    !tcp_may_send_now(sk, tp)) {
+	    !tcp_may_send_now(sk)) {
 		/* We have nothing to send. This connection is limited
 		 * either by receiver window or by application.
 		 */
@@ -1708,8 +1715,10 @@ static void tcp_add_reno_sack(struct sock *sk)
 
 /* Account for ACK, ACKing some data in Reno Recovery phase. */
 
-static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_sock *tp, int acked)
+static void tcp_remove_reno_sacks(struct sock *sk, int acked)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (acked > 0) {
 		/* One ACK acked hole. The rest eat duplicate ACKs. */
 		if (acked-1 >= tp->sacked_out)
@@ -1728,9 +1737,10 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
 }
 
 /* Mark head of queue up as lost. */
-static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
+static void tcp_mark_head_lost(struct sock *sk,
 			       int packets, u32 high_seq)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 	int cnt;
 
@@ -1771,15 +1781,17 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
 
 /* Account newly detected lost packet(s) */
 
-static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
+static void tcp_update_scoreboard(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (IsFack(tp)) {
 		int lost = tp->fackets_out - tp->reordering;
 		if (lost <= 0)
 			lost = 1;
-		tcp_mark_head_lost(sk, tp, lost, tp->high_seq);
+		tcp_mark_head_lost(sk, lost, tp->high_seq);
 	} else {
-		tcp_mark_head_lost(sk, tp, 1, tp->high_seq);
+		tcp_mark_head_lost(sk, 1, tp->high_seq);
 	}
 
 	/* New heuristics: it is possible only after we switched
@@ -1787,7 +1799,7 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
 	 * Hence, we can detect timed out packets during fast
 	 * retransmit without falling to slow start.
 	 */
-	if (!IsReno(tp) && tcp_head_timedout(sk, tp)) {
+	if (!IsReno(tp) && tcp_head_timedout(sk)) {
 		struct sk_buff *skb;
 
 		skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
@@ -1867,9 +1879,11 @@ static inline int tcp_packet_delayed(struct tcp_sock *tp)
 /* Undo procedures. */
 
 #if FASTRETRANS_DEBUG > 1
-static void DBGUNDO(struct sock *sk, struct tcp_sock *tp, const char *msg)
+static void DBGUNDO(struct sock *sk, const char *msg)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_sock *inet = inet_sk(sk);
+
 	printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n",
 	       msg,
 	       NIPQUAD(inet->daddr), ntohs(inet->dport),
@@ -1915,13 +1929,15 @@ static inline int tcp_may_undo(struct tcp_sock *tp)
 }
 
 /* People celebrate: "We love our President!" */
-static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp)
+static int tcp_try_undo_recovery(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (tcp_may_undo(tp)) {
 		/* Happy end! We did not retransmit anything
 		 * or our original transmission succeeded.
 		 */
-		DBGUNDO(sk, tp, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
+		DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
 		tcp_undo_cwr(sk, 1);
 		if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
 			NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
@@ -1941,10 +1957,12 @@ static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp)
 }
 
 /* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */
-static void tcp_try_undo_dsack(struct sock *sk, struct tcp_sock *tp)
+static void tcp_try_undo_dsack(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (tp->undo_marker && !tp->undo_retrans) {
-		DBGUNDO(sk, tp, "D-SACK");
+		DBGUNDO(sk, "D-SACK");
 		tcp_undo_cwr(sk, 1);
 		tp->undo_marker = 0;
 		NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO);
@@ -1953,9 +1971,9 @@ static void tcp_try_undo_dsack(struct sock *sk, struct tcp_sock *tp)
 
 /* Undo during fast recovery after partial ACK. */
 
-static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp,
-				int acked)
+static int tcp_try_undo_partial(struct sock *sk, int acked)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	/* Partial ACK arrived. Force Hoe's retransmit. */
 	int failed = IsReno(tp) || tp->fackets_out>tp->reordering;
 
@@ -1968,7 +1986,7 @@ static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp,
 
 		tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
 
-		DBGUNDO(sk, tp, "Hoe");
+		DBGUNDO(sk, "Hoe");
 		tcp_undo_cwr(sk, 0);
 		NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO);
 
@@ -1982,8 +2000,10 @@ static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp,
 }
 
 /* Undo during loss recovery after partial ACK. */
-static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp)
+static int tcp_try_undo_loss(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (tcp_may_undo(tp)) {
 		struct sk_buff *skb;
 		tcp_for_write_queue(skb, sk) {
@@ -1994,7 +2014,7 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp)
 
 		clear_all_retrans_hints(tp);
 
-		DBGUNDO(sk, tp, "partial loss");
+		DBGUNDO(sk, "partial loss");
 		tp->lost_out = 0;
 		tp->left_out = tp->sacked_out;
 		tcp_undo_cwr(sk, 1);
@@ -2016,8 +2036,10 @@ static inline void tcp_complete_cwr(struct sock *sk)
 	tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
 }
 
-static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag)
+static void tcp_try_to_open(struct sock *sk, int flag)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	tp->left_out = tp->sacked_out;
 
 	if (tp->retrans_out == 0)
@@ -2111,7 +2133,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 	    before(tp->snd_una, tp->high_seq) &&
 	    icsk->icsk_ca_state != TCP_CA_Open &&
 	    tp->fackets_out > tp->reordering) {
-		tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq);
+		tcp_mark_head_lost(sk, tp->fackets_out-tp->reordering, tp->high_seq);
 		NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
 	}
 
@@ -2127,7 +2149,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 		switch (icsk->icsk_ca_state) {
 		case TCP_CA_Loss:
 			icsk->icsk_retransmits = 0;
-			if (tcp_try_undo_recovery(sk, tp))
+			if (tcp_try_undo_recovery(sk))
 				return;
 			break;
 
@@ -2141,7 +2163,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 			break;
 
 		case TCP_CA_Disorder:
-			tcp_try_undo_dsack(sk, tp);
+			tcp_try_undo_dsack(sk);
 			if (!tp->undo_marker ||
 			    /* For SACK case do not Open to allow to undo
 			     * catching for all duplicate ACKs. */
@@ -2154,7 +2176,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 		case TCP_CA_Recovery:
 			if (IsReno(tp))
 				tcp_reset_reno_sack(tp);
-			if (tcp_try_undo_recovery(sk, tp))
+			if (tcp_try_undo_recovery(sk))
 				return;
 			tcp_complete_cwr(sk);
 			break;
@@ -2170,14 +2192,14 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 		} else {
 			int acked = prior_packets - tp->packets_out;
 			if (IsReno(tp))
-				tcp_remove_reno_sacks(sk, tp, acked);
-			is_dupack = tcp_try_undo_partial(sk, tp, acked);
+				tcp_remove_reno_sacks(sk, acked);
+			is_dupack = tcp_try_undo_partial(sk, acked);
 		}
 		break;
 	case TCP_CA_Loss:
 		if (flag&FLAG_DATA_ACKED)
 			icsk->icsk_retransmits = 0;
-		if (!tcp_try_undo_loss(sk, tp)) {
+		if (!tcp_try_undo_loss(sk)) {
 			tcp_moderate_cwnd(tp);
 			tcp_xmit_retransmit_queue(sk);
 			return;
@@ -2194,10 +2216,10 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 		}
 
 		if (icsk->icsk_ca_state == TCP_CA_Disorder)
-			tcp_try_undo_dsack(sk, tp);
+			tcp_try_undo_dsack(sk);
 
-		if (!tcp_time_to_recover(sk, tp)) {
-			tcp_try_to_open(sk, tp, flag);
+		if (!tcp_time_to_recover(sk)) {
+			tcp_try_to_open(sk, flag);
 			return;
 		}
 
@@ -2236,8 +2258,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
 		tcp_set_ca_state(sk, TCP_CA_Recovery);
 	}
 
-	if (is_dupack || tcp_head_timedout(sk, tp))
-		tcp_update_scoreboard(sk, tp);
+	if (is_dupack || tcp_head_timedout(sk))
+		tcp_update_scoreboard(sk);
 	tcp_cwnd_down(sk);
 	tcp_xmit_retransmit_queue(sk);
 }
@@ -2313,8 +2335,10 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
  * RFC2988 recommends to restart timer to now+rto.
  */
 
-static void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp)
+static void tcp_ack_packets_out(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (!tp->packets_out) {
 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
 	} else {
@@ -2471,7 +2495,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 
 	if (acked&FLAG_ACKED) {
 		tcp_ack_update_rtt(sk, acked, seq_rtt);
-		tcp_ack_packets_out(sk, tp);
+		tcp_ack_packets_out(sk);
 		if (rtt_sample && !(acked & FLAG_RETRANS_DATA_ACKED))
 			(*rtt_sample)(sk, tcp_usrtt(&tv));
 
@@ -2556,9 +2580,10 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp, const u32 ack
  * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
  * and in FreeBSD. NetBSD's one is even worse.) is wrong.
  */
-static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
-				 struct sk_buff *skb, u32 ack, u32 ack_seq)
+static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack,
+				 u32 ack_seq)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	int flag = 0;
 	u32 nwin = ntohs(tcp_hdr(skb)->window);
 
@@ -2576,7 +2601,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
 			 * fast path is recovered for sending TCP.
 			 */
 			tp->pred_flags = 0;
-			tcp_fast_path_check(sk, tp);
+			tcp_fast_path_check(sk);
 
 			if (nwin > tp->max_window) {
 				tp->max_window = nwin;
@@ -2762,7 +2787,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 		else
 			NET_INC_STATS_BH(LINUX_MIB_TCPPUREACKS);
 
-		flag |= tcp_ack_update_window(sk, tp, skb, ack, ack_seq);
+		flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
 
 		if (TCP_SKB_CB(skb)->sacked)
 			flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
@@ -3426,7 +3451,7 @@ queue_and_out:
 		}
 		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
 		if (skb->len)
-			tcp_event_data_recv(sk, tp, skb);
+			tcp_event_data_recv(sk, skb);
 		if (th->fin)
 			tcp_fin(skb, sk, th);
 
@@ -3443,7 +3468,7 @@ queue_and_out:
 		if (tp->rx_opt.num_sacks)
 			tcp_sack_remove(tp);
 
-		tcp_fast_path_check(sk, tp);
+		tcp_fast_path_check(sk);
 
 		if (eaten > 0)
 			__kfree_skb(skb);
@@ -3734,7 +3759,7 @@ static int tcp_prune_queue(struct sock *sk)
 	NET_INC_STATS_BH(LINUX_MIB_PRUNECALLED);
 
 	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
-		tcp_clamp_window(sk, tp);
+		tcp_clamp_window(sk);
 	else if (tcp_memory_pressure)
 		tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
 
@@ -3803,8 +3828,10 @@ void tcp_cwnd_application_limited(struct sock *sk)
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
-static int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp)
+static int tcp_should_expand_sndbuf(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	/* If the user specified a specific send buffer setting, do
 	 * not modify it.
 	 */
@@ -3836,7 +3863,7 @@ static void tcp_new_space(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (tcp_should_expand_sndbuf(sk, tp)) {
+	if (tcp_should_expand_sndbuf(sk)) {
 		int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
 			MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
 		    demanded = max_t(unsigned int, tp->snd_cwnd,
@@ -3860,9 +3887,9 @@ static void tcp_check_space(struct sock *sk)
 	}
 }
 
-static inline void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp)
+static inline void tcp_data_snd_check(struct sock *sk)
 {
-	tcp_push_pending_frames(sk, tp);
+	tcp_push_pending_frames(sk);
 	tcp_check_space(sk);
 }
 
@@ -4196,7 +4223,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				 */
 				tcp_ack(sk, skb, 0);
 				__kfree_skb(skb);
-				tcp_data_snd_check(sk, tp);
+				tcp_data_snd_check(sk);
 				return 0;
 			} else { /* Header too small */
 				TCP_INC_STATS_BH(TCP_MIB_INERRS);
@@ -4267,12 +4294,12 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 				tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
 			}
 
-			tcp_event_data_recv(sk, tp, skb);
+			tcp_event_data_recv(sk, skb);
 
 			if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
 				/* Well, only one small jumplet in fast path... */
 				tcp_ack(sk, skb, FLAG_DATA);
-				tcp_data_snd_check(sk, tp);
+				tcp_data_snd_check(sk);
 				if (!inet_csk_ack_scheduled(sk))
 					goto no_ack;
 			}
@@ -4355,7 +4382,7 @@ step5:
 	/* step 7: process the segment text */
 	tcp_data_queue(sk, skb);
 
-	tcp_data_snd_check(sk, tp);
+	tcp_data_snd_check(sk);
 	tcp_ack_snd_check(sk);
 	return 0;
 
@@ -4672,7 +4699,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		/* Do step6 onward by hand. */
 		tcp_urg(sk, skb, th);
 		__kfree_skb(skb);
-		tcp_data_snd_check(sk, tp);
+		tcp_data_snd_check(sk);
 		return 0;
 	}
 
@@ -4864,7 +4891,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 
 	/* tcp_data could move socket to TIME-WAIT */
 	if (sk->sk_state != TCP_CLOSE) {
-		tcp_data_snd_check(sk, tp);
+		tcp_data_snd_check(sk);
 		tcp_ack_snd_check(sk);
 	}
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 94d9f0c6368..3a60aea744a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -62,12 +62,13 @@ int sysctl_tcp_base_mss __read_mostly = 512;
 /* By default, RFC2861 behavior.  */
 int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
 
-static void update_send_head(struct sock *sk, struct tcp_sock *tp,
-			     struct sk_buff *skb)
+static void update_send_head(struct sock *sk, struct sk_buff *skb)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	tcp_advance_send_head(sk, skb);
 	tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
-	tcp_packets_out_inc(sk, tp, skb);
+	tcp_packets_out_inc(sk, skb);
 }
 
 /* SND.NXT, if window was not shrunk.
@@ -76,8 +77,10 @@ static void update_send_head(struct sock *sk, struct tcp_sock *tp,
  * Anything in between SND.UNA...SND.UNA+SND.WND also can be already
  * invalid. OK, let's make this for now:
  */
-static inline __u32 tcp_acceptable_seq(struct sock *sk, struct tcp_sock *tp)
+static inline __u32 tcp_acceptable_seq(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
+
 	if (!before(tp->snd_una+tp->snd_wnd, tp->snd_nxt))
 		return tp->snd_nxt;
 	else
@@ -516,7 +519,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 					     md5 ? &md5_hash_location :
 #endif
 					     NULL);
-		TCP_ECN_send(sk, tp, skb, tcp_header_size);
+		TCP_ECN_send(sk, skb, tcp_header_size);
 	}
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -927,8 +930,9 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
 
 /* Congestion window validation. (RFC2861) */
 
-static void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
+static void tcp_cwnd_validate(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	__u32 packets_out = tp->packets_out;
 
 	if (packets_out >= tp->snd_cwnd) {
@@ -1076,8 +1080,9 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
 	return cwnd_quota;
 }
 
-int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp)
+int tcp_may_send_now(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb = tcp_send_head(sk);
 
 	return (skb &&
@@ -1144,8 +1149,9 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
  *
  * This algorithm is from John Heffner.
  */
-static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)
+static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	u32 send_win, cong_win, limit, in_flight;
 
@@ -1324,7 +1330,7 @@ static int tcp_mtu_probe(struct sock *sk)
 		/* Decrement cwnd here because we are sending
 		* effectively two packets. */
 		tp->snd_cwnd--;
-		update_send_head(sk, tp, nskb);
+		update_send_head(sk, nskb);
 
 		icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
 		tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
@@ -1387,7 +1393,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
 						      nonagle : TCP_NAGLE_PUSH))))
 				break;
 		} else {
-			if (tcp_tso_should_defer(sk, tp, skb))
+			if (tcp_tso_should_defer(sk, skb))
 				break;
 		}
 
@@ -1416,14 +1422,14 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
 		/* Advance the send_head.  This one is sent out.
 		 * This call will increment packets_out.
 		 */
-		update_send_head(sk, tp, skb);
+		update_send_head(sk, skb);
 
 		tcp_minshall_update(tp, mss_now, skb);
 		sent_pkts++;
 	}
 
 	if (likely(sent_pkts)) {
-		tcp_cwnd_validate(sk, tp);
+		tcp_cwnd_validate(sk);
 		return 0;
 	}
 	return !tp->packets_out && tcp_send_head(sk);
@@ -1433,14 +1439,14 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
  * TCP_CORK or attempt at coalescing tiny packets.
  * The socket must be locked by the caller.
  */
-void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp,
-			       unsigned int cur_mss, int nonagle)
+void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
+			       int nonagle)
 {
 	struct sk_buff *skb = tcp_send_head(sk);
 
 	if (skb) {
 		if (tcp_write_xmit(sk, cur_mss, nonagle))
-			tcp_check_probe_timer(sk, tp);
+			tcp_check_probe_timer(sk);
 	}
 }
 
@@ -1484,8 +1490,8 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
 		TCP_SKB_CB(skb)->when = tcp_time_stamp;
 
 		if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) {
-			update_send_head(sk, tp, skb);
-			tcp_cwnd_validate(sk, tp);
+			update_send_head(sk, skb);
+			tcp_cwnd_validate(sk);
 			return;
 		}
 	}
@@ -1933,7 +1939,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	 * segments to send.
 	 */
 
-	if (tcp_may_send_now(sk, tp))
+	if (tcp_may_send_now(sk))
 		return;
 
 	if (tp->forward_skb_hint) {
@@ -2023,7 +2029,7 @@ void tcp_send_fin(struct sock *sk)
 		TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
 		tcp_queue_skb(sk, skb);
 	}
-	__tcp_push_pending_frames(sk, tp, mss_now, TCP_NAGLE_OFF);
+	__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
 }
 
 /* We get here when a process closes a file descriptor (either due to
@@ -2033,7 +2039,6 @@ void tcp_send_fin(struct sock *sk)
  */
 void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 
 	/* NOTE: No TCP options attached and we never retransmit this. */
@@ -2053,7 +2058,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 	skb_shinfo(skb)->gso_type = 0;
 
 	/* Send it off. */
-	TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
+	TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk);
 	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 	if (tcp_transmit_skb(sk, skb, 0, priority))
@@ -2271,7 +2276,7 @@ int tcp_connect(struct sock *sk)
 	skb_reserve(buff, MAX_TCP_HEADER);
 
 	TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
-	TCP_ECN_send_syn(sk, tp, buff);
+	TCP_ECN_send_syn(sk, buff);
 	TCP_SKB_CB(buff)->sacked = 0;
 	skb_shinfo(buff)->gso_segs = 1;
 	skb_shinfo(buff)->gso_size = 0;
@@ -2363,7 +2368,6 @@ void tcp_send_ack(struct sock *sk)
 {
 	/* If we have been reset, we may not send again. */
 	if (sk->sk_state != TCP_CLOSE) {
-		struct tcp_sock *tp = tcp_sk(sk);
 		struct sk_buff *buff;
 
 		/* We are not putting this on the write queue, so
@@ -2389,7 +2393,7 @@ void tcp_send_ack(struct sock *sk)
 		skb_shinfo(buff)->gso_type = 0;
 
 		/* Send it off, this clears delayed acks for us. */
-		TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
+		TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk);
 		TCP_SKB_CB(buff)->when = tcp_time_stamp;
 		tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
 	}
@@ -2467,7 +2471,7 @@ int tcp_write_wakeup(struct sock *sk)
 			TCP_SKB_CB(skb)->when = tcp_time_stamp;
 			err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
 			if (!err) {
-				update_send_head(sk, tp, skb);
+				update_send_head(sk, skb);
 			}
 			return err;
 		} else {
-- 
cgit v1.2.3


From 2334e973559e119fa4161047035f03ad97a8676a Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sat, 21 Apr 2007 20:12:43 +0900
Subject: [IPV6] SNMP: Avoid unaligned accesses.

Because stats pointer may not be aligned for u64, use memcpy
to fill u64 values.
Issue reported by David Miller <davem@davemloft.net>.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/net/ipv6.h |  2 +-
 net/ipv6/proc.c    | 22 ++++++++++++++++------
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 4408def379b..1df360eb079 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -172,7 +172,7 @@ int snmp6_alloc_dev(struct inet6_dev *idev);
 int snmp6_free_dev(struct inet6_dev *idev);
 int snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign);
 void snmp6_mib_free(void *ptr[2]);
-void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, int bytes);
+void snmp6_fill_stats(void *stats, struct inet6_dev *idev, int attrtype, int bytes);
 
 struct ip6_ra_chain
 {
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index c847cef626a..aba94316b77 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -210,20 +210,30 @@ static const struct file_operations snmp6_seq_fops = {
 };
 #endif	/* CONFIG_PROC_FS */
 
+/*
+ * Stats may not be aligned for u64, so use memcpy to avoid
+ * unaligned accesses.
+ */
+static inline void __set_u64(void *p, u64 v)
+{
+	memcpy(p, &v, sizeof(u64));
+}
+
 static inline void
-__snmp6_fill_stats(u64 *stats, void **mib, int items, int bytes)
+__snmp6_fill_stats(void *stats, void **mib, int items, int bytes)
 {
 	int i;
+	u8 *p = stats;
 	int pad = bytes - sizeof(u64) * items;
 	BUG_ON(pad < 0);
-	stats[0] = items;
-	for (i = 1; i < items; i++)
-		stats[i] = (u64)fold_field(mib, i);
-	memset(&stats[items], 0, pad);
+	__set_u64(p, items);
+	for (i = 1, p += sizeof(u64); i < items; i++, p += sizeof(u64))
+		__set_u64(p, fold_field(mib, i));
+	memset(p, 0, pad);
 }
 
 void
-snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, int bytes)
+snmp6_fill_stats(void *stats, struct inet6_dev *idev, int attrtype, int bytes)
 {
 	switch(attrtype) {
 	case IFLA_INET6_STATS:
-- 
cgit v1.2.3


From 952a10be3272c4b5b7839b09cb0483dc72137101 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sat, 21 Apr 2007 20:13:44 +0900
Subject: [IPV6] SNMP: Fix several warnings without procfs.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/route.c    | 3 ++-
 net/ipv6/tcp_ipv6.c | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 6264ec3bb15..b46ad53044b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2489,8 +2489,9 @@ ctl_table ipv6_route_table[] = {
 
 void __init ip6_route_init(void)
 {
+#ifdef 	CONFIG_PROC_FS
 	struct proc_dir_entry *p;
-
+#endif
 	ip6_dst_ops.kmem_cachep =
 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
 				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2b668a6ae69..e2f25ea43b6 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1947,6 +1947,7 @@ static int tcp_v6_destroy_sock(struct sock *sk)
 	return inet6_destroy_sock(sk);
 }
 
+#ifdef CONFIG_PROC_FS
 /* Proc filesystem TCPv6 sock list dumping. */
 static void get_openreq6(struct seq_file *seq,
 			 struct sock *sk, struct request_sock *req, int i, int uid)
@@ -2063,7 +2064,6 @@ static void get_timewait6_sock(struct seq_file *seq,
 		   atomic_read(&tw->tw_refcnt), tw);
 }
 
-#ifdef CONFIG_PROC_FS
 static int tcp6_seq_show(struct seq_file *seq, void *v)
 {
 	struct tcp_iter_state *st;
-- 
cgit v1.2.3


From 97fc8d0bc58cd09e62dc06ea5a64b58841738934 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sat, 21 Apr 2007 19:52:04 -0700
Subject: [IPV6] SNMP: Use put_unaligned() instead of memcpy().

Hint from David Miller <davem@davemloft.net>.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h |  2 +-
 net/ipv6/proc.c    | 26 ++++++++++----------------
 2 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 1df360eb079..4408def379b 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -172,7 +172,7 @@ int snmp6_alloc_dev(struct inet6_dev *idev);
 int snmp6_free_dev(struct inet6_dev *idev);
 int snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign);
 void snmp6_mib_free(void *ptr[2]);
-void snmp6_fill_stats(void *stats, struct inet6_dev *idev, int attrtype, int bytes);
+void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, int bytes);
 
 struct ip6_ra_chain
 {
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index aba94316b77..7a00bedb6b9 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -23,6 +23,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/stddef.h>
+#include <asm/unaligned.h>
 #include <net/sock.h>
 #include <net/tcp.h>
 #include <net/transp_v6.h>
@@ -210,30 +211,23 @@ static const struct file_operations snmp6_seq_fops = {
 };
 #endif	/* CONFIG_PROC_FS */
 
-/*
- * Stats may not be aligned for u64, so use memcpy to avoid
- * unaligned accesses.
- */
-static inline void __set_u64(void *p, u64 v)
-{
-	memcpy(p, &v, sizeof(u64));
-}
-
 static inline void
-__snmp6_fill_stats(void *stats, void **mib, int items, int bytes)
+__snmp6_fill_stats(u64 *stats, void **mib, int items, int bytes)
 {
 	int i;
-	u8 *p = stats;
 	int pad = bytes - sizeof(u64) * items;
 	BUG_ON(pad < 0);
-	__set_u64(p, items);
-	for (i = 1, p += sizeof(u64); i < items; i++, p += sizeof(u64))
-		__set_u64(p, fold_field(mib, i));
-	memset(p, 0, pad);
+
+	/* Use put_unaligned() because stats may not be aligned for u64. */
+	put_unaligned(items, &stats[0]);
+	for (i = 1; i < items; i++)
+		put_unaligned(fold_field(mib, i), &stats[i]);
+
+	memset(&stats[items], 0, pad);
 }
 
 void
-snmp6_fill_stats(void *stats, struct inet6_dev *idev, int attrtype, int bytes)
+snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, int bytes)
 {
 	switch(attrtype) {
 	case IFLA_INET6_STATS:
-- 
cgit v1.2.3


From 372cc74c8b41d808af0a3fa8b11795cba79e7299 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Sun, 22 Apr 2007 23:22:24 -0700
Subject: [NET]: Prevent much sadness in qdisc_lock_tree().

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index f3b99701da5..18c51b40f66 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3016,9 +3016,7 @@ int register_netdevice(struct net_device *dev)
 	spin_lock_init(&dev->queue_lock);
 	spin_lock_init(&dev->_xmit_lock);
 	dev->xmit_lock_owner = -1;
-#ifdef CONFIG_NET_CLS_ACT
 	spin_lock_init(&dev->ingress_lock);
-#endif
 
 	dev->iflink = -1;
 
-- 
cgit v1.2.3


From 724c6b35ecff0fb68bbb315a34b2f9cb694865d3 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 23 Apr 2007 12:18:20 -0700
Subject: [WIRELESS]: Update MAINTAINERS for wireless mailing list.

This patch adds the linux-wireless mailing list to all appropriate
entries in the MAINTAINERS file.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index ab258c7f46a..7adebcc99cb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -656,6 +656,7 @@ S:	Supported
 ATMEL WIRELESS DRIVER
 P:	Simon Kelley
 M:	simon@thekelleys.org.uk
+L:	linux-wireless@vger.kernel.org
 W:	http://www.thekelleys.org.uk/atmel
 W:	http://atmelwlandriver.sourceforge.net/
 S:	Maintained
@@ -711,6 +712,7 @@ P:	Larry Finger
 M:	Larry.Finger@lwfinger.net
 P:	Stefano Brivio
 M:	st3@riseup.net
+L:	linux-wireless@vger.kernel.org
 W:	http://bcm43xx.berlios.de/
 S:	Maintained
 
@@ -1557,6 +1559,7 @@ S:	Supported
 HOST AP DRIVER
 P:	Jouni Malinen
 M:	jkmaline@cc.hut.fi
+L:	linux-wireless@vger.kernel.org
 L:	hostap@shmoo.com
 W:	http://hostap.epitest.fi/
 S:	Maintained
@@ -1829,6 +1832,7 @@ P:	Yi Zhu
 M:	yi.zhu@intel.com
 P:	James Ketrenos
 M:	jketreno@linux.intel.com
+L:	linux-wireless@vger.kernel.org
 L:	ipw2100-devel@lists.sourceforge.net
 L:	http://lists.sourceforge.net/mailman/listinfo/ipw2100-devel
 W:	http://ipw2100.sourceforge.net
@@ -1839,6 +1843,7 @@ P:	Yi Zhu
 M:	yi.zhu@intel.com
 P:	James Ketrenos
 M:	jketreno@linux.intel.com
+L:	linux-wireless@vger.kernel.org
 L:	ipw2100-devel@lists.sourceforge.net
 L:	http://lists.sourceforge.net/mailman/listinfo/ipw2100-devel
 W:	http://ipw2200.sourceforge.net
@@ -2531,6 +2536,7 @@ P:	Pavel Roskin
 M:	proski@gnu.org
 P:	David Gibson
 M:	hermes@gibson.dropbear.id.au
+L:	linux-wireless@vger.kernel.org
 L:	orinoco-users@lists.sourceforge.net
 L:	orinoco-devel@lists.sourceforge.net
 W:	http://www.nongnu.org/orinoco/
@@ -2710,7 +2716,7 @@ S:	Supported
 PRISM54 WIRELESS DRIVER
 P:	Prism54 Development Team
 M:	developers@islsm.org
-L:	netdev@vger.kernel.org
+L:	linux-wireless@vger.kernel.org
 W:	http://prism54.org
 S:	Maintained
 
@@ -2781,7 +2787,7 @@ S:	Maintained
 RAYLINK/WEBGEAR 802.11 WIRELESS LAN DRIVER
 P:	Corey Thomas
 M:	corey@world.std.com
-L:	linux-kernel@vger.kernel.org
+L:	linux-wireless@vger.kernel.org
 S:	Maintained
 
 RANDOM NUMBER DRIVER
@@ -3044,7 +3050,7 @@ M:	josejx@gentoo.org
 P:	Daniel Drake
 M:	dsd@gentoo.org
 W:	http://softmac.sipsolutions.net/
-L:	netdev@vger.kernel.org
+L:	linux-wireless@vger.kernel.org
 S:	Maintained
 
 SOFTWARE RAID (Multiple Disks) SUPPORT
@@ -3749,6 +3755,7 @@ S:	Maintained
 WAVELAN NETWORK DRIVER & WIRELESS EXTENSIONS
 P:	Jean Tourrilhes
 M:	jt@hpl.hp.com
+L:	linux-wireless@vger.kernel.org
 W:	http://www.hpl.hp.com/personal/Jean_Tourrilhes/Linux/
 S:	Maintained
 
@@ -3766,6 +3773,7 @@ S:	Maintained
 WL3501 WIRELESS PCMCIA CARD DRIVER
 P:	Arnaldo Carvalho de Melo
 M:	acme@ghostprotocols.net
+L:	linux-wireless@vger.kernel.org
 W:	http://oops.ghostprotocols.net:81/blog
 S:	Maintained
 
@@ -3829,6 +3837,7 @@ M:	dsd@gentoo.org
 P:	Ulrich Kunitz
 M:	kune@deine-taler.de
 W:	http://zd1211.ath.cx/wiki/DriverRewrite
+L:	linux-wireless@vger.kernel.org
 L:	zd1211-devs@lists.sourceforge.net (subscribers-only)
 S:	Maintained
 
-- 
cgit v1.2.3


From 2a5e1c0eb9efe26eed1dd072fe08de5797a7efd5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 23 Apr 2007 12:19:12 -0700
Subject: [WIRELESS]: Refactor wireless Kconfig.

This patch refactors the wireless Kconfig all over and already
introduces net/wireless/Kconfig with just the WEXT bit for now,
the cfg80211 patch will add to that as well.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Makefile                  |   2 +-
 drivers/net/wireless/Kconfig          | 120 ++++++++++++++--------------------
 drivers/net/wireless/bcm43xx/Kconfig  |   3 +-
 drivers/net/wireless/hostap/Kconfig   |   3 +-
 drivers/net/wireless/zd1211rw/Kconfig |   3 +-
 net/Kconfig                           |  11 ++--
 net/Makefile                          |   2 +
 net/ieee80211/Kconfig                 |   3 +-
 net/wireless/Kconfig                  |  23 +++++++
 net/wireless/Makefile                 |   1 +
 10 files changed, 92 insertions(+), 79 deletions(-)
 create mode 100644 net/wireless/Kconfig
 create mode 100644 net/wireless/Makefile

diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 33af833667d..58527322a39 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -206,7 +206,7 @@ obj-$(CONFIG_TR) += tokenring/
 obj-$(CONFIG_WAN) += wan/
 obj-$(CONFIG_ARCNET) += arcnet/
 obj-$(CONFIG_NET_PCMCIA) += pcmcia/
-obj-$(CONFIG_NET_RADIO) += wireless/
+obj-y += wireless/
 obj-$(CONFIG_NET_TULIP) += tulip/
 obj-$(CONFIG_HAMRADIO) += hamradio/
 obj-$(CONFIG_IRDA) += irda/
diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig
index ece3d9c2dc6..880c628dcd4 100644
--- a/drivers/net/wireless/Kconfig
+++ b/drivers/net/wireless/Kconfig
@@ -2,47 +2,21 @@
 # Wireless LAN device configuration
 #
 
-menu "Wireless LAN (non-hamradio)"
-	depends on NETDEVICES
-
-config NET_RADIO
-	bool "Wireless LAN drivers (non-hamradio) & Wireless Extensions"
-	select WIRELESS_EXT
-	---help---
-	  Support for wireless LANs and everything having to do with radio,
-	  but not with amateur radio or FM broadcasting.
-
-	  Saying Y here also enables the Wireless Extensions (creates
-	  /proc/net/wireless and enables iwconfig access). The Wireless
-	  Extension is a generic API allowing a driver to expose to the user
-	  space configuration and statistics specific to common Wireless LANs.
-	  The beauty of it is that a single set of tool can support all the
-	  variations of Wireless LANs, regardless of their type (as long as
-	  the driver supports Wireless Extension). Another advantage is that
-	  these parameters may be changed on the fly without restarting the
-	  driver (or Linux). If you wish to use Wireless Extensions with
-	  wireless PCMCIA (PC-) cards, you need to say Y here; you can fetch
-	  the tools from
-	  <http://www.hpl.hp.com/personal/Jean_Tourrilhes/Linux/Tools.html>.
+menu "Wireless LAN"
 
-config NET_WIRELESS_RTNETLINK
-	bool "Wireless Extension API over RtNetlink"
-	depends on NET_RADIO
+config WLAN_PRE80211
+	bool "Wireless LAN (pre-802.11)"
+	depends on NETDEVICES
 	---help---
-	  Support the Wireless Extension API over the RtNetlink socket
-	  in addition to the traditional ioctl interface (selected above).
+	  Say Y if you have any pre-802.11 wireless LAN hardware.
 
-	  For now, few tools use this facility, but it might grow in the
-	  future. The only downside is that it adds 4.5 kB to your kernel.
-
-# Note : the cards are obsolete (can't buy them anymore), but the drivers
-# are not, as people are still using them...
-comment "Obsolete Wireless cards support (pre-802.11)"
-	depends on NET_RADIO && (INET || ISA || PCMCIA)
+	  This option does not affect the kernel build, it only
+	  let's you choose drivers.
 
 config STRIP
 	tristate "STRIP (Metricom starmode radio IP)"
-	depends on NET_RADIO && INET
+	depends on INET && WLAN_PRE80211
+	select WIRELESS_EXT
 	---help---
 	  Say Y if you have a Metricom radio and intend to use Starmode Radio
 	  IP. STRIP is a radio protocol developed for the MosquitoNet project
@@ -65,7 +39,8 @@ config STRIP
 
 config ARLAN
 	tristate "Aironet Arlan 655 & IC2200 DS support"
-	depends on NET_RADIO && ISA && !64BIT
+	depends on ISA && !64BIT && WLAN_PRE80211
+	select WIRELESS_EXT
 	---help---
 	  Aironet makes Arlan, a class of wireless LAN adapters. These use the
 	  www.Telxon.com chip, which is also used on several similar cards.
@@ -80,7 +55,8 @@ config ARLAN
 
 config WAVELAN
 	tristate "AT&T/Lucent old WaveLAN & DEC RoamAbout DS ISA support"
-	depends on NET_RADIO && ISA
+	depends on ISA && WLAN_PRE80211
+	select WIRELESS_EXT
 	---help---
 	  The Lucent WaveLAN (formerly NCR and AT&T; or DEC RoamAbout DS) is
 	  a Radio LAN (wireless Ethernet-like Local Area Network) using the
@@ -107,7 +83,8 @@ config WAVELAN
 
 config PCMCIA_WAVELAN
 	tristate "AT&T/Lucent old WaveLAN Pcmcia wireless support"
-	depends on NET_RADIO && PCMCIA
+	depends on PCMCIA && WLAN_PRE80211
+	select WIRELESS_EXT
 	help
 	  Say Y here if you intend to attach an AT&T/Lucent Wavelan PCMCIA
 	  (PC-card) wireless Ethernet networking card to your computer.  This
@@ -118,7 +95,8 @@ config PCMCIA_WAVELAN
 
 config PCMCIA_NETWAVE
 	tristate "Xircom Netwave AirSurfer Pcmcia wireless support"
-	depends on NET_RADIO && PCMCIA
+	depends on PCMCIA && WLAN_PRE80211
+	select WIRELESS_EXT
 	help
 	  Say Y here if you intend to attach this type of PCMCIA (PC-card)
 	  wireless Ethernet networking card to your computer.
@@ -126,12 +104,20 @@ config PCMCIA_NETWAVE
 	  To compile this driver as a module, choose M here: the module will be
 	  called netwave_cs.  If unsure, say N.
 
-comment "Wireless 802.11 Frequency Hopping cards support"
-	depends on NET_RADIO && PCMCIA
+
+config WLAN_80211
+	bool "Wireless LAN (IEEE 802.11)"
+	depends on NETDEVICES
+	---help---
+	  Say Y if you have any 802.11 wireless LAN hardware.
+
+	  This option does not affect the kernel build, it only
+	  let's you choose drivers.
 
 config PCMCIA_RAYCS
 	tristate "Aviator/Raytheon 2.4MHz wireless support"
-	depends on NET_RADIO && PCMCIA
+	depends on PCMCIA && WLAN_80211
+	select WIRELESS_EXT
 	---help---
 	  Say Y here if you intend to attach an Aviator/Raytheon PCMCIA
 	  (PC-card) wireless Ethernet networking card to your computer.
@@ -141,12 +127,10 @@ config PCMCIA_RAYCS
 	  To compile this driver as a module, choose M here: the module will be
 	  called ray_cs.  If unsure, say N.
 
-comment "Wireless 802.11b ISA/PCI cards support"
-	depends on NET_RADIO && (ISA || PCI || PPC_PMAC || PCMCIA)
-
 config IPW2100
 	tristate "Intel PRO/Wireless 2100 Network Connection"
-	depends on NET_RADIO && PCI
+	depends on PCI && WLAN_80211
+	select WIRELESS_EXT
 	select FW_LOADER
 	select IEEE80211
 	---help---
@@ -200,7 +184,8 @@ config IPW2100_DEBUG
 
 config IPW2200
 	tristate "Intel PRO/Wireless 2200BG and 2915ABG Network Connection"
-	depends on NET_RADIO && PCI
+	depends on PCI && WLAN_80211
+	select WIRELESS_EXT
 	select FW_LOADER
 	select IEEE80211
 	---help---
@@ -282,7 +267,8 @@ config IPW2200_DEBUG
 
 config AIRO
 	tristate "Cisco/Aironet 34X/35X/4500/4800 ISA and PCI cards"
- 	depends on NET_RADIO && ISA_DMA_API && (PCI || BROKEN)
+	depends on ISA_DMA_API && WLAN_80211 && (PCI || BROKEN)
+	select WIRELESS_EXT
 	select CRYPTO
 	---help---
 	  This is the standard Linux driver to support Cisco/Aironet ISA and
@@ -299,7 +285,8 @@ config AIRO
 
 config HERMES
 	tristate "Hermes chipset 802.11b support (Orinoco/Prism2/Symbol)"
-	depends on NET_RADIO && (PPC_PMAC || PCI || PCMCIA)
+	depends on (PPC_PMAC || PCI || PCMCIA) && WLAN_80211
+	select WIRELESS_EXT
 	---help---
 	  A driver for 802.11b wireless cards based on the "Hermes" or
 	  Intersil HFA384x (Prism 2) MAC controller.  This includes the vast
@@ -373,7 +360,8 @@ config PCI_HERMES
 
 config ATMEL
       tristate "Atmel at76c50x chipset  802.11b support"
-      depends on NET_RADIO && (PCI || PCMCIA)
+      depends on (PCI || PCMCIA) && WLAN_80211
+      select WIRELESS_EXT
       select FW_LOADER
       select CRC32
        ---help---
@@ -394,13 +382,9 @@ config PCI_ATMEL
         Enable support for PCI and mini-PCI cards containing the
         Atmel at76c506 chip.
 
-# If Pcmcia is compiled in, offer Pcmcia cards...
-comment "Wireless 802.11b Pcmcia/Cardbus cards support"
-	depends on NET_RADIO && PCMCIA
-
 config PCMCIA_HERMES
 	tristate "Hermes PCMCIA card support"
-	depends on NET_RADIO && PCMCIA && HERMES
+	depends on PCMCIA && HERMES
 	---help---
 	  A driver for "Hermes" chipset based PCMCIA wireless adaptors, such
 	  as the Lucent WavelanIEEE/Orinoco cards and their OEM (Cabletron/
@@ -420,7 +404,7 @@ config PCMCIA_HERMES
 
 config PCMCIA_SPECTRUM
 	tristate "Symbol Spectrum24 Trilogy PCMCIA card support"
-	depends on NET_RADIO && PCMCIA && HERMES
+	depends on PCMCIA && HERMES
 	select FW_LOADER
 	---help---
 
@@ -434,7 +418,8 @@ config PCMCIA_SPECTRUM
 
 config AIRO_CS
 	tristate "Cisco/Aironet 34X/35X/4500/4800 PCMCIA cards"
-	depends on NET_RADIO && PCMCIA && (BROKEN || !M32R)
+	depends on PCMCIA && (BROKEN || !M32R) && WLAN_80211
+	select WIRELESS_EXT
 	select CRYPTO
 	select CRYPTO_AES
 	---help---
@@ -458,7 +443,8 @@ config AIRO_CS
 
 config PCMCIA_ATMEL
 	tristate "Atmel at76c502/at76c504 PCMCIA cards"
-	depends on NET_RADIO && ATMEL && PCMCIA
+	depends on ATMEL && PCMCIA
+	select WIRELESS_EXT
 	select FW_LOADER
 	select CRC32
 	---help---
@@ -467,17 +453,17 @@ config PCMCIA_ATMEL
 
 config PCMCIA_WL3501
       tristate "Planet WL3501 PCMCIA cards"
-      depends on NET_RADIO && EXPERIMENTAL && PCMCIA
+      depends on EXPERIMENTAL && PCMCIA && WLAN_80211
+      select WIRELESS_EXT
        ---help---
          A driver for WL3501 PCMCIA 802.11 wireless cards made by Planet.
 	 It has basic support for Linux wireless extensions and initial
 	 micro support for ethtool.
 
-comment "Prism GT/Duette 802.11(a/b/g) PCI/Cardbus support"
-	depends on NET_RADIO && PCI
 config PRISM54
 	tristate 'Intersil Prism GT/Duette/Indigo PCI/Cardbus' 
-	depends on PCI && NET_RADIO && EXPERIMENTAL
+	depends on PCI && EXPERIMENTAL && WLAN_80211
+	select WIRELESS_EXT
 	select FW_LOADER
 	---help---
 	  Enable PCI and Cardbus support for the following chipset based cards:
@@ -523,7 +509,8 @@ config PRISM54
 
 config USB_ZD1201
 	tristate "USB ZD1201 based Wireless device support"
-	depends on USB && NET_RADIO
+	depends on USB && WLAN_80211
+	select WIRELESS_EXT
 	select FW_LOADER
 	---help---
 	  Say Y if you want to use wireless LAN adapters based on the ZyDAS
@@ -542,11 +529,4 @@ source "drivers/net/wireless/hostap/Kconfig"
 source "drivers/net/wireless/bcm43xx/Kconfig"
 source "drivers/net/wireless/zd1211rw/Kconfig"
 
-# yes, this works even when no drivers are selected
-config NET_WIRELESS
-	bool
-	depends on NET_RADIO && (ISA || PCI || PPC_PMAC || PCMCIA)
-	default y
-
 endmenu
-
diff --git a/drivers/net/wireless/bcm43xx/Kconfig b/drivers/net/wireless/bcm43xx/Kconfig
index 533993f538f..ce397e4284f 100644
--- a/drivers/net/wireless/bcm43xx/Kconfig
+++ b/drivers/net/wireless/bcm43xx/Kconfig
@@ -1,6 +1,7 @@
 config BCM43XX
 	tristate "Broadcom BCM43xx wireless support"
-	depends on PCI && IEEE80211 && IEEE80211_SOFTMAC && NET_RADIO && EXPERIMENTAL
+	depends on PCI && IEEE80211 && IEEE80211_SOFTMAC && WLAN_80211 && EXPERIMENTAL
+	select WIRELESS_EXT
 	select FW_LOADER
 	select HW_RANDOM
 	---help---
diff --git a/drivers/net/wireless/hostap/Kconfig b/drivers/net/wireless/hostap/Kconfig
index 308f773ad56..1fef33169fd 100644
--- a/drivers/net/wireless/hostap/Kconfig
+++ b/drivers/net/wireless/hostap/Kconfig
@@ -1,6 +1,7 @@
 config HOSTAP
 	tristate "IEEE 802.11 for Host AP (Prism2/2.5/3 and WEP/TKIP/CCMP)"
-	depends on NET_RADIO
+	depends on WLAN_80211
+	select WIRELESS_EXT
 	select IEEE80211
 	select IEEE80211_CRYPT_WEP
 	---help---
diff --git a/drivers/net/wireless/zd1211rw/Kconfig b/drivers/net/wireless/zd1211rw/Kconfig
index 66ed55bc546..d1ab24a9563 100644
--- a/drivers/net/wireless/zd1211rw/Kconfig
+++ b/drivers/net/wireless/zd1211rw/Kconfig
@@ -1,6 +1,7 @@
 config ZD1211RW
 	tristate "ZyDAS ZD1211/ZD1211B USB-wireless support"
-	depends on USB && IEEE80211 && IEEE80211_SOFTMAC && NET_RADIO && EXPERIMENTAL
+	depends on USB && IEEE80211_SOFTMAC && WLAN_80211 && EXPERIMENTAL
+	select WIRELESS_EXT
 	select FW_LOADER
 	---help---
 	  This is an experimental driver for the ZyDAS ZD1211/ZD1211B wireless
diff --git a/net/Kconfig b/net/Kconfig
index e2d9b3b9cda..ae1817dc51b 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -212,14 +212,17 @@ endmenu
 source "net/ax25/Kconfig"
 source "net/irda/Kconfig"
 source "net/bluetooth/Kconfig"
-source "net/ieee80211/Kconfig"
-
-config WIRELESS_EXT
-	bool
 
 config FIB_RULES
 	bool
 
+menu "Wireless"
+
+source "net/wireless/Kconfig"
+source "net/ieee80211/Kconfig"
+
+endmenu
+
 endif   # if NET
 endmenu # Networking
 
diff --git a/net/Makefile b/net/Makefile
index 4854ac50631..29bbe19d87f 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -52,3 +52,5 @@ obj-$(CONFIG_IUCV)		+= iucv/
 ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
 endif
+
+obj-y				+= wireless/
diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig
index 6ef766ef961..1438adedbc8 100644
--- a/net/ieee80211/Kconfig
+++ b/net/ieee80211/Kconfig
@@ -56,7 +56,8 @@ config IEEE80211_CRYPT_CCMP
 
 config IEEE80211_CRYPT_TKIP
 	tristate "IEEE 802.11i TKIP encryption"
-	depends on IEEE80211 && NET_RADIO
+	depends on IEEE80211
+	select WIRELESS_EXT
 	select CRYPTO
 	select CRYPTO_MICHAEL_MIC
 	select CRYPTO_ECB
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
new file mode 100644
index 00000000000..ca2f05c2976
--- /dev/null
+++ b/net/wireless/Kconfig
@@ -0,0 +1,23 @@
+config WIRELESS_EXT
+	bool "Wireless extensions"
+	default n
+	---help---
+	  This option enables the legacy wireless extensions
+	  (wireless network interface configuration via ioctls.)
+
+	  Wireless extensions will be replaced by cfg80211 and
+	  will be required only by legacy drivers that implement
+	  wireless extension handlers.
+
+	  Say N (if you can) unless you know you need wireless
+	  extensions for external modules.
+
+config NET_WIRELESS_RTNETLINK
+	bool "Wireless Extension API over RtNetlink"
+	depends on WIRELESS_EXT
+	---help---
+	  Support the Wireless Extension API over the RtNetlink socket
+	  in addition to the traditional ioctl interface (selected above).
+
+	  For now, few tools use this facility, but it might grow in the
+	  future. The only downside is that it adds 4.5 kB to your kernel.
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
new file mode 100644
index 00000000000..cf4e3d9726b
--- /dev/null
+++ b/net/wireless/Makefile
@@ -0,0 +1 @@
+# dummy file for now
-- 
cgit v1.2.3


From 704232c2718c9d4b3375ec15a14fc0397970c449 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 23 Apr 2007 12:20:05 -0700
Subject: [WIRELESS] cfg80211: New wireless config infrastructure.

This patch creates the core cfg80211 code along with some sysfs bits.
This is a stripped down version to allow mac80211 to function, but
doesn't include any configuration yet except for creating and removing
virtual interfaces.

This patch includes the nl80211 header file but it only contains the
interface types which the cfg80211 interface for creating virtual
interfaces relies on.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 CREDITS                   |   6 ++
 MAINTAINERS               |   6 ++
 include/linux/Kbuild      |   1 +
 include/linux/netdevice.h |   4 +
 include/linux/nl80211.h   |  38 +++++++++
 include/net/cfg80211.h    |  36 ++++++++
 include/net/wireless.h    | 139 ++++++++++++++++++++++++++++++
 net/wireless/Kconfig      |   3 +
 net/wireless/Makefile     |   4 +-
 net/wireless/core.c       | 209 ++++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/core.h       |  49 +++++++++++
 net/wireless/sysfs.c      |  80 ++++++++++++++++++
 net/wireless/sysfs.h      |   9 ++
 13 files changed, 583 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/nl80211.h
 create mode 100644 include/net/cfg80211.h
 create mode 100644 include/net/wireless.h
 create mode 100644 net/wireless/core.c
 create mode 100644 net/wireless/core.h
 create mode 100644 net/wireless/sysfs.c
 create mode 100644 net/wireless/sysfs.h

diff --git a/CREDITS b/CREDITS
index e3e7271ace0..dede114d046 100644
--- a/CREDITS
+++ b/CREDITS
@@ -317,6 +317,12 @@ S: 2322 37th Ave SW
 S: Seattle, Washington 98126-2010
 S: USA
 
+N: Johannes Berg
+E: johannes@sipsolutions.net
+W: http://johannes.sipsolutions.net/
+P: 1024D/9AB78CA5 AD02 0176 4E29 C137 1DF6 08D2 FC44 CF86 9AB7 8CA5
+D: powerpc & 802.11 hacker
+
 N: Stephen R. van den Berg (AKA BuGless)
 E: berg@pool.informatik.rwth-aachen.de
 D: General kernel, gcc, and libc hacker
diff --git a/MAINTAINERS b/MAINTAINERS
index 7adebcc99cb..f56c7e172ce 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -894,6 +894,12 @@ M:	maxextreme@gmail.com
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
 
+CFG80211 and NL80211
+P:	Johannes Berg
+M:	johannes@sipsolutions.net
+L:	linux-wireless@vger.kernel.org
+S:	Maintained
+
 COMMON INTERNET FILE SYSTEM (CIFS)
 P:	Steve French
 M:	sfrench@samba.org
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index ea86f2e0271..4ff0f57d0ad 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -113,6 +113,7 @@ header-y += netrom.h
 header-y += nfs2.h
 header-y += nfs4_mount.h
 header-y += nfs_mount.h
+header-y += nl80211.h
 header-y += oom.h
 header-y += param.h
 header-y += pci_regs.h
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 71fc8ff4888..584c199ec2d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -42,6 +42,8 @@
 struct vlan_group;
 struct ethtool_ops;
 struct netpoll_info;
+/* 802.11 specific */
+struct wireless_dev;
 					/* source back-compat hooks */
 #define SET_ETHTOOL_OPS(netdev,ops) \
 	( (netdev)->ethtool_ops = (ops) )
@@ -400,6 +402,8 @@ struct net_device
 	void                    *ip6_ptr;       /* IPv6 specific data */
 	void			*ec_ptr;	/* Econet specific data	*/
 	void			*ax25_ptr;	/* AX.25 specific data */
+	struct wireless_dev	*ieee80211_ptr;	/* IEEE 802.11 specific data,
+						   assign before registering */
 
 /*
  * Cache line mostly used on receive path (including eth_type_trans())
diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
new file mode 100644
index 00000000000..9a30ba2ca75
--- /dev/null
+++ b/include/linux/nl80211.h
@@ -0,0 +1,38 @@
+#ifndef __LINUX_NL80211_H
+#define __LINUX_NL80211_H
+/*
+ * 802.11 netlink interface public header
+ *
+ * Copyright 2006, 2007 Johannes Berg <johannes@sipsolutions.net>
+ */
+
+/**
+ * enum nl80211_iftype - (virtual) interface types
+ * @NL80211_IFTYPE_UNSPECIFIED: unspecified type, driver decides
+ * @NL80211_IFTYPE_ADHOC: independent BSS member
+ * @NL80211_IFTYPE_STATION: managed BSS member
+ * @NL80211_IFTYPE_AP: access point
+ * @NL80211_IFTYPE_AP_VLAN: VLAN interface for access points
+ * @NL80211_IFTYPE_WDS: wireless distribution interface
+ * @NL80211_IFTYPE_MONITOR: monitor interface receiving all frames
+ * @__NL80211_IFTYPE_AFTER_LAST: internal use
+ *
+ * These values are used with the NL80211_ATTR_IFTYPE
+ * to set the type of an interface.
+ *
+ */
+enum nl80211_iftype {
+	NL80211_IFTYPE_UNSPECIFIED,
+	NL80211_IFTYPE_ADHOC,
+	NL80211_IFTYPE_STATION,
+	NL80211_IFTYPE_AP,
+	NL80211_IFTYPE_AP_VLAN,
+	NL80211_IFTYPE_WDS,
+	NL80211_IFTYPE_MONITOR,
+
+	/* keep last */
+	__NL80211_IFTYPE_AFTER_LAST
+};
+#define NL80211_IFTYPE_MAX (__NL80211_IFTYPE_AFTER_LAST - 1)
+
+#endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
new file mode 100644
index 00000000000..783a11437a5
--- /dev/null
+++ b/include/net/cfg80211.h
@@ -0,0 +1,36 @@
+#ifndef __NET_CFG80211_H
+#define __NET_CFG80211_H
+
+#include <linux/netlink.h>
+#include <linux/skbuff.h>
+#include <net/genetlink.h>
+
+/*
+ * 802.11 configuration in-kernel interface
+ *
+ * Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
+ */
+
+/* from net/wireless.h */
+struct wiphy;
+
+/**
+ * struct cfg80211_ops - backend description for wireless configuration
+ *
+ * This struct is registered by fullmac card drivers and/or wireless stacks
+ * in order to handle configuration requests on their interfaces.
+ *
+ * All callbacks except where otherwise noted should return 0
+ * on success or a negative error code.
+ *
+ * @add_virtual_intf: create a new virtual interface with the given name
+ *
+ * @del_virtual_intf: remove the virtual interface determined by ifindex.
+ */
+struct cfg80211_ops {
+	int	(*add_virtual_intf)(struct wiphy *wiphy, char *name,
+				    unsigned int type);
+	int	(*del_virtual_intf)(struct wiphy *wiphy, int ifindex);
+};
+
+#endif /* __NET_CFG80211_H */
diff --git a/include/net/wireless.h b/include/net/wireless.h
new file mode 100644
index 00000000000..d30c4ba8fd9
--- /dev/null
+++ b/include/net/wireless.h
@@ -0,0 +1,139 @@
+#ifndef __NET_WIRELESS_H
+#define __NET_WIRELESS_H
+
+/*
+ * 802.11 device management
+ *
+ * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
+ */
+
+#include <linux/netdevice.h>
+#include <linux/debugfs.h>
+#include <linux/list.h>
+#include <net/cfg80211.h>
+
+/**
+ * struct wiphy - wireless hardware description
+ * @idx: the wiphy index assigned to this item
+ * @class_dev: the class device representing /sys/class/ieee80211/<wiphy-name>
+ */
+struct wiphy {
+	/* assign these fields before you register the wiphy */
+
+	/* permanent MAC address */
+	u8 perm_addr[ETH_ALEN];
+
+	/* If multiple wiphys are registered and you're handed e.g.
+	 * a regular netdev with assigned ieee80211_ptr, you won't
+	 * know whether it points to a wiphy your driver has registered
+	 * or not. Assign this to something global to your driver to
+	 * help determine whether you own this wiphy or not. */
+	void *privid;
+
+	/* fields below are read-only, assigned by cfg80211 */
+
+	/* the item in /sys/class/ieee80211/ points to this,
+	 * you need use set_wiphy_dev() (see below) */
+	struct device dev;
+
+	/* dir in debugfs: ieee80211/<wiphyname> */
+	struct dentry *debugfsdir;
+
+	char priv[0] __attribute__((__aligned__(NETDEV_ALIGN)));
+};
+
+/** struct wireless_dev - wireless per-netdev state
+ *
+ * This structure must be allocated by the driver/stack
+ * that uses the ieee80211_ptr field in struct net_device
+ * (this is intentional so it can be allocated along with
+ * the netdev.)
+ *
+ * @wiphy: pointer to hardware description
+ */
+struct wireless_dev {
+	struct wiphy *wiphy;
+
+	/* private to the generic wireless code */
+	struct list_head list;
+	struct net_device *netdev;
+};
+
+/**
+ * wiphy_priv - return priv from wiphy
+ */
+static inline void *wiphy_priv(struct wiphy *wiphy)
+{
+	BUG_ON(!wiphy);
+	return &wiphy->priv;
+}
+
+/**
+ * set_wiphy_dev - set device pointer for wiphy
+ */
+static inline void set_wiphy_dev(struct wiphy *wiphy, struct device *dev)
+{
+	wiphy->dev.parent = dev;
+}
+
+/**
+ * wiphy_dev - get wiphy dev pointer
+ */
+static inline struct device *wiphy_dev(struct wiphy *wiphy)
+{
+	return wiphy->dev.parent;
+}
+
+/**
+ * wiphy_name - get wiphy name
+ */
+static inline char *wiphy_name(struct wiphy *wiphy)
+{
+	return wiphy->dev.bus_id;
+}
+
+/**
+ * wdev_priv - return wiphy priv from wireless_dev
+ */
+static inline void *wdev_priv(struct wireless_dev *wdev)
+{
+	BUG_ON(!wdev);
+	return wiphy_priv(wdev->wiphy);
+}
+
+/**
+ * wiphy_new - create a new wiphy for use with cfg80211
+ *
+ * create a new wiphy and associate the given operations with it.
+ * @sizeof_priv bytes are allocated for private use.
+ *
+ * the returned pointer must be assigned to each netdev's
+ * ieee80211_ptr for proper operation.
+ */
+struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv);
+
+/**
+ * wiphy_register - register a wiphy with cfg80211
+ *
+ * register the given wiphy
+ *
+ * Returns a non-negative wiphy index or a negative error code.
+ */
+extern int wiphy_register(struct wiphy *wiphy);
+
+/**
+ * wiphy_unregister - deregister a wiphy from cfg80211
+ *
+ * unregister a device with the given priv pointer.
+ * After this call, no more requests can be made with this priv
+ * pointer, but the call may sleep to wait for an outstanding
+ * request that is being handled.
+ */
+extern void wiphy_unregister(struct wiphy *wiphy);
+
+/**
+ * wiphy_free - free wiphy
+ */
+extern void wiphy_free(struct wiphy *wiphy);
+
+#endif /* __NET_WIRELESS_H */
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index ca2f05c2976..1863c0b07d4 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -1,3 +1,6 @@
+config CFG80211
+        tristate "Improved wireless configuration API"
+
 config WIRELESS_EXT
 	bool "Wireless extensions"
 	default n
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index cf4e3d9726b..3f082ffae38 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -1 +1,3 @@
-# dummy file for now
+obj-$(CONFIG_CFG80211) += cfg80211.o
+
+cfg80211-y += core.o sysfs.o
diff --git a/net/wireless/core.c b/net/wireless/core.c
new file mode 100644
index 00000000000..532e1e09e02
--- /dev/null
+++ b/net/wireless/core.c
@@ -0,0 +1,209 @@
+/*
+ * This is the linux wireless configuration interface.
+ *
+ * Copyright 2006, 2007		Johannes Berg <johannes@sipsolutions.net>
+ */
+
+#include <linux/if.h>
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/nl80211.h>
+#include <linux/debugfs.h>
+#include <linux/notifier.h>
+#include <linux/device.h>
+#include <net/genetlink.h>
+#include <net/cfg80211.h>
+#include <net/wireless.h>
+#include "core.h"
+#include "sysfs.h"
+
+/* name for sysfs, %d is appended */
+#define PHY_NAME "phy"
+
+MODULE_AUTHOR("Johannes Berg");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("wireless configuration support");
+
+/* RCU might be appropriate here since we usually
+ * only read the list, and that can happen quite
+ * often because we need to do it for each command */
+LIST_HEAD(cfg80211_drv_list);
+DEFINE_MUTEX(cfg80211_drv_mutex);
+static int wiphy_counter;
+
+/* for debugfs */
+static struct dentry *ieee80211_debugfs_dir;
+
+/* exported functions */
+
+struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv)
+{
+	struct cfg80211_registered_device *drv;
+	int alloc_size;
+
+	alloc_size = sizeof(*drv) + sizeof_priv;
+
+	drv = kzalloc(alloc_size, GFP_KERNEL);
+	if (!drv)
+		return NULL;
+
+	drv->ops = ops;
+
+	mutex_lock(&cfg80211_drv_mutex);
+
+	if (unlikely(wiphy_counter<0)) {
+		/* ugh, wrapped! */
+		kfree(drv);
+		return NULL;
+	}
+	drv->idx = wiphy_counter;
+
+	/* give it a proper name */
+	snprintf(drv->wiphy.dev.bus_id, BUS_ID_SIZE,
+		 PHY_NAME "%d", drv->idx);
+
+	/* now increase counter for the next time */
+	wiphy_counter++;
+	mutex_unlock(&cfg80211_drv_mutex);
+
+	mutex_init(&drv->mtx);
+	mutex_init(&drv->devlist_mtx);
+	INIT_LIST_HEAD(&drv->netdev_list);
+
+	device_initialize(&drv->wiphy.dev);
+	drv->wiphy.dev.class = &ieee80211_class;
+	drv->wiphy.dev.platform_data = drv;
+
+	return &drv->wiphy;
+}
+EXPORT_SYMBOL(wiphy_new);
+
+int wiphy_register(struct wiphy *wiphy)
+{
+	struct cfg80211_registered_device *drv = wiphy_to_dev(wiphy);
+	int res;
+
+	mutex_lock(&cfg80211_drv_mutex);
+
+
+	res = device_add(&drv->wiphy.dev);
+	if (res)
+		goto out_unlock;
+
+	list_add(&drv->list, &cfg80211_drv_list);
+
+	/* add to debugfs */
+	drv->wiphy.debugfsdir =
+		debugfs_create_dir(wiphy_name(&drv->wiphy),
+				   ieee80211_debugfs_dir);
+
+	res = 0;
+out_unlock:
+	mutex_unlock(&cfg80211_drv_mutex);
+	return res;
+}
+EXPORT_SYMBOL(wiphy_register);
+
+void wiphy_unregister(struct wiphy *wiphy)
+{
+	struct cfg80211_registered_device *drv = wiphy_to_dev(wiphy);
+
+	mutex_lock(&cfg80211_drv_mutex);
+
+	/* hold registered driver mutex during list removal as well
+	 * to make sure no commands are in progress at the moment */
+	mutex_lock(&drv->mtx);
+	list_del(&drv->list);
+	mutex_unlock(&drv->mtx);
+
+	device_del(&drv->wiphy.dev);
+	debugfs_remove(drv->wiphy.debugfsdir);
+
+	mutex_unlock(&cfg80211_drv_mutex);
+}
+EXPORT_SYMBOL(wiphy_unregister);
+
+void cfg80211_dev_free(struct cfg80211_registered_device *drv)
+{
+	mutex_destroy(&drv->mtx);
+	mutex_destroy(&drv->devlist_mtx);
+	kfree(drv);
+}
+
+void wiphy_free(struct wiphy *wiphy)
+{
+	put_device(&wiphy->dev);
+}
+EXPORT_SYMBOL(wiphy_free);
+
+static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
+					 unsigned long state,
+					 void *ndev)
+{
+	struct net_device *dev = ndev;
+	struct cfg80211_registered_device *rdev;
+
+	if (!dev->ieee80211_ptr)
+		return 0;
+
+	rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy);
+
+	switch (state) {
+	case NETDEV_REGISTER:
+		mutex_lock(&rdev->devlist_mtx);
+		list_add(&dev->ieee80211_ptr->list, &rdev->netdev_list);
+		if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj,
+				      "phy80211")) {
+			printk(KERN_ERR "wireless: failed to add phy80211 "
+				"symlink to netdev!\n");
+		}
+		dev->ieee80211_ptr->netdev = dev;
+		mutex_unlock(&rdev->devlist_mtx);
+		break;
+	case NETDEV_UNREGISTER:
+		mutex_lock(&rdev->devlist_mtx);
+		if (!list_empty(&dev->ieee80211_ptr->list)) {
+			sysfs_remove_link(&dev->dev.kobj, "phy80211");
+			list_del_init(&dev->ieee80211_ptr->list);
+		}
+		mutex_unlock(&rdev->devlist_mtx);
+		break;
+	}
+
+	return 0;
+}
+
+static struct notifier_block cfg80211_netdev_notifier = {
+	.notifier_call = cfg80211_netdev_notifier_call,
+};
+
+static int cfg80211_init(void)
+{
+	int err = wiphy_sysfs_init();
+	if (err)
+		goto out_fail_sysfs;
+
+	err = register_netdevice_notifier(&cfg80211_netdev_notifier);
+	if (err)
+		goto out_fail_notifier;
+
+	ieee80211_debugfs_dir = debugfs_create_dir("ieee80211", NULL);
+
+	return 0;
+
+out_fail_notifier:
+	wiphy_sysfs_exit();
+out_fail_sysfs:
+	return err;
+}
+module_init(cfg80211_init);
+
+static void cfg80211_exit(void)
+{
+	debugfs_remove(ieee80211_debugfs_dir);
+	unregister_netdevice_notifier(&cfg80211_netdev_notifier);
+	wiphy_sysfs_exit();
+}
+module_exit(cfg80211_exit);
diff --git a/net/wireless/core.h b/net/wireless/core.h
new file mode 100644
index 00000000000..158db1edb92
--- /dev/null
+++ b/net/wireless/core.h
@@ -0,0 +1,49 @@
+/*
+ * Wireless configuration interface internals.
+ *
+ * Copyright 2006, 2007 Johannes Berg <johannes@sipsolutions.net>
+ */
+#ifndef __NET_WIRELESS_CORE_H
+#define __NET_WIRELESS_CORE_H
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <net/genetlink.h>
+#include <net/wireless.h>
+#include <net/cfg80211.h>
+
+struct cfg80211_registered_device {
+	struct cfg80211_ops *ops;
+	struct list_head list;
+	/* we hold this mutex during any call so that
+	 * we cannot do multiple calls at once, and also
+	 * to avoid the deregister call to proceed while
+	 * any call is in progress */
+	struct mutex mtx;
+
+	/* wiphy index, internal only */
+	int idx;
+
+	/* associate netdev list */
+	struct mutex devlist_mtx;
+	struct list_head netdev_list;
+
+	/* must be last because of the way we do wiphy_priv(),
+	 * and it should at least be aligned to NETDEV_ALIGN */
+	struct wiphy wiphy __attribute__((__aligned__(NETDEV_ALIGN)));
+};
+
+static inline
+struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy)
+{
+	BUG_ON(!wiphy);
+	return container_of(wiphy, struct cfg80211_registered_device, wiphy);
+}
+
+extern struct mutex cfg80211_drv_mutex;
+extern struct list_head cfg80211_drv_list;
+
+/* free object */
+extern void cfg80211_dev_free(struct cfg80211_registered_device *drv);
+
+#endif /* __NET_WIRELESS_CORE_H */
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
new file mode 100644
index 00000000000..3ebae144296
--- /dev/null
+++ b/net/wireless/sysfs.c
@@ -0,0 +1,80 @@
+/*
+ * This file provides /sys/class/ieee80211/<wiphy name>/
+ * and some default attributes.
+ *
+ * Copyright 2005-2006	Jiri Benc <jbenc@suse.cz>
+ * Copyright 2006	Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This file is GPLv2 as found in COPYING.
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/nl80211.h>
+#include <linux/rtnetlink.h>
+#include <net/cfg80211.h>
+#include "sysfs.h"
+#include "core.h"
+
+static inline struct cfg80211_registered_device *dev_to_rdev(
+	struct device *dev)
+{
+	return container_of(dev, struct cfg80211_registered_device, wiphy.dev);
+}
+
+static ssize_t _show_index(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	return sprintf(buf, "%d\n", dev_to_rdev(dev)->idx);
+}
+
+static ssize_t _show_permaddr(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	char *addr = dev_to_rdev(dev)->wiphy.perm_addr;
+
+	return sprintf(buf, "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x\n",
+		       addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+}
+
+static struct device_attribute ieee80211_dev_attrs[] = {
+	__ATTR(index, S_IRUGO, _show_index, NULL),
+	__ATTR(macaddress, S_IRUGO, _show_permaddr, NULL),
+	{}
+};
+
+static void wiphy_dev_release(struct device *dev)
+{
+	struct cfg80211_registered_device *rdev = dev_to_rdev(dev);
+
+	cfg80211_dev_free(rdev);
+}
+
+static int wiphy_uevent(struct device *dev, char **envp,
+			int num_envp, char *buf, int size)
+{
+	/* TODO, we probably need stuff here */
+	return 0;
+}
+
+struct class ieee80211_class = {
+	.name = "ieee80211",
+	.owner = THIS_MODULE,
+	.dev_release = wiphy_dev_release,
+	.dev_attrs = ieee80211_dev_attrs,
+#ifdef CONFIG_HOTPLUG
+	.dev_uevent = wiphy_uevent,
+#endif
+};
+
+int wiphy_sysfs_init(void)
+{
+	return class_register(&ieee80211_class);
+}
+
+void wiphy_sysfs_exit(void)
+{
+	class_unregister(&ieee80211_class);
+}
diff --git a/net/wireless/sysfs.h b/net/wireless/sysfs.h
new file mode 100644
index 00000000000..65acbebd371
--- /dev/null
+++ b/net/wireless/sysfs.h
@@ -0,0 +1,9 @@
+#ifndef __WIRELESS_SYSFS_H
+#define __WIRELESS_SYSFS_H
+
+extern int wiphy_sysfs_init(void);
+extern void wiphy_sysfs_exit(void);
+
+extern struct class ieee80211_class;
+
+#endif /* __WIRELESS_SYSFS_H */
-- 
cgit v1.2.3


From 9e101eab153073d8a1fc7ea22b20af65de8ab44b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 23 Apr 2007 12:20:55 -0700
Subject: [WIRELESS]: Remove wext over netlink.

As scheduled, this patch removes the pointless wext over netlink code.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/feature-removal-schedule.txt |  12 -
 include/net/iw_handler.h                   |  10 -
 net/core/rtnetlink.c                       |  31 --
 net/core/wireless.c                        | 735 -----------------------------
 net/wireless/Kconfig                       |  10 -
 5 files changed, 798 deletions(-)

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 9817b60e70a..976c8a1bd7c 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -285,18 +285,6 @@ Who:	Richard Purdie <rpurdie@rpsys.net>
 
 ---------------------------
 
-What:	Wireless extensions over netlink (CONFIG_NET_WIRELESS_RTNETLINK)
-When:	with the merge of wireless-dev, 2.6.22 or later
-Why:	The option/code is
-	 * not enabled on most kernels
-	 * not required by any userspace tools (except an experimental one,
-	   and even there only for some parts, others use ioctl)
-	 * pointless since wext is no longer evolving and the ioctl
-	   interface needs to be kept
-Who:	Johannes Berg <johannes@sipsolutions.net>
-
----------------------------
-
 What:	i8xx_tco watchdog driver
 When:	in 2.6.22
 Why:	the i8xx_tco watchdog driver has been replaced by the iTCO_wdt
diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h
index 8a830188354..909ca87d13b 100644
--- a/include/net/iw_handler.h
+++ b/include/net/iw_handler.h
@@ -440,16 +440,6 @@ extern int dev_get_wireless_info(char * buffer, char **start, off_t offset,
 /* Handle IOCTLs, called in net/core/dev.c */
 extern int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd);
 
-/* Handle RtNetlink requests, called in net/core/rtnetlink.c */
-extern int wireless_rtnetlink_set(struct net_device *	dev,
-				  char *		data,
-				  int			len);
-extern int wireless_rtnetlink_get(struct net_device *	dev,
-				  char *		data,
-				  int			len,
-				  char **		p_buf,
-				  int *			p_len);
-
 /* Second : functions that may be called by driver modules */
 
 /* Send a single event to user space */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 75cea8ea4cf..4fe0f4b3a34 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -51,10 +51,6 @@
 #include <net/pkt_sched.h>
 #include <net/fib_rules.h>
 #include <net/rtnetlink.h>
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
-#include <linux/wireless.h>
-#include <net/iw_handler.h>
-#endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
 
 struct rtnl_link
 {
@@ -684,17 +680,6 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 		modified = 1;
 	}
 
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
-	if (tb[IFLA_WIRELESS]) {
-		/* Call Wireless Extensions.
-		 * Various stuff checked in there... */
-		err = wireless_rtnetlink_set(dev, nla_data(tb[IFLA_WIRELESS]),
-					     nla_len(tb[IFLA_WIRELESS]));
-		if (err < 0)
-			goto errout_dev;
-	}
-#endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
-
 	if (tb[IFLA_BROADCAST]) {
 		nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len);
 		send_addr_notify = 1;
@@ -758,22 +743,6 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 	} else
 		return -EINVAL;
 
-
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
-	if (tb[IFLA_WIRELESS]) {
-		/* Call Wireless Extensions. We need to know the size before
-		 * we can alloc. Various stuff checked in there... */
-		err = wireless_rtnetlink_get(dev, nla_data(tb[IFLA_WIRELESS]),
-					     nla_len(tb[IFLA_WIRELESS]),
-					     &iw_buf, &iw_buf_len);
-		if (err < 0)
-			goto errout;
-
-		/* Payload is at an offset in buffer */
-		iw = iw_buf + IW_EV_POINT_OFF;
-	}
-#endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
-
 	nskb = nlmsg_new(if_nlmsg_size(iw_buf_len), GFP_KERNEL);
 	if (nskb == NULL) {
 		err = -ENOBUFS;
diff --git a/net/core/wireless.c b/net/core/wireless.c
index 86db63d7f76..fba295e05e7 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -104,12 +104,10 @@
 
 /* Debugging stuff */
 #undef WE_IOCTL_DEBUG		/* Debug IOCTL API */
-#undef WE_RTNETLINK_DEBUG	/* Debug RtNetlink API */
 #undef WE_EVENT_DEBUG		/* Debug Event dispatcher */
 #undef WE_SPY_DEBUG		/* Debug enhanced spy support */
 
 /* Options */
-//CONFIG_NET_WIRELESS_RTNETLINK	/* Wireless requests over RtNetlink */
 #define WE_EVENT_RTNETLINK	/* Propagate events using RtNetlink */
 #define WE_SET_EVENT		/* Generate an event on some set commands */
 
@@ -1145,739 +1143,6 @@ int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd)
 	return -EINVAL;
 }
 
-/********************** RTNETLINK REQUEST API **********************/
-/*
- * The alternate user space API to configure all those Wireless Extensions
- * is through RtNetlink.
- * This API support only the new driver API (iw_handler).
- *
- * This RtNetlink API use the same query/reply model as the ioctl API.
- * Maximum effort has been done to fit in the RtNetlink model, and
- * we support both RtNetlink Set and RtNelink Get operations.
- * On the other hand, we don't offer Dump operations because of the
- * following reasons :
- *	o Large number of parameters, most optional
- *	o Large size of some parameters (> 100 bytes)
- *	o Each parameters need to be extracted from hardware
- *	o Scan requests can take seconds and disable network activity.
- * Because of this high cost/overhead, we want to return only the
- * parameters the user application is really interested in.
- * We could offer partial Dump using the IW_DESCR_FLAG_DUMP flag.
- *
- * The API uses the standard RtNetlink socket. When the RtNetlink code
- * find a IFLA_WIRELESS field in a RtNetlink SET_LINK request,
- * it calls here.
- */
-
-#ifdef CONFIG_NET_WIRELESS_RTNETLINK
-/* ---------------------------------------------------------------- */
-/*
- * Wrapper to call a standard Wireless Extension GET handler.
- * We do various checks and call the handler with the proper args.
- */
-static int rtnetlink_standard_get(struct net_device *	dev,
-				  struct iw_event *	request,
-				  int			request_len,
-				  iw_handler		handler,
-				  char **		p_buf,
-				  int *			p_len)
-{
-	const struct iw_ioctl_description *	descr = NULL;
-	unsigned int				cmd;
-	union iwreq_data *			wrqu;
-	int					hdr_len;
-	struct iw_request_info			info;
-	char *					buffer = NULL;
-	int					buffer_size = 0;
-	int					ret = -EINVAL;
-
-	/* Get the description of the Request */
-	cmd = request->cmd;
-	if ((cmd - SIOCIWFIRST) >= standard_ioctl_num)
-		return -EOPNOTSUPP;
-	descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
-
-#ifdef WE_RTNETLINK_DEBUG
-	printk(KERN_DEBUG "%s (WE.r) : Found standard handler for 0x%04X\n",
-	       dev->name, cmd);
-	printk(KERN_DEBUG "%s (WE.r) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
-#endif	/* WE_RTNETLINK_DEBUG */
-
-	/* Check if wrqu is complete */
-	hdr_len = event_type_size[descr->header_type];
-	if (request_len < hdr_len) {
-#ifdef WE_RTNETLINK_DEBUG
-		printk(KERN_DEBUG
-		       "%s (WE.r) : Wireless request too short (%d)\n",
-		       dev->name, request_len);
-#endif	/* WE_RTNETLINK_DEBUG */
-		return -EINVAL;
-	}
-
-	/* Prepare the call */
-	info.cmd = cmd;
-	info.flags = 0;
-
-	/* Check if we have extra data in the reply or not */
-	if (descr->header_type != IW_HEADER_TYPE_POINT) {
-
-		/* Create the kernel buffer that we will return.
-		 * It's at an offset to match the TYPE_POINT case... */
-		buffer_size = request_len + IW_EV_POINT_OFF;
-		buffer = kmalloc(buffer_size, GFP_KERNEL);
-		if (buffer == NULL) {
-			return -ENOMEM;
-		}
-		/* Copy event data */
-		memcpy(buffer + IW_EV_POINT_OFF, request, request_len);
-		/* Use our own copy of wrqu */
-		wrqu = (union iwreq_data *) (buffer + IW_EV_POINT_OFF
-					     + IW_EV_LCP_PK_LEN);
-
-		/* No extra arguments. Trivial to handle */
-		ret = handler(dev, &info, wrqu, NULL);
-
-	} else {
-		union iwreq_data	wrqu_point;
-		char *			extra = NULL;
-		int			extra_size = 0;
-
-		/* Get a temp copy of wrqu (skip pointer) */
-		memcpy(((char *) &wrqu_point) + IW_EV_POINT_OFF,
-		       ((char *) request) + IW_EV_LCP_PK_LEN,
-		       IW_EV_POINT_LEN - IW_EV_LCP_PK_LEN);
-
-		/* Calculate space needed by arguments. Always allocate
-		 * for max space. Easier, and won't last long... */
-		extra_size = descr->max_tokens * descr->token_size;
-		/* Support for very large requests */
-		if ((descr->flags & IW_DESCR_FLAG_NOMAX) &&
-		   (wrqu_point.data.length > descr->max_tokens))
-			extra_size = (wrqu_point.data.length
-				      * descr->token_size);
-		buffer_size = extra_size + IW_EV_POINT_PK_LEN + IW_EV_POINT_OFF;
-#ifdef WE_RTNETLINK_DEBUG
-		printk(KERN_DEBUG "%s (WE.r) : Malloc %d bytes (%d bytes)\n",
-		       dev->name, extra_size, buffer_size);
-#endif	/* WE_RTNETLINK_DEBUG */
-
-		/* Create the kernel buffer that we will return */
-		buffer = kmalloc(buffer_size, GFP_KERNEL);
-		if (buffer == NULL) {
-			return -ENOMEM;
-		}
-
-		/* Put wrqu in the right place (just before extra).
-		 * Leave space for IWE header and dummy pointer...
-		 * Note that IW_EV_LCP_PK_LEN==4 bytes, so it's still aligned.
-		 */
-		memcpy(buffer + IW_EV_LCP_PK_LEN + IW_EV_POINT_OFF,
-		       ((char *) &wrqu_point) + IW_EV_POINT_OFF,
-		       IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN);
-		wrqu = (union iwreq_data *) (buffer + IW_EV_LCP_PK_LEN);
-
-		/* Extra comes logically after that. Offset +12 bytes. */
-		extra = buffer + IW_EV_POINT_OFF + IW_EV_POINT_PK_LEN;
-
-		/* Call the handler */
-		ret = handler(dev, &info, wrqu, extra);
-
-		/* Calculate real returned length */
-		extra_size = (wrqu->data.length * descr->token_size);
-		/* Re-adjust reply size */
-		request->len = extra_size + IW_EV_POINT_PK_LEN;
-
-		/* Put the iwe header where it should, i.e. scrap the
-		 * dummy pointer. */
-		memcpy(buffer + IW_EV_POINT_OFF, request, IW_EV_LCP_PK_LEN);
-
-#ifdef WE_RTNETLINK_DEBUG
-		printk(KERN_DEBUG "%s (WE.r) : Reply 0x%04X, hdr_len %d, tokens %d, extra_size %d, buffer_size %d\n", dev->name, cmd, hdr_len, wrqu->data.length, extra_size, buffer_size);
-#endif	/* WE_RTNETLINK_DEBUG */
-
-		/* Check if there is enough buffer up there */
-		if (wrqu_point.data.length < wrqu->data.length)
-			ret = -E2BIG;
-	}
-
-	/* Return the buffer to the caller */
-	if (!ret) {
-		*p_buf = buffer;
-		*p_len = request->len;
-	} else {
-		/* Cleanup */
-		if (buffer)
-			kfree(buffer);
-	}
-
-	return ret;
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Wrapper to call a standard Wireless Extension SET handler.
- * We do various checks and call the handler with the proper args.
- */
-static inline int rtnetlink_standard_set(struct net_device *	dev,
-					 struct iw_event *	request,
-					 int			request_len,
-					 iw_handler		handler)
-{
-	const struct iw_ioctl_description *	descr = NULL;
-	unsigned int				cmd;
-	union iwreq_data *			wrqu;
-	union iwreq_data			wrqu_point;
-	int					hdr_len;
-	char *					extra = NULL;
-	int					extra_size = 0;
-	struct iw_request_info			info;
-	int					ret = -EINVAL;
-
-	/* Get the description of the Request */
-	cmd = request->cmd;
-	if ((cmd - SIOCIWFIRST) >= standard_ioctl_num)
-		return -EOPNOTSUPP;
-	descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
-
-#ifdef WE_RTNETLINK_DEBUG
-	printk(KERN_DEBUG "%s (WE.r) : Found standard SET handler for 0x%04X\n",
-	       dev->name, cmd);
-	printk(KERN_DEBUG "%s (WE.r) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
-#endif	/* WE_RTNETLINK_DEBUG */
-
-	/* Extract fixed header from request. This is properly aligned. */
-	wrqu = (union iwreq_data *) (((char *) request) + IW_EV_LCP_PK_LEN);
-
-	/* Check if wrqu is complete */
-	hdr_len = event_type_pk_size[descr->header_type];
-	if (request_len < hdr_len) {
-#ifdef WE_RTNETLINK_DEBUG
-		printk(KERN_DEBUG
-		       "%s (WE.r) : Wireless request too short (%d)\n",
-		       dev->name, request_len);
-#endif	/* WE_RTNETLINK_DEBUG */
-		return -EINVAL;
-	}
-
-	/* Prepare the call */
-	info.cmd = cmd;
-	info.flags = 0;
-
-	/* Check if we have extra data in the request or not */
-	if (descr->header_type != IW_HEADER_TYPE_POINT) {
-
-		/* No extra arguments. Trivial to handle */
-		ret = handler(dev, &info, wrqu, NULL);
-
-	} else {
-		int	extra_len;
-
-		/* Put wrqu in the right place (skip pointer) */
-		memcpy(((char *) &wrqu_point) + IW_EV_POINT_OFF,
-		       wrqu, IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN);
-		/* Don't forget about the event code... */
-		wrqu = &wrqu_point;
-
-		/* Check if number of token fits within bounds */
-		if (wrqu_point.data.length > descr->max_tokens)
-			return -E2BIG;
-		if (wrqu_point.data.length < descr->min_tokens)
-			return -EINVAL;
-
-		/* Real length of payload */
-		extra_len = wrqu_point.data.length * descr->token_size;
-
-		/* Check if request is self consistent */
-		if ((request_len - hdr_len) < extra_len) {
-#ifdef WE_RTNETLINK_DEBUG
-			printk(KERN_DEBUG "%s (WE.r) : Wireless request data too short (%d)\n",
-			       dev->name, extra_size);
-#endif	/* WE_RTNETLINK_DEBUG */
-			return -EINVAL;
-		}
-
-#ifdef WE_RTNETLINK_DEBUG
-		printk(KERN_DEBUG "%s (WE.r) : Malloc %d bytes\n",
-		       dev->name, extra_size);
-#endif	/* WE_RTNETLINK_DEBUG */
-
-		/* Always allocate for max space. Easier, and won't last
-		 * long... */
-		extra_size = descr->max_tokens * descr->token_size;
-		extra = kmalloc(extra_size, GFP_KERNEL);
-		if (extra == NULL)
-			return -ENOMEM;
-
-		/* Copy extra in aligned buffer */
-		memcpy(extra, ((char *) request) + hdr_len, extra_len);
-
-		/* Call the handler */
-		ret = handler(dev, &info, &wrqu_point, extra);
-	}
-
-#ifdef WE_SET_EVENT
-	/* Generate an event to notify listeners of the change */
-	if ((descr->flags & IW_DESCR_FLAG_EVENT) &&
-	   ((ret == 0) || (ret == -EIWCOMMIT))) {
-		if (descr->flags & IW_DESCR_FLAG_RESTRICT)
-			/* If the event is restricted, don't
-			 * export the payload */
-			wireless_send_event(dev, cmd, wrqu, NULL);
-		else
-			wireless_send_event(dev, cmd, wrqu, extra);
-	}
-#endif	/* WE_SET_EVENT */
-
-	/* Cleanup - I told you it wasn't that long ;-) */
-	if (extra)
-		kfree(extra);
-
-	/* Call commit handler if needed and defined */
-	if (ret == -EIWCOMMIT)
-		ret = call_commit_handler(dev);
-
-	return ret;
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Wrapper to call a private Wireless Extension GET handler.
- * Same as above...
- * It's not as nice and slimline as the standard wrapper. The cause
- * is struct iw_priv_args, which was not really designed for the
- * job we are going here.
- *
- * IMPORTANT : This function prevent to set and get data on the same
- * IOCTL and enforce the SET/GET convention. Not doing it would be
- * far too hairy...
- * If you need to set and get data at the same time, please don't use
- * a iw_handler but process it in your ioctl handler (i.e. use the
- * old driver API).
- */
-static inline int rtnetlink_private_get(struct net_device *	dev,
-					struct iw_event *	request,
-					int			request_len,
-					iw_handler		handler,
-					char **			p_buf,
-					int *			p_len)
-{
-	const struct iw_priv_args *	descr = NULL;
-	unsigned int			cmd;
-	union iwreq_data *		wrqu;
-	int				hdr_len;
-	struct iw_request_info		info;
-	int				extra_size = 0;
-	int				i;
-	char *				buffer = NULL;
-	int				buffer_size = 0;
-	int				ret = -EINVAL;
-
-	/* Get the description of the Request */
-	cmd = request->cmd;
-	for (i = 0; i < dev->wireless_handlers->num_private_args; i++)
-		if (cmd == dev->wireless_handlers->private_args[i].cmd) {
-			descr = &(dev->wireless_handlers->private_args[i]);
-			break;
-		}
-	if (descr == NULL)
-		return -EOPNOTSUPP;
-
-#ifdef WE_RTNETLINK_DEBUG
-	printk(KERN_DEBUG "%s (WE.r) : Found private handler for 0x%04X\n",
-	       dev->name, cmd);
-	printk(KERN_DEBUG "%s (WE.r) : Name %s, set %X, get %X\n",
-	       dev->name, descr->name, descr->set_args, descr->get_args);
-#endif	/* WE_RTNETLINK_DEBUG */
-
-	/* Compute the max size of the get arguments */
-	extra_size = get_priv_size(descr->get_args);
-
-	/* Does it fits in wrqu ? */
-	if ((descr->get_args & IW_PRIV_SIZE_FIXED) &&
-	   (extra_size <= IFNAMSIZ)) {
-		hdr_len = extra_size;
-		extra_size = 0;
-	} else {
-		hdr_len = IW_EV_POINT_PK_LEN;
-	}
-
-	/* Check if wrqu is complete */
-	if (request_len < hdr_len) {
-#ifdef WE_RTNETLINK_DEBUG
-		printk(KERN_DEBUG
-		       "%s (WE.r) : Wireless request too short (%d)\n",
-		       dev->name, request_len);
-#endif	/* WE_RTNETLINK_DEBUG */
-		return -EINVAL;
-	}
-
-	/* Prepare the call */
-	info.cmd = cmd;
-	info.flags = 0;
-
-	/* Check if we have a pointer to user space data or not. */
-	if (extra_size == 0) {
-
-		/* Create the kernel buffer that we will return.
-		 * It's at an offset to match the TYPE_POINT case... */
-		buffer_size = request_len + IW_EV_POINT_OFF;
-		buffer = kmalloc(buffer_size, GFP_KERNEL);
-		if (buffer == NULL) {
-			return -ENOMEM;
-		}
-		/* Copy event data */
-		memcpy(buffer + IW_EV_POINT_OFF, request, request_len);
-		/* Use our own copy of wrqu */
-		wrqu = (union iwreq_data *) (buffer + IW_EV_POINT_OFF
-					     + IW_EV_LCP_PK_LEN);
-
-		/* No extra arguments. Trivial to handle */
-		ret = handler(dev, &info, wrqu, (char *) wrqu);
-
-	} else {
-		char *	extra;
-
-		/* Buffer for full reply */
-		buffer_size = extra_size + IW_EV_POINT_PK_LEN + IW_EV_POINT_OFF;
-
-#ifdef WE_RTNETLINK_DEBUG
-		printk(KERN_DEBUG "%s (WE.r) : Malloc %d bytes (%d bytes)\n",
-		       dev->name, extra_size, buffer_size);
-#endif	/* WE_RTNETLINK_DEBUG */
-
-		/* Create the kernel buffer that we will return */
-		buffer = kmalloc(buffer_size, GFP_KERNEL);
-		if (buffer == NULL) {
-			return -ENOMEM;
-		}
-
-		/* Put wrqu in the right place (just before extra).
-		 * Leave space for IWE header and dummy pointer...
-		 * Note that IW_EV_LCP_PK_LEN==4 bytes, so it's still aligned.
-		 */
-		memcpy(buffer + IW_EV_LCP_PK_LEN + IW_EV_POINT_OFF,
-		       ((char *) request) + IW_EV_LCP_PK_LEN,
-		       IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN);
-		wrqu = (union iwreq_data *) (buffer + IW_EV_LCP_PK_LEN);
-
-		/* Extra comes logically after that. Offset +12 bytes. */
-		extra = buffer + IW_EV_POINT_OFF + IW_EV_POINT_PK_LEN;
-
-		/* Call the handler */
-		ret = handler(dev, &info, wrqu, extra);
-
-		/* Adjust for the actual length if it's variable,
-		 * avoid leaking kernel bits outside. */
-		if (!(descr->get_args & IW_PRIV_SIZE_FIXED))
-			extra_size = adjust_priv_size(descr->get_args, wrqu);
-		/* Re-adjust reply size */
-		request->len = extra_size + IW_EV_POINT_PK_LEN;
-
-		/* Put the iwe header where it should, i.e. scrap the
-		 * dummy pointer. */
-		memcpy(buffer + IW_EV_POINT_OFF, request, IW_EV_LCP_PK_LEN);
-
-#ifdef WE_RTNETLINK_DEBUG
-		printk(KERN_DEBUG "%s (WE.r) : Reply 0x%04X, hdr_len %d, tokens %d, extra_size %d, buffer_size %d\n", dev->name, cmd, hdr_len, wrqu->data.length, extra_size, buffer_size);
-#endif	/* WE_RTNETLINK_DEBUG */
-	}
-
-	/* Return the buffer to the caller */
-	if (!ret) {
-		*p_buf = buffer;
-		*p_len = request->len;
-	} else {
-		/* Cleanup */
-		if (buffer)
-			kfree(buffer);
-	}
-
-	return ret;
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Wrapper to call a private Wireless Extension SET handler.
- * Same as above...
- * It's not as nice and slimline as the standard wrapper. The cause
- * is struct iw_priv_args, which was not really designed for the
- * job we are going here.
- *
- * IMPORTANT : This function prevent to set and get data on the same
- * IOCTL and enforce the SET/GET convention. Not doing it would be
- * far too hairy...
- * If you need to set and get data at the same time, please don't use
- * a iw_handler but process it in your ioctl handler (i.e. use the
- * old driver API).
- */
-static inline int rtnetlink_private_set(struct net_device *	dev,
-					struct iw_event *	request,
-					int			request_len,
-					iw_handler		handler)
-{
-	const struct iw_priv_args *	descr = NULL;
-	unsigned int			cmd;
-	union iwreq_data *		wrqu;
-	union iwreq_data		wrqu_point;
-	int				hdr_len;
-	char *				extra = NULL;
-	int				extra_size = 0;
-	int				offset = 0;	/* For sub-ioctls */
-	struct iw_request_info		info;
-	int				i;
-	int				ret = -EINVAL;
-
-	/* Get the description of the Request */
-	cmd = request->cmd;
-	for (i = 0; i < dev->wireless_handlers->num_private_args; i++)
-		if (cmd == dev->wireless_handlers->private_args[i].cmd) {
-			descr = &(dev->wireless_handlers->private_args[i]);
-			break;
-		}
-	if (descr == NULL)
-		return -EOPNOTSUPP;
-
-#ifdef WE_RTNETLINK_DEBUG
-	printk(KERN_DEBUG "%s (WE.r) : Found private handler for 0x%04X\n",
-	       ifr->ifr_name, cmd);
-	printk(KERN_DEBUG "%s (WE.r) : Name %s, set %X, get %X\n",
-	       dev->name, descr->name, descr->set_args, descr->get_args);
-#endif	/* WE_RTNETLINK_DEBUG */
-
-	/* Compute the size of the set arguments */
-	/* Check for sub-ioctl handler */
-	if (descr->name[0] == '\0')
-		/* Reserve one int for sub-ioctl index */
-		offset = sizeof(__u32);
-
-	/* Size of set arguments */
-	extra_size = get_priv_size(descr->set_args);
-
-	/* Does it fits in wrqu ? */
-	if ((descr->set_args & IW_PRIV_SIZE_FIXED) &&
-	   (extra_size <= IFNAMSIZ)) {
-		hdr_len = IW_EV_LCP_PK_LEN + extra_size;
-		extra_size = 0;
-	} else {
-		hdr_len = IW_EV_POINT_PK_LEN;
-	}
-
-	/* Extract fixed header from request. This is properly aligned. */
-	wrqu = (union iwreq_data *) (((char *) request) + IW_EV_LCP_PK_LEN);
-
-	/* Check if wrqu is complete */
-	if (request_len < hdr_len) {
-#ifdef WE_RTNETLINK_DEBUG
-		printk(KERN_DEBUG
-		       "%s (WE.r) : Wireless request too short (%d)\n",
-		       dev->name, request_len);
-#endif	/* WE_RTNETLINK_DEBUG */
-		return -EINVAL;
-	}
-
-	/* Prepare the call */
-	info.cmd = cmd;
-	info.flags = 0;
-
-	/* Check if we have a pointer to user space data or not. */
-	if (extra_size == 0) {
-
-		/* No extra arguments. Trivial to handle */
-		ret = handler(dev, &info, wrqu, (char *) wrqu);
-
-	} else {
-		int	extra_len;
-
-		/* Put wrqu in the right place (skip pointer) */
-		memcpy(((char *) &wrqu_point) + IW_EV_POINT_OFF,
-		       wrqu, IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN);
-
-		/* Does it fits within bounds ? */
-		if (wrqu_point.data.length > (descr->set_args &
-					     IW_PRIV_SIZE_MASK))
-			return -E2BIG;
-
-		/* Real length of payload */
-		extra_len = adjust_priv_size(descr->set_args, &wrqu_point);
-
-		/* Check if request is self consistent */
-		if ((request_len - hdr_len) < extra_len) {
-#ifdef WE_RTNETLINK_DEBUG
-			printk(KERN_DEBUG "%s (WE.r) : Wireless request data too short (%d)\n",
-			       dev->name, extra_size);
-#endif	/* WE_RTNETLINK_DEBUG */
-			return -EINVAL;
-		}
-
-#ifdef WE_RTNETLINK_DEBUG
-		printk(KERN_DEBUG "%s (WE.r) : Malloc %d bytes\n",
-		       dev->name, extra_size);
-#endif	/* WE_RTNETLINK_DEBUG */
-
-		/* Always allocate for max space. Easier, and won't last
-		 * long... */
-		extra = kmalloc(extra_size, GFP_KERNEL);
-		if (extra == NULL)
-			return -ENOMEM;
-
-		/* Copy extra in aligned buffer */
-		memcpy(extra, ((char *) request) + hdr_len, extra_len);
-
-		/* Call the handler */
-		ret = handler(dev, &info, &wrqu_point, extra);
-
-		/* Cleanup - I told you it wasn't that long ;-) */
-		kfree(extra);
-	}
-
-	/* Call commit handler if needed and defined */
-	if (ret == -EIWCOMMIT)
-		ret = call_commit_handler(dev);
-
-	return ret;
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Main RtNetlink dispatcher. Called from the main networking code
- * (do_getlink() in net/core/rtnetlink.c).
- * Check the type of Request and call the appropriate wrapper...
- */
-int wireless_rtnetlink_get(struct net_device *	dev,
-			   char *		data,
-			   int			len,
-			   char **		p_buf,
-			   int *		p_len)
-{
-	struct iw_event *	request = (struct iw_event *) data;
-	iw_handler		handler;
-
-	/* Check length */
-	if (len < IW_EV_LCP_PK_LEN) {
-		printk(KERN_DEBUG "%s (WE.r) : RtNetlink request too short (%d)\n",
-		       dev->name, len);
-		return -EINVAL;
-	}
-
-	/* ReCheck length (len may have padding) */
-	if (request->len > len) {
-		printk(KERN_DEBUG "%s (WE.r) : RtNetlink request len invalid (%d-%d)\n",
-		       dev->name, request->len, len);
-		return -EINVAL;
-	}
-
-	/* Only accept GET requests in here */
-	if (!IW_IS_GET(request->cmd))
-		return -EOPNOTSUPP;
-
-	/* If command is `get the encoding parameters', check if
-	 * the user has the right to do it */
-	if (request->cmd == SIOCGIWENCODE ||
-	    request->cmd == SIOCGIWENCODEEXT) {
-		if (!capable(CAP_NET_ADMIN))
-			return -EPERM;
-	}
-
-	/* Special cases */
-	if (request->cmd == SIOCGIWSTATS)
-		/* Get Wireless Stats */
-		return rtnetlink_standard_get(dev,
-					      request,
-					      request->len,
-					      &iw_handler_get_iwstats,
-					      p_buf, p_len);
-	if (request->cmd == SIOCGIWPRIV) {
-		/* Check if we have some wireless handlers defined */
-		if (dev->wireless_handlers == NULL)
-			return -EOPNOTSUPP;
-		/* Get Wireless Stats */
-		return rtnetlink_standard_get(dev,
-					      request,
-					      request->len,
-					      &iw_handler_get_private,
-					      p_buf, p_len);
-	}
-
-	/* Basic check */
-	if (!netif_device_present(dev))
-		return -ENODEV;
-
-	/* Try to find the handler */
-	handler = get_handler(dev, request->cmd);
-	if (handler != NULL) {
-		/* Standard and private are not the same */
-		if (request->cmd < SIOCIWFIRSTPRIV)
-			return rtnetlink_standard_get(dev,
-						      request,
-						      request->len,
-						      handler,
-						      p_buf, p_len);
-		else
-			return rtnetlink_private_get(dev,
-						     request,
-						     request->len,
-						     handler,
-						     p_buf, p_len);
-	}
-
-	return -EOPNOTSUPP;
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Main RtNetlink dispatcher. Called from the main networking code
- * (do_setlink() in net/core/rtnetlink.c).
- * Check the type of Request and call the appropriate wrapper...
- */
-int wireless_rtnetlink_set(struct net_device *	dev,
-			   char *		data,
-			   int			len)
-{
-	struct iw_event *	request = (struct iw_event *) data;
-	iw_handler		handler;
-
-	/* Check length */
-	if (len < IW_EV_LCP_PK_LEN) {
-		printk(KERN_DEBUG "%s (WE.r) : RtNetlink request too short (%d)\n",
-		       dev->name, len);
-		return -EINVAL;
-	}
-
-	/* ReCheck length (len may have padding) */
-	if (request->len > len) {
-		printk(KERN_DEBUG "%s (WE.r) : RtNetlink request len invalid (%d-%d)\n",
-		       dev->name, request->len, len);
-		return -EINVAL;
-	}
-
-	/* Only accept SET requests in here */
-	if (!IW_IS_SET(request->cmd))
-		return -EOPNOTSUPP;
-
-	/* Basic check */
-	if (!netif_device_present(dev))
-		return -ENODEV;
-
-	/* New driver API : try to find the handler */
-	handler = get_handler(dev, request->cmd);
-	if (handler != NULL) {
-		/* Standard and private are not the same */
-		if (request->cmd < SIOCIWFIRSTPRIV)
-			return rtnetlink_standard_set(dev,
-						      request,
-						      request->len,
-						      handler);
-		else
-			return rtnetlink_private_set(dev,
-						     request,
-						     request->len,
-						     handler);
-	}
-
-	return -EOPNOTSUPP;
-}
-#endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
-
 
 /************************* EVENT PROCESSING *************************/
 /*
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 1863c0b07d4..a228d56a91b 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -14,13 +14,3 @@ config WIRELESS_EXT
 
 	  Say N (if you can) unless you know you need wireless
 	  extensions for external modules.
-
-config NET_WIRELESS_RTNETLINK
-	bool "Wireless Extension API over RtNetlink"
-	depends on WIRELESS_EXT
-	---help---
-	  Support the Wireless Extension API over the RtNetlink socket
-	  in addition to the traditional ioctl interface (selected above).
-
-	  For now, few tools use this facility, but it might grow in the
-	  future. The only downside is that it adds 4.5 kB to your kernel.
-- 
cgit v1.2.3


From 42431592e74a968d919a46baf0515a2ee6978dac Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Mon, 23 Apr 2007 13:28:49 -0700
Subject: [WIRELESS] drivers/net/wireless/Kconfig: correct minor typo

Correct minor typo in drivers/net/wireless/Kconfig identified by
Stefano Brivio <stefano.brivio@polimi.it>.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wireless/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig
index 880c628dcd4..4426841b2be 100644
--- a/drivers/net/wireless/Kconfig
+++ b/drivers/net/wireless/Kconfig
@@ -11,7 +11,7 @@ config WLAN_PRE80211
 	  Say Y if you have any pre-802.11 wireless LAN hardware.
 
 	  This option does not affect the kernel build, it only
-	  let's you choose drivers.
+	  lets you choose drivers.
 
 config STRIP
 	tristate "STRIP (Metricom starmode radio IP)"
@@ -112,7 +112,7 @@ config WLAN_80211
 	  Say Y if you have any 802.11 wireless LAN hardware.
 
 	  This option does not affect the kernel build, it only
-	  let's you choose drivers.
+	  lets you choose drivers.
 
 config PCMCIA_RAYCS
 	tristate "Aviator/Raytheon 2.4MHz wireless support"
-- 
cgit v1.2.3


From 65d1b4a7e73fe0e1f5275ad7d2d3547981480886 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 23 Apr 2007 22:24:32 -0700
Subject: [TCP]: TCP Illinois update.

This version more closely matches the paper, and fixes several
math errors. The biggest difference is that it updates alpha/beta
once per RTT

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_illinois.c | 298 ++++++++++++++++++++++++++++++------------------
 1 file changed, 186 insertions(+), 112 deletions(-)

diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 91a2a34604c..ae629860088 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -23,74 +23,106 @@
 #define ALPHA_MIN	((3*ALPHA_SCALE)/10)	/* ~0.3 */
 #define ALPHA_MAX	(10*ALPHA_SCALE)	/* 10.0 */
 #define ALPHA_BASE	ALPHA_SCALE		/* 1.0 */
+#define U32_MAX		((u32)~0U)
+#define RTT_MAX		(U32_MAX / ALPHA_MAX)	/* 3.3 secs */
 
 #define BETA_SHIFT	6
 #define BETA_SCALE	(1u<<BETA_SHIFT)
-#define BETA_MIN	(BETA_SCALE/8)		/* 0.8 */
-#define BETA_MAX	(BETA_SCALE/2)
-#define BETA_BASE	BETA_MAX		/* 0.5 */
-
-#define THETA		5
+#define BETA_MIN	(BETA_SCALE/8)		/* 0.125 */
+#define BETA_MAX	(BETA_SCALE/2)		/* 0.5 */
+#define BETA_BASE	BETA_MAX
 
 static int win_thresh __read_mostly = 15;
-module_param(win_thresh, int, 0644);
+module_param(win_thresh, int, 0);
 MODULE_PARM_DESC(win_thresh, "Window threshold for starting adaptive sizing");
 
-#define MAX_RTT		0x7fffffff
+static int theta __read_mostly = 5;
+module_param(theta, int, 0);
+MODULE_PARM_DESC(theta, "# of fast RTT's before full growth");
 
 /* TCP Illinois Parameters */
-struct tcp_illinois {
-	u32	last_alpha;
-	u32	min_rtt;
-	u32	max_rtt;
-	u32	rtt_low;
-	u32	rtt_cnt;
-	u64	sum_rtt;
+struct illinois {
+	u64	sum_rtt;	/* sum of rtt's measured within last rtt */
+	u16	cnt_rtt;	/* # of rtts measured within last rtt */
+	u32	base_rtt;	/* min of all rtt in usec */
+	u32	max_rtt;	/* max of all rtt in usec */
+	u32	end_seq;	/* right edge of current RTT */
+	u32	alpha;		/* Additive increase */
+	u32	beta;		/* Muliplicative decrease */
+	u16	acked;		/* # packets acked by current ACK */
+	u8	rtt_above;	/* average rtt has gone above threshold */
+	u8	rtt_low;	/* # of rtts measurements below threshold */
 };
 
+static void rtt_reset(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct illinois *ca = inet_csk_ca(sk);
+
+	ca->end_seq = tp->snd_nxt;
+	ca->cnt_rtt = 0;
+	ca->sum_rtt = 0;
+
+	/* TODO: age max_rtt? */
+}
+
 static void tcp_illinois_init(struct sock *sk)
 {
-	struct tcp_illinois *ca = inet_csk_ca(sk);
+	struct illinois *ca = inet_csk_ca(sk);
+
+	ca->alpha = ALPHA_MAX;
+	ca->beta = BETA_BASE;
+	ca->base_rtt = 0x7fffffff;
+	ca->max_rtt = 0;
+
+	ca->acked = 0;
+	ca->rtt_low = 0;
+	ca->rtt_above = 0;
 
-	ca->last_alpha = ALPHA_BASE;
-	ca->min_rtt = 0x7fffffff;
+	rtt_reset(sk);
 }
 
-/*
- * Keep track of min, max and average RTT
- */
-static void tcp_illinois_rtt_calc(struct sock *sk, u32 rtt)
+/* Measure RTT for each ack. */
+static void tcp_illinois_rtt_sample(struct sock *sk, u32 rtt)
 {
-	struct tcp_illinois *ca = inet_csk_ca(sk);
+	struct illinois *ca = inet_csk_ca(sk);
+
+	/* ignore bogus values, this prevents wraparound in alpha math */
+	if (rtt > RTT_MAX)
+		rtt = RTT_MAX;
 
-	if (rtt < ca->min_rtt)
-		ca->min_rtt = rtt;
-	if (rtt > ca->max_rtt)
+	/* keep track of minimum RTT seen so far */
+	if (ca->base_rtt > rtt)
+		ca->base_rtt = rtt;
+
+	/* and max */
+	if (ca->max_rtt < rtt)
 		ca->max_rtt = rtt;
 
-	if (++ca->rtt_cnt == 1)
-		ca->sum_rtt = rtt;
-	else
-		ca->sum_rtt += rtt;
+	++ca->cnt_rtt;
+	ca->sum_rtt += rtt;
 }
 
-/* max queuing delay */
-static inline u32 max_delay(const struct tcp_illinois *ca)
+/* Capture count of packets covered by ack, to adjust for delayed acks */
+static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked)
 {
-	return ca->max_rtt - ca->min_rtt;
+	struct illinois *ca = inet_csk_ca(sk);
+	ca->acked = pkts_acked;
 }
 
-/* average queueing delay */
-static u32 avg_delay(struct tcp_illinois *ca)
+/* Maximum queuing delay */
+static inline u32 max_delay(const struct illinois *ca)
 {
-	u64 avg_rtt = ca->sum_rtt;
-
-	do_div(avg_rtt, ca->rtt_cnt);
+	return ca->max_rtt - ca->base_rtt;
+}
 
-	ca->sum_rtt = 0;
-	ca->rtt_cnt = 0;
+/* Average queuing delay */
+static inline u32 avg_delay(const struct illinois *ca)
+{
+	u64 t = ca->sum_rtt;
 
-	return avg_rtt - ca->min_rtt;
+	do_div(t, ca->cnt_rtt);
+	return t - ca->base_rtt;
 }
 
 /*
@@ -101,32 +133,31 @@ static u32 avg_delay(struct tcp_illinois *ca)
  * A. If average delay is at minimum (we are uncongested),
  *    then use large alpha (10.0) to increase faster.
  * B. If average delay is at maximum (getting congested)
- *    then use small alpha (1.0)
+ *    then use small alpha (0.3)
  *
  * The result is a convex window growth curve.
  */
-static u32 alpha(const struct sock *sk)
+static u32 alpha(struct illinois *ca, u32 da, u32 dm)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct tcp_illinois *ca = inet_csk_ca(sk);
-	u32 dm = max_delay(ca);
-	u32 da = avg_delay(ca);
-	u32 d1, a;
+	u32 d1 = dm / 100;	/* Low threshold */
 
-	if (tp->snd_cwnd < win_thresh)
-		return ALPHA_BASE;	/* same as Reno (1.0) */
-
-	d1 = dm / 100;
 	if (da <= d1) {
-		/* Don't let noise force agressive response */
-		if (ca->rtt_low < THETA) {
-			++ca->rtt_low;
-			return ca->last_alpha;
-		} else
+		/* If never got out of low delay zone, then use max */
+		if (!ca->rtt_above)
 			return ALPHA_MAX;
+
+		/* Wait for 5 good RTT's before allowing alpha to go alpha max.
+		 * This prevents one good RTT from causing sudden window increase.
+		 */
+		if (++ca->rtt_low < theta)
+			return ca->alpha;
+
+		ca->rtt_low = 0;
+		ca->rtt_above = 0;
+		return ALPHA_MAX;
 	}
 
-	ca->rtt_low = 0;
+	ca->rtt_above = 1;
 
 	/*
 	 * Based on:
@@ -146,37 +177,8 @@ static u32 alpha(const struct sock *sk)
 
 	dm -= d1;
 	da -= d1;
-
-	a = (dm * ALPHA_MAX) / (dm - (da  * (ALPHA_MAX - ALPHA_MIN)) / ALPHA_MIN);
-	ca->last_alpha = a;
-	return a;
-}
-
-/*
- * Increase window in response to successful acknowledgment.
- */
-static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
-				    u32 in_flight, int flag)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	/* RFC2861 only increase cwnd if fully utilized */
-	if (!tcp_is_cwnd_limited(sk, in_flight))
-		return;
-
-	/* In slow start */
-	if (tp->snd_cwnd <= tp->snd_ssthresh)
-		tcp_slow_start(tp);
-
-	else {
-		/* additive increase  cwnd += alpha / cwnd */
-		if ((tp->snd_cwnd_cnt * alpha(sk)) >> ALPHA_SHIFT >= tp->snd_cwnd) {
-			if (tp->snd_cwnd < tp->snd_cwnd_clamp)
-				tp->snd_cwnd++;
-			tp->snd_cwnd_cnt = 0;
-		} else
-			tp->snd_cwnd_cnt++;
-	}
+	return (dm * ALPHA_MAX) /
+		(dm + (da  * (ALPHA_MAX - ALPHA_MIN)) / ALPHA_MIN);
 }
 
 /*
@@ -187,20 +189,14 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
  * If delay is up to 80% of max then beta = 1/2
  * In between is a linear function
  */
-static inline u32 beta(struct sock *sk)
+static u32 beta(u32 da, u32 dm)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct tcp_illinois *ca = inet_csk_ca(sk);
-	u32 dm = max_delay(ca);
-	u32 da = avg_delay(ca);
 	u32 d2, d3;
 
-	if (tp->snd_cwnd < win_thresh)
-		return BETA_BASE;
-
 	d2 = dm / 10;
 	if (da <= d2)
 		return BETA_MIN;
+
 	d3 = (8 * dm) / 10;
 	if (da >= d3 || d3 <= d2)
 		return BETA_MAX;
@@ -222,31 +218,107 @@ static inline u32 beta(struct sock *sk)
 		/ (d3 - d2);
 }
 
+/* Update alpha and beta values once per RTT */
+static void update_params(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct illinois *ca = inet_csk_ca(sk);
+
+	if (tp->snd_cwnd < win_thresh) {
+		ca->alpha = ALPHA_BASE;
+		ca->beta = BETA_BASE;
+	} else if (ca->cnt_rtt > 0) {
+		u32 dm = max_delay(ca);
+		u32 da = avg_delay(ca);
+
+		ca->alpha = alpha(ca, da, dm);
+		ca->beta = beta(da, dm);
+	}
+
+	rtt_reset(sk);
+}
+
+/*
+ * In case of loss, reset to default values
+ */
+static void tcp_illinois_state(struct sock *sk, u8 new_state)
+{
+	struct illinois *ca = inet_csk_ca(sk);
+
+	if (new_state == TCP_CA_Loss) {
+		ca->alpha = ALPHA_BASE;
+		ca->beta = BETA_BASE;
+		ca->rtt_low = 0;
+		ca->rtt_above = 0;
+		rtt_reset(sk);
+	}
+}
+
+/*
+ * Increase window in response to successful acknowledgment.
+ */
+static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
+				    u32 in_flight, int flag)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct illinois *ca = inet_csk_ca(sk);
+
+	if (after(ack, ca->end_seq))
+		update_params(sk);
+
+	/* RFC2861 only increase cwnd if fully utilized */
+	if (!tcp_is_cwnd_limited(sk, in_flight))
+		return;
+
+	/* In slow start */
+	if (tp->snd_cwnd <= tp->snd_ssthresh)
+		tcp_slow_start(tp);
+
+	else {
+		u32 delta;
+
+		/* snd_cwnd_cnt is # of packets since last cwnd increment */
+		tp->snd_cwnd_cnt += ca->acked;
+		ca->acked = 1;
+
+		/* This is close approximation of:
+		 * tp->snd_cwnd += alpha/tp->snd_cwnd
+		*/
+		delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT;
+		if (delta >= tp->snd_cwnd) {
+			tp->snd_cwnd = min(tp->snd_cwnd + delta / tp->snd_cwnd,
+					   (u32) tp->snd_cwnd_clamp);
+			tp->snd_cwnd_cnt = 0;
+		}
+	}
+}
+
 static u32 tcp_illinois_ssthresh(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	struct illinois *ca = inet_csk_ca(sk);
 
 	/* Multiplicative decrease */
-	return max((tp->snd_cwnd * beta(sk)) >> BETA_SHIFT, 2U);
+	return max((tp->snd_cwnd * ca->beta) >> BETA_SHIFT, 2U);
 }
 
-/* Extract info for TCP socket info provided via netlink.
- * We aren't really doing Vegas, but we can provide RTT info
- */
-static void tcp_illinois_get_info(struct sock *sk, u32 ext,
-			       struct sk_buff *skb)
+
+/* Extract info for Tcp socket info provided via netlink. */
+static void tcp_illinois_info(struct sock *sk, u32 ext,
+			      struct sk_buff *skb)
 {
-	const struct tcp_illinois *ca = inet_csk_ca(sk);
+	const struct illinois *ca = inet_csk_ca(sk);
 
 	if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
 		struct tcpvegas_info info = {
 			.tcpv_enabled = 1,
-			.tcpv_rttcnt = ca->rtt_cnt,
-			.tcpv_minrtt = ca->min_rtt,
+			.tcpv_rttcnt = ca->cnt_rtt,
+			.tcpv_minrtt = ca->base_rtt,
 		};
-		u64 avg_rtt = ca->sum_rtt;
-		do_div(avg_rtt, ca->rtt_cnt);
-		info.tcpv_rtt = avg_rtt;
+		u64 t = ca->sum_rtt;
+
+		do_div(t, ca->cnt_rtt);
+		info.tcpv_rtt = t;
 
 		nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
 	}
@@ -257,8 +329,10 @@ static struct tcp_congestion_ops tcp_illinois = {
 	.ssthresh	= tcp_illinois_ssthresh,
 	.min_cwnd	= tcp_reno_min_cwnd,
 	.cong_avoid	= tcp_illinois_cong_avoid,
-	.rtt_sample	= tcp_illinois_rtt_calc,
-	.get_info	= tcp_illinois_get_info,
+	.set_state	= tcp_illinois_state,
+	.rtt_sample	= tcp_illinois_rtt_sample,
+	.get_info	= tcp_illinois_info,
+	.pkts_acked	= tcp_illinois_acked,
 
 	.owner		= THIS_MODULE,
 	.name		= "illinois",
@@ -266,7 +340,7 @@ static struct tcp_congestion_ops tcp_illinois = {
 
 static int __init tcp_illinois_register(void)
 {
-	BUILD_BUG_ON(sizeof(struct tcp_illinois) > ICSK_CA_PRIV_SIZE);
+	BUILD_BUG_ON(sizeof(struct illinois) > ICSK_CA_PRIV_SIZE);
 	return tcp_register_congestion_control(&tcp_illinois);
 }
 
@@ -281,4 +355,4 @@ module_exit(tcp_illinois_unregister);
 MODULE_AUTHOR("Stephen Hemminger, Shao Liu");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("TCP Illinois");
-MODULE_VERSION("0.3");
+MODULE_VERSION("1.0");
-- 
cgit v1.2.3


From 164891aadf1721fca4dce473bb0e0998181537c6 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 23 Apr 2007 22:26:16 -0700
Subject: [TCP]: Congestion control API update.

Do some simple changes to make congestion control API faster/cleaner.
* use ktime_t rather than timeval
* merge rtt sampling into existing ack callback
  this means one indirect call versus two per ack.
* use flags bits to store options/settings

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h  |  5 +++++
 include/net/tcp.h       |  9 +++++----
 net/ipv4/tcp_bic.c      |  2 +-
 net/ipv4/tcp_cong.c     | 14 +++++++-------
 net/ipv4/tcp_cubic.c    |  2 +-
 net/ipv4/tcp_htcp.c     |  2 +-
 net/ipv4/tcp_illinois.c | 16 +++++++---------
 net/ipv4/tcp_input.c    | 25 ++++++++-----------------
 net/ipv4/tcp_lp.c       |  8 +++++---
 net/ipv4/tcp_output.c   |  2 +-
 net/ipv4/tcp_vegas.c    | 10 +++++++---
 net/ipv4/tcp_veno.c     | 10 +++++++---
 net/ipv4/tcp_westwood.c |  2 +-
 net/ipv4/tcp_yeah.c     |  6 ++++--
 net/ipv4/tcp_yeah.h     |  7 +++++--
 15 files changed, 65 insertions(+), 55 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 50f6f6a094c..2694cb3ca76 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1569,6 +1569,11 @@ static inline void __net_timestamp(struct sk_buff *skb)
 	skb->tstamp = ktime_get_real();
 }
 
+static inline ktime_t net_timedelta(ktime_t t)
+{
+	return ktime_sub(ktime_get_real(), t);
+}
+
 
 extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len);
 extern __sum16 __skb_checksum_complete(struct sk_buff *skb);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 43910fe3c44..a385797f160 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -629,9 +629,12 @@ enum tcp_ca_event {
 #define TCP_CA_MAX	128
 #define TCP_CA_BUF_MAX	(TCP_CA_NAME_MAX*TCP_CA_MAX)
 
+#define TCP_CONG_NON_RESTRICTED 0x1
+#define TCP_CONG_RTT_STAMP	0x2
+
 struct tcp_congestion_ops {
 	struct list_head	list;
-	int	non_restricted;
+	unsigned long flags;
 
 	/* initialize private data (optional) */
 	void (*init)(struct sock *sk);
@@ -645,8 +648,6 @@ struct tcp_congestion_ops {
 	/* do new cwnd calculation (required) */
 	void (*cong_avoid)(struct sock *sk, u32 ack,
 			   u32 rtt, u32 in_flight, int good_ack);
-	/* round trip time sample per acked packet (optional) */
-	void (*rtt_sample)(struct sock *sk, u32 usrtt);
 	/* call before changing ca_state (optional) */
 	void (*set_state)(struct sock *sk, u8 new_state);
 	/* call when cwnd event occurs (optional) */
@@ -654,7 +655,7 @@ struct tcp_congestion_ops {
 	/* new value of cwnd after loss (optional) */
 	u32  (*undo_cwnd)(struct sock *sk);
 	/* hook for packet ack accounting (optional) */
-	void (*pkts_acked)(struct sock *sk, u32 num_acked);
+	void (*pkts_acked)(struct sock *sk, u32 num_acked, ktime_t last);
 	/* get info for inet_diag (optional) */
 	void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb);
 
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 5730333cd0a..281c9f91325 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -206,7 +206,7 @@ static void bictcp_state(struct sock *sk, u8 new_state)
 /* Track delayed acknowledgment ratio using sliding window
  * ratio = (15*ratio + sample) / 16
  */
-static void bictcp_acked(struct sock *sk, u32 cnt)
+static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index ccd88407e0c..86b26539e54 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -126,7 +126,7 @@ int tcp_set_default_congestion_control(const char *name)
 #endif
 
 	if (ca) {
-		ca->non_restricted = 1;	/* default is always allowed */
+		ca->flags |= TCP_CONG_NON_RESTRICTED;	/* default is always allowed */
 		list_move(&ca->list, &tcp_cong_list);
 		ret = 0;
 	}
@@ -181,7 +181,7 @@ void tcp_get_allowed_congestion_control(char *buf, size_t maxlen)
 	*buf = '\0';
 	rcu_read_lock();
 	list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
-		if (!ca->non_restricted)
+		if (!(ca->flags & TCP_CONG_NON_RESTRICTED))
 			continue;
 		offs += snprintf(buf + offs, maxlen - offs,
 				 "%s%s",
@@ -212,16 +212,16 @@ int tcp_set_allowed_congestion_control(char *val)
 		}
 	}
 
-	/* pass 2 clear */
+	/* pass 2 clear old values */
 	list_for_each_entry_rcu(ca, &tcp_cong_list, list)
-		ca->non_restricted = 0;
+		ca->flags &= ~TCP_CONG_NON_RESTRICTED;
 
 	/* pass 3 mark as allowed */
 	while ((name = strsep(&val, " ")) && *name) {
 		ca = tcp_ca_find(name);
 		WARN_ON(!ca);
 		if (ca)
-			ca->non_restricted = 1;
+			ca->flags |= TCP_CONG_NON_RESTRICTED;
 	}
 out:
 	spin_unlock(&tcp_cong_list_lock);
@@ -256,7 +256,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
 	if (!ca)
 		err = -ENOENT;
 
-	else if (!(ca->non_restricted || capable(CAP_NET_ADMIN)))
+	else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || capable(CAP_NET_ADMIN)))
 		err = -EPERM;
 
 	else if (!try_module_get(ca->owner))
@@ -371,8 +371,8 @@ u32 tcp_reno_min_cwnd(const struct sock *sk)
 EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd);
 
 struct tcp_congestion_ops tcp_reno = {
+	.flags		= TCP_CONG_NON_RESTRICTED,
 	.name		= "reno",
-	.non_restricted = 1,
 	.owner		= THIS_MODULE,
 	.ssthresh	= tcp_reno_ssthresh,
 	.cong_avoid	= tcp_reno_cong_avoid,
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 296845be912..14224487b16 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -334,7 +334,7 @@ static void bictcp_state(struct sock *sk, u8 new_state)
 /* Track delayed acknowledgment ratio using sliding window
  * ratio = (15*ratio + sample) / 16
  */
-static void bictcp_acked(struct sock *sk, u32 cnt)
+static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 1020eb48d8d..4ba4a7ae0a8 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -98,7 +98,7 @@ static inline void measure_rtt(struct sock *sk)
 	}
 }
 
-static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked)
+static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, ktime_t last)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	const struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index ae629860088..8e3165917f7 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -83,9 +83,14 @@ static void tcp_illinois_init(struct sock *sk)
 }
 
 /* Measure RTT for each ack. */
-static void tcp_illinois_rtt_sample(struct sock *sk, u32 rtt)
+static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, ktime_t last)
 {
 	struct illinois *ca = inet_csk_ca(sk);
+	u32 rtt;
+
+	ca->acked = pkts_acked;
+
+	rtt = ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC;
 
 	/* ignore bogus values, this prevents wraparound in alpha math */
 	if (rtt > RTT_MAX)
@@ -103,13 +108,6 @@ static void tcp_illinois_rtt_sample(struct sock *sk, u32 rtt)
 	ca->sum_rtt += rtt;
 }
 
-/* Capture count of packets covered by ack, to adjust for delayed acks */
-static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked)
-{
-	struct illinois *ca = inet_csk_ca(sk);
-	ca->acked = pkts_acked;
-}
-
 /* Maximum queuing delay */
 static inline u32 max_delay(const struct illinois *ca)
 {
@@ -325,12 +323,12 @@ static void tcp_illinois_info(struct sock *sk, u32 ext,
 }
 
 static struct tcp_congestion_ops tcp_illinois = {
+	.flags		= TCP_CONG_RTT_STAMP,
 	.init		= tcp_illinois_init,
 	.ssthresh	= tcp_illinois_ssthresh,
 	.min_cwnd	= tcp_reno_min_cwnd,
 	.cong_avoid	= tcp_illinois_cong_avoid,
 	.set_state	= tcp_illinois_state,
-	.rtt_sample	= tcp_illinois_rtt_sample,
 	.get_info	= tcp_illinois_info,
 	.pkts_acked	= tcp_illinois_acked,
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 63338939078..051f0f815f1 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2402,14 +2402,6 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
 	return acked;
 }
 
-static u32 tcp_usrtt(struct timeval *tv)
-{
-	struct timeval now;
-
-	do_gettimeofday(&now);
-	return (now.tv_sec - tv->tv_sec) * 1000000 + (now.tv_usec - tv->tv_usec);
-}
-
 /* Remove acknowledged frames from the retransmission queue. */
 static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 {
@@ -2420,9 +2412,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 	int acked = 0;
 	__s32 seq_rtt = -1;
 	u32 pkts_acked = 0;
-	void (*rtt_sample)(struct sock *sk, u32 usrtt)
-		= icsk->icsk_ca_ops->rtt_sample;
-	struct timeval tv = { .tv_sec = 0, .tv_usec = 0 };
+	ktime_t last_ackt = ktime_set(0,0);
 
 	while ((skb = tcp_write_queue_head(sk)) &&
 	       skb != tcp_send_head(sk)) {
@@ -2471,7 +2461,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 				seq_rtt = -1;
 			} else if (seq_rtt < 0) {
 				seq_rtt = now - scb->when;
-				skb_get_timestamp(skb, &tv);
+				last_ackt = skb->tstamp;
 			}
 			if (sacked & TCPCB_SACKED_ACKED)
 				tp->sacked_out -= tcp_skb_pcount(skb);
@@ -2484,7 +2474,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 			}
 		} else if (seq_rtt < 0) {
 			seq_rtt = now - scb->when;
-			skb_get_timestamp(skb, &tv);
+			last_ackt = skb->tstamp;
 		}
 		tcp_dec_pcount_approx(&tp->fackets_out, skb);
 		tcp_packets_out_dec(tp, skb);
@@ -2494,13 +2484,14 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
 	}
 
 	if (acked&FLAG_ACKED) {
+		const struct tcp_congestion_ops *ca_ops
+			= inet_csk(sk)->icsk_ca_ops;
+
 		tcp_ack_update_rtt(sk, acked, seq_rtt);
 		tcp_ack_packets_out(sk);
-		if (rtt_sample && !(acked & FLAG_RETRANS_DATA_ACKED))
-			(*rtt_sample)(sk, tcp_usrtt(&tv));
 
-		if (icsk->icsk_ca_ops->pkts_acked)
-			icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked);
+		if (ca_ops->pkts_acked)
+			ca_ops->pkts_acked(sk, pkts_acked, last_ackt);
 	}
 
 #if FASTRETRANS_DEBUG > 0
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index f0ebaf0e21c..b4e062ab24a 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -218,7 +218,7 @@ static u32 tcp_lp_owd_calculator(struct sock *sk)
  *   3. calc smoothed OWD (SOWD).
  * Most ideas come from the original TCP-LP implementation.
  */
-static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt)
+static void tcp_lp_rtt_sample(struct sock *sk, u32 rtt)
 {
 	struct lp *lp = inet_csk_ca(sk);
 	s64 mowd = tcp_lp_owd_calculator(sk);
@@ -261,11 +261,13 @@ static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt)
  * newReno in increase case.
  * We work it out by following the idea from TCP-LP's paper directly
  */
-static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked)
+static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, ktime_t last)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct lp *lp = inet_csk_ca(sk);
 
+	tcp_lp_rtt_sample(sk,  ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC);
+
 	/* calc inference */
 	if (tcp_time_stamp > tp->rx_opt.rcv_tsecr)
 		lp->inference = 3 * (tcp_time_stamp - tp->rx_opt.rcv_tsecr);
@@ -312,11 +314,11 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked)
 }
 
 static struct tcp_congestion_ops tcp_lp = {
+	.flags = TCP_CONG_RTT_STAMP,
 	.init = tcp_lp_init,
 	.ssthresh = tcp_reno_ssthresh,
 	.cong_avoid = tcp_lp_cong_avoid,
 	.min_cwnd = tcp_reno_min_cwnd,
-	.rtt_sample = tcp_lp_rtt_sample,
 	.pkts_acked = tcp_lp_pkts_acked,
 
 	.owner = THIS_MODULE,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3a60aea744a..e70a6840cb6 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -409,7 +409,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	/* If congestion control is doing timestamping, we must
 	 * take such a timestamp before we potentially clone/copy.
 	 */
-	if (icsk->icsk_ca_ops->rtt_sample)
+	if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
 		__net_timestamp(skb);
 
 	if (likely(clone_it)) {
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 87e72bef6d0..f4104eeb5f2 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -120,10 +120,13 @@ static void tcp_vegas_init(struct sock *sk)
  *   o min-filter RTT samples from a much longer window (forever for now)
  *     to find the propagation delay (baseRTT)
  */
-static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt)
+static void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
 {
 	struct vegas *vegas = inet_csk_ca(sk);
-	u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */
+	u32 vrtt;
+
+	/* Never allow zero rtt or baseRTT */
+	vrtt = (ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC) + 1;
 
 	/* Filter to find propagation delay: */
 	if (vrtt < vegas->baseRTT)
@@ -353,11 +356,12 @@ static void tcp_vegas_get_info(struct sock *sk, u32 ext,
 }
 
 static struct tcp_congestion_ops tcp_vegas = {
+	.flags		= TCP_CONG_RTT_STAMP,
 	.init		= tcp_vegas_init,
 	.ssthresh	= tcp_reno_ssthresh,
 	.cong_avoid	= tcp_vegas_cong_avoid,
 	.min_cwnd	= tcp_reno_min_cwnd,
-	.rtt_sample	= tcp_vegas_rtt_calc,
+	.pkts_acked	= tcp_vegas_pkts_acked,
 	.set_state	= tcp_vegas_state,
 	.cwnd_event	= tcp_vegas_cwnd_event,
 	.get_info	= tcp_vegas_get_info,
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index ce57bf302f6..0b50d0607a0 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -69,10 +69,13 @@ static void tcp_veno_init(struct sock *sk)
 }
 
 /* Do rtt sampling needed for Veno. */
-static void tcp_veno_rtt_calc(struct sock *sk, u32 usrtt)
+static void tcp_veno_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
 {
 	struct veno *veno = inet_csk_ca(sk);
-	u32 vrtt = usrtt + 1;	/* Never allow zero rtt or basertt */
+	u32 vrtt;
+
+	/* Never allow zero rtt or baseRTT */
+	vrtt = (ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC) + 1;
 
 	/* Filter to find propagation delay: */
 	if (vrtt < veno->basertt)
@@ -199,10 +202,11 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
 }
 
 static struct tcp_congestion_ops tcp_veno = {
+	.flags		= TCP_CONG_RTT_STAMP,
 	.init		= tcp_veno_init,
 	.ssthresh	= tcp_veno_ssthresh,
 	.cong_avoid	= tcp_veno_cong_avoid,
-	.rtt_sample	= tcp_veno_rtt_calc,
+	.pkts_acked	= tcp_veno_pkts_acked,
 	.set_state	= tcp_veno_state,
 	.cwnd_event	= tcp_veno_cwnd_event,
 
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index ae1026a6772..e61e09dd513 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -100,7 +100,7 @@ static void westwood_filter(struct westwood *w, u32 delta)
  * Called after processing group of packets.
  * but all westwood needs is the last sample of srtt.
  */
-static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt)
+static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
 {
 	struct westwood *w = inet_csk_ca(sk);
 	if (cnt > 0)
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 46dd1bee583..81ef02c1649 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -64,13 +64,15 @@ static void tcp_yeah_init(struct sock *sk)
 }
 
 
-static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked)
+static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, ktime_t last)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct yeah *yeah = inet_csk_ca(sk);
 
 	if (icsk->icsk_ca_state == TCP_CA_Open)
 		yeah->pkts_acked = pkts_acked;
+
+	tcp_vegas_pkts_acked(sk, pkts_acked, last);
 }
 
 static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
@@ -237,11 +239,11 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) {
 }
 
 static struct tcp_congestion_ops tcp_yeah = {
+	.flags		= TCP_CONG_RTT_STAMP,
 	.init		= tcp_yeah_init,
 	.ssthresh	= tcp_yeah_ssthresh,
 	.cong_avoid	= tcp_yeah_cong_avoid,
 	.min_cwnd	= tcp_reno_min_cwnd,
-	.rtt_sample	= tcp_vegas_rtt_calc,
 	.set_state	= tcp_vegas_state,
 	.cwnd_event	= tcp_vegas_cwnd_event,
 	.get_info	= tcp_vegas_get_info,
diff --git a/net/ipv4/tcp_yeah.h b/net/ipv4/tcp_yeah.h
index a62d82038fd..33ad5385c18 100644
--- a/net/ipv4/tcp_yeah.h
+++ b/net/ipv4/tcp_yeah.h
@@ -81,10 +81,13 @@ static void tcp_vegas_state(struct sock *sk, u8 ca_state)
  *   o min-filter RTT samples from a much longer window (forever for now)
  *     to find the propagation delay (baseRTT)
  */
-static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt)
+static void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
 {
 	struct vegas *vegas = inet_csk_ca(sk);
-	u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */
+	u32 vrtt;
+
+	/* Never allow zero rtt or baseRTT */
+	vrtt = (ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC) + 1;
 
 	/* Filter to find propagation delay: */
 	if (vrtt < vegas->baseRTT)
-- 
cgit v1.2.3


From 7752237e9f07b316f81aebdc43f0d7c9a4ba0acf Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Mon, 23 Apr 2007 22:28:23 -0700
Subject: [TCP] TCP YEAH: Use vegas dont copy it.

Rather than using a copy of vegas code, the YEAH code should just have
it exported so there is common code.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_vegas.c |  31 +++++-------
 net/ipv4/tcp_vegas.h |  24 ++++++++++
 net/ipv4/tcp_yeah.c  |  53 ++++++++++-----------
 net/ipv4/tcp_yeah.h  | 131 ---------------------------------------------------
 4 files changed, 61 insertions(+), 178 deletions(-)
 create mode 100644 net/ipv4/tcp_vegas.h

diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index f4104eeb5f2..0f0ee7f732c 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -38,6 +38,8 @@
 
 #include <net/tcp.h>
 
+#include "tcp_vegas.h"
+
 /* Default values of the Vegas variables, in fixed-point representation
  * with V_PARAM_SHIFT bits to the right of the binary point.
  */
@@ -54,17 +56,6 @@ module_param(gamma, int, 0644);
 MODULE_PARM_DESC(gamma, "limit on increase (scale by 2)");
 
 
-/* Vegas variables */
-struct vegas {
-	u32	beg_snd_nxt;	/* right edge during last RTT */
-	u32	beg_snd_una;	/* left edge  during last RTT */
-	u32	beg_snd_cwnd;	/* saves the size of the cwnd */
-	u8	doing_vegas_now;/* if true, do vegas for this RTT */
-	u16	cntRTT;		/* # of RTTs measured within last RTT */
-	u32	minRTT;		/* min of RTTs measured within last RTT (in usec) */
-	u32	baseRTT;	/* the min of all Vegas RTT measurements seen (in usec) */
-};
-
 /* There are several situations when we must "re-start" Vegas:
  *
  *  o when a connection is established
@@ -81,7 +72,7 @@ struct vegas {
  * Instead we must wait until the completion of an RTT during
  * which we actually receive ACKs.
  */
-static inline void vegas_enable(struct sock *sk)
+static void vegas_enable(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	struct vegas *vegas = inet_csk_ca(sk);
@@ -104,13 +95,14 @@ static inline void vegas_disable(struct sock *sk)
 	vegas->doing_vegas_now = 0;
 }
 
-static void tcp_vegas_init(struct sock *sk)
+void tcp_vegas_init(struct sock *sk)
 {
 	struct vegas *vegas = inet_csk_ca(sk);
 
 	vegas->baseRTT = 0x7fffffff;
 	vegas_enable(sk);
 }
+EXPORT_SYMBOL_GPL(tcp_vegas_init);
 
 /* Do RTT sampling needed for Vegas.
  * Basically we:
@@ -120,7 +112,7 @@ static void tcp_vegas_init(struct sock *sk)
  *   o min-filter RTT samples from a much longer window (forever for now)
  *     to find the propagation delay (baseRTT)
  */
-static void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
+void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
 {
 	struct vegas *vegas = inet_csk_ca(sk);
 	u32 vrtt;
@@ -138,8 +130,9 @@ static void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
 	vegas->minRTT = min(vegas->minRTT, vrtt);
 	vegas->cntRTT++;
 }
+EXPORT_SYMBOL_GPL(tcp_vegas_pkts_acked);
 
-static void tcp_vegas_state(struct sock *sk, u8 ca_state)
+void tcp_vegas_state(struct sock *sk, u8 ca_state)
 {
 
 	if (ca_state == TCP_CA_Open)
@@ -147,6 +140,7 @@ static void tcp_vegas_state(struct sock *sk, u8 ca_state)
 	else
 		vegas_disable(sk);
 }
+EXPORT_SYMBOL_GPL(tcp_vegas_state);
 
 /*
  * If the connection is idle and we are restarting,
@@ -157,12 +151,13 @@ static void tcp_vegas_state(struct sock *sk, u8 ca_state)
  * packets, _then_ we can make Vegas calculations
  * again.
  */
-static void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
 {
 	if (event == CA_EVENT_CWND_RESTART ||
 	    event == CA_EVENT_TX_START)
 		tcp_vegas_init(sk);
 }
+EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
 
 static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 				 u32 seq_rtt, u32 in_flight, int flag)
@@ -339,8 +334,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
 }
 
 /* Extract info for Tcp socket info provided via netlink. */
-static void tcp_vegas_get_info(struct sock *sk, u32 ext,
-			       struct sk_buff *skb)
+void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
 {
 	const struct vegas *ca = inet_csk_ca(sk);
 	if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
@@ -354,6 +348,7 @@ static void tcp_vegas_get_info(struct sock *sk, u32 ext,
 		nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
 	}
 }
+EXPORT_SYMBOL_GPL(tcp_vegas_get_info);
 
 static struct tcp_congestion_ops tcp_vegas = {
 	.flags		= TCP_CONG_RTT_STAMP,
diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h
new file mode 100644
index 00000000000..502fa818363
--- /dev/null
+++ b/net/ipv4/tcp_vegas.h
@@ -0,0 +1,24 @@
+/*
+ * TCP Vegas congestion control interface
+ */
+#ifndef __TCP_VEGAS_H
+#define __TCP_VEGAS_H 1
+
+/* Vegas variables */
+struct vegas {
+	u32	beg_snd_nxt;	/* right edge during last RTT */
+	u32	beg_snd_una;	/* left edge  during last RTT */
+	u32	beg_snd_cwnd;	/* saves the size of the cwnd */
+	u8	doing_vegas_now;/* if true, do vegas for this RTT */
+	u16	cntRTT;		/* # of RTTs measured within last RTT */
+	u32	minRTT;		/* min of RTTs measured within last RTT (in usec) */
+	u32	baseRTT;	/* the min of all Vegas RTT measurements seen (in usec) */
+};
+
+extern void tcp_vegas_init(struct sock *sk);
+extern void tcp_vegas_state(struct sock *sk, u8 ca_state);
+extern void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last);
+extern void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event);
+extern void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb);
+
+#endif	/* __TCP_VEGAS_H */
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 81ef02c1649..545ed237ab5 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -6,13 +6,14 @@
  *    http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf
  *
  */
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/inet_diag.h>
 
-#include "tcp_yeah.h"
+#include <net/tcp.h>
 
-/* Default values of the Vegas variables, in fixed-point representation
- * with V_PARAM_SHIFT bits to the right of the binary point.
- */
-#define V_PARAM_SHIFT 1
+#include "tcp_vegas.h"
 
 #define TCP_YEAH_ALPHA       80 //lin number of packets queued at the bottleneck
 #define TCP_YEAH_GAMMA        1 //lin fraction of queue to be removed per rtt
@@ -26,14 +27,7 @@
 
 /* YeAH variables */
 struct yeah {
-	/* Vegas */
-	u32	beg_snd_nxt;	/* right edge during last RTT */
-	u32	beg_snd_una;	/* left edge  during last RTT */
-	u32	beg_snd_cwnd;	/* saves the size of the cwnd */
-	u8	doing_vegas_now;/* if true, do vegas for this RTT */
-	u16	cntRTT;		/* # of RTTs measured within last RTT */
-	u32	minRTT;		/* min of RTTs measured within last RTT (in usec) */
-	u32	baseRTT;	/* the min of all Vegas RTT measurements seen (in usec) */
+	struct vegas vegas;	/* must be first */
 
 	/* YeAH */
 	u32 lastQ;
@@ -84,9 +78,10 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
 	if (!tcp_is_cwnd_limited(sk, in_flight))
 		return;
 
-	if (tp->snd_cwnd <= tp->snd_ssthresh) {
+	if (tp->snd_cwnd <= tp->snd_ssthresh)
 		tcp_slow_start(tp);
-	} else if (!yeah->doing_reno_now) {
+
+	else if (!yeah->doing_reno_now) {
 		/* Scalable */
 
 		tp->snd_cwnd_cnt+=yeah->pkts_acked;
@@ -110,19 +105,19 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
 		}
 	}
 
-	/* The key players are v_beg_snd_una and v_beg_snd_nxt.
+	/* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt.
 	 *
 	 * These are so named because they represent the approximate values
 	 * of snd_una and snd_nxt at the beginning of the current RTT. More
 	 * precisely, they represent the amount of data sent during the RTT.
 	 * At the end of the RTT, when we receive an ACK for v_beg_snd_nxt,
-	 * we will calculate that (v_beg_snd_nxt - v_beg_snd_una) outstanding
+	 * we will calculate that (v_beg_snd_nxt - v_vegas.beg_snd_una) outstanding
 	 * bytes of data have been ACKed during the course of the RTT, giving
 	 * an "actual" rate of:
 	 *
-	 *     (v_beg_snd_nxt - v_beg_snd_una) / (rtt duration)
+	 *     (v_beg_snd_nxt - v_vegas.beg_snd_una) / (rtt duration)
 	 *
-	 * Unfortunately, v_beg_snd_una is not exactly equal to snd_una,
+	 * Unfortunately, v_vegas.beg_snd_una is not exactly equal to snd_una,
 	 * because delayed ACKs can cover more than one segment, so they
 	 * don't line up yeahly with the boundaries of RTTs.
 	 *
@@ -132,7 +127,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
 	 * So we keep track of our cwnd separately, in v_beg_snd_cwnd.
 	 */
 
-	if (after(ack, yeah->beg_snd_nxt)) {
+	if (after(ack, yeah->vegas.beg_snd_nxt)) {
 
 		/* We do the Vegas calculations only if we got enough RTT
 		 * samples that we can be reasonably sure that we got
@@ -143,7 +138,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
 		 * If  we have 3 samples, we should be OK.
 		 */
 
-		if (yeah->cntRTT > 2) {
+		if (yeah->vegas.cntRTT > 2) {
 			u32 rtt, queue;
 			u64 bw;
 
@@ -158,18 +153,18 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
 			 * of delayed ACKs, at the cost of noticing congestion
 			 * a bit later.
 			 */
-			rtt = yeah->minRTT;
+			rtt = yeah->vegas.minRTT;
 
 			/* Compute excess number of packets above bandwidth
 			 * Avoid doing full 64 bit divide.
 			 */
 			bw = tp->snd_cwnd;
-			bw *= rtt - yeah->baseRTT;
+			bw *= rtt - yeah->vegas.baseRTT;
 			do_div(bw, rtt);
 			queue = bw;
 
 			if (queue > TCP_YEAH_ALPHA ||
-			    rtt - yeah->baseRTT > (yeah->baseRTT / TCP_YEAH_PHY)) {
+			    rtt - yeah->vegas.baseRTT > (yeah->vegas.baseRTT / TCP_YEAH_PHY)) {
 				if (queue > TCP_YEAH_ALPHA
 				    && tp->snd_cwnd > yeah->reno_count) {
 					u32 reduction = min(queue / TCP_YEAH_GAMMA ,
@@ -208,13 +203,13 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
 		/* Save the extent of the current window so we can use this
 		 * at the end of the next RTT.
 		 */
-		yeah->beg_snd_una  = yeah->beg_snd_nxt;
-		yeah->beg_snd_nxt  = tp->snd_nxt;
-		yeah->beg_snd_cwnd = tp->snd_cwnd;
+		yeah->vegas.beg_snd_una  = yeah->vegas.beg_snd_nxt;
+		yeah->vegas.beg_snd_nxt  = tp->snd_nxt;
+		yeah->vegas.beg_snd_cwnd = tp->snd_cwnd;
 
 		/* Wipe the slate clean for the next RTT. */
-		yeah->cntRTT = 0;
-		yeah->minRTT = 0x7fffffff;
+		yeah->vegas.cntRTT = 0;
+		yeah->vegas.minRTT = 0x7fffffff;
 	}
 }
 
diff --git a/net/ipv4/tcp_yeah.h b/net/ipv4/tcp_yeah.h
index 33ad5385c18..ed3b7198f23 100644
--- a/net/ipv4/tcp_yeah.h
+++ b/net/ipv4/tcp_yeah.h
@@ -5,134 +5,3 @@
 #include <asm/div64.h>
 
 #include <net/tcp.h>
-
-/* Vegas variables */
-struct vegas {
-	u32	beg_snd_nxt;	/* right edge during last RTT */
-	u32	beg_snd_una;	/* left edge  during last RTT */
-	u32	beg_snd_cwnd;	/* saves the size of the cwnd */
-	u8	doing_vegas_now;/* if true, do vegas for this RTT */
-	u16	cntRTT;		/* # of RTTs measured within last RTT */
-	u32	minRTT;		/* min of RTTs measured within last RTT (in usec) */
-	u32	baseRTT;	/* the min of all Vegas RTT measurements seen (in usec) */
-};
-
-/* There are several situations when we must "re-start" Vegas:
- *
- *  o when a connection is established
- *  o after an RTO
- *  o after fast recovery
- *  o when we send a packet and there is no outstanding
- *    unacknowledged data (restarting an idle connection)
- *
- * In these circumstances we cannot do a Vegas calculation at the
- * end of the first RTT, because any calculation we do is using
- * stale info -- both the saved cwnd and congestion feedback are
- * stale.
- *
- * Instead we must wait until the completion of an RTT during
- * which we actually receive ACKs.
- */
-static inline void vegas_enable(struct sock *sk)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	struct vegas *vegas = inet_csk_ca(sk);
-
-	/* Begin taking Vegas samples next time we send something. */
-	vegas->doing_vegas_now = 1;
-
-	/* Set the beginning of the next send window. */
-	vegas->beg_snd_nxt = tp->snd_nxt;
-
-	vegas->cntRTT = 0;
-	vegas->minRTT = 0x7fffffff;
-}
-
-/* Stop taking Vegas samples for now. */
-static inline void vegas_disable(struct sock *sk)
-{
-	struct vegas *vegas = inet_csk_ca(sk);
-
-	vegas->doing_vegas_now = 0;
-}
-
-static void tcp_vegas_init(struct sock *sk)
-{
-	struct vegas *vegas = inet_csk_ca(sk);
-
-	vegas->baseRTT = 0x7fffffff;
-	vegas_enable(sk);
-}
-
-static void tcp_vegas_state(struct sock *sk, u8 ca_state)
-{
-
-	if (ca_state == TCP_CA_Open)
-		vegas_enable(sk);
-	else
-		vegas_disable(sk);
-}
-
-/* Do RTT sampling needed for Vegas.
- * Basically we:
- *   o min-filter RTT samples from within an RTT to get the current
- *     propagation delay + queuing delay (we are min-filtering to try to
- *     avoid the effects of delayed ACKs)
- *   o min-filter RTT samples from a much longer window (forever for now)
- *     to find the propagation delay (baseRTT)
- */
-static void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
-{
-	struct vegas *vegas = inet_csk_ca(sk);
-	u32 vrtt;
-
-	/* Never allow zero rtt or baseRTT */
-	vrtt = (ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC) + 1;
-
-	/* Filter to find propagation delay: */
-	if (vrtt < vegas->baseRTT)
-		vegas->baseRTT = vrtt;
-
-	/* Find the min RTT during the last RTT to find
-	 * the current prop. delay + queuing delay:
-	 */
-	vegas->minRTT = min(vegas->minRTT, vrtt);
-	vegas->cntRTT++;
-}
-
-/*
- * If the connection is idle and we are restarting,
- * then we don't want to do any Vegas calculations
- * until we get fresh RTT samples.  So when we
- * restart, we reset our Vegas state to a clean
- * slate. After we get acks for this flight of
- * packets, _then_ we can make Vegas calculations
- * again.
- */
-static void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
-{
-	if (event == CA_EVENT_CWND_RESTART ||
-	    event == CA_EVENT_TX_START)
-		tcp_vegas_init(sk);
-}
-
-/* Extract info for Tcp socket info provided via netlink. */
-static void tcp_vegas_get_info(struct sock *sk, u32 ext,
-			       struct sk_buff *skb)
-{
-	const struct vegas *ca = inet_csk_ca(sk);
-	if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
-		struct tcpvegas_info *info;
-
-		info = RTA_DATA(__RTA_PUT(skb, INET_DIAG_VEGASINFO,
-					  sizeof(*info)));
-
-		info->tcpv_enabled = ca->doing_vegas_now;
-		info->tcpv_rttcnt = ca->cntRTT;
-		info->tcpv_rtt = ca->baseRTT;
-		info->tcpv_minrtt = ca->minRTT;
-	rtattr_failure:	;
-	}
-}
-
-
-- 
cgit v1.2.3


From f9d106a6d53b57b78eae5544f9582c643343a764 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 23 Apr 2007 22:36:13 -0700
Subject: [NET]: Warn about GSO/checksum abuse

Now that Patrick has added the code to deal with GSO in netfilter,
we no longer need the crutch that computes partial checksums just
before transmission.

This patch turns this into a warning again.  If this goes OK, we
can then turn it into a BUG_ON and remove the gso_send_check cruft.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 18c51b40f66..d82d00f5451 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1202,7 +1202,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
 	skb->mac_len = skb->network_header - skb->mac_header;
 	__skb_pull(skb, skb->mac_len);
 
-	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
+	if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
 		if (skb_header_cloned(skb) &&
 		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
 			return ERR_PTR(err);
-- 
cgit v1.2.3


From 43fb45cb79e9441a79ece206cf741774500dd627 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 24 Apr 2007 14:07:27 -0700
Subject: [WIRELESS] cfg80211: Update comment for locking.

This patch adds a comment that was part of my rtnl locking patch for
cfg80211 but which I forgot for the merge.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/cfg80211.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 783a11437a5..88171f8ce58 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -23,6 +23,10 @@ struct wiphy;
  * All callbacks except where otherwise noted should return 0
  * on success or a negative error code.
  *
+ * All operations are currently invoked under rtnl for consistency with the
+ * wireless extensions but this is subject to reevaluation as soon as this
+ * code is used more widely and we have a first user without wext.
+ *
  * @add_virtual_intf: create a new virtual interface with the given name
  *
  * @del_virtual_intf: remove the virtual interface determined by ifindex.
-- 
cgit v1.2.3


From 1f9eda7e2b67898fb8e79b3aa3880211b51235e6 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 24 Apr 2007 14:51:55 -0700
Subject: [TIPC]: Enhancements to msg_set_bits() routine

This patch makes two enhancements to msg_set_bits():

1) It now ignores any bits of the new field value that are not
   covered by the mask being used.  (Previously, if the new value
   exceeded the size of the mask the extra bits could corrupt
   other fields in the message header word being updated.)

2) The code has been optimized to minimize the number of run-time
   endianness conversion operations by leveraging the fact that the
   mask (and, in some cases, the value as well) is constant and the
   necessary conversion can be performed by the compiler.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Jon Paul Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/msg.h | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index aec7ce7af87..35d5ba1d4f4 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -1,8 +1,8 @@
 /*
  * net/tipc/msg.h: Include file for TIPC message header routines
  *
- * Copyright (c) 2000-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
+ * Copyright (c) 2000-2007, Ericsson AB
+ * Copyright (c) 2005-2007, Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -71,8 +71,11 @@ static inline void msg_set_word(struct tipc_msg *m, u32 w, u32 val)
 static inline void msg_set_bits(struct tipc_msg *m, u32 w,
 				u32 pos, u32 mask, u32 val)
 {
-	u32 word = msg_word(m,w) & ~(mask << pos);
-	msg_set_word(m, w, (word |= (val << pos)));
+	val = (val & mask) << pos;
+	val = htonl(val);
+	mask = htonl(mask << pos);
+	m->hdr[w] &= ~mask;
+	m->hdr[w] |= val;
 }
 
 /*
-- 
cgit v1.2.3


From 84299b3bc4eaedc0734fcc9052b01291e44445fc Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 24 Apr 2007 16:21:38 -0700
Subject: [TCP]: Fix linkage errors on i386.

To avoid raw division, use ktime_to_timeval() to get usec.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ktime.h   | 6 ++++++
 net/ipv4/tcp_illinois.c | 2 +-
 net/ipv4/tcp_lp.c       | 2 +-
 net/ipv4/tcp_vegas.c    | 2 +-
 net/ipv4/tcp_veno.c     | 2 +-
 5 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 248305bb9a1..81bb9c7a4eb 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -259,6 +259,12 @@ static inline s64 ktime_to_ns(const ktime_t kt)
 
 #endif
 
+static inline s64 ktime_to_us(const ktime_t kt)
+{
+	struct timeval tv = ktime_to_timeval(kt);
+	return (s64) tv.tv_sec * USEC_PER_SEC + tv.tv_usec;
+}
+
 /*
  * The resolution of the clocks. The resolution value is returned in
  * the clock_getres() system call to give application programmers an
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 8e3165917f7..4adc47c5535 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -90,7 +90,7 @@ static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, ktime_t last)
 
 	ca->acked = pkts_acked;
 
-	rtt = ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC;
+	rtt = ktime_to_us(net_timedelta(last));
 
 	/* ignore bogus values, this prevents wraparound in alpha math */
 	if (rtt > RTT_MAX)
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index b4e062ab24a..43294ad9f63 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -266,7 +266,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, ktime_t last)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct lp *lp = inet_csk_ca(sk);
 
-	tcp_lp_rtt_sample(sk,  ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC);
+	tcp_lp_rtt_sample(sk,  ktime_to_us(net_timedelta(last)));
 
 	/* calc inference */
 	if (tcp_time_stamp > tp->rx_opt.rcv_tsecr)
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 0f0ee7f732c..73e19cf7df2 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -118,7 +118,7 @@ void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
 	u32 vrtt;
 
 	/* Never allow zero rtt or baseRTT */
-	vrtt = (ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC) + 1;
+	vrtt = ktime_to_us(net_timedelta(last)) + 1;
 
 	/* Filter to find propagation delay: */
 	if (vrtt < vegas->baseRTT)
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 0b50d0607a0..9edb340f2f9 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -75,7 +75,7 @@ static void tcp_veno_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
 	u32 vrtt;
 
 	/* Never allow zero rtt or baseRTT */
-	vrtt = (ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC) + 1;
+	vrtt = ktime_to_us(net_timedelta(last)) + 1;
 
 	/* Filter to find propagation delay: */
 	if (vrtt < veno->basertt)
-- 
cgit v1.2.3


From bb7ec6dfb5aa32b5b4d7d6388b4098b33cd01e8c Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 24 Apr 2007 16:22:42 -0700
Subject: [IPV4]: Fix build without procfs.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/Makefile | 4 ++--
 net/ipv4/proc.c   | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 4ff6c151d7f..3bd25f56f5d 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -10,11 +10,11 @@ obj-y     := route.o inetpeer.o protocol.o \
 	     tcp_minisocks.o tcp_cong.o \
 	     datagram.o raw.o udp.o udplite.o \
 	     arp.o icmp.o devinet.o af_inet.o  igmp.o \
-	     sysctl_net_ipv4.o fib_frontend.o fib_semantics.o
+	     sysctl_net_ipv4.o fib_frontend.o fib_semantics.o \
+	     proc.o
 
 obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
 obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o
-obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
 obj-$(CONFIG_IP_MROUTE) += ipmr.o
 obj-$(CONFIG_NET_IPIP) += ipip.o
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 97952d54ae8..a236154591f 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -45,6 +45,7 @@
 #include <net/sock.h>
 #include <net/raw.h>
 
+#ifdef CONFIG_PROC_FS
 static int fold_prot_inuse(struct proto *proto)
 {
 	int res = 0;
@@ -390,6 +391,7 @@ out_netstat:
 	rc = -ENOMEM;
 	goto out;
 }
+#endif
 
 int snmp_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
 {
-- 
cgit v1.2.3


From 5e0f04351d11e07a23b5ab4914282cbb78027e50 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 24 Apr 2007 21:53:35 -0700
Subject: [IPV4]: Consolidate common SNMP code

This patch moves the SNMP code shared between IPv4/IPv6 from proc.c
into net/ipv4/af_inet.c.  This makes sense because these functions
aren't specific to /proc.

As a result we can again skip proc.o if /proc is disabled.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h   |  1 +
 net/ipv4/Makefile  |  4 ++--
 net/ipv4/af_inet.c | 40 ++++++++++++++++++++++++++++++++
 net/ipv4/proc.c    | 68 +++++++++++-------------------------------------------
 4 files changed, 57 insertions(+), 56 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index f41ce07f670..bb207db0367 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -166,6 +166,7 @@ DECLARE_SNMP_STAT(struct linux_mib, net_statistics);
 #define NET_ADD_STATS_BH(field, adnd)	SNMP_ADD_STATS_BH(net_statistics, field, adnd)
 #define NET_ADD_STATS_USER(field, adnd)	SNMP_ADD_STATS_USER(net_statistics, field, adnd)
 
+extern unsigned long snmp_fold_field(void *mib[], int offt);
 extern int snmp_mib_init(void *ptr[2], size_t mibsize, size_t mibalign);
 extern void snmp_mib_free(void *ptr[2]);
 
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 3bd25f56f5d..4ff6c151d7f 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -10,11 +10,11 @@ obj-y     := route.o inetpeer.o protocol.o \
 	     tcp_minisocks.o tcp_cong.o \
 	     datagram.o raw.o udp.o udplite.o \
 	     arp.o icmp.o devinet.o af_inet.o  igmp.o \
-	     sysctl_net_ipv4.o fib_frontend.o fib_semantics.o \
-	     proc.o
+	     sysctl_net_ipv4.o fib_frontend.o fib_semantics.o
 
 obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
 obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o
+obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
 obj-$(CONFIG_IP_MROUTE) += ipmr.o
 obj-$(CONFIG_NET_IPIP) += ipip.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index a33ca7e7e8f..16aae8ef555 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1219,6 +1219,46 @@ out:
 	return segs;
 }
 
+unsigned long snmp_fold_field(void *mib[], int offt)
+{
+	unsigned long res = 0;
+	int i;
+
+	for_each_possible_cpu(i) {
+		res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt);
+		res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt);
+	}
+	return res;
+}
+EXPORT_SYMBOL_GPL(snmp_fold_field);
+
+int snmp_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
+{
+	BUG_ON(ptr == NULL);
+	ptr[0] = __alloc_percpu(mibsize);
+	if (!ptr[0])
+		goto err0;
+	ptr[1] = __alloc_percpu(mibsize);
+	if (!ptr[1])
+		goto err1;
+	return 0;
+err1:
+	free_percpu(ptr[0]);
+	ptr[0] = NULL;
+err0:
+	return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(snmp_mib_init);
+
+void snmp_mib_free(void *ptr[2])
+{
+	BUG_ON(ptr == NULL);
+	free_percpu(ptr[0]);
+	free_percpu(ptr[1]);
+	ptr[0] = ptr[1] = NULL;
+}
+EXPORT_SYMBOL_GPL(snmp_mib_free);
+
 #ifdef CONFIG_IP_MULTICAST
 static struct net_protocol igmp_protocol = {
 	.handler =	igmp_rcv,
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index a236154591f..37ab5802ca0 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -45,7 +45,6 @@
 #include <net/sock.h>
 #include <net/raw.h>
 
-#ifdef CONFIG_PROC_FS
 static int fold_prot_inuse(struct proto *proto)
 {
 	int res = 0;
@@ -88,19 +87,6 @@ static const struct file_operations sockstat_seq_fops = {
 	.release = single_release,
 };
 
-static unsigned long
-fold_field(void *mib[], int offt)
-{
-	unsigned long res = 0;
-	int i;
-
-	for_each_possible_cpu(i) {
-		res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt);
-		res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt);
-	}
-	return res;
-}
-
 /* snmp items */
 static const struct snmp_mib snmp4_ipstats_list[] = {
 	SNMP_MIB_ITEM("InReceives", IPSTATS_MIB_INRECEIVES),
@@ -267,8 +253,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 
 	for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   fold_field((void **) ip_statistics,
-				      snmp4_ipstats_list[i].entry));
+			   snmp_fold_field((void **)ip_statistics,
+					   snmp4_ipstats_list[i].entry));
 
 	seq_puts(seq, "\nIcmp:");
 	for (i = 0; snmp4_icmp_list[i].name != NULL; i++)
@@ -277,8 +263,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 	seq_puts(seq, "\nIcmp:");
 	for (i = 0; snmp4_icmp_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   fold_field((void **) icmp_statistics,
-				      snmp4_icmp_list[i].entry));
+			   snmp_fold_field((void **)icmp_statistics,
+					   snmp4_icmp_list[i].entry));
 
 	seq_puts(seq, "\nTcp:");
 	for (i = 0; snmp4_tcp_list[i].name != NULL; i++)
@@ -289,12 +275,12 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 		/* MaxConn field is signed, RFC 2012 */
 		if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
 			seq_printf(seq, " %ld",
-				   fold_field((void **) tcp_statistics,
-					      snmp4_tcp_list[i].entry));
+				   snmp_fold_field((void **)tcp_statistics,
+						   snmp4_tcp_list[i].entry));
 		else
 			seq_printf(seq, " %lu",
-				   fold_field((void **) tcp_statistics,
-					      snmp4_tcp_list[i].entry));
+				   snmp_fold_field((void **)tcp_statistics,
+						   snmp4_tcp_list[i].entry));
 	}
 
 	seq_puts(seq, "\nUdp:");
@@ -304,8 +290,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 	seq_puts(seq, "\nUdp:");
 	for (i = 0; snmp4_udp_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   fold_field((void **) udp_statistics,
-				      snmp4_udp_list[i].entry));
+			   snmp_fold_field((void **)udp_statistics,
+					   snmp4_udp_list[i].entry));
 
 	/* the UDP and UDP-Lite MIBs are the same */
 	seq_puts(seq, "\nUdpLite:");
@@ -315,8 +301,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 	seq_puts(seq, "\nUdpLite:");
 	for (i = 0; snmp4_udp_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   fold_field((void **) udplite_statistics,
-				      snmp4_udp_list[i].entry)     );
+			   snmp_fold_field((void **)udplite_statistics,
+					   snmp4_udp_list[i].entry));
 
 	seq_putc(seq, '\n');
 	return 0;
@@ -349,8 +335,8 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
 	seq_puts(seq, "\nTcpExt:");
 	for (i = 0; snmp4_net_list[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
-			   fold_field((void **) net_statistics,
-				      snmp4_net_list[i].entry));
+			   snmp_fold_field((void **)net_statistics,
+					   snmp4_net_list[i].entry));
 
 	seq_putc(seq, '\n');
 	return 0;
@@ -391,30 +377,4 @@ out_netstat:
 	rc = -ENOMEM;
 	goto out;
 }
-#endif
-
-int snmp_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
-{
-	BUG_ON(ptr == NULL);
-	ptr[0] = __alloc_percpu(mibsize);
-	if (!ptr[0])
-		goto err0;
-	ptr[1] = __alloc_percpu(mibsize);
-	if (!ptr[1])
-		goto err1;
-	return 0;
-err1:
-	free_percpu(ptr[0]);
-	ptr[0] = NULL;
-err0:
-	return -ENOMEM;
-}
-
-void snmp_mib_free(void *ptr[2])
-{
-	BUG_ON(ptr == NULL);
-	free_percpu(ptr[0]);
-	free_percpu(ptr[1]);
-	ptr[0] = ptr[1] = NULL;
-}
 
-- 
cgit v1.2.3


From 7f7d9a6b96c5708c5184cbed61bbc15b163a0f08 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 24 Apr 2007 21:54:09 -0700
Subject: [IPV6]: Consolidate common SNMP code

This patch moves the non-proc SNMP code into addrconf.c and reuses
IPv4 SNMP code where applicable.

As a result we can skip proc.o if /proc is disabled.

Note that I've made a number of functions static since they're only
used by addrconf.c for now.  If they ever get used elsewhere we can
always remove the static.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h  |  20 +++++----
 net/ipv6/Makefile   |   3 +-
 net/ipv6/addrconf.c |  60 +++++++++++++++++++++++++
 net/ipv6/af_inet6.c |  30 ++++++-------
 net/ipv6/proc.c     | 125 +---------------------------------------------------
 5 files changed, 91 insertions(+), 147 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 4408def379b..f70afef9c3c 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -166,14 +166,6 @@ DECLARE_SNMP_STAT(struct udp_mib, udplite_stats_in6);
 	if (is_udplite) SNMP_INC_STATS_USER(udplite_stats_in6, field);         \
 	else		SNMP_INC_STATS_USER(udp_stats_in6, field);    } while(0)
 
-int snmp6_register_dev(struct inet6_dev *idev);
-int snmp6_unregister_dev(struct inet6_dev *idev);
-int snmp6_alloc_dev(struct inet6_dev *idev);
-int snmp6_free_dev(struct inet6_dev *idev);
-int snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign);
-void snmp6_mib_free(void *ptr[2]);
-void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, int bytes);
-
 struct ip6_ra_chain
 {
 	struct ip6_ra_chain	*next;
@@ -606,8 +598,20 @@ extern int  udplite6_proc_init(void);
 extern void udplite6_proc_exit(void);
 extern int  ipv6_misc_proc_init(void);
 extern void ipv6_misc_proc_exit(void);
+extern int snmp6_register_dev(struct inet6_dev *idev);
+extern int snmp6_unregister_dev(struct inet6_dev *idev);
 
 extern struct rt6_statistics rt6_stats;
+#else
+static inline int snmp6_register_dev(struct inet6_dev *idev)
+{
+	return 0;
+}
+
+static inline int snmp6_unregister_dev(struct inet6_dev *idev)
+{
+	return 0;
+}
 #endif
 
 #ifdef CONFIG_SYSCTL
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index e9478688c3d..0acf64dc577 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_IPV6) += ipv6.o
 ipv6-objs :=	af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
 		route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
 		raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
-		exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \
+		exthdrs.o sysctl_net_ipv6.o datagram.o \
 		ip6_flowlabel.o inet6_connection_sock.o
 
 ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
@@ -28,6 +28,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
 obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
 obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
 obj-$(CONFIG_NETFILTER)	+= netfilter/
+obj-$(CONFIG_PROC_FS) += proc.o
 
 obj-$(CONFIG_IPV6_SIT) += sit.o
 obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 33ccc95c349..ea86bf4bfe0 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -81,6 +81,7 @@
 #endif
 
 #include <asm/uaccess.h>
+#include <asm/unaligned.h>
 
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
@@ -246,6 +247,37 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
 	add_timer(&ifp->timer);
 }
 
+static int snmp6_alloc_dev(struct inet6_dev *idev)
+{
+	int err = -ENOMEM;
+
+	if (!idev || !idev->dev)
+		return -EINVAL;
+
+	if (snmp_mib_init((void **)idev->stats.ipv6,
+			  sizeof(struct ipstats_mib),
+			  __alignof__(struct ipstats_mib)) < 0)
+		goto err_ip;
+	if (snmp_mib_init((void **)idev->stats.icmpv6,
+			  sizeof(struct icmpv6_mib),
+			  __alignof__(struct icmpv6_mib)) < 0)
+		goto err_icmp;
+
+	return 0;
+
+err_icmp:
+	snmp_mib_free((void **)idev->stats.ipv6);
+err_ip:
+	return err;
+}
+
+static int snmp6_free_dev(struct inet6_dev *idev)
+{
+	snmp_mib_free((void **)idev->stats.icmpv6);
+	snmp_mib_free((void **)idev->stats.ipv6);
+	return 0;
+}
+
 /* Nobody refers to this device, we may destroy it. */
 
 static void in6_dev_finish_destroy_rcu(struct rcu_head *head)
@@ -3438,6 +3470,34 @@ static inline size_t inet6_if_nlmsg_size(void)
 		 );
 }
 
+static inline void __snmp6_fill_stats(u64 *stats, void **mib, int items,
+				      int bytes)
+{
+	int i;
+	int pad = bytes - sizeof(u64) * items;
+	BUG_ON(pad < 0);
+
+	/* Use put_unaligned() because stats may not be aligned for u64. */
+	put_unaligned(items, &stats[0]);
+	for (i = 1; i < items; i++)
+		put_unaligned(snmp_fold_field(mib, i), &stats[i]);
+
+	memset(&stats[items], 0, pad);
+}
+
+static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
+			     int bytes)
+{
+	switch(attrtype) {
+	case IFLA_INET6_STATS:
+		__snmp6_fill_stats(stats, (void **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes);
+		break;
+	case IFLA_INET6_ICMP6STATS:
+		__snmp6_fill_stats(stats, (void **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes);
+		break;
+	}
+}
+
 static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
 			     u32 pid, u32 seq, int event, unsigned int flags)
 {
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 825d03e87ae..18cb928c8d9 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -714,26 +714,26 @@ EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
 
 static int __init init_ipv6_mibs(void)
 {
-	if (snmp6_mib_init((void **)ipv6_statistics, sizeof (struct ipstats_mib),
-			   __alignof__(struct ipstats_mib)) < 0)
+	if (snmp_mib_init((void **)ipv6_statistics, sizeof (struct ipstats_mib),
+			  __alignof__(struct ipstats_mib)) < 0)
 		goto err_ip_mib;
-	if (snmp6_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib),
-			   __alignof__(struct icmpv6_mib)) < 0)
+	if (snmp_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib),
+			  __alignof__(struct icmpv6_mib)) < 0)
 		goto err_icmp_mib;
-	if (snmp6_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib),
-			   __alignof__(struct udp_mib)) < 0)
+	if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib),
+			  __alignof__(struct udp_mib)) < 0)
 		goto err_udp_mib;
-	if (snmp6_mib_init((void **)udplite_stats_in6, sizeof (struct udp_mib),
-			   __alignof__(struct udp_mib)) < 0)
+	if (snmp_mib_init((void **)udplite_stats_in6, sizeof (struct udp_mib),
+			  __alignof__(struct udp_mib)) < 0)
 		goto err_udplite_mib;
 	return 0;
 
 err_udplite_mib:
-	snmp6_mib_free((void **)udp_stats_in6);
+	snmp_mib_free((void **)udp_stats_in6);
 err_udp_mib:
-	snmp6_mib_free((void **)icmpv6_statistics);
+	snmp_mib_free((void **)icmpv6_statistics);
 err_icmp_mib:
-	snmp6_mib_free((void **)ipv6_statistics);
+	snmp_mib_free((void **)ipv6_statistics);
 err_ip_mib:
 	return -ENOMEM;
 
@@ -741,10 +741,10 @@ err_ip_mib:
 
 static void cleanup_ipv6_mibs(void)
 {
-	snmp6_mib_free((void **)ipv6_statistics);
-	snmp6_mib_free((void **)icmpv6_statistics);
-	snmp6_mib_free((void **)udp_stats_in6);
-	snmp6_mib_free((void **)udplite_stats_in6);
+	snmp_mib_free((void **)ipv6_statistics);
+	snmp_mib_free((void **)icmpv6_statistics);
+	snmp_mib_free((void **)udp_stats_in6);
+	snmp_mib_free((void **)udplite_stats_in6);
 }
 
 static int __init inet6_init(void)
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 7a00bedb6b9..acb306a5dd5 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -23,13 +23,12 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/stddef.h>
-#include <asm/unaligned.h>
+#include <net/ip.h>
 #include <net/sock.h>
 #include <net/tcp.h>
 #include <net/transp_v6.h>
 #include <net/ipv6.h>
 
-#ifdef CONFIG_PROC_FS
 static struct proc_dir_entry *proc_net_devsnmp6;
 
 static int fold_prot_inuse(struct proto *proto)
@@ -142,29 +141,14 @@ static struct snmp_mib snmp6_udplite6_list[] = {
 	SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
 	SNMP_MIB_SENTINEL
 };
-#endif	/* CONFIG_PROC_FS */
 
-static unsigned long
-fold_field(void *mib[], int offt)
-{
-	unsigned long res = 0;
-	int i;
-
-	for_each_possible_cpu(i) {
-		res += *(((unsigned long *)per_cpu_ptr(mib[0], i)) + offt);
-		res += *(((unsigned long *)per_cpu_ptr(mib[1], i)) + offt);
-	}
-	return res;
-}
-
-#ifdef CONFIG_PROC_FS
 static inline void
 snmp6_seq_show_item(struct seq_file *seq, void **mib, struct snmp_mib *itemlist)
 {
 	int i;
 	for (i=0; itemlist[i].name; i++)
 		seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name,
-				fold_field(mib, itemlist[i].entry));
+			   snmp_fold_field(mib, itemlist[i].entry));
 }
 
 static int snmp6_seq_show(struct seq_file *seq, void *v)
@@ -209,37 +193,7 @@ static const struct file_operations snmp6_seq_fops = {
 	.llseek	 = seq_lseek,
 	.release = single_release,
 };
-#endif	/* CONFIG_PROC_FS */
 
-static inline void
-__snmp6_fill_stats(u64 *stats, void **mib, int items, int bytes)
-{
-	int i;
-	int pad = bytes - sizeof(u64) * items;
-	BUG_ON(pad < 0);
-
-	/* Use put_unaligned() because stats may not be aligned for u64. */
-	put_unaligned(items, &stats[0]);
-	for (i = 1; i < items; i++)
-		put_unaligned(fold_field(mib, i), &stats[i]);
-
-	memset(&stats[items], 0, pad);
-}
-
-void
-snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, int bytes)
-{
-	switch(attrtype) {
-	case IFLA_INET6_STATS:
-		__snmp6_fill_stats(stats, (void **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes);
-		break;
-	case IFLA_INET6_ICMP6STATS:
-		__snmp6_fill_stats(stats, (void **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes);
-		break;
-	}
-}
-
-#ifdef CONFIG_PROC_FS
 int snmp6_register_dev(struct inet6_dev *idev)
 {
 	struct proc_dir_entry *p;
@@ -304,78 +258,3 @@ void ipv6_misc_proc_exit(void)
 	proc_net_remove("snmp6");
 }
 
-#else	/* CONFIG_PROC_FS */
-
-
-int snmp6_register_dev(struct inet6_dev *idev)
-{
-	return 0;
-}
-
-int snmp6_unregister_dev(struct inet6_dev *idev)
-{
-	return 0;
-}
-
-#endif	/* CONFIG_PROC_FS */
-
-int snmp6_alloc_dev(struct inet6_dev *idev)
-{
-	int err = -ENOMEM;
-
-	if (!idev || !idev->dev)
-		return -EINVAL;
-
-	if (snmp6_mib_init((void **)idev->stats.ipv6, sizeof(struct ipstats_mib),
-			   __alignof__(struct ipstats_mib)) < 0)
-		goto err_ip;
-	if (snmp6_mib_init((void **)idev->stats.icmpv6, sizeof(struct icmpv6_mib),
-			   __alignof__(struct icmpv6_mib)) < 0)
-		goto err_icmp;
-
-	return 0;
-
-err_icmp:
-	snmp6_mib_free((void **)idev->stats.ipv6);
-err_ip:
-	return err;
-}
-
-int snmp6_free_dev(struct inet6_dev *idev)
-{
-	snmp6_mib_free((void **)idev->stats.icmpv6);
-	snmp6_mib_free((void **)idev->stats.ipv6);
-	return 0;
-}
-
-int snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
-{
-	if (ptr == NULL)
-		return -EINVAL;
-
-	ptr[0] = __alloc_percpu(mibsize);
-	if (!ptr[0])
-		goto err0;
-
-	ptr[1] = __alloc_percpu(mibsize);
-	if (!ptr[1])
-		goto err1;
-
-	return 0;
-
-err1:
-	free_percpu(ptr[0]);
-	ptr[0] = NULL;
-err0:
-	return -ENOMEM;
-}
-
-void snmp6_mib_free(void *ptr[2])
-{
-	if (ptr == NULL)
-		return;
-	free_percpu(ptr[0]);
-	free_percpu(ptr[1]);
-	ptr[0] = ptr[1] = NULL;
-}
-
-- 
cgit v1.2.3


From 30041e4af426bc9ab7a73440ce4a7c78881b6001 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Tue, 24 Apr 2007 22:15:40 -0700
Subject: [IPV6]: Fix Makefile thinko.

obj-$(CONFIG_PROC_FS) --> ipv6-$(CONFIG_PROC_FS)

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 0acf64dc577..bb33309044c 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -15,6 +15,7 @@ ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
 ipv6-$(CONFIG_NETFILTER) += netfilter.o
 ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
 ipv6-$(CONFIG_IPV6_MIP6) += mip6.o
+ipv6-$(CONFIG_PROC_FS) += proc.o
 
 ipv6-objs += $(ipv6-y)
 
@@ -28,7 +29,6 @@ obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
 obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
 obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
 obj-$(CONFIG_NETFILTER)	+= netfilter/
-obj-$(CONFIG_PROC_FS) += proc.o
 
 obj-$(CONFIG_IPV6_SIT) += sit.o
 obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
-- 
cgit v1.2.3


From 420fe234ad7adaa5a5445e5fab83b1485ed9e0f3 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 24 Apr 2007 20:44:47 +0900
Subject: [IPV6] SIT: Unify code path to get hash array index.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/sit.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 27fe10ffacb..1efa95a99f4 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -99,10 +99,10 @@ static struct ip_tunnel * ipip6_tunnel_lookup(__be32 remote, __be32 local)
 	return NULL;
 }
 
-static struct ip_tunnel ** ipip6_bucket(struct ip_tunnel *t)
+static struct ip_tunnel **__ipip6_bucket(struct ip_tunnel_parm *parms)
 {
-	__be32 remote = t->parms.iph.daddr;
-	__be32 local = t->parms.iph.saddr;
+	__be32 remote = parms->iph.daddr;
+	__be32 local = parms->iph.saddr;
 	unsigned h = 0;
 	int prio = 0;
 
@@ -117,6 +117,11 @@ static struct ip_tunnel ** ipip6_bucket(struct ip_tunnel *t)
 	return &tunnels[prio][h];
 }
 
+static inline struct ip_tunnel **ipip6_bucket(struct ip_tunnel *t)
+{
+	return __ipip6_bucket(&t->parms);
+}
+
 static void ipip6_tunnel_unlink(struct ip_tunnel *t)
 {
 	struct ip_tunnel **tp;
@@ -147,19 +152,9 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct ip_tunnel_parm *parms, int
 	__be32 local = parms->iph.saddr;
 	struct ip_tunnel *t, **tp, *nt;
 	struct net_device *dev;
-	unsigned h = 0;
-	int prio = 0;
 	char name[IFNAMSIZ];
 
-	if (remote) {
-		prio |= 2;
-		h ^= HASH(remote);
-	}
-	if (local) {
-		prio |= 1;
-		h ^= HASH(local);
-	}
-	for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+	for (tp = __ipip6_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 			return t;
 	}
-- 
cgit v1.2.3


From 87d1a164df0b5e297cda698724ea7984d8392b06 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 24 Apr 2007 20:44:47 +0900
Subject: [IPV4] IPIP: Unify code path to get hash array index.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv4/ipip.c | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 37ab3917017..ebd2f2d532f 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -157,10 +157,10 @@ static struct ip_tunnel * ipip_tunnel_lookup(__be32 remote, __be32 local)
 	return NULL;
 }
 
-static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
+static struct ip_tunnel **__ipip_bucket(struct ip_tunnel_parm *parms)
 {
-	__be32 remote = t->parms.iph.daddr;
-	__be32 local = t->parms.iph.saddr;
+	__be32 remote = parms->iph.daddr;
+	__be32 local = parms->iph.saddr;
 	unsigned h = 0;
 	int prio = 0;
 
@@ -175,6 +175,10 @@ static struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
 	return &tunnels[prio][h];
 }
 
+static inline struct ip_tunnel **ipip_bucket(struct ip_tunnel *t)
+{
+	return __ipip_bucket(&t->parms);
+}
 
 static void ipip_tunnel_unlink(struct ip_tunnel *t)
 {
@@ -206,19 +210,9 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c
 	__be32 local = parms->iph.saddr;
 	struct ip_tunnel *t, **tp, *nt;
 	struct net_device *dev;
-	unsigned h = 0;
-	int prio = 0;
 	char name[IFNAMSIZ];
 
-	if (remote) {
-		prio |= 2;
-		h ^= HASH(remote);
-	}
-	if (local) {
-		prio |= 1;
-		h ^= HASH(local);
-	}
-	for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+	for (tp = __ipip_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
 			return t;
 	}
-- 
cgit v1.2.3


From 5056a1ef9e2597cff7b15904fbc74193f316fc40 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 24 Apr 2007 20:44:48 +0900
Subject: [IPV4] IP_GRE: Unify code path to get hash array index.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv4/ip_gre.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index f49afaa8129..63282934725 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -191,11 +191,11 @@ static struct ip_tunnel * ipgre_tunnel_lookup(__be32 remote, __be32 local, __be3
 	return NULL;
 }
 
-static struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
+static struct ip_tunnel **__ipgre_bucket(struct ip_tunnel_parm *parms)
 {
-	__be32 remote = t->parms.iph.daddr;
-	__be32 local = t->parms.iph.saddr;
-	__be32 key = t->parms.i_key;
+	__be32 remote = parms->iph.daddr;
+	__be32 local = parms->iph.saddr;
+	__be32 key = parms->i_key;
 	unsigned h = HASH(key);
 	int prio = 0;
 
@@ -209,6 +209,11 @@ static struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
 	return &tunnels[prio][h];
 }
 
+static inline struct ip_tunnel **ipgre_bucket(struct ip_tunnel *t)
+{
+	return __ipgre_bucket(&t->parms);
+}
+
 static void ipgre_tunnel_link(struct ip_tunnel *t)
 {
 	struct ip_tunnel **tp = ipgre_bucket(t);
@@ -240,17 +245,9 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int
 	__be32 key = parms->i_key;
 	struct ip_tunnel *t, **tp, *nt;
 	struct net_device *dev;
-	unsigned h = HASH(key);
-	int prio = 0;
 	char name[IFNAMSIZ];
 
-	if (local)
-		prio |= 1;
-	if (remote && !MULTICAST(remote)) {
-		prio |= 2;
-		h ^= HASH(remote);
-	}
-	for (tp = &tunnels[prio][h]; (t = *tp) != NULL; tp = &t->next) {
+	for (tp = __ipgre_bucket(parms); (t = *tp) != NULL; tp = &t->next) {
 		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
 			if (key == t->parms.i_key)
 				return t;
-- 
cgit v1.2.3


From df8981dc1928f3a231d91f27c2b3dc373fb4d410 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 24 Apr 2007 20:44:49 +0900
Subject: [IPV6]: Export in6addr_any for future use.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/linux/in6.h | 2 --
 net/ipv6/addrconf.c | 2 --
 2 files changed, 4 deletions(-)

diff --git a/include/linux/in6.h b/include/linux/in6.h
index d559fac4a26..2a61c82af11 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -44,10 +44,8 @@ struct in6_addr
  * NOTE: Be aware the IN6ADDR_* constants and in6addr_* externals are defined
  * in network byte order, not in host byte order as are the IPv4 equivalents
  */
-#if 0
 extern const struct in6_addr in6addr_any;
 #define IN6ADDR_ANY_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } } }
-#endif
 extern const struct in6_addr in6addr_loopback;
 #define IN6ADDR_LOOPBACK_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 } } }
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ea86bf4bfe0..e04e4937350 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -209,9 +209,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 };
 
 /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
-#if 0
 const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
-#endif
 const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
 
 static void addrconf_del_timer(struct inet6_ifaddr *ifp)
-- 
cgit v1.2.3


From c53b3590bb294a42121b640e8309379752482b38 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 24 Apr 2007 20:44:50 +0900
Subject: [IPV6] XFRM: Use ip6addr_any where applicable.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/xfrm6_input.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index f30ef16072f..d7ed8aa56ec 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -147,14 +147,12 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
 {
 	struct xfrm_state *x = NULL;
 	int wildcard = 0;
-	struct in6_addr any;
 	xfrm_address_t *xany;
 	struct xfrm_state *xfrm_vec_one = NULL;
 	int nh = 0;
 	int i = 0;
 
-	ipv6_addr_set(&any, 0, 0, 0, 0);
-	xany = (xfrm_address_t *)&any;
+	xany = (xfrm_address_t *)&in6addr_any;
 
 	for (i = 0; i < 3; i++) {
 		xfrm_address_t *dst, *src;
-- 
cgit v1.2.3


From e1ec7842df5db897516d73c76bd2a568b4abc33b Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 24 Apr 2007 20:44:52 +0900
Subject: [IPV6] NDISC: Unify main process of sending ND messages.

Because ndisc_send_na(), ndisc_send_ns() and ndisc_send_rs()
are almost identical, so let's unify their common part.

With gcc (GCC) 3.3.5 (Debian 1:3.3.5-13) on i386,
	Before:
	   text	   data	    bss	    dec	    hex	filename
	  14689	    364	     24	  15077	   3ae5	net/ipv6/ndisc.o
	After:
	   text	   data	    bss	    dec	    hex	filename
	  12317	    364	     24	  12705	   31a1	net/ipv6/ndisc.o

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/ndisc.c | 283 +++++++++++++++++--------------------------------------
 1 file changed, 84 insertions(+), 199 deletions(-)

diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 4ee1216f801..d8b36451bad 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -427,38 +427,23 @@ static inline void ndisc_flow_init(struct flowi *fl, u8 type,
 	security_sk_classify_flow(ndisc_socket->sk, fl);
 }
 
-static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
-		   struct in6_addr *daddr, struct in6_addr *solicited_addr,
-		   int router, int solicited, int override, int inc_opt)
+static void __ndisc_send(struct net_device *dev,
+			 struct neighbour *neigh,
+			 struct in6_addr *daddr, struct in6_addr *saddr,
+			 struct icmp6hdr *icmp6h, struct in6_addr *target,
+			 int llinfo, int icmp6_mib_outnd)
 {
-	struct in6_addr tmpaddr;
-	struct inet6_ifaddr *ifp;
-	struct inet6_dev *idev;
 	struct flowi fl;
-	struct dst_entry* dst;
+	struct dst_entry *dst;
 	struct sock *sk = ndisc_socket->sk;
-	struct in6_addr *src_addr;
-	struct nd_msg *msg;
-	int len;
 	struct sk_buff *skb;
+	struct icmp6hdr *hdr;
+	struct inet6_dev *idev;
+	int len;
 	int err;
+	u8 *opt;
 
-	len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
-
-	/* for anycast or proxy, solicited_addr != src_addr */
-	ifp = ipv6_get_ifaddr(solicited_addr, dev, 1);
-	if (ifp) {
-		src_addr = solicited_addr;
-		if (ifp->flags & IFA_F_OPTIMISTIC)
-			override = 0;
-		in6_ifa_put(ifp);
-	} else {
-		if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr))
-			return;
-		src_addr = &tmpaddr;
-	}
-
-	ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr,
+	ndisc_flow_init(&fl, icmp6h->icmp6_type, saddr, daddr,
 			dev->ifindex);
 
 	dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output);
@@ -469,61 +454,57 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
 	if (err < 0)
 		return;
 
-	if (inc_opt) {
-		if (dev->addr_len)
-			len += ndisc_opt_addr_space(dev);
-		else
-			inc_opt = 0;
-	}
+	if (!dev->addr_len)
+		llinfo = 0;
+
+	len = sizeof(struct icmp6hdr) + (target ? sizeof(*target) : 0);
+	if (llinfo)
+		len += ndisc_opt_addr_space(dev);
 
 	skb = sock_alloc_send_skb(sk,
 				  (MAX_HEADER + sizeof(struct ipv6hdr) +
 				   len + LL_RESERVED_SPACE(dev)),
 				  1, &err);
-
-	if (skb == NULL) {
+	if (!skb) {
 		ND_PRINTK0(KERN_ERR
-			   "ICMPv6 NA: %s() failed to allocate an skb.\n",
+			   "ICMPv6 ND: %s() failed to allocate an skb.\n",
 			   __FUNCTION__);
 		dst_release(dst);
 		return;
 	}
 
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
-	ip6_nd_hdr(sk, skb, dev, src_addr, daddr, IPPROTO_ICMPV6, len);
+	ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
 
 	skb->transport_header = skb->tail;
 	skb_put(skb, len);
-	msg = (struct nd_msg *)skb_transport_header(skb);
 
-	msg->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
-	msg->icmph.icmp6_code = 0;
-	msg->icmph.icmp6_cksum = 0;
+	hdr = (struct icmp6hdr *)skb_transport_header(skb);
+	memcpy(hdr, icmp6h, sizeof(*hdr));
 
-	msg->icmph.icmp6_unused = 0;
-	msg->icmph.icmp6_router    = router;
-	msg->icmph.icmp6_solicited = solicited;
-	msg->icmph.icmp6_override  = override;
-
-	/* Set the target address. */
-	ipv6_addr_copy(&msg->target, solicited_addr);
+	opt = skb_transport_header(skb) + sizeof(struct icmp6hdr);
+	if (target) {
+		ipv6_addr_copy((struct in6_addr *)opt, target);
+		opt += sizeof(*target);
+	}
 
-	if (inc_opt)
-		ndisc_fill_addr_option(msg->opt, ND_OPT_TARGET_LL_ADDR, dev->dev_addr,
+	if (llinfo)
+		ndisc_fill_addr_option(opt, llinfo, dev->dev_addr,
 				       dev->addr_len, dev->type);
 
-	/* checksum */
-	msg->icmph.icmp6_cksum = csum_ipv6_magic(src_addr, daddr, len,
-						 IPPROTO_ICMPV6,
-						 csum_partial((__u8 *) msg,
-							      len, 0));
+	hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len,
+					   IPPROTO_ICMPV6,
+					   csum_partial((__u8 *) hdr,
+							len, 0));
 
 	skb->dst = dst;
+
 	idev = in6_dev_get(dst->dev);
 	IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
+
 	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
 	if (!err) {
-		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTNEIGHBORADVERTISEMENTS);
+		ICMP6_INC_STATS(idev, icmp6_mib_outnd);
 		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
 	}
 
@@ -531,20 +512,48 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
 		in6_dev_put(idev);
 }
 
+static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
+		   struct in6_addr *daddr, struct in6_addr *solicited_addr,
+		   int router, int solicited, int override, int inc_opt)
+{
+	struct in6_addr tmpaddr;
+	struct inet6_ifaddr *ifp;
+	struct in6_addr *src_addr;
+	struct icmp6hdr icmp6h = {
+		.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
+	};
+
+	/* for anycast or proxy, solicited_addr != src_addr */
+	ifp = ipv6_get_ifaddr(solicited_addr, dev, 1);
+	if (ifp) {
+		src_addr = solicited_addr;
+		if (ifp->flags & IFA_F_OPTIMISTIC)
+			override = 0;
+		in6_ifa_put(ifp);
+	} else {
+		if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr))
+			return;
+		src_addr = &tmpaddr;
+	}
+
+	icmp6h.icmp6_router = router;
+	icmp6h.icmp6_solicited = solicited;
+	icmp6h.icmp6_override = override;
+
+	__ndisc_send(dev, neigh, daddr, src_addr,
+		     &icmp6h, solicited_addr,
+		     inc_opt ? ND_OPT_TARGET_LL_ADDR : 0,
+		     ICMP6_MIB_OUTNEIGHBORADVERTISEMENTS);
+}
+
 void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
 		   struct in6_addr *solicit,
 		   struct in6_addr *daddr, struct in6_addr *saddr)
 {
-	struct flowi fl;
-	struct dst_entry* dst;
-	struct inet6_dev *idev;
-	struct sock *sk = ndisc_socket->sk;
-	struct sk_buff *skb;
-	struct nd_msg *msg;
 	struct in6_addr addr_buf;
-	int len;
-	int err;
-	int send_llinfo;
+	struct icmp6hdr icmp6h = {
+		.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
+	};
 
 	if (saddr == NULL) {
 		if (ipv6_get_lladdr(dev, &addr_buf,
@@ -553,86 +562,19 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
 		saddr = &addr_buf;
 	}
 
-	ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr,
-			dev->ifindex);
-
-	dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output);
-	if (!dst)
-		return;
-
-	err = xfrm_lookup(&dst, &fl, NULL, 0);
-	if (err < 0)
-		return;
-
-	len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
-	send_llinfo = dev->addr_len && !ipv6_addr_any(saddr);
-	if (send_llinfo)
-		len += ndisc_opt_addr_space(dev);
-
-	skb = sock_alloc_send_skb(sk,
-				  (MAX_HEADER + sizeof(struct ipv6hdr) +
-				   len + LL_RESERVED_SPACE(dev)),
-				  1, &err);
-	if (skb == NULL) {
-		ND_PRINTK0(KERN_ERR
-			   "ICMPv6 NA: %s() failed to allocate an skb.\n",
-			   __FUNCTION__);
-		dst_release(dst);
-		return;
-	}
-
-	skb_reserve(skb, LL_RESERVED_SPACE(dev));
-	ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
-
-	skb->transport_header = skb->tail;
-	skb_put(skb, len);
-	msg = (struct nd_msg *)skb_transport_header(skb);
-	msg->icmph.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION;
-	msg->icmph.icmp6_code = 0;
-	msg->icmph.icmp6_cksum = 0;
-	msg->icmph.icmp6_unused = 0;
-
-	/* Set the target address. */
-	ipv6_addr_copy(&msg->target, solicit);
-
-	if (send_llinfo)
-		ndisc_fill_addr_option(msg->opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr,
-				       dev->addr_len, dev->type);
-
-	/* checksum */
-	msg->icmph.icmp6_cksum = csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
-						 daddr, len,
-						 IPPROTO_ICMPV6,
-						 csum_partial((__u8 *) msg,
-							      len, 0));
-	/* send it! */
-	skb->dst = dst;
-	idev = in6_dev_get(dst->dev);
-	IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
-	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
-	if (!err) {
-		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTNEIGHBORSOLICITS);
-		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
-	}
-
-	if (likely(idev != NULL))
-		in6_dev_put(idev);
+	__ndisc_send(dev, neigh, daddr, saddr,
+		     &icmp6h, solicit,
+		     !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0,
+		     ICMP6_MIB_OUTNEIGHBORSOLICITS);
 }
 
 void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
 		   struct in6_addr *daddr)
 {
-	struct flowi fl;
-	struct dst_entry* dst;
-	struct inet6_dev *idev;
-	struct sock *sk = ndisc_socket->sk;
-	struct sk_buff *skb;
-	struct icmp6hdr *hdr;
-	__u8 * opt;
+	struct icmp6hdr icmp6h = {
+		.icmp6_type = NDISC_ROUTER_SOLICITATION,
+	};
 	int send_sllao = dev->addr_len;
-	int len;
-	int err;
-
 
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 	/*
@@ -655,67 +597,10 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
 		}
 	}
 #endif
-	ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr,
-			dev->ifindex);
-
-	dst = ndisc_dst_alloc(dev, NULL, daddr, ip6_output);
-	if (!dst)
-		return;
-
-	err = xfrm_lookup(&dst, &fl, NULL, 0);
-	if (err < 0)
-		return;
-
-	len = sizeof(struct icmp6hdr);
-	if (send_sllao)
-		len += ndisc_opt_addr_space(dev);
-
-	skb = sock_alloc_send_skb(sk,
-				  (MAX_HEADER + sizeof(struct ipv6hdr) +
-				   len + LL_RESERVED_SPACE(dev)),
-				  1, &err);
-	if (skb == NULL) {
-		ND_PRINTK0(KERN_ERR
-			   "ICMPv6 RS: %s() failed to allocate an skb.\n",
-			   __FUNCTION__);
-		dst_release(dst);
-		return;
-	}
-
-	skb_reserve(skb, LL_RESERVED_SPACE(dev));
-	ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
-
-	skb->transport_header = skb->tail;
-	skb_put(skb, len);
-	hdr = icmp6_hdr(skb);
-	hdr->icmp6_type = NDISC_ROUTER_SOLICITATION;
-	hdr->icmp6_code = 0;
-	hdr->icmp6_cksum = 0;
-	hdr->icmp6_unused = 0;
-
-	opt = (u8*) (hdr + 1);
-
-	if (send_sllao)
-		ndisc_fill_addr_option(opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr,
-				       dev->addr_len, dev->type);
-
-	/* checksum */
-	hdr->icmp6_cksum = csum_ipv6_magic(&ipv6_hdr(skb)->saddr, daddr, len,
-					   IPPROTO_ICMPV6,
-					   csum_partial((__u8 *) hdr, len, 0));
-
-	/* send it! */
-	skb->dst = dst;
-	idev = in6_dev_get(dst->dev);
-	IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
-	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
-	if (!err) {
-		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTROUTERSOLICITS);
-		ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
-	}
-
-	if (likely(idev != NULL))
-		in6_dev_put(idev);
+	__ndisc_send(dev, NULL, daddr, saddr,
+		     &icmp6h, NULL,
+		     send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0,
+		     ICMP6_MIB_OUTROUTERSOLICITS);
 }
 
 
-- 
cgit v1.2.3


From 83aa0938ff59e8ef6d0b99260063ebe84fc84a16 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Wed, 25 Apr 2007 22:03:10 -0700
Subject: [BRIDGE]: don't change packet type

The change to forward STP bpdu's (for usermode STP) through normal path,
changed the packet type in the process. Since link local stuff is multicast, it
should stay pkt_type = PACKET_MULTICAST.  The code was probably copy/pasted
incorrectly from the bridge pseudo-device receive path.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_input.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 8a552763209..364e0ba4415 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -131,12 +131,9 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
 	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
 		goto drop;
 
-	if (unlikely(is_link_local(dest))) {
-		skb->pkt_type = PACKET_HOST;
-
+	if (unlikely(is_link_local(dest)))
 		return (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
 				NULL, br_handle_local_finish) == 0) ? skb : NULL;
-	}
 
 	switch (p->state) {
 	case BR_STATE_FORWARDING:
-- 
cgit v1.2.3


From 2111f8b9e58fd04b87b8b07d66485f255a57b0bb Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Wed, 25 Apr 2007 22:05:55 -0700
Subject: [BRIDGE]: drop PAUSE frames

Pause frames should never make it out of the network device into
the stack. But if a device was misconfigured, it might happen.
So drop pause frames in bridge.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_ether.h | 1 +
 net/bridge/br_input.c    | 7 ++++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index f6863fbcf33..1db774cf9dc 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -61,6 +61,7 @@
 #define ETH_P_8021Q	0x8100          /* 802.1Q VLAN Extended Header  */
 #define ETH_P_IPX	0x8137		/* IPX over DIX			*/
 #define ETH_P_IPV6	0x86DD		/* IPv6 over bluebook		*/
+#define ETH_P_PAUSE	0x8808		/* IEEE Pause frames. See 802.3 31B */
 #define ETH_P_SLOW	0x8809		/* Slow Protocol. See 802.3ad 43B */
 #define ETH_P_WCCP	0x883E		/* Web-cache coordination protocol
 					 * defined in draft-wilson-wrec-wccp-v2-00.txt */
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 364e0ba4415..5662567c8ae 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -131,9 +131,14 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
 	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
 		goto drop;
 
-	if (unlikely(is_link_local(dest)))
+	if (unlikely(is_link_local(dest))) {
+		/* Pause frames shouldn't be passed up by driver anyway */
+		if (skb->protocol == htons(ETH_P_PAUSE))
+			goto drop;
+
 		return (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
 				NULL, br_handle_local_finish) == 0) ? skb : NULL;
+	}
 
 	switch (p->state) {
 	case BR_STATE_FORWARDING:
-- 
cgit v1.2.3


From c2886d6259b8faac4c05ffd9c3c401ac84478de0 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Wed, 25 Apr 2007 22:07:58 -0700
Subject: [BRIDGE]: if no STP then forward all BPDUs

If a bridge is not running STP, then it has no way to detect a cycle
in the network. But if it is not running STP and some other machine
or device is running STP, then if STP BPDU's get forwarded to it can
detect the cycle.

This is how the old 2.4 and early 2.6 code worked.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_input.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 5662567c8ae..420bbb9955e 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -136,8 +136,14 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
 		if (skb->protocol == htons(ETH_P_PAUSE))
 			goto drop;
 
-		return (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
-				NULL, br_handle_local_finish) == 0) ? skb : NULL;
+		/* Process STP BPDU's through normal netif_receive_skb() path */
+		if (p->br->stp_enabled != BR_NO_STP) {
+			if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
+				    NULL, br_handle_local_finish))
+				return NULL;
+			else
+				return skb;
+		}
 	}
 
 	switch (p->state) {
-- 
cgit v1.2.3


From 98486fa2f4894e2b01e325c659635596bdec1614 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Wed, 25 Apr 2007 22:08:46 -0700
Subject: [BRIDGE]: Missing rtnl.

Writing to /sys/class/net/brX/bridge/stp_state causes a warning because
RTNL is not held when call br_stp_if.c

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_sysfs_br.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 7ec0b76cdd2..33c6c4a7c68 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -149,9 +149,11 @@ static ssize_t show_stp_state(struct device *d,
 
 static void set_stp_state(struct net_bridge *br, unsigned long val)
 {
+	rtnl_lock();
 	spin_unlock_bh(&br->lock);
 	br_stp_set_enabled(br, val);
 	spin_lock_bh(&br->lock);
+	rtnl_unlock();
 }
 
 static ssize_t store_stp_state(struct device *d,
-- 
cgit v1.2.3


From 28d8909bc790d936ce33f4402adf7577533bbd4b Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Thu, 26 Apr 2007 00:10:29 -0700
Subject: [XFRM]: Export SAD info.

On a system with a lot of SAs, counting SAD entries chews useful
CPU time since you need to dump the whole SAD to user space;
i.e something like ip xfrm state ls | grep -i src | wc -l
I have seen taking literally minutes on a 40K SAs when the system
is swapping.
With this patch, some of the SAD info (that was already being tracked)
is exposed to user space. i.e you do:
ip xfrm state count
And you get the count; you can also pass -s to the command line and
get the hash info.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/xfrm.h  | 25 +++++++++++++++++++++++
 include/net/xfrm.h    |  8 ++++++++
 net/xfrm/xfrm_state.c | 10 +++++++++
 net/xfrm/xfrm_user.c  | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 99 insertions(+)

diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 15ca89e9961..9c656a5cf84 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -181,6 +181,10 @@ enum {
 	XFRM_MSG_MIGRATE,
 #define XFRM_MSG_MIGRATE XFRM_MSG_MIGRATE
 
+	XFRM_MSG_NEWSADINFO,
+#define XFRM_MSG_NEWSADINFO XFRM_MSG_NEWSADINFO
+	XFRM_MSG_GETSADINFO,
+#define XFRM_MSG_GETSADINFO XFRM_MSG_GETSADINFO
 	__XFRM_MSG_MAX
 };
 #define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1)
@@ -234,6 +238,17 @@ enum xfrm_ae_ftype_t {
 #define XFRM_AE_MAX (__XFRM_AE_MAX - 1)
 };
 
+/* SAD Table filter flags  */
+enum xfrm_sad_ftype_t {
+	XFRM_SAD_UNSPEC,
+	XFRM_SAD_HMASK=1,
+	XFRM_SAD_HMAX=2,
+	XFRM_SAD_CNT=4,
+	__XFRM_SAD_MAX
+
+#define XFRM_SAD_MAX (__XFRM_SAD_MAX - 1)
+};
+
 struct xfrm_userpolicy_type {
 	__u8		type;
 	__u16		reserved1;
@@ -265,6 +280,16 @@ enum xfrm_attr_type_t {
 #define XFRMA_MAX (__XFRMA_MAX - 1)
 };
 
+enum xfrm_sadattr_type_t {
+	XFRMA_SAD_UNSPEC,
+	XFRMA_SADHMASK,
+	XFRMA_SADHMAX,
+	XFRMA_SADCNT,
+	__XFRMA_SAD_MAX
+
+#define XFRMA_SAD_MAX (__XFRMA_SAD_MAX - 1)
+};
+
 struct xfrm_usersa_info {
 	struct xfrm_selector		sel;
 	struct xfrm_id			id;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index e144a25814b..8287081d77f 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -416,6 +416,13 @@ struct xfrm_audit
 	u32	secid;
 };
 
+/* SAD metadata, add more later */
+struct xfrm_sadinfo
+{
+	u32 sadhcnt; /* current hash bkts */
+	u32 sadhmcnt; /* max allowed hash bkts */
+	u32 sadcnt; /* current running count */
+};
 #ifdef CONFIG_AUDITSYSCALL
 extern void xfrm_audit_log(uid_t auid, u32 secid, int type, int result,
 		    struct xfrm_policy *xp, struct xfrm_state *x);
@@ -938,6 +945,7 @@ static inline int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **s
 extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq);
 extern int xfrm_state_delete(struct xfrm_state *x);
 extern void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info);
+extern void xfrm_sad_getinfo(struct xfrm_sadinfo *si);
 extern int xfrm_replay_check(struct xfrm_state *x, __be32 seq);
 extern void xfrm_replay_advance(struct xfrm_state *x, __be32 seq);
 extern void xfrm_replay_notify(struct xfrm_state *x, int event);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 743f07e7f69..f3a61ebd8d6 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -421,6 +421,16 @@ restart:
 }
 EXPORT_SYMBOL(xfrm_state_flush);
 
+void xfrm_sad_getinfo(struct xfrm_sadinfo *si)
+{
+	spin_lock_bh(&xfrm_state_lock);
+	si->sadcnt = xfrm_state_num;
+	si->sadhcnt = xfrm_state_hmask;
+	si->sadhmcnt = xfrm_state_hashmax;
+	spin_unlock_bh(&xfrm_state_lock);
+}
+EXPORT_SYMBOL(xfrm_sad_getinfo);
+
 static int
 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
 		  struct xfrm_tmpl *tmpl,
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index f91521d5f2a..cb4cc1bde5d 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -672,6 +672,61 @@ static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb,
 	return skb;
 }
 
+static int build_sadinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags)
+{
+	struct xfrm_sadinfo si;
+	struct nlmsghdr *nlh;
+	u32 *f;
+
+	nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0);
+	if (nlh == NULL) /* shouldnt really happen ... */
+		return -EMSGSIZE;
+
+	f = nlmsg_data(nlh);
+	*f = flags;
+	xfrm_sad_getinfo(&si);
+
+	if (flags & XFRM_SAD_HMASK)
+		NLA_PUT_U32(skb, XFRMA_SADHMASK, si.sadhcnt);
+	if (flags & XFRM_SAD_HMAX)
+		NLA_PUT_U32(skb, XFRMA_SADHMAX, si.sadhmcnt);
+	if (flags & XFRM_SAD_CNT)
+		NLA_PUT_U32(skb, XFRMA_SADCNT, si.sadcnt);
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
+		struct rtattr **xfrma)
+{
+	struct sk_buff *r_skb;
+	u32 *flags = NLMSG_DATA(nlh);
+	u32 spid = NETLINK_CB(skb).pid;
+	u32 seq = nlh->nlmsg_seq;
+	int len = NLMSG_LENGTH(sizeof(u32));
+
+	if (*flags & XFRM_SAD_HMASK)
+		len += RTA_SPACE(sizeof(u32));
+	if (*flags & XFRM_SAD_HMAX)
+		len += RTA_SPACE(sizeof(u32));
+	if (*flags & XFRM_SAD_CNT)
+		len += RTA_SPACE(sizeof(u32));
+
+	r_skb = alloc_skb(len, GFP_ATOMIC);
+
+	if (r_skb == NULL)
+		return -ENOMEM;
+
+	if (build_sadinfo(r_skb, spid, seq, *flags) < 0)
+		BUG();
+
+	return nlmsg_unicast(xfrm_nl, r_skb, spid);
+}
+
 static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 		struct rtattr **xfrma)
 {
@@ -1850,6 +1905,7 @@ static struct xfrm_link {
 	[XFRM_MSG_NEWAE       - XFRM_MSG_BASE] = { .doit = xfrm_new_ae  },
 	[XFRM_MSG_GETAE       - XFRM_MSG_BASE] = { .doit = xfrm_get_ae  },
 	[XFRM_MSG_MIGRATE     - XFRM_MSG_BASE] = { .doit = xfrm_do_migrate    },
+	[XFRM_MSG_GETSADINFO  - XFRM_MSG_BASE] = { .doit = xfrm_get_sadinfo    },
 };
 
 static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
-- 
cgit v1.2.3


From eefa3906283a2b60a6d02a2cda593a7d7d7946c5 Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Thu, 26 Apr 2007 00:44:22 -0700
Subject: [NET]: Clean up sk_buff walkers.

I noticed recently that, in skb_checksum(), "offset" and "start" are
essentially the same thing and have the same value throughout the
function, despite being computed differently. Using a single variable
allows some cleanups and makes the skb_checksum() function smaller,
more readable, and presumably marginally faster.

We appear to have many other "sk_buff walker" functions built on the
exact same model, so the cleanup applies to them, too. Here is a list
of the functions I found to be affected:

net/appletalk/ddp.c:atalk_sum_skb()
net/core/datagram.c:skb_copy_datagram_iovec()
net/core/datagram.c:skb_copy_and_csum_datagram()
net/core/skbuff.c:skb_copy_bits()
net/core/skbuff.c:skb_store_bits()
net/core/skbuff.c:skb_checksum()
net/core/skbuff.c:skb_copy_and_csum_bit()
net/core/user_dma.c:dma_skb_copy_datagram_iovec()
net/xfrm/xfrm_algo.c:skb_icv_walk()
net/xfrm/xfrm_algo.c:skb_to_sgvec()

OTOH, I admit I'm a bit surprised, the cleanup is rather obvious so I'm
really wondering if I am missing something. Can anyone please comment
on this?

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/appletalk/ddp.c  |  25 ++++-------
 net/core/datagram.c  |  50 +++++++--------------
 net/core/skbuff.c    | 122 ++++++++++++++++++---------------------------------
 net/core/user_dma.c  |  25 ++++-------
 net/xfrm/xfrm_algo.c |  22 ++++------
 5 files changed, 86 insertions(+), 158 deletions(-)

diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index f6a92a0b7aa..16eda21fb38 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -937,11 +937,11 @@ static unsigned long atalk_sum_partial(const unsigned char *data,
 static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset,
 				   int len, unsigned long sum)
 {
-	int start = skb_headlen(skb);
+	int end = skb_headlen(skb);
 	int i, copy;
 
 	/* checksum stuff in header space */
-	if ( (copy = start - offset) > 0) {
+	if ((copy = end - offset) > 0) {
 		if (copy > len)
 			copy = len;
 		sum = atalk_sum_partial(skb->data + offset, copy, sum);
@@ -953,11 +953,9 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset,
 
 	/* checksum stuff in frags */
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		int end;
+		BUG_TRAP(len >= 0);
 
-		BUG_TRAP(start <= offset + len);
-
-		end = start + skb_shinfo(skb)->frags[i].size;
+		end = offset + skb_shinfo(skb)->frags[i].size;
 		if ((copy = end - offset) > 0) {
 			u8 *vaddr;
 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
@@ -965,36 +963,31 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset,
 			if (copy > len)
 				copy = len;
 			vaddr = kmap_skb_frag(frag);
-			sum = atalk_sum_partial(vaddr + frag->page_offset +
-						  offset - start, copy, sum);
+			sum = atalk_sum_partial(vaddr + frag->page_offset,
+						copy, sum);
 			kunmap_skb_frag(vaddr);
 
 			if (!(len -= copy))
 				return sum;
 			offset += copy;
 		}
-		start = end;
 	}
 
 	if (skb_shinfo(skb)->frag_list) {
 		struct sk_buff *list = skb_shinfo(skb)->frag_list;
 
 		for (; list; list = list->next) {
-			int end;
-
-			BUG_TRAP(start <= offset + len);
+			BUG_TRAP(len >= 0);
 
-			end = start + list->len;
+			end = offset + list->len;
 			if ((copy = end - offset) > 0) {
 				if (copy > len)
 					copy = len;
-				sum = atalk_sum_skb(list, offset - start,
-						    copy, sum);
+				sum = atalk_sum_skb(list, 0, copy, sum);
 				if ((len -= copy) == 0)
 					return sum;
 				offset += copy;
 			}
-			start = end;
 		}
 	}
 
diff --git a/net/core/datagram.c b/net/core/datagram.c
index cb056f47612..e1afa767944 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -247,8 +247,8 @@ EXPORT_SYMBOL(skb_kill_datagram);
 int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
 			    struct iovec *to, int len)
 {
-	int start = skb_headlen(skb);
-	int i, copy = start - offset;
+	int end = skb_headlen(skb);
+	int i, copy = end - offset;
 
 	/* Copy header. */
 	if (copy > 0) {
@@ -263,11 +263,9 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
 
 	/* Copy paged appendix. Hmm... why does this look so complicated? */
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		int end;
+		BUG_TRAP(len >= 0);
 
-		BUG_TRAP(start <= offset + len);
-
-		end = start + skb_shinfo(skb)->frags[i].size;
+		end = offset + skb_shinfo(skb)->frags[i].size;
 		if ((copy = end - offset) > 0) {
 			int err;
 			u8  *vaddr;
@@ -277,8 +275,8 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
 			if (copy > len)
 				copy = len;
 			vaddr = kmap(page);
-			err = memcpy_toiovec(to, vaddr + frag->page_offset +
-					     offset - start, copy);
+			err = memcpy_toiovec(to, vaddr + frag->page_offset,
+					     copy);
 			kunmap(page);
 			if (err)
 				goto fault;
@@ -286,30 +284,24 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
 				return 0;
 			offset += copy;
 		}
-		start = end;
 	}
 
 	if (skb_shinfo(skb)->frag_list) {
 		struct sk_buff *list = skb_shinfo(skb)->frag_list;
 
 		for (; list; list = list->next) {
-			int end;
-
-			BUG_TRAP(start <= offset + len);
+			BUG_TRAP(len >= 0);
 
-			end = start + list->len;
+			end = offset + list->len;
 			if ((copy = end - offset) > 0) {
 				if (copy > len)
 					copy = len;
-				if (skb_copy_datagram_iovec(list,
-							    offset - start,
-							    to, copy))
+				if (skb_copy_datagram_iovec(list, 0, to, copy))
 					goto fault;
 				if ((len -= copy) == 0)
 					return 0;
 				offset += copy;
 			}
-			start = end;
 		}
 	}
 	if (!len)
@@ -323,9 +315,9 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
 				      u8 __user *to, int len,
 				      __wsum *csump)
 {
-	int start = skb_headlen(skb);
+	int end = skb_headlen(skb);
 	int pos = 0;
-	int i, copy = start - offset;
+	int i, copy = end - offset;
 
 	/* Copy header. */
 	if (copy > 0) {
@@ -344,11 +336,9 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
 	}
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		int end;
+		BUG_TRAP(len >= 0);
 
-		BUG_TRAP(start <= offset + len);
-
-		end = start + skb_shinfo(skb)->frags[i].size;
+		end = offset + skb_shinfo(skb)->frags[i].size;
 		if ((copy = end - offset) > 0) {
 			__wsum csum2;
 			int err = 0;
@@ -360,8 +350,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
 				copy = len;
 			vaddr = kmap(page);
 			csum2 = csum_and_copy_to_user(vaddr +
-							frag->page_offset +
-							offset - start,
+							frag->page_offset,
 						      to, copy, 0, &err);
 			kunmap(page);
 			if (err)
@@ -373,24 +362,20 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
 			to += copy;
 			pos += copy;
 		}
-		start = end;
 	}
 
 	if (skb_shinfo(skb)->frag_list) {
 		struct sk_buff *list = skb_shinfo(skb)->frag_list;
 
 		for (; list; list=list->next) {
-			int end;
-
-			BUG_TRAP(start <= offset + len);
+			BUG_TRAP(len >= 0);
 
-			end = start + list->len;
+			end = offset + list->len;
 			if ((copy = end - offset) > 0) {
 				__wsum csum2 = 0;
 				if (copy > len)
 					copy = len;
-				if (skb_copy_and_csum_datagram(list,
-							       offset - start,
+				if (skb_copy_and_csum_datagram(list, 0,
 							       to, copy,
 							       &csum2))
 					goto fault;
@@ -401,7 +386,6 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
 				to += copy;
 				pos += copy;
 			}
-			start = end;
 		}
 	}
 	if (!len)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 142257307fa..32f087b5233 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1045,13 +1045,13 @@ pull_pages:
 int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
 {
 	int i, copy;
-	int start = skb_headlen(skb);
+	int end = skb_headlen(skb);
 
 	if (offset > (int)skb->len - len)
 		goto fault;
 
 	/* Copy header. */
-	if ((copy = start - offset) > 0) {
+	if ((copy = end - offset) > 0) {
 		if (copy > len)
 			copy = len;
 		skb_copy_from_linear_data_offset(skb, offset, to, copy);
@@ -1062,11 +1062,9 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
 	}
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		int end;
+		BUG_TRAP(len >= 0);
 
-		BUG_TRAP(start <= offset + len);
-
-		end = start + skb_shinfo(skb)->frags[i].size;
+		end = offset + skb_shinfo(skb)->frags[i].size;
 		if ((copy = end - offset) > 0) {
 			u8 *vaddr;
 
@@ -1075,8 +1073,8 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
 
 			vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
 			memcpy(to,
-			       vaddr + skb_shinfo(skb)->frags[i].page_offset+
-			       offset - start, copy);
+			       vaddr + skb_shinfo(skb)->frags[i].page_offset,
+			       copy);
 			kunmap_skb_frag(vaddr);
 
 			if ((len -= copy) == 0)
@@ -1084,30 +1082,25 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
 			offset += copy;
 			to     += copy;
 		}
-		start = end;
 	}
 
 	if (skb_shinfo(skb)->frag_list) {
 		struct sk_buff *list = skb_shinfo(skb)->frag_list;
 
 		for (; list; list = list->next) {
-			int end;
-
-			BUG_TRAP(start <= offset + len);
+			BUG_TRAP(len >= 0);
 
-			end = start + list->len;
+			end = offset + list->len;
 			if ((copy = end - offset) > 0) {
 				if (copy > len)
 					copy = len;
-				if (skb_copy_bits(list, offset - start,
-						  to, copy))
+				if (skb_copy_bits(list, 0, to, copy))
 					goto fault;
 				if ((len -= copy) == 0)
 					return 0;
 				offset += copy;
 				to     += copy;
 			}
-			start = end;
 		}
 	}
 	if (!len)
@@ -1132,12 +1125,12 @@ fault:
 int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
 {
 	int i, copy;
-	int start = skb_headlen(skb);
+	int end = skb_headlen(skb);
 
 	if (offset > (int)skb->len - len)
 		goto fault;
 
-	if ((copy = start - offset) > 0) {
+	if ((copy = end - offset) > 0) {
 		if (copy > len)
 			copy = len;
 		skb_copy_to_linear_data_offset(skb, offset, from, copy);
@@ -1149,11 +1142,9 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-		int end;
-
-		BUG_TRAP(start <= offset + len);
+		BUG_TRAP(len >= 0);
 
-		end = start + frag->size;
+		end = offset + frag->size;
 		if ((copy = end - offset) > 0) {
 			u8 *vaddr;
 
@@ -1161,8 +1152,7 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
 				copy = len;
 
 			vaddr = kmap_skb_frag(frag);
-			memcpy(vaddr + frag->page_offset + offset - start,
-			       from, copy);
+			memcpy(vaddr + frag->page_offset, from, copy);
 			kunmap_skb_frag(vaddr);
 
 			if ((len -= copy) == 0)
@@ -1170,30 +1160,25 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
 			offset += copy;
 			from += copy;
 		}
-		start = end;
 	}
 
 	if (skb_shinfo(skb)->frag_list) {
 		struct sk_buff *list = skb_shinfo(skb)->frag_list;
 
 		for (; list; list = list->next) {
-			int end;
+			BUG_TRAP(len >= 0);
 
-			BUG_TRAP(start <= offset + len);
-
-			end = start + list->len;
+			end = offset + list->len;
 			if ((copy = end - offset) > 0) {
 				if (copy > len)
 					copy = len;
-				if (skb_store_bits(list, offset - start,
-						   from, copy))
+				if (skb_store_bits(list, 0, from, copy))
 					goto fault;
 				if ((len -= copy) == 0)
 					return 0;
 				offset += copy;
 				from += copy;
 			}
-			start = end;
 		}
 	}
 	if (!len)
@@ -1210,8 +1195,8 @@ EXPORT_SYMBOL(skb_store_bits);
 __wsum skb_checksum(const struct sk_buff *skb, int offset,
 			  int len, __wsum csum)
 {
-	int start = skb_headlen(skb);
-	int i, copy = start - offset;
+	int end = skb_headlen(skb);
+	int i, copy = end - offset;
 	int pos = 0;
 
 	/* Checksum header. */
@@ -1226,11 +1211,9 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
 	}
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		int end;
-
-		BUG_TRAP(start <= offset + len);
+		BUG_TRAP(len >= 0);
 
-		end = start + skb_shinfo(skb)->frags[i].size;
+		end = offset + skb_shinfo(skb)->frags[i].size;
 		if ((copy = end - offset) > 0) {
 			__wsum csum2;
 			u8 *vaddr;
@@ -1239,8 +1222,8 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
 			if (copy > len)
 				copy = len;
 			vaddr = kmap_skb_frag(frag);
-			csum2 = csum_partial(vaddr + frag->page_offset +
-					     offset - start, copy, 0);
+			csum2 = csum_partial(vaddr + frag->page_offset,
+					     copy, 0);
 			kunmap_skb_frag(vaddr);
 			csum = csum_block_add(csum, csum2, pos);
 			if (!(len -= copy))
@@ -1248,31 +1231,26 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
 			offset += copy;
 			pos    += copy;
 		}
-		start = end;
 	}
 
 	if (skb_shinfo(skb)->frag_list) {
 		struct sk_buff *list = skb_shinfo(skb)->frag_list;
 
 		for (; list; list = list->next) {
-			int end;
+			BUG_TRAP(len >= 0);
 
-			BUG_TRAP(start <= offset + len);
-
-			end = start + list->len;
+			end = offset + list->len;
 			if ((copy = end - offset) > 0) {
 				__wsum csum2;
 				if (copy > len)
 					copy = len;
-				csum2 = skb_checksum(list, offset - start,
-						     copy, 0);
+				csum2 = skb_checksum(list, 0, copy, 0);
 				csum = csum_block_add(csum, csum2, pos);
 				if ((len -= copy) == 0)
 					return csum;
 				offset += copy;
 				pos    += copy;
 			}
-			start = end;
 		}
 	}
 	BUG_ON(len);
@@ -1285,8 +1263,8 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
 __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
 				    u8 *to, int len, __wsum csum)
 {
-	int start = skb_headlen(skb);
-	int i, copy = start - offset;
+	int end = skb_headlen(skb);
+	int i, copy = end - offset;
 	int pos = 0;
 
 	/* Copy header. */
@@ -1303,11 +1281,9 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
 	}
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		int end;
-
-		BUG_TRAP(start <= offset + len);
+		BUG_TRAP(len >= 0);
 
-		end = start + skb_shinfo(skb)->frags[i].size;
+		end = offset + skb_shinfo(skb)->frags[i].size;
 		if ((copy = end - offset) > 0) {
 			__wsum csum2;
 			u8 *vaddr;
@@ -1317,9 +1293,8 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
 				copy = len;
 			vaddr = kmap_skb_frag(frag);
 			csum2 = csum_partial_copy_nocheck(vaddr +
-							  frag->page_offset +
-							  offset - start, to,
-							  copy, 0);
+							  frag->page_offset,
+							  to, copy, 0);
 			kunmap_skb_frag(vaddr);
 			csum = csum_block_add(csum, csum2, pos);
 			if (!(len -= copy))
@@ -1328,7 +1303,6 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
 			to     += copy;
 			pos    += copy;
 		}
-		start = end;
 	}
 
 	if (skb_shinfo(skb)->frag_list) {
@@ -1336,16 +1310,13 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
 
 		for (; list; list = list->next) {
 			__wsum csum2;
-			int end;
-
-			BUG_TRAP(start <= offset + len);
+			BUG_TRAP(len >= 0);
 
-			end = start + list->len;
+			end = offset + list->len;
 			if ((copy = end - offset) > 0) {
 				if (copy > len)
 					copy = len;
-				csum2 = skb_copy_and_csum_bits(list,
-							       offset - start,
+				csum2 = skb_copy_and_csum_bits(list, 0,
 							       to, copy, 0);
 				csum = csum_block_add(csum, csum2, pos);
 				if ((len -= copy) == 0)
@@ -1354,7 +1325,6 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
 				to     += copy;
 				pos    += copy;
 			}
-			start = end;
 		}
 	}
 	BUG_ON(len);
@@ -2026,8 +1996,8 @@ void __init skb_init(void)
 int
 skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
 {
-	int start = skb_headlen(skb);
-	int i, copy = start - offset;
+	int end = skb_headlen(skb);
+	int i, copy = end - offset;
 	int elt = 0;
 
 	if (copy > 0) {
@@ -2043,45 +2013,39 @@ skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
 	}
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		int end;
+		BUG_TRAP(len >= 0);
 
-		BUG_TRAP(start <= offset + len);
-
-		end = start + skb_shinfo(skb)->frags[i].size;
+		end = offset + skb_shinfo(skb)->frags[i].size;
 		if ((copy = end - offset) > 0) {
 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
 			if (copy > len)
 				copy = len;
 			sg[elt].page = frag->page;
-			sg[elt].offset = frag->page_offset+offset-start;
+			sg[elt].offset = frag->page_offset;
 			sg[elt].length = copy;
 			elt++;
 			if (!(len -= copy))
 				return elt;
 			offset += copy;
 		}
-		start = end;
 	}
 
 	if (skb_shinfo(skb)->frag_list) {
 		struct sk_buff *list = skb_shinfo(skb)->frag_list;
 
 		for (; list; list = list->next) {
-			int end;
-
-			BUG_TRAP(start <= offset + len);
+			BUG_TRAP(len >= 0);
 
-			end = start + list->len;
+			end = offset + list->len;
 			if ((copy = end - offset) > 0) {
 				if (copy > len)
 					copy = len;
-				elt += skb_to_sgvec(list, sg+elt, offset - start, copy);
+				elt += skb_to_sgvec(list, sg+elt, 0, copy);
 				if ((len -= copy) == 0)
 					return elt;
 				offset += copy;
 			}
-			start = end;
 		}
 	}
 	BUG_ON(len);
diff --git a/net/core/user_dma.c b/net/core/user_dma.c
index 0ad1cd57bc3..89241cdeea3 100644
--- a/net/core/user_dma.c
+++ b/net/core/user_dma.c
@@ -49,8 +49,8 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
 			struct sk_buff *skb, int offset, struct iovec *to,
 			size_t len, struct dma_pinned_list *pinned_list)
 {
-	int start = skb_headlen(skb);
-	int i, copy = start - offset;
+	int end = skb_headlen(skb);
+	int i, copy = end - offset;
 	dma_cookie_t cookie = 0;
 
 	/* Copy header. */
@@ -69,11 +69,9 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
 
 	/* Copy paged appendix. Hmm... why does this look so complicated? */
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		int end;
+		BUG_TRAP(len >= 0);
 
-		BUG_TRAP(start <= offset + len);
-
-		end = start + skb_shinfo(skb)->frags[i].size;
+		end = offset + skb_shinfo(skb)->frags[i].size;
 		copy = end - offset;
 		if ((copy = end - offset) > 0) {
 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
@@ -82,8 +80,8 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
 			if (copy > len)
 				copy = len;
 
-			cookie = dma_memcpy_pg_to_iovec(chan, to, pinned_list, page,
-					frag->page_offset + offset - start, copy);
+			cookie = dma_memcpy_pg_to_iovec(chan, to, pinned_list,
+					page, frag->page_offset, copy);
 			if (cookie < 0)
 				goto fault;
 			len -= copy;
@@ -91,25 +89,21 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
 				goto end;
 			offset += copy;
 		}
-		start = end;
 	}
 
 	if (skb_shinfo(skb)->frag_list) {
 		struct sk_buff *list = skb_shinfo(skb)->frag_list;
 
 		for (; list; list = list->next) {
-			int end;
-
-			BUG_TRAP(start <= offset + len);
+			BUG_TRAP(len >= 0);
 
-			end = start + list->len;
+			end = offset + list->len;
 			copy = end - offset;
 			if (copy > 0) {
 				if (copy > len)
 					copy = len;
 				cookie = dma_skb_copy_datagram_iovec(chan, list,
-						offset - start, to, copy,
-						pinned_list);
+						0, to, copy, pinned_list);
 				if (cookie < 0)
 					goto fault;
 				len -= copy;
@@ -117,7 +111,6 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
 					goto end;
 				offset += copy;
 			}
-			start = end;
 		}
 	}
 
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 6249a9405bb..be529c4241a 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -532,8 +532,8 @@ EXPORT_SYMBOL_GPL(xfrm_count_enc_supported);
 int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc,
 		 int offset, int len, icv_update_fn_t icv_update)
 {
-	int start = skb_headlen(skb);
-	int i, copy = start - offset;
+	int end = skb_headlen(skb);
+	int i, copy = end - offset;
 	int err;
 	struct scatterlist sg;
 
@@ -556,11 +556,9 @@ int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc,
 	}
 
 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		int end;
+		BUG_TRAP(len >= 0);
 
-		BUG_TRAP(start <= offset + len);
-
-		end = start + skb_shinfo(skb)->frags[i].size;
+		end = offset + skb_shinfo(skb)->frags[i].size;
 		if ((copy = end - offset) > 0) {
 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
@@ -568,7 +566,7 @@ int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc,
 				copy = len;
 
 			sg.page = frag->page;
-			sg.offset = frag->page_offset + offset-start;
+			sg.offset = frag->page_offset;
 			sg.length = copy;
 
 			err = icv_update(desc, &sg, copy);
@@ -579,22 +577,19 @@ int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc,
 				return 0;
 			offset += copy;
 		}
-		start = end;
 	}
 
 	if (skb_shinfo(skb)->frag_list) {
 		struct sk_buff *list = skb_shinfo(skb)->frag_list;
 
 		for (; list; list = list->next) {
-			int end;
-
-			BUG_TRAP(start <= offset + len);
+			BUG_TRAP(len >= 0);
 
-			end = start + list->len;
+			end = offset + list->len;
 			if ((copy = end - offset) > 0) {
 				if (copy > len)
 					copy = len;
-				err = skb_icv_walk(list, desc, offset-start,
+				err = skb_icv_walk(list, desc, 0,
 						   copy, icv_update);
 				if (unlikely(err))
 					return err;
@@ -602,7 +597,6 @@ int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc,
 					return 0;
 				offset += copy;
 			}
-			start = end;
 		}
 	}
 	BUG_ON(len);
-- 
cgit v1.2.3


From 55404bca6c45595fee1a546f1a0cc616aeef0b00 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 26 Apr 2007 00:55:53 -0700
Subject: [NET]: Fix yam.c

drivers/net/hamradio/yam.c: In function `yam_tx_byte':
drivers/net/hamradio/yam.c:643: warning: passing arg 1 of `skb_copy_from_linear_data_offset' from incompatible pointer type

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hamradio/yam.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
index ac2d6dd9dbe..467559debfd 100644
--- a/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@ -638,7 +638,7 @@ static void yam_tx_byte(struct net_device *dev, struct yam_port *yp)
         			dev_kfree_skb_any(skb);
 				break;
 			}
-			skb_copy_from_linear_data_offset(skb->data, 1,
+			skb_copy_from_linear_data_offset(skb, 1,
 							 yp->tx_buf,
 							 yp->tx_len);
 			dev_kfree_skb_any(skb);
-- 
cgit v1.2.3


From 42bad1da506cafa7041a02ab84033a724afe88ac Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Thu, 26 Apr 2007 00:57:41 -0700
Subject: [NETLINK]: Possible cleanups.

- make the following needlessly global variables static:
  - core/rtnetlink.c: struct rtnl_msg_handlers[]
  - netfilter/nf_conntrack_proto.c: struct nf_ct_protos[]
- make the following needlessly global functions static:
  - core/rtnetlink.c: rtnl_dump_all()
  - netlink/af_netlink.c: netlink_queue_skip()

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_conntrack_l4proto.h | 1 -
 include/net/netlink.h                        | 2 --
 include/net/rtnetlink.h                      | 1 -
 net/core/rtnetlink.c                         | 6 ++----
 net/netfilter/nf_conntrack_proto.c           | 2 +-
 net/netlink/af_netlink.c                     | 4 ++--
 6 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 8415182ec12..f46cb930414 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -97,7 +97,6 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6;
 extern struct nf_conntrack_l4proto nf_conntrack_l4proto_generic;
 
 #define MAX_NF_CT_PROTO 256
-extern struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX];
 
 extern struct nf_conntrack_l4proto *
 __nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto);
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 2e4c90a98a7..0bf325c29af 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -217,8 +217,6 @@ struct nl_info {
 extern void		netlink_run_queue(struct sock *sk, unsigned int *qlen,
 					  int (*cb)(struct sk_buff *,
 						    struct nlmsghdr *));
-extern void		netlink_queue_skip(struct nlmsghdr *nlh,
-					   struct sk_buff *skb);
 extern int		nlmsg_notify(struct sock *sk, struct sk_buff *skb,
 				     u32 pid, unsigned int group, int report,
 				     gfp_t flags);
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 086fa9e8950..3b3d4745618 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -13,7 +13,6 @@ extern void	rtnl_register(int protocol, int msgtype,
 			      rtnl_doit_func, rtnl_dumpit_func);
 extern int	rtnl_unregister(int protocol, int msgtype);
 extern void	rtnl_unregister_all(int protocol);
-extern int	rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb);
 
 static inline int rtnl_msg_family(struct nlmsghdr *nlh)
 {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4fe0f4b3a34..cec11110915 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -97,7 +97,7 @@ int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len)
 	return 0;
 }
 
-struct rtnl_link *rtnl_msg_handlers[NPROTO];
+static struct rtnl_link *rtnl_msg_handlers[NPROTO];
 
 static inline int rtm_msgindex(int msgtype)
 {
@@ -765,7 +765,7 @@ errout:
 	return err;
 }
 
-int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
+static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	int idx;
 	int s_idx = cb->family;
@@ -789,8 +789,6 @@ int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
-EXPORT_SYMBOL_GPL(rtnl_dump_all);
-
 void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
 {
 	struct sk_buff *skb;
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index e4aad2087f4..6d947068c58 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -28,7 +28,7 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_core.h>
 
-struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX] __read_mostly;
+static struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX] __read_mostly;
 struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX] __read_mostly;
 EXPORT_SYMBOL_GPL(nf_ct_l3protos);
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 64d4b27f25a..42d2fb94eff 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -121,6 +121,7 @@ static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
 
 static int netlink_dump(struct sock *sk);
 static void netlink_destroy_callback(struct netlink_callback *cb);
+static void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb);
 
 static DEFINE_RWLOCK(nl_table_lock);
 static atomic_t nl_table_users = ATOMIC_INIT(0);
@@ -1568,7 +1569,7 @@ void netlink_run_queue(struct sock *sk, unsigned int *qlen,
  * Pulls the given netlink message off the socket buffer so the next
  * call to netlink_queue_run() will not reconsider the message.
  */
-void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb)
+static void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb)
 {
 	int msglen = NLMSG_ALIGN(nlh->nlmsg_len);
 
@@ -1851,7 +1852,6 @@ core_initcall(netlink_proto_init);
 
 EXPORT_SYMBOL(netlink_ack);
 EXPORT_SYMBOL(netlink_run_queue);
-EXPORT_SYMBOL(netlink_queue_skip);
 EXPORT_SYMBOL(netlink_broadcast);
 EXPORT_SYMBOL(netlink_dump_start);
 EXPORT_SYMBOL(netlink_kernel_create);
-- 
cgit v1.2.3


From c1a068f6b0c38665c079e8d4ca241e24020eff36 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@mindspring.com>
Date: Thu, 26 Apr 2007 00:58:39 -0700
Subject: [NET]: Delete unused header file linux/sdla_fr.h.

Delete the unreferenced header file include/linux/sdla_fr.h.

Signed-off-by: Robert P. J. Day <rpjday@mindspring.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/sdla_fr.h | 638 ------------------------------------------------
 1 file changed, 638 deletions(-)
 delete mode 100644 include/linux/sdla_fr.h

diff --git a/include/linux/sdla_fr.h b/include/linux/sdla_fr.h
deleted file mode 100644
index cdfa77fcb06..00000000000
--- a/include/linux/sdla_fr.h
+++ /dev/null
@@ -1,638 +0,0 @@
-/*****************************************************************************
-* sdla_fr.h	Sangoma frame relay firmware API definitions.
-*
-* Author:       Gideon Hack  	
-*		Nenad Corbic <ncorbic@sangoma.com> 	
-*
-* Copyright:	(c) 1995-2000 Sangoma Technologies Inc.
-*
-*		This program is free software; you can redistribute it and/or
-*		modify it under the terms of the GNU General Public License
-*		as published by the Free Software Foundation; either version
-*		2 of the License, or (at your option) any later version.
-* ============================================================================
-* Oct 04, 1999  Gideon Hack     Updated API structures
-* Jun 02, 1999  Gideon Hack 	Modifications for S514 support
-* Oct 12, 1997	Jaspreet Singh	Added FR_READ_DLCI_IB_MAPPING
-* Jul 21, 1997 	Jaspreet Singh	Changed FRRES_TOO_LONG and FRRES_TOO_MANY to 
-*				0x05 and 0x06 respectively.
-* Dec 23, 1996	Gene Kozin	v2.0
-* Apr 29, 1996	Gene Kozin	v1.0 (merged version S502 & S508 definitions).
-* Sep 26, 1995	Gene Kozin	Initial version.
-*****************************************************************************/
-#ifndef	_SDLA_FR_H
-#define	_SDLA_FR_H
-
-/*----------------------------------------------------------------------------
- * Notes:
- * ------
- * 1. All structures defined in this file are byte-alined.  
- *
- *	Compiler	Platform
- *	--------	--------
- *	GNU C		Linux
- */
-
-#ifndef	PACKED
-#    define	PACKED	__attribute__((packed))
-#endif	/* PACKED */
-
-/* Adapter memory layout */
-#define	FR_MB_VECTOR	0xE000	/* mailbox window vector */
-#define	FR502_RX_VECTOR	0xA000	/* S502 direct receive window vector */
-#define	FR502_MBOX_OFFS	0xF60	/* S502 mailbox offset */
-#define	FR508_MBOX_OFFS	0	/* S508 mailbox offset */
-#define	FR502_FLAG_OFFS	0x1FF0	/* S502 status flags offset */
-#define	FR508_FLAG_OFFS	0x1000	/* S508 status flags offset */
-#define	FR502_RXMB_OFFS	0x900	/* S502 direct receive mailbox offset */
-#define	FR508_TXBC_OFFS	0x1100	/* S508 Tx buffer info offset */
-#define	FR508_RXBC_OFFS	0x1120	/* S508 Rx buffer info offset */
-
-/* Important constants */
-#define FR502_MAX_DATA	4096	/* maximum data buffer length */
-#define FR508_MAX_DATA	4080	/* maximum data buffer length */
-#define MIN_LGTH_FR_DATA_CFG         300     /* min Information frame length
-(for configuration purposes) */
-#define FR_MAX_NO_DATA_BYTES_IN_FRAME  15354 	/* max Information frame length */
- 
-#define HIGHEST_VALID_DLCI	991
-
-/****** Data Structures *****************************************************/
-
-/*----------------------------------------------------------------------------
- * Frame relay command block.
- */
-typedef struct fr_cmd
-{
-	unsigned char  command	PACKED;	/* command code */
-	unsigned short length	PACKED;	/* length of data buffer */
-	unsigned char  result	PACKED;	/* return code */
-	unsigned short dlci	PACKED;	/* DLCI number */
-	unsigned char  attr	PACKED;	/* FECN, BECN, DE and C/R bits */
-	unsigned short rxlost1	PACKED;	/* frames discarded at int. level */
-	unsigned long  rxlost2	PACKED;	/* frames discarded at app. level */
-	unsigned char  rsrv[2]	PACKED;	/* reserved for future use */
-} fr_cmd_t;
-
-/* 'command' field defines */
-#define	FR_WRITE		0x01
-#define	FR_READ			0x02
-#define	FR_ISSUE_IS_FRAME	0x03
-#define FR_SET_CONFIG		0x10
-#define FR_READ_CONFIG		0x11
-#define FR_COMM_DISABLE		0x12
-#define FR_COMM_ENABLE		0x13
-#define FR_READ_STATUS		0x14
-#define FR_READ_STATISTICS	0x15
-#define FR_FLUSH_STATISTICS	0x16
-#define	FR_LIST_ACTIVE_DLCI	0x17
-#define FR_FLUSH_DATA_BUFFERS	0x18
-#define FR_READ_ADD_DLC_STATS	0x19
-#define	FR_ADD_DLCI		0x20
-#define	FR_DELETE_DLCI		0x21
-#define	FR_ACTIVATE_DLCI	0x22
-#define	FR_DEACTIVATE_DLCI	0x22
-#define FR_READ_MODEM_STATUS	0x30
-#define FR_SET_MODEM_STATUS	0x31
-#define FR_READ_ERROR_STATS	0x32
-#define FR_FLUSH_ERROR_STATS	0x33
-#define FR_READ_DLCI_IB_MAPPING 0x34
-#define FR_READ_CODE_VERSION	0x40
-#define	FR_SET_INTR_MODE	0x50
-#define	FR_READ_INTR_MODE	0x51
-#define FR_SET_TRACE_CONFIG	0x60
-#define FR_FT1_STATUS_CTRL 	0x80
-#define FR_SET_FT1_MODE		0x81
-
-/* Special UDP drivers management commands */
-#define FPIPE_ENABLE_TRACING          	0x41
-#define FPIPE_DISABLE_TRACING		0x42
-#define FPIPE_GET_TRACE_INFO            0x43
-#define FPIPE_FT1_READ_STATUS           0x44
-#define FPIPE_DRIVER_STAT_IFSEND        0x45
-#define FPIPE_DRIVER_STAT_INTR          0x46
-#define FPIPE_DRIVER_STAT_GEN           0x47
-#define FPIPE_FLUSH_DRIVER_STATS        0x48
-#define FPIPE_ROUTER_UP_TIME            0x49
-
-/* 'result' field defines */
-#define FRRES_OK		0x00	/* command executed successfully */
-#define	FRRES_DISABLED		0x01	/* communications not enabled */
-#define	FRRES_INOPERATIVE	0x02	/* channel inoperative */
-#define	FRRES_DLCI_INACTIVE	0x03	/* DLCI is inactive */
-#define	FRRES_DLCI_INVALID	0x04	/* DLCI is not configured */
-#define	FRRES_TOO_LONG		0x05
-#define	FRRES_TOO_MANY		0x06
-#define	FRRES_CIR_OVERFLOW	0x07	/* Tx throughput has exceeded CIR */
-#define	FRRES_BUFFER_OVERFLOW	0x08
-#define	FRRES_MODEM_FAILURE	0x10	/* DCD and/or CTS dropped */
-#define	FRRES_CHANNEL_DOWN	0x11	/* channel became inoperative */
-#define	FRRES_CHANNEL_UP	0x12	/* channel became operative */
-#define	FRRES_DLCI_CHANGE	0x13	/* DLCI status (or number) changed */
-#define	FRRES_DLCI_MISMATCH	0x14
-#define	FRRES_INVALID_CMD	0x1F	/* invalid command */
-
-/* 'attr' field defines */
-#define	FRATTR_
-
-/*----------------------------------------------------------------------------
- * Frame relay mailbox.
- *	This structure is located at offset FR50?_MBOX_OFFS into FR_MB_VECTOR.
- *	For S502 it is also located at offset FR502_RXMB_OFFS into
- *	FR502_RX_VECTOR.
- */
-typedef struct fr_mbox
-{
-	unsigned char opflag	PACKED;	/* 00h: execution flag */
-	fr_cmd_t cmd		PACKED;	/* 01h: command block */
-	unsigned char data[1]	PACKED;	/* 10h: variable length data buffer */
-} fr_mbox_t;
-
-/*----------------------------------------------------------------------------
- * S502 frame relay status flags.
- *	This structure is located at offset FR502_FLAG_OFFS into FR_MB_VECTOR.
- */
-typedef struct	fr502_flags
-{	
-	unsigned char rsrv1[1]	PACKED;	/* 00h: */
-	unsigned char tx_ready	PACKED;	/* 01h: Tx buffer available */
-	unsigned char rx_ready	PACKED;	/* 02h: Rx frame available */
-	unsigned char event	PACKED;	/* 03h: asynchronous event */
-	unsigned char mstatus	PACKED;	/* 04h: modem status */
-	unsigned char rsrv2[8]	PACKED;	/* 05h: */
-	unsigned char iflag	PACKED;	/* 0Dh: interrupt flag */
-	unsigned char imask	PACKED;	/* 0Eh: interrupt mask */
-} fr502_flags_t;
-
-/*----------------------------------------------------------------------------
- * S508 frame relay status flags.
- *	This structure is located at offset FR508_FLAG_OFFS into FR_MB_VECTOR.
- */
-typedef struct	fr508_flags
-{
-	unsigned char rsrv1[3]	PACKED;	/* 00h: reserved */
-	unsigned char event	PACKED;	/* 03h: asynchronous event */
-	unsigned char mstatus	PACKED;	/* 04h: modem status */
-	unsigned char rsrv2[11]	PACKED;	/* 05h: reserved */
-	unsigned char iflag	PACKED;	/* 10h: interrupt flag */
-	unsigned char imask	PACKED;	/* 11h: interrupt mask */
-	unsigned long tse_offs	PACKED;	/* 12h: Tx status element */
-	unsigned short dlci	PACKED; /* 16h: DLCI NUMBER */
-} fr508_flags_t;
-
-/* 'event' field defines */
-#define	FR_EVENT_STATUS		0x01	/* channel status change */
-#define	FR_EVENT_DLC_STATUS	0x02	/* DLC status change */
-#define	FR_EVENT_BAD_DLCI	0x04	/* FSR included wrong DLCI */
-#define	FR_EVENT_LINK_DOWN	0x40	/* DCD or CTS low */
-
-/* 'mstatus' field defines */
-#define	FR_MDM_DCD		0x08	/* mdm_status: DCD */
-#define	FR_MDM_CTS		0x20	/* mdm_status: CTS */
-
-/* 'iflag' & 'imask' fields defines */
-#define	FR_INTR_RXRDY		0x01	/* Rx ready */
-#define	FR_INTR_TXRDY		0x02	/* Tx ready */
-#define	FR_INTR_MODEM		0x04	/* modem status change (DCD, CTS) */
-#define	FR_INTR_READY		0x08	/* interface command completed */
-#define	FR_INTR_DLC		0x10	/* DLC status change */
-#define	FR_INTR_TIMER		0x20	/* millisecond timer */
-#define FR_INTR_TX_MULT_DLCIs	0x80	/* Tx interrupt on multiple DLCIs */
-
-
-/*----------------------------------------------------------------------------
- * Receive Buffer Configuration Info. S508 only!
- *	This structure is located at offset FR508_RXBC_OFFS into FR_MB_VECTOR.
- */
-typedef struct	fr_buf_info
-{
-	unsigned short rse_num	PACKED;	/* 00h: number of status elements */
-	unsigned long rse_base	PACKED;	/* 02h: receive status array base */
-	unsigned long rse_next	PACKED;	/* 06h: next status element */
-	unsigned long buf_base	PACKED;	/* 0Ah: rotational buffer base */
-	unsigned short reserved	PACKED;	/* 0Eh:  */
-	unsigned long buf_top	PACKED;	/* 10h: rotational buffer top */
-} fr_buf_info_t;
-
-/*----------------------------------------------------------------------------
- * Buffer Status Element. S508 only!
- *	Array of structures of this type is located at offset defined by the
- *	'rse_base' field of the frBufInfo_t structure into absolute adapter
- *	memory address space.
- */
-typedef struct	fr_rx_buf_ctl
-{
-	unsigned char flag	PACKED;	/* 00h: ready flag */
-	unsigned short length	PACKED;	/* 01h: frame length */
-	unsigned short dlci	PACKED;	/* 03h: DLCI */
-	unsigned char attr	PACKED;	/* 05h: FECN/BECN/DE/CR */
-	unsigned short tmstamp	PACKED;	/* 06h: time stamp */
-	unsigned short rsrv[2]	PACKED; /* 08h:  */
-	unsigned long offset	PACKED;	/* 0Ch: buffer absolute address */
-} fr_rx_buf_ctl_t;
-
-typedef struct  fr_tx_buf_ctl
-{
-        unsigned char flag      PACKED; /* 00h: ready flag */
-	unsigned short rsrv0[2]	PACKED;	/* 01h: */
-        unsigned short length   PACKED; /* 05h: frame length */
-        unsigned short dlci     PACKED; /* 07h: DLCI */
-        unsigned char attr      PACKED; /* 09h: FECN/BECN/DE/CR */
-        unsigned short rsrv1 	PACKED; /* 0Ah:  */
-        unsigned long offset    PACKED; /* 0Ch: buffer absolute address */
-} fr_tx_buf_ctl_t;
-
-/*----------------------------------------------------------------------------
- * Global Configuration Block. Passed to FR_SET_CONFIG command when dlci == 0.
- */
-typedef struct	fr_conf
-{
-	unsigned short station	PACKED;	/* 00h: CPE/Node */
-	unsigned short options	PACKED;	/* 02h: configuration options */
-	unsigned short kbps	PACKED;	/* 04h: baud rate in kbps */
-	unsigned short port	PACKED;	/* 06h: RS-232/V.35 */
-	unsigned short mtu	PACKED;	/* 08h: max. transmit length */
-	unsigned short t391	PACKED;	/* 0Ah:  */
-	unsigned short t392	PACKED;	/* 0Ch:  */
-	unsigned short n391	PACKED;	/* 0Eh:  */
-	unsigned short n392	PACKED;	/* 10h:  */
-	unsigned short n393	PACKED;	/* 12h:  */
-	unsigned short cir_fwd	PACKED;	/* 14h:  */
-	unsigned short bc_fwd	PACKED;	/* 16h:  */
-	unsigned short be_fwd	PACKED;	/* 18h:  */
-	unsigned short cir_bwd	PACKED;	/* 1Ah:  */
-	unsigned short bc_bwd	PACKED;	/* 1Ch:  */
-	unsigned short be_bwd	PACKED;	/* 1Eh:  */
-	unsigned short dlci[0]	PACKED;	/* 20h:  */
-} fr_conf_t;
-
-/* 'station_type' defines */
-#define	FRCFG_STATION_CPE	0
-#define	FRCFG_STATION_NODE	1
-
-/* 'conf_flags' defines */
-#define	FRCFG_IGNORE_TX_CIR	0x0001
-#define	FRCFG_IGNORE_RX_CIR	0x0002
-#define	FRCFG_DONT_RETRANSMIT	0x0004
-#define	FRCFG_IGNORE_CBS	0x0008
-#define	FRCFG_THROUGHPUT	0x0010	/* enable throughput calculation */
-#define	FRCFG_DIRECT_RX		0x0080	/* enable direct receive buffer */
-#define	FRCFG_AUTO_CONFIG	0x8000	/* enable  auto DLCI configuration */
-
-/* 'baud_rate' defines */
-#define	FRCFG_BAUD_1200		12
-#define	FRCFG_BAUD_2400		24
-#define	FRCFG_BAUD_4800		48
-#define	FRCFG_BAUD_9600		96
-#define	FRCFG_BAUD_19200	19
-#define	FRCFG_BAUD_38400	38
-#define	FRCFG_BAUD_56000	56
-#define	FRCFG_BAUD_64000	64
-#define	FRCFG_BAUD_128000	128
-
-/* 'port_mode' defines */
-#define	FRCFG_MODE_EXT_CLK	0x0000
-#define	FRCFG_MODE_INT_CLK	0x0001
-#define	FRCFG_MODE_V35		0x0000	/* S508 only */
-#define	FRCFG_MODE_RS232	0x0002	/* S508 only */
-
-/* defines for line tracing */
-
-/* the line trace status element presented by the frame relay code */
-typedef struct {
-        unsigned char flag      PACKED; /* ready flag */
-        unsigned short length   PACKED; /* trace length */
-        unsigned char rsrv0[2]  PACKED; /* reserved */
-        unsigned char attr      PACKED; /* trace attributes */
-        unsigned short tmstamp  PACKED; /* time stamp */
-        unsigned char rsrv1[4]  PACKED; /* reserved */
-        unsigned long offset    PACKED; /* buffer absolute address */
-} fr_trc_el_t;
-
-typedef struct {
-        unsigned char status    	PACKED; /* status flag */
-	unsigned char data_passed	PACKED;	/* 0 if no data passed, 1 if */
-						/* data passed */
-        unsigned short length   	PACKED; /* frame length */
-        unsigned short tmstamp  	PACKED; /* time stamp */
-} fpipemon_trc_hdr_t;
-
-typedef struct {
-	fpipemon_trc_hdr_t fpipemon_trc_hdr			PACKED;
-        unsigned char data[FR_MAX_NO_DATA_BYTES_IN_FRAME]	PACKED;
-} fpipemon_trc_t;
-
-/* bit settings for the 'status' byte  - note that bits 1, 2 and 3 are used */
-/* for returning the number of frames being passed to fpipemon */
-#define TRC_OUTGOING_FRM	0x01
-#define TRC_ABORT_ERROR         0x10
-#define TRC_CRC_ERROR           0x20
-#define TRC_OVERRUN_ERROR       0x40
-#define MORE_TRC_DATA		0x80
-
-#define MAX_FRMS_TRACED		0x07
-
-#define NO_TRC_ELEMENTS_OFF		0x9000
-#define BASE_TRC_ELEMENTS_OFF		0x9002
-#define TRC_ACTIVE			0x01
-#define FLUSH_TRC_BUFFERS 		0x02
-#define FLUSH_TRC_STATISTICS		0x04
-#define TRC_SIGNALLING_FRMS		0x10
-#define TRC_INFO_FRMS			0x20
-#define ACTIVATE_TRC	(TRC_ACTIVE | TRC_SIGNALLING_FRMS | TRC_INFO_FRMS)
-#define RESET_TRC	(FLUSH_TRC_BUFFERS | FLUSH_TRC_STATISTICS)
-
-/*----------------------------------------------------------------------------
- * Channel configuration.
- *	This structure is passed to the FR_SET_CONFIG command when dlci != 0.
- */
-typedef struct	fr_dlc_conf
-{
-	unsigned short conf_flags	PACKED;	/* 00h: configuration bits */
-	unsigned short cir_fwd		PACKED;	/* 02h:  */
-	unsigned short bc_fwd		PACKED;	/* 04h:  */
-	unsigned short be_fwd		PACKED;	/* 06h:  */
-	unsigned short cir_bwd		PACKED;	/* 08h:  */
-	unsigned short bc_bwd		PACKED;	/* 0Ah:  */
-	unsigned short be_bwd		PACKED;	/* 0Ch:  */
-} fr_dlc_conf_t;
-
-/*----------------------------------------------------------------------------
- * S502 interrupt mode control block.
- *	This structure is passed to the FR_SET_INTR_FLAGS and returned by the
- *	FR_READ_INTR_FLAGS commands.
- */
-typedef struct fr502_intr_ctl
-{
-	unsigned char mode	PACKED;	/* 00h: interrupt enable flags */
-	unsigned short tx_len	PACKED;	/* 01h: required Tx buffer size */
-} fr502_intr_ctl_t;
-
-/*----------------------------------------------------------------------------
- * S508 interrupt mode control block.
- *	This structure is passed to the FR_SET_INTR_FLAGS and returned by the
- *	FR_READ_INTR_FLAGS commands.
- */
-typedef struct fr508_intr_ctl
-{
-	unsigned char mode	PACKED;	/* 00h: interrupt enable flags */
-	unsigned short tx_len	PACKED;	/* 01h: required Tx buffer size */
-	unsigned char irq	PACKED;	/* 03h: IRQ level to activate */
-	unsigned char flags	PACKED;	/* 04h: ?? */
-	unsigned short timeout	PACKED;	/* 05h: ms, for timer interrupt */
-} fr508_intr_ctl_t;
-
-/*----------------------------------------------------------------------------
- * Channel status.
- *	This structure is returned by the FR_READ_STATUS command.
- */
-typedef struct	fr_dlc_Status
-{
-	unsigned char status		PACKED;	/* 00h: link/DLCI status */
-	struct
-	{
-		unsigned short dlci	PACKED;	/* 01h: DLCI number */
-		unsigned char status	PACKED;	/* 03h: DLCI status */
-	} circuit[1]			PACKED;
-} fr_dlc_status_t;
-
-/* 'status' defines */
-#define	FR_LINK_INOPER	0x00		/* for global status (DLCI == 0) */
-#define	FR_LINK_OPER	0x01
-#define	FR_DLCI_DELETED	0x01		/* for circuit status (DLCI != 0) */
-#define	FR_DLCI_ACTIVE	0x02
-#define	FR_DLCI_WAITING	0x04
-#define	FR_DLCI_NEW	0x08
-#define	FR_DLCI_REPORT	0x40
-
-/*----------------------------------------------------------------------------
- * Global Statistics Block.
- *	This structure is returned by the FR_READ_STATISTICS command when
- *	dcli == 0.
- */
-typedef struct	fr_link_stat
-{
-	unsigned short rx_too_long	PACKED;	/* 00h:  */
-	unsigned short rx_dropped	PACKED;	/* 02h:  */
-	unsigned short rx_dropped2	PACKED;	/* 04h:  */
-	unsigned short rx_bad_dlci	PACKED;	/* 06h:  */
-	unsigned short rx_bad_format	PACKED;	/* 08h:  */
-	unsigned short retransmitted	PACKED;	/* 0Ah:  */
-	unsigned short cpe_tx_FSE	PACKED;	/* 0Ch:  */
-	unsigned short cpe_tx_LIV	PACKED;	/* 0Eh:  */
-	unsigned short cpe_rx_FSR	PACKED;	/* 10h:  */
-	unsigned short cpe_rx_LIV	PACKED;	/* 12h:  */
-	unsigned short node_rx_FSE	PACKED;	/* 14h:  */
-	unsigned short node_rx_LIV	PACKED;	/* 16h:  */
-	unsigned short node_tx_FSR	PACKED;	/* 18h:  */
-	unsigned short node_tx_LIV	PACKED;	/* 1Ah:  */
-	unsigned short rx_ISF_err	PACKED;	/* 1Ch:  */
-	unsigned short rx_unsolicited	PACKED;	/* 1Eh:  */
-	unsigned short rx_SSN_err	PACKED;	/* 20h:  */
-	unsigned short rx_RSN_err	PACKED;	/* 22h:  */
-	unsigned short T391_timeouts	PACKED;	/* 24h:  */
-	unsigned short T392_timeouts	PACKED;	/* 26h:  */
-	unsigned short N392_reached	PACKED;	/* 28h:  */
-	unsigned short cpe_SSN_RSN	PACKED;	/* 2Ah:  */
-	unsigned short current_SSN	PACKED;	/* 2Ch:  */
-	unsigned short current_RSN	PACKED;	/* 2Eh:  */
-	unsigned short curreny_T391	PACKED;	/* 30h:  */
-	unsigned short current_T392	PACKED;	/* 32h:  */
-	unsigned short current_N392	PACKED;	/* 34h:  */
-	unsigned short current_N393	PACKED;	/* 36h:  */
-} fr_link_stat_t;
-
-/*----------------------------------------------------------------------------
- * DLCI statistics.
- *	This structure is returned by the FR_READ_STATISTICS command when
- *	dlci != 0.
- */
-typedef struct	fr_dlci_stat
-{
-	unsigned long tx_frames		PACKED;	/* 00h:  */
-	unsigned long tx_bytes		PACKED;	/* 04h:  */
-	unsigned long rx_frames		PACKED;	/* 08h:  */
-	unsigned long rx_bytes		PACKED;	/* 0Ch:  */
-	unsigned long rx_dropped	PACKED;	/* 10h:  */
-	unsigned long rx_inactive	PACKED;	/* 14h:  */
-	unsigned long rx_exceed_CIR	PACKED;	/* 18h:  */
-	unsigned long rx_DE_set		PACKED;	/* 1Ch:  */
-	unsigned long tx_throughput	PACKED;	/* 20h:  */
-	unsigned long tx_calc_timer	PACKED;	/* 24h:  */
-	unsigned long rx_throughput	PACKED;	/* 28h:  */
-	unsigned long rx_calc_timer	PACKED;	/* 2Ch:  */
-} fr_dlci_stat_t;
-
-/*----------------------------------------------------------------------------
- * Communications error statistics.
- *	This structure is returned by the FR_READ_ERROR_STATS command.
- */
-typedef struct	fr_comm_stat
-{
-	unsigned char rx_overruns	PACKED;	/* 00h:  */
-	unsigned char rx_bad_crc	PACKED;	/* 01h:  */
-	unsigned char rx_aborts		PACKED;	/* 02h:  */
-	unsigned char rx_too_long	PACKED;	/* 03h:  */
-	unsigned char tx_aborts		PACKED;	/* 04h:  */
-	unsigned char tx_underruns	PACKED;	/* 05h:  */
-	unsigned char tx_missed_undr	PACKED;	/* 06h:  */
-	unsigned char dcd_dropped	PACKED;	/* 07h:  */
-	unsigned char cts_dropped	PACKED;	/* 08h:  */
-} fr_comm_stat_t;
-
-/*----------------------------------------------------------------------------
- * Defines for the FR_ISSUE_IS_FRAME command.
- */
-#define	FR_ISF_LVE	2		/* issue Link Verification Enquiry */
-#define	FR_ISF_FSE	3		/* issue Full Status Enquiry */
-
-/*----------------------------------------------------------------------------
- * Frame Relay ARP Header -- Used for Dynamic route creation with InvARP 
- */
-
-typedef struct arphdr_fr
-	{
-	unsigned short ar_hrd PACKED;		/* format of hardware addr */
-	unsigned short ar_pro PACKED;		/* format of protocol addr */
-	unsigned char  ar_hln PACKED;		/* length of hardware addr */	
-	unsigned char  ar_pln PACKED;		/* length of protocol addr */
-	unsigned short ar_op  PACKED;		/* ARP opcode		   */
-	unsigned short ar_sha PACKED;		/* Sender DLCI addr 2 bytes */
-	unsigned long  ar_sip PACKED;		/* Sender IP   addr 4 bytes */
-	unsigned short ar_tha PACKED;		/* Target DLCI addr 2 bytes */
-	unsigned long  ar_tip PACKED;		/* Target IP   addr 4 bytes */
-	} arphdr_fr_t;
-
-/*----------------------------------------------------------------------------
- * Frame Relay RFC 1490 SNAP Header -- Used to check for ARP packets
- */
-typedef struct arphdr_1490
-	{
-	unsigned char control PACKED;		/* UI, etc...  */
-	unsigned char pad     PACKED;		/* Pad */
-	unsigned char NLPID   PACKED;		/* SNAP */
-	unsigned char OUI[3]  PACKED;		/* Ethertype, etc... */
-	unsigned short PID    PACKED;		/* ARP, IP, etc... */
-	}  arphdr_1490_t;
-
-/* UDP/IP packet (for UDP management) layout */
-
-/* The embedded control block for UDP mgmt
-   This is essentially a mailbox structure, without the large data field */
-
-typedef struct {
-        unsigned char  opp_flag PACKED; /* the opp flag */
-        unsigned char  command  PACKED; /* command code */
-        unsigned short length   PACKED; /* length of data buffer */
-        unsigned char  result   PACKED; /* return code */
-        unsigned short dlci     PACKED; /* DLCI number */
-        unsigned char  attr     PACKED; /* FECN, BECN, DE and C/R bits */
-        unsigned short rxlost1  PACKED; /* frames discarded at int. level */
-        unsigned long  rxlost2  PACKED; /* frames discarded at app. level */
-        unsigned char  rsrv[2]  PACKED; /* reserved for future use */
-} cblock_t;
-
-
-/* UDP management packet layout (data area of ip packet) */
-
-typedef struct {
-        unsigned char   control                 PACKED;
-        unsigned char   NLPID                   PACKED;
-} fr_encap_hdr_t;
-
-typedef struct {
-//	fr_encap_hdr_t 		fr_encap_hdr	PACKED;
-	ip_pkt_t 		ip_pkt		PACKED;
-	udp_pkt_t		udp_pkt		PACKED;
-	wp_mgmt_t 		wp_mgmt       	PACKED;
-        cblock_t                cblock          PACKED;
-        unsigned char           data[4080]      PACKED;
-} fr_udp_pkt_t;
-
-
-/* valid ip_protocol for UDP management */
-#define UDPMGMT_UDP_PROTOCOL 0x11
-
-#define UDPMGMT_FPIPE_SIGNATURE         "FPIPE8ND"
-#define UDPMGMT_DRVRSTATS_SIGNATURE     "DRVSTATS"
-
-/* values for request/reply byte */
-#define UDPMGMT_REQUEST	0x01
-#define UDPMGMT_REPLY	0x02
-#define UDP_OFFSET	12
-
-typedef struct {
-        unsigned long if_send_entry;
-        unsigned long if_send_skb_null;
-        unsigned long if_send_broadcast;
-        unsigned long if_send_multicast;
-        unsigned long if_send_critical_ISR;
-        unsigned long if_send_critical_non_ISR;
-        unsigned long if_send_busy;
-        unsigned long if_send_busy_timeout;
-	unsigned long if_send_DRVSTATS_request;
-        unsigned long if_send_FPIPE_request;
-        unsigned long if_send_wan_disconnected;
-        unsigned long if_send_dlci_disconnected;
-        unsigned long if_send_no_bfrs;
-        unsigned long if_send_adptr_bfrs_full;
-        unsigned long if_send_bfrs_passed_to_adptr;
-	unsigned long if_send_consec_send_fail;
-} drvstats_if_send_t; 
-
-typedef struct {
-        unsigned long rx_intr_no_socket;
-        unsigned long rx_intr_dev_not_started;
-        unsigned long rx_intr_DRVSTATS_request;
-        unsigned long rx_intr_FPIPE_request;
-        unsigned long rx_intr_bfr_not_passed_to_stack;
-        unsigned long rx_intr_bfr_passed_to_stack;
- } drvstats_rx_intr_t;
-
-typedef struct {
-        unsigned long UDP_FPIPE_mgmt_kmalloc_err;
-        unsigned long UDP_FPIPE_mgmt_direction_err;
-        unsigned long UDP_FPIPE_mgmt_adptr_type_err;
-        unsigned long UDP_FPIPE_mgmt_adptr_cmnd_OK;
-        unsigned long UDP_FPIPE_mgmt_adptr_cmnd_timeout;
-        unsigned long UDP_FPIPE_mgmt_adptr_send_passed;
-        unsigned long UDP_FPIPE_mgmt_adptr_send_failed;
-        unsigned long UDP_FPIPE_mgmt_not_passed_to_stack;
-        unsigned long UDP_FPIPE_mgmt_passed_to_stack;
-        unsigned long UDP_FPIPE_mgmt_no_socket;
-        unsigned long UDP_DRVSTATS_mgmt_kmalloc_err;
-        unsigned long UDP_DRVSTATS_mgmt_adptr_cmnd_OK;
-        unsigned long UDP_DRVSTATS_mgmt_adptr_cmnd_timeout;
-        unsigned long UDP_DRVSTATS_mgmt_adptr_send_passed;
-        unsigned long UDP_DRVSTATS_mgmt_adptr_send_failed;
-        unsigned long UDP_DRVSTATS_mgmt_not_passed_to_stack;
-        unsigned long UDP_DRVSTATS_mgmt_passed_to_stack;
-        unsigned long UDP_DRVSTATS_mgmt_no_socket;
-} drvstats_gen_t;
-
-typedef struct {
-        unsigned char   attr      	PACKED;
-        unsigned short  time_stamp      PACKED;
-        unsigned char   reserved[13]    PACKED;
-} api_rx_hdr_t;
-
-typedef struct {
-        api_rx_hdr_t    api_rx_hdr      PACKED;
-        void *          data            PACKED;
-} api_rx_element_t;
-
-typedef struct {
-        unsigned char   attr            PACKED;
-        unsigned char   reserved[15]    PACKED;
-} api_tx_hdr_t;
-
-typedef struct {
-        api_tx_hdr_t    api_tx_hdr      PACKED;
-        void *          data            PACKED;
-} api_tx_element_t;
-
-#ifdef		_MSC_
-#  pragma	pack()
-#endif
-#endif	/* _SDLA_FR_H */
-
-- 
cgit v1.2.3


From 48491e6bdb8fa73751cc95f740175ec799db5d55 Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@mindspring.com>
Date: Thu, 26 Apr 2007 00:59:27 -0700
Subject: [NET]: Delete unused header file linux/if_wanpipe_common.h

Delete the unreferenced header file include/linux/if_wanpipe_common.h,
as well as the reference to it in the Doc file.

Signed-off-by: Robert P. J. Day <rpjday@mindspring.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/wan-router.txt |  1 -
 include/linux/if_wanpipe_common.h       | 58 ---------------------------------
 2 files changed, 59 deletions(-)
 delete mode 100644 include/linux/if_wanpipe_common.h

diff --git a/Documentation/networking/wan-router.txt b/Documentation/networking/wan-router.txt
index 653978dcea7..07dd6d9930a 100644
--- a/Documentation/networking/wan-router.txt
+++ b/Documentation/networking/wan-router.txt
@@ -250,7 +250,6 @@ PRODUCT COMPONENTS AND RELATED FILES
 	sdladrv.h	SDLA support module API definitions
 	sdlasfm.h	SDLA firmware module definitions
 	if_wanpipe.h	WANPIPE Socket definitions
-	if_wanpipe_common.h	WANPIPE Socket/Driver common definitions.
 	sdlapci.h	WANPIPE PCI definitions
 	
 
diff --git a/include/linux/if_wanpipe_common.h b/include/linux/if_wanpipe_common.h
deleted file mode 100644
index 6e5461d69fd..00000000000
--- a/include/linux/if_wanpipe_common.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*****************************************************************************
-* if_wanipe_common.h   Sangoma Driver/Socket common area definitions.
-*
-* Author:       Nenad Corbic <ncorbic@sangoma.com>
-*
-* Copyright:    (c) 2000 Sangoma Technologies Inc.
-*
-*               This program is free software; you can redistribute it and/or
-*               modify it under the terms of the GNU General Public License
-*               as published by the Free Software Foundation; either version
-*               2 of the License, or (at your option) any later version.
-* ============================================================================
-* Jan 13, 2000  Nenad Corbic      Initial version
-*****************************************************************************/
-
-
-#ifndef _WANPIPE_SOCK_DRIVER_COMMON_H
-#define _WANPIPE_SOCK_DRIVER_COMMON_H
-
-typedef struct {
-	struct net_device *slave;
-	atomic_t packet_sent;
-	atomic_t receive_block;
-	atomic_t command;
-	atomic_t disconnect;
-	atomic_t driver_busy;
-	long common_critical;
-	struct timer_list *tx_timer;
-	struct sock *sk;		/* Wanpipe Sock bind's here */ 
-	int (*func)(struct sk_buff *skb, struct net_device *dev, 
-		    struct sock *sk);
-
-	struct work_struct wanpipe_work;    /* deferred keventd work */
-	unsigned char rw_bind;			  /* Sock bind state */
-	unsigned char usedby;
-	unsigned char state;
-	unsigned char svc;
-	unsigned short lcn;
-	void *mbox;
-} wanpipe_common_t;
-
-
-enum {
-	WANSOCK_UNCONFIGURED,	/* link/channel is not configured */
-	WANSOCK_DISCONNECTED,	/* link/channel is disconnected */
-	WANSOCK_CONNECTING,		/* connection is in progress */
-	WANSOCK_CONNECTED,		/* link/channel is operational */
-	WANSOCK_LIMIT,		/* for verification only */
-	WANSOCK_DUALPORT,		/* for Dual Port cards */
-	WANSOCK_DISCONNECTING,
-	WANSOCK_BINDED,
-	WANSOCK_BIND_LISTEN,
-	WANSOCK_LISTEN
-};
-
-#endif
-
-
-- 
cgit v1.2.3


From 36226a8ded46b89a94f9de5976f554bb5e02d84c Mon Sep 17 00:00:00 2001
From: Brian Braunstein <linuxkernel@bristyle.com>
Date: Thu, 26 Apr 2007 01:00:55 -0700
Subject: [NET] tun/tap: fixed hw address handling

Fixed tun/tap driver's handling of hw addresses.  The hw address is stored
in both the net_device.dev_addr and tun.dev_addr fields.  These fields were
not kept synchronized, and in fact weren't even initialized to the same
value.  Now during both init and when performing SIOCSIFHWADDR on the tun
device these values are both updated.  However, if SIOCSIFHWADDR is
performed on the net device directly (for instance, setting the hw address
using ifconfig), the tun device does not get updated.  Perhaps the
tun.dev_addr field should be removed completely at some point, as it is
redundant and net_device.dev_addr can be used anywhere it is used.

Signed-off-by: Brian Braunstein <linuxkernel@bristyle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c | 38 +++++++++++++++++++++++++++-----------
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 4d461595406..a2c6caaaae9 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -18,6 +18,10 @@
 /*
  *  Changes:
  *
+ *  Brian Braunstein <linuxkernel@bristyle.com> 2007/03/23
+ *    Fixed hw address handling.  Now net_device.dev_addr is kept consistent
+ *    with tun.dev_addr when the address is set by this module.
+ *
  *  Mike Kershaw <dragorn@kismetwireless.net> 2005/08/14
  *    Add TUNSETLINK ioctl to set the link encapsulation
  *
@@ -196,7 +200,10 @@ static void tun_net_init(struct net_device *dev)
 		dev->set_multicast_list = tun_net_mclist;
 
 		ether_setup(dev);
-		random_ether_addr(dev->dev_addr);
+
+		/* random address already created for us by tun_set_iff, use it */
+		memcpy(dev->dev_addr, tun->dev_addr, min(sizeof(tun->dev_addr), sizeof(dev->dev_addr)) );
+
 		dev->tx_queue_len = TUN_READQ_SIZE;  /* We prefer our own queue length */
 		break;
 	}
@@ -636,6 +643,7 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file,
 		return 0;
 
 	case SIOCGIFHWADDR:
+		/* Note: the actual net device's address may be different */
 		memcpy(ifr.ifr_hwaddr.sa_data, tun->dev_addr,
 				min(sizeof ifr.ifr_hwaddr.sa_data, sizeof tun->dev_addr));
 		if (copy_to_user( argp, &ifr, sizeof ifr))
@@ -643,16 +651,24 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file,
 		return 0;
 
 	case SIOCSIFHWADDR:
-		/** Set the character device's hardware address. This is used when
-		 * filtering packets being sent from the network device to the character
-		 * device. */
-		memcpy(tun->dev_addr, ifr.ifr_hwaddr.sa_data,
-				min(sizeof ifr.ifr_hwaddr.sa_data, sizeof tun->dev_addr));
-		DBG(KERN_DEBUG "%s: set hardware address: %x:%x:%x:%x:%x:%x\n",
-				tun->dev->name,
-				tun->dev_addr[0], tun->dev_addr[1], tun->dev_addr[2],
-				tun->dev_addr[3], tun->dev_addr[4], tun->dev_addr[5]);
-		return 0;
+	{
+		/* try to set the actual net device's hw address */
+		int ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr);
+
+		if (ret == 0) {
+			/** Set the character device's hardware address. This is used when
+			 * filtering packets being sent from the network device to the character
+			 * device. */
+			memcpy(tun->dev_addr, ifr.ifr_hwaddr.sa_data,
+					min(sizeof ifr.ifr_hwaddr.sa_data, sizeof tun->dev_addr));
+			DBG(KERN_DEBUG "%s: set hardware address: %x:%x:%x:%x:%x:%x\n",
+					tun->dev->name,
+					tun->dev_addr[0], tun->dev_addr[1], tun->dev_addr[2],
+					tun->dev_addr[3], tun->dev_addr[4], tun->dev_addr[5]);
+		}
+
+		return  ret;
+	}
 
 	case SIOCADDMULTI:
 		/** Add the specified group to the character device's multicast filter
-- 
cgit v1.2.3


From 57cd5f754e04240ee587c51b7be8d3b7793542ae Mon Sep 17 00:00:00 2001
From: Milind Arun Choudhary <milindchoudhary@gmail.com>
Date: Thu, 26 Apr 2007 01:01:53 -0700
Subject: [NET]: ROUND_UP macro cleanup in drivers/net/ppp_generic.c

ROUND_UP macro cleanup use DIV_ROUND_UP

Signed-off-by: Milind Arun Choudhary <milindchoudhary@gmail.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp_generic.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 18f1790aab9..6d596ca50cf 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -88,8 +88,6 @@ struct ppp_file {
 #define PF_TO_PPP(pf)		PF_TO_X(pf, struct ppp)
 #define PF_TO_CHANNEL(pf)	PF_TO_X(pf, struct channel)
 
-#define ROUNDUP(n, x)		(((n) + (x) - 1) / (x))
-
 /*
  * Data structure describing one ppp unit.
  * A ppp unit corresponds to a ppp network interface device
@@ -1297,7 +1295,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
 	 */
 	fragsize = len;
 	if (nfree > 1)
-		fragsize = ROUNDUP(fragsize, nfree);
+		fragsize = DIV_ROUND_UP(fragsize, nfree);
 	/* nbigger channels get fragsize bytes, the rest get fragsize-1,
 	   except if nbigger==0, then they all get fragsize. */
 	nbigger = len % nfree;
-- 
cgit v1.2.3


From f8a6d97043f9adc25889876b681998b77f543bfa Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 26 Apr 2007 01:02:51 -0700
Subject: [AF_IUCV]: Fix compilation on s390-up

  CC [M]  net/iucv/iucv.o
net/iucv/iucv.c: In function 'iucv_init':
net/iucv/iucv.c:1556: error: 'iucv_cpu_notifier' undeclared (first use in this function)

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/iucv.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 1b10d576f22..60f293842a3 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -519,7 +519,6 @@ static void iucv_disable(void)
 	kfree(iucv_path_table);
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
 static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
 				     unsigned long action, void *hcpu)
 {
@@ -565,7 +564,6 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
 static struct notifier_block iucv_cpu_notifier = {
 	.notifier_call = iucv_cpu_notify,
 };
-#endif
 
 /**
  * iucv_sever_pathid
-- 
cgit v1.2.3


From 14690fc649f4c59712f497135f7323eb8ceceaaf Mon Sep 17 00:00:00 2001
From: Martin Peschke <mp3@de.ibm.com>
Date: Thu, 26 Apr 2007 01:03:43 -0700
Subject: [SUNRPC]: cleanup: use seq_release_private() where appropriate

We can save some lines of code by using seq_release_private().

Signed-off-by: Martin Peschke <mp3@de.ibm.com>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sunrpc/cache.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index f02f24ae946..543b085ae2c 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1237,20 +1237,12 @@ static int content_open(struct inode *inode, struct file *file)
 
 	return res;
 }
-static int content_release(struct inode *inode, struct file *file)
-{
-	struct seq_file *m = (struct seq_file *)file->private_data;
-	struct handle *han = m->private;
-	kfree(han);
-	m->private = NULL;
-	return seq_release(inode, file);
-}
 
 static const struct file_operations content_file_operations = {
 	.open		= content_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= content_release,
+	.release	= seq_release_private,
 };
 
 static ssize_t read_flush(struct file *file, char __user *buf,
-- 
cgit v1.2.3


From 1c8ea5aee0b16409295d96a5e8984bd902f06a77 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 26 Apr 2007 01:36:49 -0700
Subject: [IRDA] irda_device_dongle_init: fix kzalloc(GFP_KERNEL) in spinlock

Fix http://bugzilla.kernel.org/show_bug.cgi?id=8343

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Samuel Ortiz <samuel@sortiz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irda_device.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c
index e717801b38f..7b5def1ea63 100644
--- a/net/irda/irda_device.c
+++ b/net/irda/irda_device.c
@@ -375,7 +375,7 @@ EXPORT_SYMBOL(alloc_irdadev);
 dongle_t *irda_device_dongle_init(struct net_device *dev, int type)
 {
 	struct dongle_reg *reg;
-	dongle_t *dongle = NULL;
+	dongle_t *dongle = kzalloc(sizeof(dongle_t), GFP_KERNEL);
 
 	might_sleep();
 
@@ -397,19 +397,14 @@ dongle_t *irda_device_dongle_init(struct net_device *dev, int type)
 	if (!reg || !try_module_get(reg->owner) ) {
 		IRDA_ERROR("IrDA: Unable to find requested dongle type %x\n",
 			   type);
-		goto out;
+		kfree(dongle);
+		dongle = NULL;
+	}
+	if (dongle) {
+		/* Bind the registration info to this particular instance */
+		dongle->issue = reg;
+		dongle->dev = dev;
 	}
-
-	/* Allocate dongle info for this instance */
-	dongle = kzalloc(sizeof(dongle_t), GFP_KERNEL);
-	if (!dongle)
-		goto out;
-
-	/* Bind the registration info to this particular instance */
-	dongle->issue = reg;
-	dongle->dev = dev;
-
- out:
 	spin_unlock(&dongles->hb_spinlock);
 	return dongle;
 }
-- 
cgit v1.2.3


From 4ef8d0aeafda8388dd51f2671b7059192b1e5a5f Mon Sep 17 00:00:00 2001
From: Milind Arun Choudhary <milindchoudhary@gmail.com>
Date: Thu, 26 Apr 2007 01:37:44 -0700
Subject: [NET]: SPIN_LOCK_UNLOCKED cleanup in drivers/atm, net

SPIN_LOCK_UNLOCKED cleanup,use __SPIN_LOCK_UNLOCKED instead

Signed-off-by: Milind Arun Choudhary <milindchoudhary@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/atmtcp.c | 2 +-
 net/atm/clip.c       | 2 +-
 net/atm/lec.c        | 2 +-
 net/atm/mpc.c        | 2 +-
 net/atm/signaling.c  | 2 +-
 net/dccp/minisocks.c | 2 +-
 net/ipv6/mip6.c      | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c
index 1b9493a16ac..02ad83d6b56 100644
--- a/drivers/atm/atmtcp.c
+++ b/drivers/atm/atmtcp.c
@@ -352,7 +352,7 @@ static struct atm_dev atmtcp_control_dev = {
 	.ops		= &atmtcp_c_dev_ops,
 	.type		= "atmtcp",
 	.number		= 999,
-	.lock		= SPIN_LOCK_UNLOCKED
+	.lock		= __SPIN_LOCK_UNLOCKED(atmtcp_control_dev.lock)
 };
 
 
diff --git a/net/atm/clip.c b/net/atm/clip.c
index ccba24ffb96..876b77f1474 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -702,7 +702,7 @@ static struct atm_dev atmarpd_dev = {
 	.ops =			&atmarpd_dev_ops,
 	.type =			"arpd",
 	.number = 		999,
-	.lock =			SPIN_LOCK_UNLOCKED
+	.lock =			__SPIN_LOCK_UNLOCKED(atmarpd_dev.lock)
 };
 
 
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 6d63afa5764..4dc5f2b8c43 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -630,7 +630,7 @@ static struct atm_dev lecatm_dev = {
 	.ops = &lecdev_ops,
 	.type = "lec",
 	.number = 999,		/* dummy device number */
-	.lock = SPIN_LOCK_UNLOCKED
+	.lock = __SPIN_LOCK_UNLOCKED(lecatm_dev.lock)
 };
 
 /*
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 813e08d6dc7..7c85aa551d5 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -737,7 +737,7 @@ static struct atm_dev mpc_dev = {
 	.ops	= &mpc_ops,
 	.type	= "mpc",
 	.number	= 42,
-	.lock	= SPIN_LOCK_UNLOCKED
+	.lock	= __SPIN_LOCK_UNLOCKED(mpc_dev.lock)
 	/* members not explicitly initialised will be 0 */
 };
 
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index 31d98b57e1d..d14baaf1f4c 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -256,7 +256,7 @@ static struct atm_dev sigd_dev = {
 	.ops =		&sigd_dev_ops,
 	.type =		"sig",
 	.number =	999,
-	.lock =		SPIN_LOCK_UNLOCKED
+	.lock =		__SPIN_LOCK_UNLOCKED(sigd_dev.lock)
 };
 
 
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 6d235b3013d..e18e249ac49 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -27,7 +27,7 @@
 struct inet_timewait_death_row dccp_death_row = {
 	.sysctl_max_tw_buckets = NR_FILE * 2,
 	.period		= DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
-	.death_lock	= SPIN_LOCK_UNLOCKED,
+	.death_lock	= __SPIN_LOCK_UNLOCKED(dccp_death_row.death_lock),
 	.hashinfo	= &dccp_hashinfo,
 	.tw_timer	= TIMER_INITIALIZER(inet_twdr_hangman, 0,
 					    (unsigned long)&dccp_death_row),
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 6ed763ee678..13b7160fb89 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -125,7 +125,7 @@ struct mip6_report_rate_limiter {
 };
 
 static struct mip6_report_rate_limiter mip6_report_rl = {
-	.lock = SPIN_LOCK_UNLOCKED
+	.lock = __SPIN_LOCK_UNLOCKED(mip6_report_rl.lock)
 };
 
 static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb)
-- 
cgit v1.2.3


From 9198d2220d29b87ac3a05a3b791c50bb8a014d63 Mon Sep 17 00:00:00 2001
From: "Alexandra N. Kossovsky" <Alexandra.Kossovsky@oktetlabs.ru>
Date: Thu, 26 Apr 2007 01:40:13 -0700
Subject: [NET]: bonding documentation fix for multiple bonding interfaces

Fix bonding driver documentation for the case of multiple bonding interfaces.

Signed-off-by: "Alexandra N. Kossovsky" <Alexandra.Kossovsky@oktetlabs.ru>
Acked-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/bonding.txt | 35 ++---------------------------------
 1 file changed, 2 insertions(+), 33 deletions(-)

diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index de809e58092..1da56663083 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -920,40 +920,9 @@ options, you may wish to use the "max_bonds" module parameter,
 documented above.
 
 	To create multiple bonding devices with differing options, it
-is necessary to load the bonding driver multiple times.  Note that
-current versions of the sysconfig network initialization scripts
-handle this automatically; if your distro uses these scripts, no
-special action is needed.  See the section Configuring Bonding
-Devices, above, if you're not sure about your network initialization
-scripts.
-
-	To load multiple instances of the module, it is necessary to
-specify a different name for each instance (the module loading system
-requires that every loaded module, even multiple instances of the same
-module, have a unique name).  This is accomplished by supplying
-multiple sets of bonding options in /etc/modprobe.conf, for example:
-	
-alias bond0 bonding
-options bond0 -o bond0 mode=balance-rr miimon=100
-
-alias bond1 bonding
-options bond1 -o bond1 mode=balance-alb miimon=50
-
-	will load the bonding module two times.  The first instance is
-named "bond0" and creates the bond0 device in balance-rr mode with an
-miimon of 100.  The second instance is named "bond1" and creates the
-bond1 device in balance-alb mode with an miimon of 50.
-
-	In some circumstances (typically with older distributions),
-the above does not work, and the second bonding instance never sees
-its options.  In that case, the second options line can be substituted
-as follows:
-
-install bond1 /sbin/modprobe --ignore-install bonding -o bond1 \
-	mode=balance-alb miimon=50
+is necessary to use bonding parameters exported by sysfs, documented
+in the section below.
 
-	This may be repeated any number of times, specifying a new and
-unique name in place of bond1 for each subsequent instance.
 
 3.4 Configuring Bonding Manually via Sysfs
 ------------------------------------------
-- 
cgit v1.2.3


From 74da9d88bf5ffd31aed61a0b19519684ad744ded Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 26 Apr 2007 01:41:01 -0700
Subject: [BLUETOOTH] rfcomm_worker(): fix wakeup race

Set TASK_INTERRUPTIBLE prior to testing the flag to avoid missed wakeups.

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/rfcomm/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 10cc13cfae6..fe7df90eb70 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1851,18 +1851,18 @@ static void rfcomm_worker(void)
 	BT_DBG("");
 
 	while (!atomic_read(&terminate)) {
+		set_current_state(TASK_INTERRUPTIBLE);
 		if (!test_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event)) {
 			/* No pending events. Let's sleep.
 			 * Incoming connections and data will wake us up. */
-			set_current_state(TASK_INTERRUPTIBLE);
 			schedule();
 		}
+		set_current_state(TASK_RUNNING);
 
 		/* Process stuff */
 		clear_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event);
 		rfcomm_process_sessions();
 	}
-	set_current_state(TASK_RUNNING);
 	return;
 }
 
-- 
cgit v1.2.3


From bfbf3c0968498f5232c02965cf41695edae1bc4d Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <matthias.kaehlcke@gmail.com>
Date: Thu, 26 Apr 2007 01:41:49 -0700
Subject: [ATM]: Use mutex instead of binary semaphore in FORE Systems
 200E-series driver

(akpm: remove CVS control string too)

Signed-off-by: Matthias Kaehlcke <matthias.kaehlcke@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/fore200e.c | 20 +++++++++-----------
 drivers/atm/fore200e.h |  2 +-
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index a7c0ed3107e..405ee5e0922 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -1,6 +1,4 @@
 /*
-  $Id: fore200e.c,v 1.5 2000/04/14 10:10:34 davem Exp $
-
   A FORE Systems 200E-series driver for ATM on Linux.
   Christophe Lizzi (lizzi@cnam.fr), October 1999-March 2003.
 
@@ -1502,9 +1500,9 @@ fore200e_open(struct atm_vcc *vcc)
     /* pseudo-CBR bandwidth requested? */
     if ((vcc->qos.txtp.traffic_class == ATM_CBR) && (vcc->qos.txtp.max_pcr > 0)) {
 	
-	down(&fore200e->rate_sf);
+	mutex_lock(&fore200e->rate_mtx);
 	if (fore200e->available_cell_rate < vcc->qos.txtp.max_pcr) {
-	    up(&fore200e->rate_sf);
+	    mutex_unlock(&fore200e->rate_mtx);
 
 	    kfree(fore200e_vcc);
 	    vc_map->vcc = NULL;
@@ -1513,7 +1511,7 @@ fore200e_open(struct atm_vcc *vcc)
 
 	/* reserve bandwidth */
 	fore200e->available_cell_rate -= vcc->qos.txtp.max_pcr;
-	up(&fore200e->rate_sf);
+	mutex_unlock(&fore200e->rate_mtx);
     }
     
     vcc->itf = vcc->dev->number;
@@ -1599,9 +1597,9 @@ fore200e_close(struct atm_vcc* vcc)
     /* release reserved bandwidth, if any */
     if ((vcc->qos.txtp.traffic_class == ATM_CBR) && (vcc->qos.txtp.max_pcr > 0)) {
 
-	down(&fore200e->rate_sf);
+	mutex_lock(&fore200e->rate_mtx);
 	fore200e->available_cell_rate += vcc->qos.txtp.max_pcr;
-	up(&fore200e->rate_sf);
+	mutex_unlock(&fore200e->rate_mtx);
 
 	clear_bit(ATM_VF_HASQOS, &vcc->flags);
     }
@@ -2064,16 +2062,16 @@ fore200e_change_qos(struct atm_vcc* vcc,struct atm_qos* qos, int flags)
 
     if ((qos->txtp.traffic_class == ATM_CBR) && (qos->txtp.max_pcr > 0)) {
 
-	down(&fore200e->rate_sf);
+	mutex_lock(&fore200e->rate_mtx);
 	if (fore200e->available_cell_rate + vcc->qos.txtp.max_pcr < qos->txtp.max_pcr) {
-	    up(&fore200e->rate_sf);
+	    mutex_unlock(&fore200e->rate_mtx);
 	    return -EAGAIN;
 	}
 
 	fore200e->available_cell_rate += vcc->qos.txtp.max_pcr;
 	fore200e->available_cell_rate -= qos->txtp.max_pcr;
 
-	up(&fore200e->rate_sf);
+	mutex_unlock(&fore200e->rate_mtx);
 	
 	memcpy(&vcc->qos, qos, sizeof(struct atm_qos));
 	
@@ -2459,7 +2457,7 @@ fore200e_initialize(struct fore200e* fore200e)
 
     DPRINTK(2, "device %s being initialized\n", fore200e->name);
 
-    init_MUTEX(&fore200e->rate_sf);
+    mutex_init(&fore200e->rate_mtx);
     spin_lock_init(&fore200e->q_lock);
 
     cpq = fore200e->cp_queues = fore200e->virt_base + FORE200E_CP_QUEUES_OFFSET;
diff --git a/drivers/atm/fore200e.h b/drivers/atm/fore200e.h
index f9abfdac33e..b85a54613de 100644
--- a/drivers/atm/fore200e.h
+++ b/drivers/atm/fore200e.h
@@ -869,7 +869,7 @@ typedef struct fore200e {
 
     struct stats*              stats;                  /* last snapshot of the stats         */
     
-    struct semaphore           rate_sf;                /* protects rate reservation ops      */
+    struct mutex               rate_mtx;               /* protects rate reservation ops      */
     spinlock_t                 q_lock;                 /* protects queue ops                 */
 #ifdef FORE200E_USE_TASKLET
     struct tasklet_struct      tx_tasklet;             /* performs tx interrupt work         */
-- 
cgit v1.2.3


From 566ec03448052c096dc3982fbe573522dc0ba479 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Thu, 26 Apr 2007 14:12:15 -0700
Subject: [XFRM]: Missing bits to SAD info.

This brings the SAD info in sync with net-2.6.22/net-2.6

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_user.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index cb4cc1bde5d..69110fed64b 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1878,6 +1878,7 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = {
 	[XFRM_MSG_GETAE       - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id),
 	[XFRM_MSG_REPORT      - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report),
 	[XFRM_MSG_MIGRATE     - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id),
+	[XFRM_MSG_GETSADINFO  - XFRM_MSG_BASE] = NLMSG_LENGTH(sizeof(u32)),
 };
 
 #undef XMSGSIZE
@@ -1905,7 +1906,7 @@ static struct xfrm_link {
 	[XFRM_MSG_NEWAE       - XFRM_MSG_BASE] = { .doit = xfrm_new_ae  },
 	[XFRM_MSG_GETAE       - XFRM_MSG_BASE] = { .doit = xfrm_get_ae  },
 	[XFRM_MSG_MIGRATE     - XFRM_MSG_BASE] = { .doit = xfrm_do_migrate    },
-	[XFRM_MSG_GETSADINFO  - XFRM_MSG_BASE] = { .doit = xfrm_get_sadinfo    },
+	[XFRM_MSG_GETSADINFO  - XFRM_MSG_BASE] = { .doit = xfrm_get_sadinfo   },
 };
 
 static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
-- 
cgit v1.2.3


From 071b638689464c6b39407025eedd810d5b5e6f5d Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Thu, 26 Apr 2007 15:45:32 -0700
Subject: [WORKQUEUE]: cancel_delayed_work: use del_timer() instead of
 del_timer_sync()

del_timer_sync() buys nothing for cancel_delayed_work(), but it is less
efficient since it locks the timer unconditionally, and may wait for the
completion of the delayed_work_timer_fn().

cancel_delayed_work() == 0 means:

	before this patch:
		work->func may still be running or queued

	after this patch:
		work->func may still be running or queued, or
		delayed_work_timer_fn->__queue_work() in progress.

		The latter doesn't differ from the caller's POV,
		delayed_work_timer_fn() is called with _PENDING
		bit set.

cancel_delayed_work() == 1 with this patch adds a new possibility:

	delayed_work->work was cancelled, but delayed_work_timer_fn
	is still running (this is only possible for the re-arming
	works on single-threaded workqueue).

	In this case the timer was re-started by work->func(), nobody
	else can do this. This in turn means that delayed_work_timer_fn
	has already passed __queue_work() (and wont't touch delayed_work)
	because nobody else can queue delayed_work->work.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/workqueue.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 2a7b38d8701..b8abfc74d03 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -191,14 +191,15 @@ int execute_in_process_context(work_func_t fn, struct execute_work *);
 
 /*
  * Kill off a pending schedule_delayed_work().  Note that the work callback
- * function may still be running on return from cancel_delayed_work().  Run
- * flush_scheduled_work() to wait on it.
+ * function may still be running on return from cancel_delayed_work(), unless
+ * it returns 1 and the work doesn't re-arm itself. Run flush_workqueue() or
+ * cancel_work_sync() to wait on it.
  */
 static inline int cancel_delayed_work(struct delayed_work *work)
 {
 	int ret;
 
-	ret = del_timer_sync(&work->timer);
+	ret = del_timer(&work->timer);
 	if (ret)
 		work_release(&work->work);
 	return ret;
-- 
cgit v1.2.3


From 7318226ea2931a627f3572e5f4804c91ca19ecbc Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 26 Apr 2007 15:46:23 -0700
Subject: [AF_RXRPC]: Key facility changes for AF_RXRPC

Export the keyring key type definition and document its availability.

Add alternative types into the key's type_data union to make it more useful.
Not all users necessarily want to use it as a list_head (AF_RXRPC doesn't, for
example), so make it clear that it can be used in other ways.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/keys.txt  | 12 ++++++++++++
 include/linux/key.h     |  2 ++
 security/keys/keyring.c |  2 ++
 3 files changed, 16 insertions(+)

diff --git a/Documentation/keys.txt b/Documentation/keys.txt
index 60c665d9cfa..81d9aa09729 100644
--- a/Documentation/keys.txt
+++ b/Documentation/keys.txt
@@ -859,6 +859,18 @@ payload contents" for more information.
 	void unregister_key_type(struct key_type *type);
 
 
+Under some circumstances, it may be desirable to desirable to deal with a
+bundle of keys.  The facility provides access to the keyring type for managing
+such a bundle:
+
+	struct key_type key_type_keyring;
+
+This can be used with a function such as request_key() to find a specific
+keyring in a process's keyrings.  A keyring thus found can then be searched
+with keyring_search().  Note that it is not possible to use request_key() to
+search a specific keyring, so using keyrings in this way is of limited utility.
+
+
 ===================================
 NOTES ON ACCESSING PAYLOAD CONTENTS
 ===================================
diff --git a/include/linux/key.h b/include/linux/key.h
index 169f05e4863..a9220e75782 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -160,6 +160,8 @@ struct key {
 	 */
 	union {
 		struct list_head	link;
+		unsigned long		x[2];
+		void			*p[2];
 	} type_data;
 
 	/* key data
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index ad45ce73964..88292e3dee9 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -66,6 +66,8 @@ struct key_type key_type_keyring = {
 	.read		= keyring_read,
 };
 
+EXPORT_SYMBOL(key_type_keyring);
+
 /*
  * semaphore to serialise link/link calls to prevent two link calls in parallel
  * introducing a cycle
-- 
cgit v1.2.3


From e19dff1fdd99a25819af74cf0710e147fff4fd3a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 26 Apr 2007 15:46:56 -0700
Subject: [AF_RXRPC]: Make it possible to merely try to cancel timers from a
 module

Export try_to_del_timer_sync() for use by the AF_RXRPC module.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/timer.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/timer.c b/kernel/timer.c
index dd6c2c1c561..b22bd39740d 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -505,6 +505,8 @@ out:
 	return ret;
 }
 
+EXPORT_SYMBOL(try_to_del_timer_sync);
+
 /**
  * del_timer_sync - deactivate a timer and wait for the handler to finish.
  * @timer: the timer to be deactivated
-- 
cgit v1.2.3


From 17926a79320afa9b95df6b977b40cca6d8713cea Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 26 Apr 2007 15:48:28 -0700
Subject: [AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and
 kernel both

Provide AF_RXRPC sockets that can be used to talk to AFS servers, or serve
answers to AFS clients.  KerberosIV security is fully supported.  The patches
and some example test programs can be found in:

	http://people.redhat.com/~dhowells/rxrpc/

This will eventually replace the old implementation of kernel-only RxRPC
currently resident in net/rxrpc/.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/rxrpc.txt |  663 +++++++++++++++++++
 include/keys/rxrpc-type.h          |   22 +
 include/linux/net.h                |    2 +-
 include/linux/rxrpc.h              |   62 ++
 include/linux/socket.h             |    5 +-
 include/net/af_rxrpc.h             |   17 +
 include/rxrpc/packet.h             |   85 ++-
 net/Kconfig                        |    1 +
 net/Makefile                       |    1 +
 net/core/sock.c                    |    6 +-
 net/rxrpc/Kconfig                  |   37 ++
 net/rxrpc/Makefile                 |   31 +-
 net/rxrpc/af_rxrpc.c               |  754 ++++++++++++++++++++++
 net/rxrpc/ar-accept.c              |  399 ++++++++++++
 net/rxrpc/ar-ack.c                 | 1250 ++++++++++++++++++++++++++++++++++++
 net/rxrpc/ar-call.c                |  787 +++++++++++++++++++++++
 net/rxrpc/ar-connection.c          |  895 ++++++++++++++++++++++++++
 net/rxrpc/ar-connevent.c           |  387 +++++++++++
 net/rxrpc/ar-error.c               |  253 ++++++++
 net/rxrpc/ar-input.c               |  791 +++++++++++++++++++++++
 net/rxrpc/ar-internal.h            |  842 ++++++++++++++++++++++++
 net/rxrpc/ar-key.c                 |  334 ++++++++++
 net/rxrpc/ar-local.c               |  309 +++++++++
 net/rxrpc/ar-output.c              |  658 +++++++++++++++++++
 net/rxrpc/ar-peer.c                |  273 ++++++++
 net/rxrpc/ar-proc.c                |  247 +++++++
 net/rxrpc/ar-recvmsg.c             |  366 +++++++++++
 net/rxrpc/ar-security.c            |  258 ++++++++
 net/rxrpc/ar-skbuff.c              |  118 ++++
 net/rxrpc/ar-transport.c           |  276 ++++++++
 net/rxrpc/rxkad.c                  | 1153 +++++++++++++++++++++++++++++++++
 31 files changed, 11275 insertions(+), 7 deletions(-)
 create mode 100644 Documentation/networking/rxrpc.txt
 create mode 100644 include/keys/rxrpc-type.h
 create mode 100644 include/linux/rxrpc.h
 create mode 100644 include/net/af_rxrpc.h
 create mode 100644 net/rxrpc/Kconfig
 create mode 100644 net/rxrpc/af_rxrpc.c
 create mode 100644 net/rxrpc/ar-accept.c
 create mode 100644 net/rxrpc/ar-ack.c
 create mode 100644 net/rxrpc/ar-call.c
 create mode 100644 net/rxrpc/ar-connection.c
 create mode 100644 net/rxrpc/ar-connevent.c
 create mode 100644 net/rxrpc/ar-error.c
 create mode 100644 net/rxrpc/ar-input.c
 create mode 100644 net/rxrpc/ar-internal.h
 create mode 100644 net/rxrpc/ar-key.c
 create mode 100644 net/rxrpc/ar-local.c
 create mode 100644 net/rxrpc/ar-output.c
 create mode 100644 net/rxrpc/ar-peer.c
 create mode 100644 net/rxrpc/ar-proc.c
 create mode 100644 net/rxrpc/ar-recvmsg.c
 create mode 100644 net/rxrpc/ar-security.c
 create mode 100644 net/rxrpc/ar-skbuff.c
 create mode 100644 net/rxrpc/ar-transport.c
 create mode 100644 net/rxrpc/rxkad.c

diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
new file mode 100644
index 00000000000..fb809b738a0
--- /dev/null
+++ b/Documentation/networking/rxrpc.txt
@@ -0,0 +1,663 @@
+			    ======================
+			    RxRPC NETWORK PROTOCOL
+			    ======================
+
+The RxRPC protocol driver provides a reliable two-phase transport on top of UDP
+that can be used to perform RxRPC remote operations.  This is done over sockets
+of AF_RXRPC family, using sendmsg() and recvmsg() with control data to send and
+receive data, aborts and errors.
+
+Contents of this document:
+
+ (*) Overview.
+
+ (*) RxRPC protocol summary.
+
+ (*) AF_RXRPC driver model.
+
+ (*) Control messages.
+
+ (*) Socket options.
+
+ (*) Security.
+
+ (*) Example client usage.
+
+ (*) Example server usage.
+
+
+========
+OVERVIEW
+========
+
+RxRPC is a two-layer protocol.  There is a session layer which provides
+reliable virtual connections using UDP over IPv4 (or IPv6) as the transport
+layer, but implements a real network protocol; and there's the presentation
+layer which renders structured data to binary blobs and back again using XDR
+(as does SunRPC):
+
+		+-------------+
+		| Application |
+		+-------------+
+		|     XDR     |		Presentation
+		+-------------+
+		|    RxRPC    |		Session
+		+-------------+
+		|     UDP     |		Transport
+		+-------------+
+
+
+AF_RXRPC provides:
+
+ (1) Part of an RxRPC facility for both kernel and userspace applications by
+     making the session part of it a Linux network protocol (AF_RXRPC).
+
+ (2) A two-phase protocol.  The client transmits a blob (the request) and then
+     receives a blob (the reply), and the server receives the request and then
+     transmits the reply.
+
+ (3) Retention of the reusable bits of the transport system set up for one call
+     to speed up subsequent calls.
+
+ (4) A secure protocol, using the Linux kernel's key retention facility to
+     manage security on the client end.  The server end must of necessity be
+     more active in security negotiations.
+
+AF_RXRPC does not provide XDR marshalling/presentation facilities.  That is
+left to the application.  AF_RXRPC only deals in blobs.  Even the operation ID
+is just the first four bytes of the request blob, and as such is beyond the
+kernel's interest.
+
+
+Sockets of AF_RXRPC family are:
+
+ (1) created as type SOCK_DGRAM;
+
+ (2) provided with a protocol of the type of underlying transport they're going
+     to use - currently only PF_INET is supported.
+
+
+The Andrew File System (AFS) is an example of an application that uses this and
+that has both kernel (filesystem) and userspace (utility) components.
+
+
+======================
+RXRPC PROTOCOL SUMMARY
+======================
+
+An overview of the RxRPC protocol:
+
+ (*) RxRPC sits on top of another networking protocol (UDP is the only option
+     currently), and uses this to provide network transport.  UDP ports, for
+     example, provide transport endpoints.
+
+ (*) RxRPC supports multiple virtual "connections" from any given transport
+     endpoint, thus allowing the endpoints to be shared, even to the same
+     remote endpoint.
+
+ (*) Each connection goes to a particular "service".  A connection may not go
+     to multiple services.  A service may be considered the RxRPC equivalent of
+     a port number.  AF_RXRPC permits multiple services to share an endpoint.
+
+ (*) Client-originating packets are marked, thus a transport endpoint can be
+     shared between client and server connections (connections have a
+     direction).
+
+ (*) Up to a billion connections may be supported concurrently between one
+     local transport endpoint and one service on one remote endpoint.  An RxRPC
+     connection is described by seven numbers:
+
+	Local address	}
+	Local port	} Transport (UDP) address
+	Remote address	}
+	Remote port	}
+	Direction
+	Connection ID
+	Service ID
+
+ (*) Each RxRPC operation is a "call".  A connection may make up to four
+     billion calls, but only up to four calls may be in progress on a
+     connection at any one time.
+
+ (*) Calls are two-phase and asymmetric: the client sends its request data,
+     which the service receives; then the service transmits the reply data
+     which the client receives.
+
+ (*) The data blobs are of indefinite size, the end of a phase is marked with a
+     flag in the packet.  The number of packets of data making up one blob may
+     not exceed 4 billion, however, as this would cause the sequence number to
+     wrap.
+
+ (*) The first four bytes of the request data are the service operation ID.
+
+ (*) Security is negotiated on a per-connection basis.  The connection is
+     initiated by the first data packet on it arriving.  If security is
+     requested, the server then issues a "challenge" and then the client
+     replies with a "response".  If the response is successful, the security is
+     set for the lifetime of that connection, and all subsequent calls made
+     upon it use that same security.  In the event that the server lets a
+     connection lapse before the client, the security will be renegotiated if
+     the client uses the connection again.
+
+ (*) Calls use ACK packets to handle reliability.  Data packets are also
+     explicitly sequenced per call.
+
+ (*) There are two types of positive acknowledgement: hard-ACKs and soft-ACKs.
+     A hard-ACK indicates to the far side that all the data received to a point
+     has been received and processed; a soft-ACK indicates that the data has
+     been received but may yet be discarded and re-requested.  The sender may
+     not discard any transmittable packets until they've been hard-ACK'd.
+
+ (*) Reception of a reply data packet implicitly hard-ACK's all the data
+     packets that make up the request.
+
+ (*) An call is complete when the request has been sent, the reply has been
+     received and the final hard-ACK on the last packet of the reply has
+     reached the server.
+
+ (*) An call may be aborted by either end at any time up to its completion.
+
+
+=====================
+AF_RXRPC DRIVER MODEL
+=====================
+
+About the AF_RXRPC driver:
+
+ (*) The AF_RXRPC protocol transparently uses internal sockets of the transport
+     protocol to represent transport endpoints.
+
+ (*) AF_RXRPC sockets map onto RxRPC connection bundles.  Actual RxRPC
+     connections are handled transparently.  One client socket may be used to
+     make multiple simultaneous calls to the same service.  One server socket
+     may handle calls from many clients.
+
+ (*) Additional parallel client connections will be initiated to support extra
+     concurrent calls, up to a tunable limit.
+
+ (*) Each connection is retained for a certain amount of time [tunable] after
+     the last call currently using it has completed in case a new call is made
+     that could reuse it.
+
+ (*) Each internal UDP socket is retained [tunable] for a certain amount of
+     time [tunable] after the last connection using it discarded, in case a new
+     connection is made that could use it.
+
+ (*) A client-side connection is only shared between calls if they have have
+     the same key struct describing their security (and assuming the calls
+     would otherwise share the connection).  Non-secured calls would also be
+     able to share connections with each other.
+
+ (*) A server-side connection is shared if the client says it is.
+
+ (*) ACK'ing is handled by the protocol driver automatically, including ping
+     replying.
+
+ (*) SO_KEEPALIVE automatically pings the other side to keep the connection
+     alive [TODO].
+
+ (*) If an ICMP error is received, all calls affected by that error will be
+     aborted with an appropriate network error passed through recvmsg().
+
+
+Interaction with the user of the RxRPC socket:
+
+ (*) A socket is made into a server socket by binding an address with a
+     non-zero service ID.
+
+ (*) In the client, sending a request is achieved with one or more sendmsgs,
+     followed by the reply being received with one or more recvmsgs.
+
+ (*) The first sendmsg for a request to be sent from a client contains a tag to
+     be used in all other sendmsgs or recvmsgs associated with that call.  The
+     tag is carried in the control data.
+
+ (*) connect() is used to supply a default destination address for a client
+     socket.  This may be overridden by supplying an alternate address to the
+     first sendmsg() of a call (struct msghdr::msg_name).
+
+ (*) If connect() is called on an unbound client, a random local port will
+     bound before the operation takes place.
+
+ (*) A server socket may also be used to make client calls.  To do this, the
+     first sendmsg() of the call must specify the target address.  The server's
+     transport endpoint is used to send the packets.
+
+ (*) Once the application has received the last message associated with a call,
+     the tag is guaranteed not to be seen again, and so it can be used to pin
+     client resources.  A new call can then be initiated with the same tag
+     without fear of interference.
+
+ (*) In the server, a request is received with one or more recvmsgs, then the
+     the reply is transmitted with one or more sendmsgs, and then the final ACK
+     is received with a last recvmsg.
+
+ (*) When sending data for a call, sendmsg is given MSG_MORE if there's more
+     data to come on that call.
+
+ (*) When receiving data for a call, recvmsg flags MSG_MORE if there's more
+     data to come for that call.
+
+ (*) When receiving data or messages for a call, MSG_EOR is flagged by recvmsg
+     to indicate the terminal message for that call.
+
+ (*) A call may be aborted by adding an abort control message to the control
+     data.  Issuing an abort terminates the kernel's use of that call's tag.
+     Any messages waiting in the receive queue for that call will be discarded.
+
+ (*) Aborts, busy notifications and challenge packets are delivered by recvmsg,
+     and control data messages will be set to indicate the context.  Receiving
+     an abort or a busy message terminates the kernel's use of that call's tag.
+
+ (*) The control data part of the msghdr struct is used for a number of things:
+
+     (*) The tag of the intended or affected call.
+
+     (*) Sending or receiving errors, aborts and busy notifications.
+
+     (*) Notifications of incoming calls.
+
+     (*) Sending debug requests and receiving debug replies [TODO].
+
+ (*) When the kernel has received and set up an incoming call, it sends a
+     message to server application to let it know there's a new call awaiting
+     its acceptance [recvmsg reports a special control message].  The server
+     application then uses sendmsg to assign a tag to the new call.  Once that
+     is done, the first part of the request data will be delivered by recvmsg.
+
+ (*) The server application has to provide the server socket with a keyring of
+     secret keys corresponding to the security types it permits.  When a secure
+     connection is being set up, the kernel looks up the appropriate secret key
+     in the keyring and then sends a challenge packet to the client and
+     receives a response packet.  The kernel then checks the authorisation of
+     the packet and either aborts the connection or sets up the security.
+
+ (*) The name of the key a client will use to secure its communications is
+     nominated by a socket option.
+
+
+Notes on recvmsg:
+
+ (*) If there's a sequence of data messages belonging to a particular call on
+     the receive queue, then recvmsg will keep working through them until:
+
+     (a) it meets the end of that call's received data,
+
+     (b) it meets a non-data message,
+
+     (c) it meets a message belonging to a different call, or
+
+     (d) it fills the user buffer.
+
+     If recvmsg is called in blocking mode, it will keep sleeping, awaiting the
+     reception of further data, until one of the above four conditions is met.
+
+ (2) MSG_PEEK operates similarly, but will return immediately if it has put any
+     data in the buffer rather than sleeping until it can fill the buffer.
+
+ (3) If a data message is only partially consumed in filling a user buffer,
+     then the remainder of that message will be left on the front of the queue
+     for the next taker.  MSG_TRUNC will never be flagged.
+
+ (4) If there is more data to be had on a call (it hasn't copied the last byte
+     of the last data message in that phase yet), then MSG_MORE will be
+     flagged.
+
+
+================
+CONTROL MESSAGES
+================
+
+AF_RXRPC makes use of control messages in sendmsg() and recvmsg() to multiplex
+calls, to invoke certain actions and to report certain conditions.  These are:
+
+	MESSAGE ID		SRT DATA	MEANING
+	=======================	=== ===========	===============================
+	RXRPC_USER_CALL_ID	sr- User ID	App's call specifier
+	RXRPC_ABORT		srt Abort code	Abort code to issue/received
+	RXRPC_ACK		-rt n/a		Final ACK received
+	RXRPC_NET_ERROR		-rt error num	Network error on call
+	RXRPC_BUSY		-rt n/a		Call rejected (server busy)
+	RXRPC_LOCAL_ERROR	-rt error num	Local error encountered
+	RXRPC_NEW_CALL		-r- n/a		New call received
+	RXRPC_ACCEPT		s-- n/a		Accept new call
+
+	(SRT = usable in Sendmsg / delivered by Recvmsg / Terminal message)
+
+ (*) RXRPC_USER_CALL_ID
+
+     This is used to indicate the application's call ID.  It's an unsigned long
+     that the app specifies in the client by attaching it to the first data
+     message or in the server by passing it in association with an RXRPC_ACCEPT
+     message.  recvmsg() passes it in conjunction with all messages except
+     those of the RXRPC_NEW_CALL message.
+
+ (*) RXRPC_ABORT
+
+     This is can be used by an application to abort a call by passing it to
+     sendmsg, or it can be delivered by recvmsg to indicate a remote abort was
+     received.  Either way, it must be associated with an RXRPC_USER_CALL_ID to
+     specify the call affected.  If an abort is being sent, then error EBADSLT
+     will be returned if there is no call with that user ID.
+
+ (*) RXRPC_ACK
+
+     This is delivered to a server application to indicate that the final ACK
+     of a call was received from the client.  It will be associated with an
+     RXRPC_USER_CALL_ID to indicate the call that's now complete.
+
+ (*) RXRPC_NET_ERROR
+
+     This is delivered to an application to indicate that an ICMP error message
+     was encountered in the process of trying to talk to the peer.  An
+     errno-class integer value will be included in the control message data
+     indicating the problem, and an RXRPC_USER_CALL_ID will indicate the call
+     affected.
+
+ (*) RXRPC_BUSY
+
+     This is delivered to a client application to indicate that a call was
+     rejected by the server due to the server being busy.  It will be
+     associated with an RXRPC_USER_CALL_ID to indicate the rejected call.
+
+ (*) RXRPC_LOCAL_ERROR
+
+     This is delivered to an application to indicate that a local error was
+     encountered and that a call has been aborted because of it.  An
+     errno-class integer value will be included in the control message data
+     indicating the problem, and an RXRPC_USER_CALL_ID will indicate the call
+     affected.
+
+ (*) RXRPC_NEW_CALL
+
+     This is delivered to indicate to a server application that a new call has
+     arrived and is awaiting acceptance.  No user ID is associated with this,
+     as a user ID must subsequently be assigned by doing an RXRPC_ACCEPT.
+
+ (*) RXRPC_ACCEPT
+
+     This is used by a server application to attempt to accept a call and
+     assign it a user ID.  It should be associated with an RXRPC_USER_CALL_ID
+     to indicate the user ID to be assigned.  If there is no call to be
+     accepted (it may have timed out, been aborted, etc.), then sendmsg will
+     return error ENODATA.  If the user ID is already in use by another call,
+     then error EBADSLT will be returned.
+
+
+==============
+SOCKET OPTIONS
+==============
+
+AF_RXRPC sockets support a few socket options at the SOL_RXRPC level:
+
+ (*) RXRPC_SECURITY_KEY
+
+     This is used to specify the description of the key to be used.  The key is
+     extracted from the calling process's keyrings with request_key() and
+     should be of "rxrpc" type.
+
+     The optval pointer points to the description string, and optlen indicates
+     how long the string is, without the NUL terminator.
+
+ (*) RXRPC_SECURITY_KEYRING
+
+     Similar to above but specifies a keyring of server secret keys to use (key
+     type "keyring").  See the "Security" section.
+
+ (*) RXRPC_EXCLUSIVE_CONNECTION
+
+     This is used to request that new connections should be used for each call
+     made subsequently on this socket.  optval should be NULL and optlen 0.
+
+ (*) RXRPC_MIN_SECURITY_LEVEL
+
+     This is used to specify the minimum security level required for calls on
+     this socket.  optval must point to an int containing one of the following
+     values:
+
+     (a) RXRPC_SECURITY_PLAIN
+
+	 Encrypted checksum only.
+
+     (b) RXRPC_SECURITY_AUTH
+
+	 Encrypted checksum plus packet padded and first eight bytes of packet
+	 encrypted - which includes the actual packet length.
+
+     (c) RXRPC_SECURITY_ENCRYPTED
+
+	 Encrypted checksum plus entire packet padded and encrypted, including
+	 actual packet length.
+
+
+========
+SECURITY
+========
+
+Currently, only the kerberos 4 equivalent protocol has been implemented
+(security index 2 - rxkad).  This requires the rxkad module to be loaded and,
+on the client, tickets of the appropriate type to be obtained from the AFS
+kaserver or the kerberos server and installed as "rxrpc" type keys.  This is
+normally done using the klog program.  An example simple klog program can be
+found at:
+
+	http://people.redhat.com/~dhowells/rxrpc/klog.c
+
+The payload provided to add_key() on the client should be of the following
+form:
+
+	struct rxrpc_key_sec2_v1 {
+		uint16_t	security_index;	/* 2 */
+		uint16_t	ticket_length;	/* length of ticket[] */
+		uint32_t	expiry;		/* time at which expires */
+		uint8_t		kvno;		/* key version number */
+		uint8_t		__pad[3];
+		uint8_t		session_key[8];	/* DES session key */
+		uint8_t		ticket[0];	/* the encrypted ticket */
+	};
+
+Where the ticket blob is just appended to the above structure.
+
+
+For the server, keys of type "rxrpc_s" must be made available to the server.
+They have a description of "<serviceID>:<securityIndex>" (eg: "52:2" for an
+rxkad key for the AFS VL service).  When such a key is created, it should be
+given the server's secret key as the instantiation data (see the example
+below).
+
+	add_key("rxrpc_s", "52:2", secret_key, 8, keyring);
+
+A keyring is passed to the server socket by naming it in a sockopt.  The server
+socket then looks the server secret keys up in this keyring when secure
+incoming connections are made.  This can be seen in an example program that can
+be found at:
+
+	http://people.redhat.com/~dhowells/rxrpc/listen.c
+
+
+====================
+EXAMPLE CLIENT USAGE
+====================
+
+A client would issue an operation by:
+
+ (1) An RxRPC socket is set up by:
+
+	client = socket(AF_RXRPC, SOCK_DGRAM, PF_INET);
+
+     Where the third parameter indicates the protocol family of the transport
+     socket used - usually IPv4 but it can also be IPv6 [TODO].
+
+ (2) A local address can optionally be bound:
+
+	struct sockaddr_rxrpc srx = {
+		.srx_family	= AF_RXRPC,
+		.srx_service	= 0,  /* we're a client */
+		.transport_type	= SOCK_DGRAM,	/* type of transport socket */
+		.transport.sin_family	= AF_INET,
+		.transport.sin_port	= htons(7000), /* AFS callback */
+		.transport.sin_address	= 0,  /* all local interfaces */
+	};
+	bind(client, &srx, sizeof(srx));
+
+     This specifies the local UDP port to be used.  If not given, a random
+     non-privileged port will be used.  A UDP port may be shared between
+     several unrelated RxRPC sockets.  Security is handled on a basis of
+     per-RxRPC virtual connection.
+
+ (3) The security is set:
+
+	const char *key = "AFS:cambridge.redhat.com";
+	setsockopt(client, SOL_RXRPC, RXRPC_SECURITY_KEY, key, strlen(key));
+
+     This issues a request_key() to get the key representing the security
+     context.  The minimum security level can be set:
+
+	unsigned int sec = RXRPC_SECURITY_ENCRYPTED;
+	setsockopt(client, SOL_RXRPC, RXRPC_MIN_SECURITY_LEVEL,
+		   &sec, sizeof(sec));
+
+ (4) The server to be contacted can then be specified (alternatively this can
+     be done through sendmsg):
+
+	struct sockaddr_rxrpc srx = {
+		.srx_family	= AF_RXRPC,
+		.srx_service	= VL_SERVICE_ID,
+		.transport_type	= SOCK_DGRAM,	/* type of transport socket */
+		.transport.sin_family	= AF_INET,
+		.transport.sin_port	= htons(7005), /* AFS volume manager */
+		.transport.sin_address	= ...,
+	};
+	connect(client, &srx, sizeof(srx));
+
+ (5) The request data should then be posted to the server socket using a series
+     of sendmsg() calls, each with the following control message attached:
+
+	RXRPC_USER_CALL_ID	- specifies the user ID for this call
+
+     MSG_MORE should be set in msghdr::msg_flags on all but the last part of
+     the request.  Multiple requests may be made simultaneously.
+
+     If a call is intended to go to a destination other then the default
+     specified through connect(), then msghdr::msg_name should be set on the
+     first request message of that call.
+
+ (6) The reply data will then be posted to the server socket for recvmsg() to
+     pick up.  MSG_MORE will be flagged by recvmsg() if there's more reply data
+     for a particular call to be read.  MSG_EOR will be set on the terminal
+     read for a call.
+
+     All data will be delivered with the following control message attached:
+
+	RXRPC_USER_CALL_ID	- specifies the user ID for this call
+
+     If an abort or error occurred, this will be returned in the control data
+     buffer instead, and MSG_EOR will be flagged to indicate the end of that
+     call.
+
+
+====================
+EXAMPLE SERVER USAGE
+====================
+
+A server would be set up to accept operations in the following manner:
+
+ (1) An RxRPC socket is created by:
+
+	server = socket(AF_RXRPC, SOCK_DGRAM, PF_INET);
+
+     Where the third parameter indicates the address type of the transport
+     socket used - usually IPv4.
+
+ (2) Security is set up if desired by giving the socket a keyring with server
+     secret keys in it:
+
+	keyring = add_key("keyring", "AFSkeys", NULL, 0,
+			  KEY_SPEC_PROCESS_KEYRING);
+
+	const char secret_key[8] = {
+		0xa7, 0x83, 0x8a, 0xcb, 0xc7, 0x83, 0xec, 0x94 };
+	add_key("rxrpc_s", "52:2", secret_key, 8, keyring);
+
+	setsockopt(server, SOL_RXRPC, RXRPC_SECURITY_KEYRING, "AFSkeys", 7);
+
+     The keyring can be manipulated after it has been given to the socket. This
+     permits the server to add more keys, replace keys, etc. whilst it is live.
+
+ (2) A local address must then be bound:
+
+	struct sockaddr_rxrpc srx = {
+		.srx_family	= AF_RXRPC,
+		.srx_service	= VL_SERVICE_ID, /* RxRPC service ID */
+		.transport_type	= SOCK_DGRAM,	/* type of transport socket */
+		.transport.sin_family	= AF_INET,
+		.transport.sin_port	= htons(7000), /* AFS callback */
+		.transport.sin_address	= 0,  /* all local interfaces */
+	};
+	bind(server, &srx, sizeof(srx));
+
+ (3) The server is then set to listen out for incoming calls:
+
+	listen(server, 100);
+
+ (4) The kernel notifies the server of pending incoming connections by sending
+     it a message for each.  This is received with recvmsg() on the server
+     socket.  It has no data, and has a single dataless control message
+     attached:
+
+	RXRPC_NEW_CALL
+
+     The address that can be passed back by recvmsg() at this point should be
+     ignored since the call for which the message was posted may have gone by
+     the time it is accepted - in which case the first call still on the queue
+     will be accepted.
+
+ (5) The server then accepts the new call by issuing a sendmsg() with two
+     pieces of control data and no actual data:
+
+	RXRPC_ACCEPT		- indicate connection acceptance
+	RXRPC_USER_CALL_ID	- specify user ID for this call
+
+ (6) The first request data packet will then be posted to the server socket for
+     recvmsg() to pick up.  At that point, the RxRPC address for the call can
+     be read from the address fields in the msghdr struct.
+
+     Subsequent request data will be posted to the server socket for recvmsg()
+     to collect as it arrives.  All but the last piece of the request data will
+     be delivered with MSG_MORE flagged.
+
+     All data will be delivered with the following control message attached:
+
+	RXRPC_USER_CALL_ID	- specifies the user ID for this call
+
+ (8) The reply data should then be posted to the server socket using a series
+     of sendmsg() calls, each with the following control messages attached:
+
+	RXRPC_USER_CALL_ID	- specifies the user ID for this call
+
+     MSG_MORE should be set in msghdr::msg_flags on all but the last message
+     for a particular call.
+
+ (9) The final ACK from the client will be posted for retrieval by recvmsg()
+     when it is received.  It will take the form of a dataless message with two
+     control messages attached:
+
+	RXRPC_USER_CALL_ID	- specifies the user ID for this call
+	RXRPC_ACK		- indicates final ACK (no data)
+
+     MSG_EOR will be flagged to indicate that this is the final message for
+     this call.
+
+(10) Up to the point the final packet of reply data is sent, the call can be
+     aborted by calling sendmsg() with a dataless message with the following
+     control messages attached:
+
+	RXRPC_USER_CALL_ID	- specifies the user ID for this call
+	RXRPC_ABORT		- indicates abort code (4 byte data)
+
+     Any packets waiting in the socket's receive queue will be discarded if
+     this is issued.
+
+Note that all the communications for a particular service take place through
+the one server socket, using control messages on sendmsg() and recvmsg() to
+determine the call affected.
diff --git a/include/keys/rxrpc-type.h b/include/keys/rxrpc-type.h
new file mode 100644
index 00000000000..e2ee73aef0e
--- /dev/null
+++ b/include/keys/rxrpc-type.h
@@ -0,0 +1,22 @@
+/* RxRPC key type
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _KEYS_RXRPC_TYPE_H
+#define _KEYS_RXRPC_TYPE_H
+
+#include <linux/key.h>
+
+/*
+ * key type for AF_RXRPC keys
+ */
+extern struct key_type key_type_rxrpc;
+
+#endif /* _KEYS_USER_TYPE_H */
diff --git a/include/linux/net.h b/include/linux/net.h
index 4db21e63d8d..efc45177b50 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -24,7 +24,7 @@
 struct poll_table_struct;
 struct inode;
 
-#define NPROTO		33		/* should be enough for now..	*/
+#define NPROTO		34		/* should be enough for now..	*/
 
 #define SYS_SOCKET	1		/* sys_socket(2)		*/
 #define SYS_BIND	2		/* sys_bind(2)			*/
diff --git a/include/linux/rxrpc.h b/include/linux/rxrpc.h
new file mode 100644
index 00000000000..f7b826b565c
--- /dev/null
+++ b/include/linux/rxrpc.h
@@ -0,0 +1,62 @@
+/* AF_RXRPC parameters
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_RXRPC_H
+#define _LINUX_RXRPC_H
+
+#include <linux/in.h>
+#include <linux/in6.h>
+
+/*
+ * RxRPC socket address
+ */
+struct sockaddr_rxrpc {
+	sa_family_t	srx_family;	/* address family */
+	u16		srx_service;	/* service desired */
+	u16		transport_type;	/* type of transport socket (SOCK_DGRAM) */
+	u16		transport_len;	/* length of transport address */
+	union {
+		sa_family_t family;		/* transport address family */
+		struct sockaddr_in sin;		/* IPv4 transport address */
+		struct sockaddr_in6 sin6;	/* IPv6 transport address */
+	} transport;
+};
+
+/*
+ * RxRPC socket options
+ */
+#define RXRPC_SECURITY_KEY		1	/* [clnt] set client security key */
+#define RXRPC_SECURITY_KEYRING		2	/* [srvr] set ring of server security keys */
+#define RXRPC_EXCLUSIVE_CONNECTION	3	/* [clnt] use exclusive RxRPC connection */
+#define RXRPC_MIN_SECURITY_LEVEL	4	/* minimum security level */
+
+/*
+ * RxRPC control messages
+ * - terminal messages mean that a user call ID tag can be recycled
+ */
+#define RXRPC_USER_CALL_ID	1	/* user call ID specifier */
+#define RXRPC_ABORT		2	/* abort request / notification [terminal] */
+#define RXRPC_ACK		3	/* [Server] RPC op final ACK received [terminal] */
+#define RXRPC_NET_ERROR		5	/* network error received [terminal] */
+#define RXRPC_BUSY		6	/* server busy received [terminal] */
+#define RXRPC_LOCAL_ERROR	7	/* local error generated [terminal] */
+#define RXRPC_NEW_CALL		8	/* [Server] new incoming call notification */
+#define RXRPC_ACCEPT		9	/* [Server] accept request */
+
+/*
+ * RxRPC security levels
+ */
+#define RXRPC_SECURITY_PLAIN	0	/* plain secure-checksummed packets only */
+#define RXRPC_SECURITY_AUTH	1	/* authenticated packets */
+#define RXRPC_SECURITY_ENCRYPT	2	/* encrypted packets */
+
+
+#endif /* _LINUX_RXRPC_H */
diff --git a/include/linux/socket.h b/include/linux/socket.h
index fcd35a210e7..6e7c9483a6a 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -188,7 +188,8 @@ struct ucred {
 #define AF_TIPC		30	/* TIPC sockets			*/
 #define AF_BLUETOOTH	31	/* Bluetooth sockets 		*/
 #define AF_IUCV		32	/* IUCV sockets			*/
-#define AF_MAX		33	/* For now.. */
+#define AF_RXRPC	33	/* RxRPC sockets 		*/
+#define AF_MAX		34	/* For now.. */
 
 /* Protocol families, same as address families. */
 #define PF_UNSPEC	AF_UNSPEC
@@ -222,6 +223,7 @@ struct ucred {
 #define PF_TIPC		AF_TIPC
 #define PF_BLUETOOTH	AF_BLUETOOTH
 #define PF_IUCV		AF_IUCV
+#define PF_RXRPC	AF_RXRPC
 #define PF_MAX		AF_MAX
 
 /* Maximum queue length specifiable by listen.  */
@@ -284,6 +286,7 @@ struct ucred {
 #define SOL_DCCP	269
 #define SOL_NETLINK	270
 #define SOL_TIPC	271
+#define SOL_RXRPC	272
 
 /* IPX options */
 #define IPX_TYPE	1
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
new file mode 100644
index 00000000000..b01ca2589d6
--- /dev/null
+++ b/include/net/af_rxrpc.h
@@ -0,0 +1,17 @@
+/* RxRPC definitions
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _NET_RXRPC_H
+#define _NET_RXRPC_H
+
+#include <linux/rxrpc.h>
+
+#endif /* _NET_RXRPC_H */
diff --git a/include/rxrpc/packet.h b/include/rxrpc/packet.h
index 1447f0aaa0e..452a9bb02d4 100644
--- a/include/rxrpc/packet.h
+++ b/include/rxrpc/packet.h
@@ -33,7 +33,8 @@ struct rxrpc_header
 #define RXRPC_MAXCALLS		4			/* max active calls per conn */
 #define RXRPC_CHANNELMASK	(RXRPC_MAXCALLS-1)	/* mask for channel ID */
 #define RXRPC_CIDMASK		(~RXRPC_CHANNELMASK)	/* mask for connection ID */
-#define RXRPC_CIDSHIFT		2			/* shift for connection ID */
+#define RXRPC_CIDSHIFT		ilog2(RXRPC_MAXCALLS)	/* shift for connection ID */
+#define RXRPC_CID_INC		(1 << RXRPC_CIDSHIFT)	/* connection ID increment */
 
 	__be32		callNumber;	/* call ID (0 for connection-level packets) */
 #define RXRPC_PROCESS_MAXCALLS	(1<<2)	/* maximum number of active calls per conn (power of 2) */
@@ -62,7 +63,10 @@ struct rxrpc_header
 
 	uint8_t		userStatus;	/* app-layer defined status */
 	uint8_t		securityIndex;	/* security protocol ID */
-	__be16		_rsvd;		/* reserved (used by kerberos security as cksum) */
+	union {
+		__be16	_rsvd;		/* reserved */
+		__be16	cksum;		/* kerberos security checksum */
+	};
 	__be16		serviceId;	/* service ID */
 
 } __attribute__((packed));
@@ -124,4 +128,81 @@ struct rxrpc_ackpacket
 
 } __attribute__((packed));
 
+/*
+ * ACK packets can have a further piece of information tagged on the end
+ */
+struct rxrpc_ackinfo {
+	__be32		rxMTU;		/* maximum Rx MTU size (bytes) [AFS 3.3] */
+	__be32		maxMTU;		/* maximum interface MTU size (bytes) [AFS 3.3] */
+	__be32		rwind;		/* Rx window size (packets) [AFS 3.4] */
+	__be32		jumbo_max;	/* max packets to stick into a jumbo packet [AFS 3.5] */
+};
+
+/*****************************************************************************/
+/*
+ * Kerberos security type-2 challenge packet
+ */
+struct rxkad_challenge {
+	__be32		version;	/* version of this challenge type */
+	__be32		nonce;		/* encrypted random number */
+	__be32		min_level;	/* minimum security level */
+	__be32		__padding;	/* padding to 8-byte boundary */
+} __attribute__((packed));
+
+/*****************************************************************************/
+/*
+ * Kerberos security type-2 response packet
+ */
+struct rxkad_response {
+	__be32		version;	/* version of this reponse type */
+	__be32		__pad;
+
+	/* encrypted bit of the response */
+	struct {
+		__be32		epoch;		/* current epoch */
+		__be32		cid;		/* parent connection ID */
+		__be32		checksum;	/* checksum */
+		__be32		securityIndex;	/* security type */
+		__be32		call_id[4];	/* encrypted call IDs */
+		__be32		inc_nonce;	/* challenge nonce + 1 */
+		__be32		level;		/* desired level */
+	} encrypted;
+
+	__be32		kvno;		/* Kerberos key version number */
+	__be32		ticket_len;	/* Kerberos ticket length  */
+} __attribute__((packed));
+
+/*****************************************************************************/
+/*
+ * RxRPC-level abort codes
+ */
+#define RX_CALL_DEAD		-1	/* call/conn has been inactive and is shut down */
+#define RX_INVALID_OPERATION	-2	/* invalid operation requested / attempted */
+#define RX_CALL_TIMEOUT		-3	/* call timeout exceeded */
+#define RX_EOF			-4	/* unexpected end of data on read op */
+#define RX_PROTOCOL_ERROR	-5	/* low-level protocol error */
+#define RX_USER_ABORT		-6	/* generic user abort */
+#define RX_ADDRINUSE		-7	/* UDP port in use */
+#define RX_DEBUGI_BADTYPE	-8	/* bad debugging packet type */
+
+/*
+ * Rx kerberos security abort codes
+ * - unfortunately we have no generalised security abort codes to say things
+ *   like "unsupported security", so we have to use these instead and hope the
+ *   other side understands
+ */
+#define RXKADINCONSISTENCY	19270400	/* security module structure inconsistent */
+#define RXKADPACKETSHORT	19270401	/* packet too short for security challenge */
+#define RXKADLEVELFAIL		19270402	/* security level negotiation failed */
+#define RXKADTICKETLEN		19270403	/* ticket length too short or too long */
+#define RXKADOUTOFSEQUENCE	19270404	/* packet had bad sequence number */
+#define RXKADNOAUTH		19270405	/* caller not authorised */
+#define RXKADBADKEY		19270406	/* illegal key: bad parity or weak */
+#define RXKADBADTICKET		19270407	/* security object was passed a bad ticket */
+#define RXKADUNKNOWNKEY		19270408	/* ticket contained unknown key version number */
+#define RXKADEXPIRED		19270409	/* authentication expired */
+#define RXKADSEALEDINCON	19270410	/* sealed data inconsistent */
+#define RXKADDATALEN		19270411	/* user data too long */
+#define RXKADILLEGALLEVEL	19270412	/* caller not authorised to use encrypted conns */
+
 #endif /* _LINUX_RXRPC_PACKET_H */
diff --git a/net/Kconfig b/net/Kconfig
index ae1817dc51b..2fc8e77b1e6 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -212,6 +212,7 @@ endmenu
 source "net/ax25/Kconfig"
 source "net/irda/Kconfig"
 source "net/bluetooth/Kconfig"
+source "net/rxrpc/Kconfig"
 
 config FIB_RULES
 	bool
diff --git a/net/Makefile b/net/Makefile
index 29bbe19d87f..6b74d4118c5 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_IRDA)		+= irda/
 obj-$(CONFIG_BT)		+= bluetooth/
 obj-$(CONFIG_SUNRPC)		+= sunrpc/
 obj-$(CONFIG_RXRPC)		+= rxrpc/
+obj-$(CONFIG_AF_RXRPC)		+= rxrpc/
 obj-$(CONFIG_ATM)		+= atm/
 obj-$(CONFIG_DECNET)		+= decnet/
 obj-$(CONFIG_ECONET)		+= econet/
diff --git a/net/core/sock.c b/net/core/sock.c
index 043bdc05d21..22183c2ef28 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -154,7 +154,8 @@ static const char *af_family_key_strings[AF_MAX+1] = {
   "sk_lock-21"       , "sk_lock-AF_SNA"      , "sk_lock-AF_IRDA"     ,
   "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE"  , "sk_lock-AF_LLC"      ,
   "sk_lock-27"       , "sk_lock-28"          , "sk_lock-29"          ,
-  "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-AF_MAX"
+  "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
+  "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX"
 };
 static const char *af_family_slock_key_strings[AF_MAX+1] = {
   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
@@ -167,7 +168,8 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = {
   "slock-21"       , "slock-AF_SNA"      , "slock-AF_IRDA"     ,
   "slock-AF_PPPOX" , "slock-AF_WANPIPE"  , "slock-AF_LLC"      ,
   "slock-27"       , "slock-28"          , "slock-29"          ,
-  "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_MAX"
+  "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
+  "slock-AF_RXRPC" , "slock-AF_MAX"
 };
 #endif
 
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
new file mode 100644
index 00000000000..d72380e304a
--- /dev/null
+++ b/net/rxrpc/Kconfig
@@ -0,0 +1,37 @@
+#
+# RxRPC session sockets
+#
+
+config AF_RXRPC
+	tristate "RxRPC session sockets"
+	depends on EXPERIMENTAL
+	help
+	  Say Y or M here to include support for RxRPC session sockets (just
+	  the transport part, not the presentation part: (un)marshalling is
+	  left to the application).
+
+	  These are used for AFS kernel filesystem and userspace utilities.
+
+	  This module at the moment only supports client operations and is
+	  currently incomplete.
+
+	  See Documentation/networking/rxrpc.txt.
+
+
+config AF_RXRPC_DEBUG
+	bool "RxRPC dynamic debugging"
+	depends on AF_RXRPC
+	help
+	  Say Y here to make runtime controllable debugging messages appear.
+
+	  See Documentation/networking/rxrpc.txt.
+
+
+config RXKAD
+	tristate "RxRPC Kerberos security"
+	depends on AF_RXRPC && KEYS
+	help
+	  Provide kerberos 4 and AFS kaserver security handling for AF_RXRPC
+	  through the use of the key retention service.
+
+	  See Documentation/networking/rxrpc.txt.
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index 6efcb6f162a..07bf82ffec6 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -4,6 +4,35 @@
 
 #CFLAGS += -finstrument-functions
 
+af-rxrpc-objs := \
+	af_rxrpc.o \
+	ar-accept.o \
+	ar-ack.o \
+	ar-call.o \
+	ar-connection.o \
+	ar-connevent.o \
+	ar-error.o \
+	ar-input.o \
+	ar-key.o \
+	ar-local.o \
+	ar-output.o \
+	ar-peer.o \
+	ar-recvmsg.o \
+	ar-security.o \
+	ar-skbuff.o \
+	ar-transport.o
+
+ifeq ($(CONFIG_PROC_FS),y)
+af-rxrpc-objs += ar-proc.o
+endif
+
+obj-$(CONFIG_AF_RXRPC) += af-rxrpc.o
+
+obj-$(CONFIG_RXKAD) += rxkad.o
+
+#
+# obsolete RxRPC interface, still used by fs/afs/
+#
 rxrpc-objs := \
 	call.o \
 	connection.o \
@@ -22,4 +51,4 @@ ifeq ($(CONFIG_SYSCTL),y)
 rxrpc-objs += sysctl.o
 endif
 
-obj-$(CONFIG_RXRPC) := rxrpc.o
+obj-$(CONFIG_RXRPC) += rxrpc.o
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
new file mode 100644
index 00000000000..bfa8822e228
--- /dev/null
+++ b/net/rxrpc/af_rxrpc.c
@@ -0,0 +1,754 @@
+/* AF_RXRPC implementation
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/poll.h>
+#include <linux/proc_fs.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+MODULE_DESCRIPTION("RxRPC network protocol");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_RXRPC);
+
+unsigned rxrpc_debug; // = RXRPC_DEBUG_KPROTO;
+module_param_named(debug, rxrpc_debug, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(rxrpc_debug, "RxRPC debugging mask");
+
+static int sysctl_rxrpc_max_qlen __read_mostly = 10;
+
+static struct proto rxrpc_proto;
+static const struct proto_ops rxrpc_rpc_ops;
+
+/* local epoch for detecting local-end reset */
+__be32 rxrpc_epoch;
+
+/* current debugging ID */
+atomic_t rxrpc_debug_id;
+
+/* count of skbs currently in use */
+atomic_t rxrpc_n_skbs;
+
+static void rxrpc_sock_destructor(struct sock *);
+
+/*
+ * see if an RxRPC socket is currently writable
+ */
+static inline int rxrpc_writable(struct sock *sk)
+{
+	return atomic_read(&sk->sk_wmem_alloc) < (size_t) sk->sk_sndbuf;
+}
+
+/*
+ * wait for write bufferage to become available
+ */
+static void rxrpc_write_space(struct sock *sk)
+{
+	_enter("%p", sk);
+	read_lock(&sk->sk_callback_lock);
+	if (rxrpc_writable(sk)) {
+		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+			wake_up_interruptible(sk->sk_sleep);
+		sk_wake_async(sk, 2, POLL_OUT);
+	}
+	read_unlock(&sk->sk_callback_lock);
+}
+
+/*
+ * validate an RxRPC address
+ */
+static int rxrpc_validate_address(struct rxrpc_sock *rx,
+				  struct sockaddr_rxrpc *srx,
+				  int len)
+{
+	if (len < sizeof(struct sockaddr_rxrpc))
+		return -EINVAL;
+
+	if (srx->srx_family != AF_RXRPC)
+		return -EAFNOSUPPORT;
+
+	if (srx->transport_type != SOCK_DGRAM)
+		return -ESOCKTNOSUPPORT;
+
+	len -= offsetof(struct sockaddr_rxrpc, transport);
+	if (srx->transport_len < sizeof(sa_family_t) ||
+	    srx->transport_len > len)
+		return -EINVAL;
+
+	if (srx->transport.family != rx->proto)
+		return -EAFNOSUPPORT;
+
+	switch (srx->transport.family) {
+	case AF_INET:
+		_debug("INET: %x @ %u.%u.%u.%u",
+		       ntohs(srx->transport.sin.sin_port),
+		       NIPQUAD(srx->transport.sin.sin_addr));
+		if (srx->transport_len > 8)
+			memset((void *)&srx->transport + 8, 0,
+			       srx->transport_len - 8);
+		break;
+
+	case AF_INET6:
+	default:
+		return -EAFNOSUPPORT;
+	}
+
+	return 0;
+}
+
+/*
+ * bind a local address to an RxRPC socket
+ */
+static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
+{
+	struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *) saddr;
+	struct sock *sk = sock->sk;
+	struct rxrpc_local *local;
+	struct rxrpc_sock *rx = rxrpc_sk(sk), *prx;
+	__be16 service_id;
+	int ret;
+
+	_enter("%p,%p,%d", rx, saddr, len);
+
+	ret = rxrpc_validate_address(rx, srx, len);
+	if (ret < 0)
+		goto error;
+
+	lock_sock(&rx->sk);
+
+	if (rx->sk.sk_state != RXRPC_UNCONNECTED) {
+		ret = -EINVAL;
+		goto error_unlock;
+	}
+
+	memcpy(&rx->srx, srx, sizeof(rx->srx));
+
+	/* find a local transport endpoint if we don't have one already */
+	local = rxrpc_lookup_local(&rx->srx);
+	if (IS_ERR(local)) {
+		ret = PTR_ERR(local);
+		goto error_unlock;
+	}
+
+	rx->local = local;
+	if (srx->srx_service) {
+		service_id = htons(srx->srx_service);
+		write_lock_bh(&local->services_lock);
+		list_for_each_entry(prx, &local->services, listen_link) {
+			if (prx->service_id == service_id)
+				goto service_in_use;
+		}
+
+		rx->service_id = service_id;
+		list_add_tail(&rx->listen_link, &local->services);
+		write_unlock_bh(&local->services_lock);
+
+		rx->sk.sk_state = RXRPC_SERVER_BOUND;
+	} else {
+		rx->sk.sk_state = RXRPC_CLIENT_BOUND;
+	}
+
+	release_sock(&rx->sk);
+	_leave(" = 0");
+	return 0;
+
+service_in_use:
+	ret = -EADDRINUSE;
+	write_unlock_bh(&local->services_lock);
+error_unlock:
+	release_sock(&rx->sk);
+error:
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * set the number of pending calls permitted on a listening socket
+ */
+static int rxrpc_listen(struct socket *sock, int backlog)
+{
+	struct sock *sk = sock->sk;
+	struct rxrpc_sock *rx = rxrpc_sk(sk);
+	int ret;
+
+	_enter("%p,%d", rx, backlog);
+
+	lock_sock(&rx->sk);
+
+	switch (rx->sk.sk_state) {
+	case RXRPC_UNCONNECTED:
+		ret = -EADDRNOTAVAIL;
+		break;
+	case RXRPC_CLIENT_BOUND:
+	case RXRPC_CLIENT_CONNECTED:
+	default:
+		ret = -EBUSY;
+		break;
+	case RXRPC_SERVER_BOUND:
+		ASSERT(rx->local != NULL);
+		sk->sk_max_ack_backlog = backlog;
+		rx->sk.sk_state = RXRPC_SERVER_LISTENING;
+		ret = 0;
+		break;
+	}
+
+	release_sock(&rx->sk);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * find a transport by address
+ */
+static struct rxrpc_transport *rxrpc_name_to_transport(struct socket *sock,
+						       struct sockaddr *addr,
+						       int addr_len, int flags)
+{
+	struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *) addr;
+	struct rxrpc_transport *trans;
+	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+	struct rxrpc_peer *peer;
+
+	_enter("%p,%p,%d,%d", rx, addr, addr_len, flags);
+
+	ASSERT(rx->local != NULL);
+	ASSERT(rx->sk.sk_state > RXRPC_UNCONNECTED);
+
+	if (rx->srx.transport_type != srx->transport_type)
+		return ERR_PTR(-ESOCKTNOSUPPORT);
+	if (rx->srx.transport.family != srx->transport.family)
+		return ERR_PTR(-EAFNOSUPPORT);
+
+	/* find a remote transport endpoint from the local one */
+	peer = rxrpc_get_peer(srx, GFP_KERNEL);
+	if (IS_ERR(peer))
+		return ERR_PTR(PTR_ERR(peer));
+
+	/* find a transport */
+	trans = rxrpc_get_transport(rx->local, peer, GFP_KERNEL);
+	rxrpc_put_peer(peer);
+	_leave(" = %p", trans);
+	return trans;
+}
+
+/*
+ * connect an RxRPC socket
+ * - this just targets it at a specific destination; no actual connection
+ *   negotiation takes place
+ */
+static int rxrpc_connect(struct socket *sock, struct sockaddr *addr,
+			 int addr_len, int flags)
+{
+	struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *) addr;
+	struct sock *sk = sock->sk;
+	struct rxrpc_transport *trans;
+	struct rxrpc_local *local;
+	struct rxrpc_sock *rx = rxrpc_sk(sk);
+	int ret;
+
+	_enter("%p,%p,%d,%d", rx, addr, addr_len, flags);
+
+	ret = rxrpc_validate_address(rx, srx, addr_len);
+	if (ret < 0) {
+		_leave(" = %d [bad addr]", ret);
+		return ret;
+	}
+
+	lock_sock(&rx->sk);
+
+	switch (rx->sk.sk_state) {
+	case RXRPC_UNCONNECTED:
+		/* find a local transport endpoint if we don't have one already */
+		ASSERTCMP(rx->local, ==, NULL);
+		rx->srx.srx_family = AF_RXRPC;
+		rx->srx.srx_service = 0;
+		rx->srx.transport_type = srx->transport_type;
+		rx->srx.transport_len = sizeof(sa_family_t);
+		rx->srx.transport.family = srx->transport.family;
+		local = rxrpc_lookup_local(&rx->srx);
+		if (IS_ERR(local)) {
+			release_sock(&rx->sk);
+			return PTR_ERR(local);
+		}
+		rx->local = local;
+		rx->sk.sk_state = RXRPC_CLIENT_BOUND;
+	case RXRPC_CLIENT_BOUND:
+		break;
+	case RXRPC_CLIENT_CONNECTED:
+		release_sock(&rx->sk);
+		return -EISCONN;
+	default:
+		release_sock(&rx->sk);
+		return -EBUSY; /* server sockets can't connect as well */
+	}
+
+	trans = rxrpc_name_to_transport(sock, addr, addr_len, flags);
+	if (IS_ERR(trans)) {
+		release_sock(&rx->sk);
+		_leave(" = %ld", PTR_ERR(trans));
+		return PTR_ERR(trans);
+	}
+
+	rx->trans = trans;
+	rx->service_id = htons(srx->srx_service);
+	rx->sk.sk_state = RXRPC_CLIENT_CONNECTED;
+
+	release_sock(&rx->sk);
+	return 0;
+}
+
+/*
+ * send a message through an RxRPC socket
+ * - in a client this does a number of things:
+ *   - finds/sets up a connection for the security specified (if any)
+ *   - initiates a call (ID in control data)
+ *   - ends the request phase of a call (if MSG_MORE is not set)
+ *   - sends a call data packet
+ *   - may send an abort (abort code in control data)
+ */
+static int rxrpc_sendmsg(struct kiocb *iocb, struct socket *sock,
+			 struct msghdr *m, size_t len)
+{
+	struct rxrpc_transport *trans;
+	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+	int ret;
+
+	_enter(",{%d},,%zu", rx->sk.sk_state, len);
+
+	if (m->msg_flags & MSG_OOB)
+		return -EOPNOTSUPP;
+
+	if (m->msg_name) {
+		ret = rxrpc_validate_address(rx, m->msg_name, m->msg_namelen);
+		if (ret < 0) {
+			_leave(" = %d [bad addr]", ret);
+			return ret;
+		}
+	}
+
+	trans = NULL;
+	lock_sock(&rx->sk);
+
+	if (m->msg_name) {
+		ret = -EISCONN;
+		trans = rxrpc_name_to_transport(sock, m->msg_name,
+						m->msg_namelen, 0);
+		if (IS_ERR(trans)) {
+			ret = PTR_ERR(trans);
+			trans = NULL;
+			goto out;
+		}
+	} else {
+		trans = rx->trans;
+		if (trans)
+			atomic_inc(&trans->usage);
+	}
+
+	switch (rx->sk.sk_state) {
+	case RXRPC_SERVER_LISTENING:
+		if (!m->msg_name) {
+			ret = rxrpc_server_sendmsg(iocb, rx, m, len);
+			break;
+		}
+	case RXRPC_SERVER_BOUND:
+	case RXRPC_CLIENT_BOUND:
+		if (!m->msg_name) {
+			ret = -ENOTCONN;
+			break;
+		}
+	case RXRPC_CLIENT_CONNECTED:
+		ret = rxrpc_client_sendmsg(iocb, rx, trans, m, len);
+		break;
+	default:
+		ret = -ENOTCONN;
+		break;
+	}
+
+out:
+	release_sock(&rx->sk);
+	if (trans)
+		rxrpc_put_transport(trans);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * set RxRPC socket options
+ */
+static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
+			    char __user *optval, int optlen)
+{
+	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+	unsigned min_sec_level;
+	int ret;
+
+	_enter(",%d,%d,,%d", level, optname, optlen);
+
+	lock_sock(&rx->sk);
+	ret = -EOPNOTSUPP;
+
+	if (level == SOL_RXRPC) {
+		switch (optname) {
+		case RXRPC_EXCLUSIVE_CONNECTION:
+			ret = -EINVAL;
+			if (optlen != 0)
+				goto error;
+			ret = -EISCONN;
+			if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+				goto error;
+			set_bit(RXRPC_SOCK_EXCLUSIVE_CONN, &rx->flags);
+			goto success;
+
+		case RXRPC_SECURITY_KEY:
+			ret = -EINVAL;
+			if (rx->key)
+				goto error;
+			ret = -EISCONN;
+			if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+				goto error;
+			ret = rxrpc_request_key(rx, optval, optlen);
+			goto error;
+
+		case RXRPC_SECURITY_KEYRING:
+			ret = -EINVAL;
+			if (rx->key)
+				goto error;
+			ret = -EISCONN;
+			if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+				goto error;
+			ret = rxrpc_server_keyring(rx, optval, optlen);
+			goto error;
+
+		case RXRPC_MIN_SECURITY_LEVEL:
+			ret = -EINVAL;
+			if (optlen != sizeof(unsigned))
+				goto error;
+			ret = -EISCONN;
+			if (rx->sk.sk_state != RXRPC_UNCONNECTED)
+				goto error;
+			ret = get_user(min_sec_level,
+				       (unsigned __user *) optval);
+			if (ret < 0)
+				goto error;
+			ret = -EINVAL;
+			if (min_sec_level > RXRPC_SECURITY_MAX)
+				goto error;
+			rx->min_sec_level = min_sec_level;
+			goto success;
+
+		default:
+			break;
+		}
+	}
+
+success:
+	ret = 0;
+error:
+	release_sock(&rx->sk);
+	return ret;
+}
+
+/*
+ * permit an RxRPC socket to be polled
+ */
+static unsigned int rxrpc_poll(struct file *file, struct socket *sock,
+			       poll_table *wait)
+{
+	unsigned int mask;
+	struct sock *sk = sock->sk;
+
+	poll_wait(file, sk->sk_sleep, wait);
+	mask = 0;
+
+	/* the socket is readable if there are any messages waiting on the Rx
+	 * queue */
+	if (!skb_queue_empty(&sk->sk_receive_queue))
+		mask |= POLLIN | POLLRDNORM;
+
+	/* the socket is writable if there is space to add new data to the
+	 * socket; there is no guarantee that any particular call in progress
+	 * on the socket may have space in the Tx ACK window */
+	if (rxrpc_writable(sk))
+		mask |= POLLOUT | POLLWRNORM;
+
+	return mask;
+}
+
+/*
+ * create an RxRPC socket
+ */
+static int rxrpc_create(struct socket *sock, int protocol)
+{
+	struct rxrpc_sock *rx;
+	struct sock *sk;
+
+	_enter("%p,%d", sock, protocol);
+
+	/* we support transport protocol UDP only */
+	if (protocol != PF_INET)
+		return -EPROTONOSUPPORT;
+
+	if (sock->type != SOCK_DGRAM)
+		return -ESOCKTNOSUPPORT;
+
+	sock->ops = &rxrpc_rpc_ops;
+	sock->state = SS_UNCONNECTED;
+
+	sk = sk_alloc(PF_RXRPC, GFP_KERNEL, &rxrpc_proto, 1);
+	if (!sk)
+		return -ENOMEM;
+
+	sock_init_data(sock, sk);
+	sk->sk_state		= RXRPC_UNCONNECTED;
+	sk->sk_write_space	= rxrpc_write_space;
+	sk->sk_max_ack_backlog	= sysctl_rxrpc_max_qlen;
+	sk->sk_destruct		= rxrpc_sock_destructor;
+
+	rx = rxrpc_sk(sk);
+	rx->proto = protocol;
+	rx->calls = RB_ROOT;
+
+	INIT_LIST_HEAD(&rx->listen_link);
+	INIT_LIST_HEAD(&rx->secureq);
+	INIT_LIST_HEAD(&rx->acceptq);
+	rwlock_init(&rx->call_lock);
+	memset(&rx->srx, 0, sizeof(rx->srx));
+
+	_leave(" = 0 [%p]", rx);
+	return 0;
+}
+
+/*
+ * RxRPC socket destructor
+ */
+static void rxrpc_sock_destructor(struct sock *sk)
+{
+	_enter("%p", sk);
+
+	rxrpc_purge_queue(&sk->sk_receive_queue);
+
+	BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
+	BUG_TRAP(sk_unhashed(sk));
+	BUG_TRAP(!sk->sk_socket);
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		printk("Attempt to release alive rxrpc socket: %p\n", sk);
+		return;
+	}
+}
+
+/*
+ * release an RxRPC socket
+ */
+static int rxrpc_release_sock(struct sock *sk)
+{
+	struct rxrpc_sock *rx = rxrpc_sk(sk);
+
+	_enter("%p{%d,%d}", sk, sk->sk_state, atomic_read(&sk->sk_refcnt));
+
+	/* declare the socket closed for business */
+	sock_orphan(sk);
+	sk->sk_shutdown = SHUTDOWN_MASK;
+
+	spin_lock_bh(&sk->sk_receive_queue.lock);
+	sk->sk_state = RXRPC_CLOSE;
+	spin_unlock_bh(&sk->sk_receive_queue.lock);
+
+	ASSERTCMP(rx->listen_link.next, !=, LIST_POISON1);
+
+	if (!list_empty(&rx->listen_link)) {
+		write_lock_bh(&rx->local->services_lock);
+		list_del(&rx->listen_link);
+		write_unlock_bh(&rx->local->services_lock);
+	}
+
+	/* try to flush out this socket */
+	rxrpc_release_calls_on_socket(rx);
+	flush_scheduled_work();
+	rxrpc_purge_queue(&sk->sk_receive_queue);
+
+	if (rx->conn) {
+		rxrpc_put_connection(rx->conn);
+		rx->conn = NULL;
+	}
+
+	if (rx->bundle) {
+		rxrpc_put_bundle(rx->trans, rx->bundle);
+		rx->bundle = NULL;
+	}
+	if (rx->trans) {
+		rxrpc_put_transport(rx->trans);
+		rx->trans = NULL;
+	}
+	if (rx->local) {
+		rxrpc_put_local(rx->local);
+		rx->local = NULL;
+	}
+
+	key_put(rx->key);
+	rx->key = NULL;
+	key_put(rx->securities);
+	rx->securities = NULL;
+	sock_put(sk);
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * release an RxRPC BSD socket on close() or equivalent
+ */
+static int rxrpc_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	_enter("%p{%p}", sock, sk);
+
+	if (!sk)
+		return 0;
+
+	sock->sk = NULL;
+
+	return rxrpc_release_sock(sk);
+}
+
+/*
+ * RxRPC network protocol
+ */
+static const struct proto_ops rxrpc_rpc_ops = {
+	.family		= PF_UNIX,
+	.owner		= THIS_MODULE,
+	.release	= rxrpc_release,
+	.bind		= rxrpc_bind,
+	.connect	= rxrpc_connect,
+	.socketpair	= sock_no_socketpair,
+	.accept		= sock_no_accept,
+	.getname	= sock_no_getname,
+	.poll		= rxrpc_poll,
+	.ioctl		= sock_no_ioctl,
+	.listen		= rxrpc_listen,
+	.shutdown	= sock_no_shutdown,
+	.setsockopt	= rxrpc_setsockopt,
+	.getsockopt	= sock_no_getsockopt,
+	.sendmsg	= rxrpc_sendmsg,
+	.recvmsg	= rxrpc_recvmsg,
+	.mmap		= sock_no_mmap,
+	.sendpage	= sock_no_sendpage,
+};
+
+static struct proto rxrpc_proto = {
+	.name		= "RXRPC",
+	.owner		= THIS_MODULE,
+	.obj_size	= sizeof(struct rxrpc_sock),
+	.max_header	= sizeof(struct rxrpc_header),
+};
+
+static struct net_proto_family rxrpc_family_ops = {
+	.family	= PF_RXRPC,
+	.create = rxrpc_create,
+	.owner	= THIS_MODULE,
+};
+
+/*
+ * initialise and register the RxRPC protocol
+ */
+static int __init af_rxrpc_init(void)
+{
+	struct sk_buff *dummy_skb;
+	int ret = -1;
+
+	BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > sizeof(dummy_skb->cb));
+
+	rxrpc_epoch = htonl(xtime.tv_sec);
+
+	rxrpc_call_jar = kmem_cache_create(
+		"rxrpc_call_jar", sizeof(struct rxrpc_call), 0,
+		SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (!rxrpc_call_jar) {
+		printk(KERN_NOTICE "RxRPC: Failed to allocate call jar\n");
+		ret = -ENOMEM;
+		goto error_call_jar;
+	}
+
+	ret = proto_register(&rxrpc_proto, 1);
+        if (ret < 0) {
+                printk(KERN_CRIT "RxRPC: Cannot register protocol\n");
+		goto error_proto;
+	}
+
+	ret = sock_register(&rxrpc_family_ops);
+	if (ret < 0) {
+                printk(KERN_CRIT "RxRPC: Cannot register socket family\n");
+		goto error_sock;
+	}
+
+	ret = register_key_type(&key_type_rxrpc);
+	if (ret < 0) {
+                printk(KERN_CRIT "RxRPC: Cannot register client key type\n");
+		goto error_key_type;
+	}
+
+	ret = register_key_type(&key_type_rxrpc_s);
+	if (ret < 0) {
+                printk(KERN_CRIT "RxRPC: Cannot register server key type\n");
+		goto error_key_type_s;
+	}
+
+#ifdef CONFIG_PROC_FS
+	proc_net_fops_create("rxrpc_calls", 0, &rxrpc_call_seq_fops);
+	proc_net_fops_create("rxrpc_conns", 0, &rxrpc_connection_seq_fops);
+#endif
+	return 0;
+
+error_key_type_s:
+	unregister_key_type(&key_type_rxrpc);
+error_key_type:
+	sock_unregister(PF_RXRPC);
+error_sock:
+	proto_unregister(&rxrpc_proto);
+error_proto:
+	kmem_cache_destroy(rxrpc_call_jar);
+error_call_jar:
+	return ret;
+}
+
+/*
+ * unregister the RxRPC protocol
+ */
+static void __exit af_rxrpc_exit(void)
+{
+	_enter("");
+	unregister_key_type(&key_type_rxrpc_s);
+	unregister_key_type(&key_type_rxrpc);
+	sock_unregister(PF_RXRPC);
+	proto_unregister(&rxrpc_proto);
+	rxrpc_destroy_all_calls();
+	rxrpc_destroy_all_connections();
+	rxrpc_destroy_all_transports();
+	rxrpc_destroy_all_peers();
+	rxrpc_destroy_all_locals();
+
+	ASSERTCMP(atomic_read(&rxrpc_n_skbs), ==, 0);
+
+	_debug("flush scheduled work");
+	flush_scheduled_work();
+	proc_net_remove("rxrpc_conns");
+	proc_net_remove("rxrpc_calls");
+	kmem_cache_destroy(rxrpc_call_jar);
+	_leave("");
+}
+
+module_init(af_rxrpc_init);
+module_exit(af_rxrpc_exit);
diff --git a/net/rxrpc/ar-accept.c b/net/rxrpc/ar-accept.c
new file mode 100644
index 00000000000..e7af780cd6f
--- /dev/null
+++ b/net/rxrpc/ar-accept.c
@@ -0,0 +1,399 @@
+/* incoming call handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/errqueue.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/icmp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include "ar-internal.h"
+
+/*
+ * generate a connection-level abort
+ */
+static int rxrpc_busy(struct rxrpc_local *local, struct sockaddr_rxrpc *srx,
+		      struct rxrpc_header *hdr)
+{
+	struct msghdr msg;
+	struct kvec iov[1];
+	size_t len;
+	int ret;
+
+	_enter("%d,,", local->debug_id);
+
+	msg.msg_name	= &srx->transport.sin;
+	msg.msg_namelen	= sizeof(srx->transport.sin);
+	msg.msg_control	= NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags	= 0;
+
+	hdr->seq	= 0;
+	hdr->type	= RXRPC_PACKET_TYPE_BUSY;
+	hdr->flags	= 0;
+	hdr->userStatus	= 0;
+	hdr->_rsvd	= 0;
+
+	iov[0].iov_base	= hdr;
+	iov[0].iov_len	= sizeof(*hdr);
+
+	len = iov[0].iov_len;
+
+	hdr->serial = htonl(1);
+	_proto("Tx BUSY %%%u", ntohl(hdr->serial));
+
+	ret = kernel_sendmsg(local->socket, &msg, iov, 1, len);
+	if (ret < 0) {
+		_leave(" = -EAGAIN [sendmsg failed: %d]", ret);
+		return -EAGAIN;
+	}
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * accept an incoming call that needs peer, transport and/or connection setting
+ * up
+ */
+static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
+				      struct rxrpc_sock *rx,
+				      struct sk_buff *skb,
+				      struct sockaddr_rxrpc *srx)
+{
+	struct rxrpc_connection *conn;
+	struct rxrpc_transport *trans;
+	struct rxrpc_skb_priv *sp, *nsp;
+	struct rxrpc_peer *peer;
+	struct rxrpc_call *call;
+	struct sk_buff *notification;
+	int ret;
+
+	_enter("");
+
+	sp = rxrpc_skb(skb);
+
+	/* get a notification message to send to the server app */
+	notification = alloc_skb(0, GFP_NOFS);
+	rxrpc_new_skb(notification);
+	notification->mark = RXRPC_SKB_MARK_NEW_CALL;
+
+	peer = rxrpc_get_peer(srx, GFP_NOIO);
+	if (IS_ERR(peer)) {
+		_debug("no peer");
+		ret = -EBUSY;
+		goto error;
+	}
+
+	trans = rxrpc_get_transport(local, peer, GFP_NOIO);
+	rxrpc_put_peer(peer);
+	if (!trans) {
+		_debug("no trans");
+		ret = -EBUSY;
+		goto error;
+	}
+
+	conn = rxrpc_incoming_connection(trans, &sp->hdr, GFP_NOIO);
+	rxrpc_put_transport(trans);
+	if (IS_ERR(conn)) {
+		_debug("no conn");
+		ret = PTR_ERR(conn);
+		goto error;
+	}
+
+	call = rxrpc_incoming_call(rx, conn, &sp->hdr, GFP_NOIO);
+	rxrpc_put_connection(conn);
+	if (IS_ERR(call)) {
+		_debug("no call");
+		ret = PTR_ERR(call);
+		goto error;
+	}
+
+	/* attach the call to the socket */
+	read_lock_bh(&local->services_lock);
+	if (rx->sk.sk_state == RXRPC_CLOSE)
+		goto invalid_service;
+
+	write_lock(&rx->call_lock);
+	if (!test_and_set_bit(RXRPC_CALL_INIT_ACCEPT, &call->flags)) {
+		rxrpc_get_call(call);
+
+		spin_lock(&call->conn->state_lock);
+		if (sp->hdr.securityIndex > 0 &&
+		    call->conn->state == RXRPC_CONN_SERVER_UNSECURED) {
+			_debug("await conn sec");
+			list_add_tail(&call->accept_link, &rx->secureq);
+			call->conn->state = RXRPC_CONN_SERVER_CHALLENGING;
+			atomic_inc(&call->conn->usage);
+			set_bit(RXRPC_CONN_CHALLENGE, &call->conn->events);
+			schedule_work(&call->conn->processor);
+		} else {
+			_debug("conn ready");
+			call->state = RXRPC_CALL_SERVER_ACCEPTING;
+			list_add_tail(&call->accept_link, &rx->acceptq);
+			rxrpc_get_call(call);
+			nsp = rxrpc_skb(notification);
+			nsp->call = call;
+
+			ASSERTCMP(atomic_read(&call->usage), >=, 3);
+
+			_debug("notify");
+			spin_lock(&call->lock);
+			ret = rxrpc_queue_rcv_skb(call, notification, true,
+						  false);
+			spin_unlock(&call->lock);
+			notification = NULL;
+			if (ret < 0)
+				BUG();
+		}
+		spin_unlock(&call->conn->state_lock);
+
+		_debug("queued");
+	}
+	write_unlock(&rx->call_lock);
+
+	_debug("process");
+	rxrpc_fast_process_packet(call, skb);
+
+	_debug("done");
+	read_unlock_bh(&local->services_lock);
+	rxrpc_free_skb(notification);
+	rxrpc_put_call(call);
+	_leave(" = 0");
+	return 0;
+
+invalid_service:
+	_debug("invalid");
+	read_unlock_bh(&local->services_lock);
+
+	read_lock_bh(&call->state_lock);
+	if (!test_bit(RXRPC_CALL_RELEASE, &call->flags) &&
+	    !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events)) {
+		rxrpc_get_call(call);
+		schedule_work(&call->processor);
+	}
+	read_unlock_bh(&call->state_lock);
+	rxrpc_put_call(call);
+	ret = -ECONNREFUSED;
+error:
+	rxrpc_free_skb(notification);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * accept incoming calls that need peer, transport and/or connection setting up
+ * - the packets we get are all incoming client DATA packets that have seq == 1
+ */
+void rxrpc_accept_incoming_calls(struct work_struct *work)
+{
+	struct rxrpc_local *local =
+		container_of(work, struct rxrpc_local, acceptor);
+	struct rxrpc_skb_priv *sp;
+	struct sockaddr_rxrpc srx;
+	struct rxrpc_sock *rx;
+	struct sk_buff *skb;
+	__be16 service_id;
+	int ret;
+
+	_enter("%d", local->debug_id);
+
+	read_lock_bh(&rxrpc_local_lock);
+	if (atomic_read(&local->usage) > 0)
+		rxrpc_get_local(local);
+	else
+		local = NULL;
+	read_unlock_bh(&rxrpc_local_lock);
+	if (!local) {
+		_leave(" [local dead]");
+		return;
+	}
+
+process_next_packet:
+	skb = skb_dequeue(&local->accept_queue);
+	if (!skb) {
+		rxrpc_put_local(local);
+		_leave("\n");
+		return;
+	}
+
+	_net("incoming call skb %p", skb);
+
+	sp = rxrpc_skb(skb);
+
+	/* determine the remote address */
+	memset(&srx, 0, sizeof(srx));
+	srx.srx_family = AF_RXRPC;
+	srx.transport.family = local->srx.transport.family;
+	srx.transport_type = local->srx.transport_type;
+	switch (srx.transport.family) {
+	case AF_INET:
+		srx.transport_len = sizeof(struct sockaddr_in);
+		srx.transport.sin.sin_port = udp_hdr(skb)->source;
+		srx.transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+		break;
+	default:
+		goto busy;
+	}
+
+	/* get the socket providing the service */
+	service_id = sp->hdr.serviceId;
+	read_lock_bh(&local->services_lock);
+	list_for_each_entry(rx, &local->services, listen_link) {
+		if (rx->service_id == service_id &&
+		    rx->sk.sk_state != RXRPC_CLOSE)
+			goto found_service;
+	}
+	read_unlock_bh(&local->services_lock);
+	goto invalid_service;
+
+found_service:
+	_debug("found service %hd", ntohs(rx->service_id));
+	if (sk_acceptq_is_full(&rx->sk))
+		goto backlog_full;
+	sk_acceptq_added(&rx->sk);
+	sock_hold(&rx->sk);
+	read_unlock_bh(&local->services_lock);
+
+	ret = rxrpc_accept_incoming_call(local, rx, skb, &srx);
+	if (ret < 0)
+		sk_acceptq_removed(&rx->sk);
+	sock_put(&rx->sk);
+	switch (ret) {
+	case -ECONNRESET: /* old calls are ignored */
+	case -ECONNABORTED: /* aborted calls are reaborted or ignored */
+	case 0:
+		goto process_next_packet;
+	case -ECONNREFUSED:
+		goto invalid_service;
+	case -EBUSY:
+		goto busy;
+	case -EKEYREJECTED:
+		goto security_mismatch;
+	default:
+		BUG();
+	}
+
+backlog_full:
+	read_unlock_bh(&local->services_lock);
+busy:
+	rxrpc_busy(local, &srx, &sp->hdr);
+	rxrpc_free_skb(skb);
+	goto process_next_packet;
+
+invalid_service:
+	skb->priority = RX_INVALID_OPERATION;
+	rxrpc_reject_packet(local, skb);
+	goto process_next_packet;
+
+	/* can't change connection security type mid-flow */
+security_mismatch:
+	skb->priority = RX_PROTOCOL_ERROR;
+	rxrpc_reject_packet(local, skb);
+	goto process_next_packet;
+}
+
+/*
+ * handle acceptance of a call by userspace
+ * - assign the user call ID to the call at the front of the queue
+ */
+int rxrpc_accept_call(struct rxrpc_sock *rx, unsigned long user_call_ID)
+{
+	struct rxrpc_call *call;
+	struct rb_node *parent, **pp;
+	int ret;
+
+	_enter(",%lx", user_call_ID);
+
+	ASSERT(!irqs_disabled());
+
+	write_lock(&rx->call_lock);
+
+	ret = -ENODATA;
+	if (list_empty(&rx->acceptq))
+		goto out;
+
+	/* check the user ID isn't already in use */
+	ret = -EBADSLT;
+	pp = &rx->calls.rb_node;
+	parent = NULL;
+	while (*pp) {
+		parent = *pp;
+		call = rb_entry(parent, struct rxrpc_call, sock_node);
+
+		if (user_call_ID < call->user_call_ID)
+			pp = &(*pp)->rb_left;
+		else if (user_call_ID > call->user_call_ID)
+			pp = &(*pp)->rb_right;
+		else
+			goto out;
+	}
+
+	/* dequeue the first call and check it's still valid */
+	call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link);
+	list_del_init(&call->accept_link);
+	sk_acceptq_removed(&rx->sk);
+
+	write_lock_bh(&call->state_lock);
+	switch (call->state) {
+	case RXRPC_CALL_SERVER_ACCEPTING:
+		call->state = RXRPC_CALL_SERVER_RECV_REQUEST;
+		break;
+	case RXRPC_CALL_REMOTELY_ABORTED:
+	case RXRPC_CALL_LOCALLY_ABORTED:
+		ret = -ECONNABORTED;
+		goto out_release;
+	case RXRPC_CALL_NETWORK_ERROR:
+		ret = call->conn->error;
+		goto out_release;
+	case RXRPC_CALL_DEAD:
+		ret = -ETIME;
+		goto out_discard;
+	default:
+		BUG();
+	}
+
+	/* formalise the acceptance */
+	call->user_call_ID = user_call_ID;
+	rb_link_node(&call->sock_node, parent, pp);
+	rb_insert_color(&call->sock_node, &rx->calls);
+	if (test_and_set_bit(RXRPC_CALL_HAS_USERID, &call->flags))
+		BUG();
+	if (test_and_set_bit(RXRPC_CALL_ACCEPTED, &call->events))
+		BUG();
+	schedule_work(&call->processor);
+
+	write_unlock_bh(&call->state_lock);
+	write_unlock(&rx->call_lock);
+	_leave(" = 0");
+	return 0;
+
+	/* if the call is already dying or dead, then we leave the socket's ref
+	 * on it to be released by rxrpc_dead_call_expired() as induced by
+	 * rxrpc_release_call() */
+out_release:
+	_debug("release %p", call);
+	if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
+	    !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
+		schedule_work(&call->processor);
+out_discard:
+	write_unlock_bh(&call->state_lock);
+	_debug("discard %p", call);
+out:
+	write_unlock(&rx->call_lock);
+	_leave(" = %d", ret);
+	return ret;
+}
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
new file mode 100644
index 00000000000..8f7764eca96
--- /dev/null
+++ b/net/rxrpc/ar-ack.c
@@ -0,0 +1,1250 @@
+/* Management of Tx window, Tx resend, ACKs and out-of-sequence reception
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/circ_buf.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/udp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static unsigned rxrpc_ack_defer = 1;
+
+static const char *rxrpc_acks[] = {
+	"---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY", "IDL",
+	"-?-"
+};
+
+static const s8 rxrpc_ack_priority[] = {
+	[0]				= 0,
+	[RXRPC_ACK_DELAY]		= 1,
+	[RXRPC_ACK_REQUESTED]		= 2,
+	[RXRPC_ACK_IDLE]		= 3,
+	[RXRPC_ACK_PING_RESPONSE]	= 4,
+	[RXRPC_ACK_DUPLICATE]		= 5,
+	[RXRPC_ACK_OUT_OF_SEQUENCE]	= 6,
+	[RXRPC_ACK_EXCEEDS_WINDOW]	= 7,
+	[RXRPC_ACK_NOSPACE]		= 8,
+};
+
+/*
+ * propose an ACK be sent
+ */
+void __rxrpc_propose_ACK(struct rxrpc_call *call, uint8_t ack_reason,
+			 __be32 serial, bool immediate)
+{
+	unsigned long expiry;
+	s8 prior = rxrpc_ack_priority[ack_reason];
+
+	ASSERTCMP(prior, >, 0);
+
+	_enter("{%d},%s,%%%x,%u",
+	       call->debug_id, rxrpc_acks[ack_reason], ntohl(serial),
+	       immediate);
+
+	if (prior < rxrpc_ack_priority[call->ackr_reason]) {
+		if (immediate)
+			goto cancel_timer;
+		return;
+	}
+
+	/* update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial
+	 * numbers */
+	if (prior == rxrpc_ack_priority[call->ackr_reason]) {
+		if (prior <= 4)
+			call->ackr_serial = serial;
+		if (immediate)
+			goto cancel_timer;
+		return;
+	}
+
+	call->ackr_reason = ack_reason;
+	call->ackr_serial = serial;
+
+	switch (ack_reason) {
+	case RXRPC_ACK_DELAY:
+		_debug("run delay timer");
+		call->ack_timer.expires = jiffies + rxrpc_ack_timeout * HZ;
+		add_timer(&call->ack_timer);
+		return;
+
+	case RXRPC_ACK_IDLE:
+		if (!immediate) {
+			_debug("run defer timer");
+			expiry = 1;
+			goto run_timer;
+		}
+		goto cancel_timer;
+
+	case RXRPC_ACK_REQUESTED:
+		if (!rxrpc_ack_defer)
+			goto cancel_timer;
+		if (!immediate || serial == cpu_to_be32(1)) {
+			_debug("run defer timer");
+			expiry = rxrpc_ack_defer;
+			goto run_timer;
+		}
+
+	default:
+		_debug("immediate ACK");
+		goto cancel_timer;
+	}
+
+run_timer:
+	expiry += jiffies;
+	if (!timer_pending(&call->ack_timer) ||
+	    time_after(call->ack_timer.expires, expiry))
+		mod_timer(&call->ack_timer, expiry);
+	return;
+
+cancel_timer:
+	_debug("cancel timer %%%u", ntohl(serial));
+	try_to_del_timer_sync(&call->ack_timer);
+	read_lock_bh(&call->state_lock);
+	if (call->state <= RXRPC_CALL_COMPLETE &&
+	    !test_and_set_bit(RXRPC_CALL_ACK, &call->events))
+		schedule_work(&call->processor);
+	read_unlock_bh(&call->state_lock);
+}
+
+/*
+ * propose an ACK be sent, locking the call structure
+ */
+void rxrpc_propose_ACK(struct rxrpc_call *call, uint8_t ack_reason,
+		       __be32 serial, bool immediate)
+{
+	s8 prior = rxrpc_ack_priority[ack_reason];
+
+	if (prior > rxrpc_ack_priority[call->ackr_reason]) {
+		spin_lock_bh(&call->lock);
+		__rxrpc_propose_ACK(call, ack_reason, serial, immediate);
+		spin_unlock_bh(&call->lock);
+	}
+}
+
+/*
+ * set the resend timer
+ */
+static void rxrpc_set_resend(struct rxrpc_call *call, u8 resend,
+			     unsigned long resend_at)
+{
+	read_lock_bh(&call->state_lock);
+	if (call->state >= RXRPC_CALL_COMPLETE)
+		resend = 0;
+
+	if (resend & 1) {
+		_debug("SET RESEND");
+		set_bit(RXRPC_CALL_RESEND, &call->events);
+	}
+
+	if (resend & 2) {
+		_debug("MODIFY RESEND TIMER");
+		set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+		mod_timer(&call->resend_timer, resend_at);
+	} else {
+		_debug("KILL RESEND TIMER");
+		del_timer_sync(&call->resend_timer);
+		clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events);
+		clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+	}
+	read_unlock_bh(&call->state_lock);
+}
+
+/*
+ * resend packets
+ */
+static void rxrpc_resend(struct rxrpc_call *call)
+{
+	struct rxrpc_skb_priv *sp;
+	struct rxrpc_header *hdr;
+	struct sk_buff *txb;
+	unsigned long *p_txb, resend_at;
+	int loop, stop;
+	u8 resend;
+
+	_enter("{%d,%d,%d,%d},",
+	       call->acks_hard, call->acks_unacked,
+	       atomic_read(&call->sequence),
+	       CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz));
+
+	stop = 0;
+	resend = 0;
+	resend_at = 0;
+
+	for (loop = call->acks_tail;
+	     loop != call->acks_head || stop;
+	     loop = (loop + 1) &  (call->acks_winsz - 1)
+	     ) {
+		p_txb = call->acks_window + loop;
+		smp_read_barrier_depends();
+		if (*p_txb & 1)
+			continue;
+
+		txb = (struct sk_buff *) *p_txb;
+		sp = rxrpc_skb(txb);
+
+		if (sp->need_resend) {
+			sp->need_resend = 0;
+
+			/* each Tx packet has a new serial number */
+			sp->hdr.serial =
+				htonl(atomic_inc_return(&call->conn->serial));
+
+			hdr = (struct rxrpc_header *) txb->head;
+			hdr->serial = sp->hdr.serial;
+
+			_proto("Tx DATA %%%u { #%d }",
+			       ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
+			if (rxrpc_send_packet(call->conn->trans, txb) < 0) {
+				stop = 0;
+				sp->resend_at = jiffies + 3;
+			} else {
+				sp->resend_at =
+					jiffies + rxrpc_resend_timeout * HZ;
+			}
+		}
+
+		if (time_after_eq(jiffies + 1, sp->resend_at)) {
+			sp->need_resend = 1;
+			resend |= 1;
+		} else if (resend & 2) {
+			if (time_before(sp->resend_at, resend_at))
+				resend_at = sp->resend_at;
+		} else {
+			resend_at = sp->resend_at;
+			resend |= 2;
+		}
+	}
+
+	rxrpc_set_resend(call, resend, resend_at);
+	_leave("");
+}
+
+/*
+ * handle resend timer expiry
+ */
+static void rxrpc_resend_timer(struct rxrpc_call *call)
+{
+	struct rxrpc_skb_priv *sp;
+	struct sk_buff *txb;
+	unsigned long *p_txb, resend_at;
+	int loop;
+	u8 resend;
+
+	_enter("%d,%d,%d",
+	       call->acks_tail, call->acks_unacked, call->acks_head);
+
+	resend = 0;
+	resend_at = 0;
+
+	for (loop = call->acks_unacked;
+	     loop != call->acks_head;
+	     loop = (loop + 1) &  (call->acks_winsz - 1)
+	     ) {
+		p_txb = call->acks_window + loop;
+		smp_read_barrier_depends();
+		txb = (struct sk_buff *) (*p_txb & ~1);
+		sp = rxrpc_skb(txb);
+
+		ASSERT(!(*p_txb & 1));
+
+		if (sp->need_resend) {
+			;
+		} else if (time_after_eq(jiffies + 1, sp->resend_at)) {
+			sp->need_resend = 1;
+			resend |= 1;
+		} else if (resend & 2) {
+			if (time_before(sp->resend_at, resend_at))
+				resend_at = sp->resend_at;
+		} else {
+			resend_at = sp->resend_at;
+			resend |= 2;
+		}
+	}
+
+	rxrpc_set_resend(call, resend, resend_at);
+	_leave("");
+}
+
+/*
+ * process soft ACKs of our transmitted packets
+ * - these indicate packets the peer has or has not received, but hasn't yet
+ *   given to the consumer, and so can still be discarded and re-requested
+ */
+static int rxrpc_process_soft_ACKs(struct rxrpc_call *call,
+				   struct rxrpc_ackpacket *ack,
+				   struct sk_buff *skb)
+{
+	struct rxrpc_skb_priv *sp;
+	struct sk_buff *txb;
+	unsigned long *p_txb, resend_at;
+	int loop;
+	u8 sacks[RXRPC_MAXACKS], resend;
+
+	_enter("{%d,%d},{%d},",
+	       call->acks_hard,
+	       CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz),
+	       ack->nAcks);
+
+	if (skb_copy_bits(skb, 0, sacks, ack->nAcks) < 0)
+		goto protocol_error;
+
+	resend = 0;
+	resend_at = 0;
+	for (loop = 0; loop < ack->nAcks; loop++) {
+		p_txb = call->acks_window;
+		p_txb += (call->acks_tail + loop) & (call->acks_winsz - 1);
+		smp_read_barrier_depends();
+		txb = (struct sk_buff *) (*p_txb & ~1);
+		sp = rxrpc_skb(txb);
+
+		switch (sacks[loop]) {
+		case RXRPC_ACK_TYPE_ACK:
+			sp->need_resend = 0;
+			*p_txb |= 1;
+			break;
+		case RXRPC_ACK_TYPE_NACK:
+			sp->need_resend = 1;
+			*p_txb &= ~1;
+			resend = 1;
+			break;
+		default:
+			_debug("Unsupported ACK type %d", sacks[loop]);
+			goto protocol_error;
+		}
+	}
+
+	smp_mb();
+	call->acks_unacked = (call->acks_tail + loop) & (call->acks_winsz - 1);
+
+	/* anything not explicitly ACK'd is implicitly NACK'd, but may just not
+	 * have been received or processed yet by the far end */
+	for (loop = call->acks_unacked;
+	     loop != call->acks_head;
+	     loop = (loop + 1) &  (call->acks_winsz - 1)
+	     ) {
+		p_txb = call->acks_window + loop;
+		smp_read_barrier_depends();
+		txb = (struct sk_buff *) (*p_txb & ~1);
+		sp = rxrpc_skb(txb);
+
+		if (*p_txb & 1) {
+			/* packet must have been discarded */
+			sp->need_resend = 1;
+			*p_txb &= ~1;
+			resend |= 1;
+		} else if (sp->need_resend) {
+			;
+		} else if (time_after_eq(jiffies + 1, sp->resend_at)) {
+			sp->need_resend = 1;
+			resend |= 1;
+		} else if (resend & 2) {
+			if (time_before(sp->resend_at, resend_at))
+				resend_at = sp->resend_at;
+		} else {
+			resend_at = sp->resend_at;
+			resend |= 2;
+		}
+	}
+
+	rxrpc_set_resend(call, resend, resend_at);
+	_leave(" = 0");
+	return 0;
+
+protocol_error:
+	_leave(" = -EPROTO");
+	return -EPROTO;
+}
+
+/*
+ * discard hard-ACK'd packets from the Tx window
+ */
+static void rxrpc_rotate_tx_window(struct rxrpc_call *call, u32 hard)
+{
+	struct rxrpc_skb_priv *sp;
+	unsigned long _skb;
+	int tail = call->acks_tail, old_tail;
+	int win = CIRC_CNT(call->acks_head, tail, call->acks_winsz);
+
+	_enter("{%u,%u},%u", call->acks_hard, win, hard);
+
+	ASSERTCMP(hard - call->acks_hard, <=, win);
+
+	while (call->acks_hard < hard) {
+		smp_read_barrier_depends();
+		_skb = call->acks_window[tail] & ~1;
+		sp = rxrpc_skb((struct sk_buff *) _skb);
+		rxrpc_free_skb((struct sk_buff *) _skb);
+		old_tail = tail;
+		tail = (tail + 1) & (call->acks_winsz - 1);
+		call->acks_tail = tail;
+		if (call->acks_unacked == old_tail)
+			call->acks_unacked = tail;
+		call->acks_hard++;
+	}
+
+	wake_up(&call->tx_waitq);
+}
+
+/*
+ * clear the Tx window in the event of a failure
+ */
+static void rxrpc_clear_tx_window(struct rxrpc_call *call)
+{
+	rxrpc_rotate_tx_window(call, atomic_read(&call->sequence));
+}
+
+/*
+ * drain the out of sequence received packet queue into the packet Rx queue
+ */
+static int rxrpc_drain_rx_oos_queue(struct rxrpc_call *call)
+{
+	struct rxrpc_skb_priv *sp;
+	struct sk_buff *skb;
+	bool terminal;
+	int ret;
+
+	_enter("{%d,%d}", call->rx_data_post, call->rx_first_oos);
+
+	spin_lock_bh(&call->lock);
+
+	ret = -ECONNRESET;
+	if (test_bit(RXRPC_CALL_RELEASED, &call->flags))
+		goto socket_unavailable;
+
+	skb = skb_dequeue(&call->rx_oos_queue);
+	if (skb) {
+		sp = rxrpc_skb(skb);
+
+		_debug("drain OOS packet %d [%d]",
+		       ntohl(sp->hdr.seq), call->rx_first_oos);
+
+		if (ntohl(sp->hdr.seq) != call->rx_first_oos) {
+			skb_queue_head(&call->rx_oos_queue, skb);
+			call->rx_first_oos = ntohl(rxrpc_skb(skb)->hdr.seq);
+			_debug("requeue %p {%u}", skb, call->rx_first_oos);
+		} else {
+			skb->mark = RXRPC_SKB_MARK_DATA;
+			terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) &&
+				!(sp->hdr.flags & RXRPC_CLIENT_INITIATED));
+			ret = rxrpc_queue_rcv_skb(call, skb, true, terminal);
+			BUG_ON(ret < 0);
+			_debug("drain #%u", call->rx_data_post);
+			call->rx_data_post++;
+
+			/* find out what the next packet is */
+			skb = skb_peek(&call->rx_oos_queue);
+			if (skb)
+				call->rx_first_oos =
+					ntohl(rxrpc_skb(skb)->hdr.seq);
+			else
+				call->rx_first_oos = 0;
+			_debug("peek %p {%u}", skb, call->rx_first_oos);
+		}
+	}
+
+	ret = 0;
+socket_unavailable:
+	spin_unlock_bh(&call->lock);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * insert an out of sequence packet into the buffer
+ */
+static void rxrpc_insert_oos_packet(struct rxrpc_call *call,
+				    struct sk_buff *skb)
+{
+	struct rxrpc_skb_priv *sp, *psp;
+	struct sk_buff *p;
+	u32 seq;
+
+	sp = rxrpc_skb(skb);
+	seq = ntohl(sp->hdr.seq);
+	_enter(",,{%u}", seq);
+
+	skb->destructor = rxrpc_packet_destructor;
+	ASSERTCMP(sp->call, ==, NULL);
+	sp->call = call;
+	rxrpc_get_call(call);
+
+	/* insert into the buffer in sequence order */
+	spin_lock_bh(&call->lock);
+
+	skb_queue_walk(&call->rx_oos_queue, p) {
+		psp = rxrpc_skb(p);
+		if (ntohl(psp->hdr.seq) > seq) {
+			_debug("insert oos #%u before #%u",
+			       seq, ntohl(psp->hdr.seq));
+			skb_insert(p, skb, &call->rx_oos_queue);
+			goto inserted;
+		}
+	}
+
+	_debug("append oos #%u", seq);
+	skb_queue_tail(&call->rx_oos_queue, skb);
+inserted:
+
+	/* we might now have a new front to the queue */
+	if (call->rx_first_oos == 0 || seq < call->rx_first_oos)
+		call->rx_first_oos = seq;
+
+	read_lock(&call->state_lock);
+	if (call->state < RXRPC_CALL_COMPLETE &&
+	    call->rx_data_post == call->rx_first_oos) {
+		_debug("drain rx oos now");
+		set_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events);
+	}
+	read_unlock(&call->state_lock);
+
+	spin_unlock_bh(&call->lock);
+	_leave(" [stored #%u]", call->rx_first_oos);
+}
+
+/*
+ * clear the Tx window on final ACK reception
+ */
+static void rxrpc_zap_tx_window(struct rxrpc_call *call)
+{
+	struct rxrpc_skb_priv *sp;
+	struct sk_buff *skb;
+	unsigned long _skb, *acks_window;
+	uint8_t winsz = call->acks_winsz;
+	int tail;
+
+	acks_window = call->acks_window;
+	call->acks_window = NULL;
+
+	while (CIRC_CNT(call->acks_head, call->acks_tail, winsz) > 0) {
+		tail = call->acks_tail;
+		smp_read_barrier_depends();
+		_skb = acks_window[tail] & ~1;
+		smp_mb();
+		call->acks_tail = (call->acks_tail + 1) & (winsz - 1);
+
+		skb = (struct sk_buff *) _skb;
+		sp = rxrpc_skb(skb);
+		_debug("+++ clear Tx %u", ntohl(sp->hdr.seq));
+		rxrpc_free_skb(skb);
+	}
+
+	kfree(acks_window);
+}
+
+/*
+ * process packets in the reception queue
+ */
+static int rxrpc_process_rx_queue(struct rxrpc_call *call,
+				  u32 *_abort_code)
+{
+	struct rxrpc_ackpacket ack;
+	struct rxrpc_skb_priv *sp;
+	struct sk_buff *skb;
+	bool post_ACK;
+	int latest;
+	u32 hard, tx;
+
+	_enter("");
+
+process_further:
+	skb = skb_dequeue(&call->rx_queue);
+	if (!skb)
+		return -EAGAIN;
+
+	_net("deferred skb %p", skb);
+
+	sp = rxrpc_skb(skb);
+
+	_debug("process %s [st %d]", rxrpc_pkts[sp->hdr.type], call->state);
+
+	post_ACK = false;
+
+	switch (sp->hdr.type) {
+		/* data packets that wind up here have been received out of
+		 * order, need security processing or are jumbo packets */
+	case RXRPC_PACKET_TYPE_DATA:
+		_proto("OOSQ DATA %%%u { #%u }",
+		       ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
+
+		/* secured packets must be verified and possibly decrypted */
+		if (rxrpc_verify_packet(call, skb, _abort_code) < 0)
+			goto protocol_error;
+
+		rxrpc_insert_oos_packet(call, skb);
+		goto process_further;
+
+		/* partial ACK to process */
+	case RXRPC_PACKET_TYPE_ACK:
+		if (skb_copy_bits(skb, 0, &ack, sizeof(ack)) < 0) {
+			_debug("extraction failure");
+			goto protocol_error;
+		}
+		if (!skb_pull(skb, sizeof(ack)))
+			BUG();
+
+		latest = ntohl(sp->hdr.serial);
+		hard = ntohl(ack.firstPacket);
+		tx = atomic_read(&call->sequence);
+
+		_proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
+		       latest,
+		       ntohs(ack.maxSkew),
+		       hard,
+		       ntohl(ack.previousPacket),
+		       ntohl(ack.serial),
+		       rxrpc_acks[ack.reason],
+		       ack.nAcks);
+
+		if (ack.reason == RXRPC_ACK_PING) {
+			_proto("Rx ACK %%%u PING Request", latest);
+			rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE,
+					  sp->hdr.serial, true);
+		}
+
+		/* discard any out-of-order or duplicate ACKs */
+		if (latest - call->acks_latest <= 0) {
+			_debug("discard ACK %d <= %d",
+			       latest, call->acks_latest);
+			goto discard;
+		}
+		call->acks_latest = latest;
+
+		if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
+		    call->state != RXRPC_CALL_CLIENT_AWAIT_REPLY &&
+		    call->state != RXRPC_CALL_SERVER_SEND_REPLY &&
+		    call->state != RXRPC_CALL_SERVER_AWAIT_ACK)
+			goto discard;
+
+		_debug("Tx=%d H=%u S=%d", tx, call->acks_hard, call->state);
+
+		if (hard > 0) {
+			if (hard - 1 > tx) {
+				_debug("hard-ACK'd packet %d not transmitted"
+				       " (%d top)",
+				       hard - 1, tx);
+				goto protocol_error;
+			}
+
+			if ((call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY ||
+			     call->state == RXRPC_CALL_SERVER_AWAIT_ACK) &&
+			    hard > tx)
+				goto all_acked;
+
+			smp_rmb();
+			rxrpc_rotate_tx_window(call, hard - 1);
+		}
+
+		if (ack.nAcks > 0) {
+			if (hard - 1 + ack.nAcks > tx) {
+				_debug("soft-ACK'd packet %d+%d not"
+				       " transmitted (%d top)",
+				       hard - 1, ack.nAcks, tx);
+				goto protocol_error;
+			}
+
+			if (rxrpc_process_soft_ACKs(call, &ack, skb) < 0)
+				goto protocol_error;
+		}
+		goto discard;
+
+		/* complete ACK to process */
+	case RXRPC_PACKET_TYPE_ACKALL:
+		goto all_acked;
+
+		/* abort and busy are handled elsewhere */
+	case RXRPC_PACKET_TYPE_BUSY:
+	case RXRPC_PACKET_TYPE_ABORT:
+		BUG();
+
+		/* connection level events - also handled elsewhere */
+	case RXRPC_PACKET_TYPE_CHALLENGE:
+	case RXRPC_PACKET_TYPE_RESPONSE:
+	case RXRPC_PACKET_TYPE_DEBUG:
+		BUG();
+	}
+
+	/* if we've had a hard ACK that covers all the packets we've sent, then
+	 * that ends that phase of the operation */
+all_acked:
+	write_lock_bh(&call->state_lock);
+	_debug("ack all %d", call->state);
+
+	switch (call->state) {
+	case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+		call->state = RXRPC_CALL_CLIENT_RECV_REPLY;
+		break;
+	case RXRPC_CALL_SERVER_AWAIT_ACK:
+		_debug("srv complete");
+		call->state = RXRPC_CALL_COMPLETE;
+		post_ACK = true;
+		break;
+	case RXRPC_CALL_CLIENT_SEND_REQUEST:
+	case RXRPC_CALL_SERVER_RECV_REQUEST:
+		goto protocol_error_unlock; /* can't occur yet */
+	default:
+		write_unlock_bh(&call->state_lock);
+		goto discard; /* assume packet left over from earlier phase */
+	}
+
+	write_unlock_bh(&call->state_lock);
+
+	/* if all the packets we sent are hard-ACK'd, then we can discard
+	 * whatever we've got left */
+	_debug("clear Tx %d",
+	       CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz));
+
+	del_timer_sync(&call->resend_timer);
+	clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+	clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events);
+
+	if (call->acks_window)
+		rxrpc_zap_tx_window(call);
+
+	if (post_ACK) {
+		/* post the final ACK message for userspace to pick up */
+		_debug("post ACK");
+		skb->mark = RXRPC_SKB_MARK_FINAL_ACK;
+		sp->call = call;
+		rxrpc_get_call(call);
+		spin_lock_bh(&call->lock);
+		if (rxrpc_queue_rcv_skb(call, skb, true, true) < 0)
+			BUG();
+		spin_unlock_bh(&call->lock);
+		goto process_further;
+	}
+
+discard:
+	rxrpc_free_skb(skb);
+	goto process_further;
+
+protocol_error_unlock:
+	write_unlock_bh(&call->state_lock);
+protocol_error:
+	rxrpc_free_skb(skb);
+	_leave(" = -EPROTO");
+	return -EPROTO;
+}
+
+/*
+ * post a message to the socket Rx queue for recvmsg() to pick up
+ */
+static int rxrpc_post_message(struct rxrpc_call *call, u32 mark, u32 error,
+			      bool fatal)
+{
+	struct rxrpc_skb_priv *sp;
+	struct sk_buff *skb;
+	int ret;
+
+	_enter("{%d,%lx},%u,%u,%d",
+	       call->debug_id, call->flags, mark, error, fatal);
+
+	/* remove timers and things for fatal messages */
+	if (fatal) {
+		del_timer_sync(&call->resend_timer);
+		del_timer_sync(&call->ack_timer);
+		clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+	}
+
+	if (mark != RXRPC_SKB_MARK_NEW_CALL &&
+	    !test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
+		_leave("[no userid]");
+		return 0;
+	}
+
+	if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) {
+		skb = alloc_skb(0, GFP_NOFS);
+		if (!skb)
+			return -ENOMEM;
+
+		rxrpc_new_skb(skb);
+
+		skb->mark = mark;
+
+		sp = rxrpc_skb(skb);
+		memset(sp, 0, sizeof(*sp));
+		sp->error = error;
+		sp->call = call;
+		rxrpc_get_call(call);
+
+		spin_lock_bh(&call->lock);
+		ret = rxrpc_queue_rcv_skb(call, skb, true, fatal);
+		spin_unlock_bh(&call->lock);
+		if (ret < 0)
+			BUG();
+	}
+
+	return 0;
+}
+
+/*
+ * handle background processing of incoming call packets and ACK / abort
+ * generation
+ */
+void rxrpc_process_call(struct work_struct *work)
+{
+	struct rxrpc_call *call =
+		container_of(work, struct rxrpc_call, processor);
+	struct rxrpc_ackpacket ack;
+	struct rxrpc_ackinfo ackinfo;
+	struct rxrpc_header hdr;
+	struct msghdr msg;
+	struct kvec iov[5];
+	unsigned long bits;
+	__be32 data;
+	size_t len;
+	int genbit, loop, nbit, ioc, ret;
+	u32 abort_code = RX_PROTOCOL_ERROR;
+	u8 *acks = NULL;
+
+	//printk("\n--------------------\n");
+	_enter("{%d,%s,%lx} [%lu]",
+	       call->debug_id, rxrpc_call_states[call->state], call->events,
+	       (jiffies - call->creation_jif) / (HZ / 10));
+
+	if (test_and_set_bit(RXRPC_CALL_PROC_BUSY, &call->flags)) {
+		_debug("XXXXXXXXXXXXX RUNNING ON MULTIPLE CPUS XXXXXXXXXXXXX");
+		return;
+	}
+
+	/* there's a good chance we're going to have to send a message, so set
+	 * one up in advance */
+	msg.msg_name	= &call->conn->trans->peer->srx.transport.sin;
+	msg.msg_namelen	= sizeof(call->conn->trans->peer->srx.transport.sin);
+	msg.msg_control	= NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags	= 0;
+
+	hdr.epoch	= call->conn->epoch;
+	hdr.cid		= call->cid;
+	hdr.callNumber	= call->call_id;
+	hdr.seq		= 0;
+	hdr.type	= RXRPC_PACKET_TYPE_ACK;
+	hdr.flags	= call->conn->out_clientflag;
+	hdr.userStatus	= 0;
+	hdr.securityIndex = call->conn->security_ix;
+	hdr._rsvd	= 0;
+	hdr.serviceId	= call->conn->service_id;
+
+	memset(iov, 0, sizeof(iov));
+	iov[0].iov_base	= &hdr;
+	iov[0].iov_len	= sizeof(hdr);
+
+	/* deal with events of a final nature */
+	if (test_bit(RXRPC_CALL_RELEASE, &call->events)) {
+		rxrpc_release_call(call);
+		clear_bit(RXRPC_CALL_RELEASE, &call->events);
+	}
+
+	if (test_bit(RXRPC_CALL_RCVD_ERROR, &call->events)) {
+		int error;
+
+		clear_bit(RXRPC_CALL_CONN_ABORT, &call->events);
+		clear_bit(RXRPC_CALL_REJECT_BUSY, &call->events);
+		clear_bit(RXRPC_CALL_ABORT, &call->events);
+
+		error = call->conn->trans->peer->net_error;
+		_debug("post net error %d", error);
+
+		if (rxrpc_post_message(call, RXRPC_SKB_MARK_NET_ERROR,
+				       error, true) < 0)
+			goto no_mem;
+		clear_bit(RXRPC_CALL_RCVD_ERROR, &call->events);
+		goto kill_ACKs;
+	}
+
+	if (test_bit(RXRPC_CALL_CONN_ABORT, &call->events)) {
+		ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE);
+
+		clear_bit(RXRPC_CALL_REJECT_BUSY, &call->events);
+		clear_bit(RXRPC_CALL_ABORT, &call->events);
+
+		_debug("post conn abort");
+
+		if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
+				       call->conn->error, true) < 0)
+			goto no_mem;
+		clear_bit(RXRPC_CALL_CONN_ABORT, &call->events);
+		goto kill_ACKs;
+	}
+
+	if (test_bit(RXRPC_CALL_REJECT_BUSY, &call->events)) {
+		hdr.type = RXRPC_PACKET_TYPE_BUSY;
+		genbit = RXRPC_CALL_REJECT_BUSY;
+		goto send_message;
+	}
+
+	if (test_bit(RXRPC_CALL_ABORT, &call->events)) {
+		ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE);
+
+		if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
+				       ECONNABORTED, true) < 0)
+			goto no_mem;
+		hdr.type = RXRPC_PACKET_TYPE_ABORT;
+		data = htonl(call->abort_code);
+		iov[1].iov_base = &data;
+		iov[1].iov_len = sizeof(data);
+		genbit = RXRPC_CALL_ABORT;
+		goto send_message;
+	}
+
+	if (test_bit(RXRPC_CALL_ACK_FINAL, &call->events)) {
+		hdr.type = RXRPC_PACKET_TYPE_ACKALL;
+		genbit = RXRPC_CALL_ACK_FINAL;
+		goto send_message;
+	}
+
+	if (call->events & ((1 << RXRPC_CALL_RCVD_BUSY) |
+			    (1 << RXRPC_CALL_RCVD_ABORT))
+	    ) {
+		u32 mark;
+
+		if (test_bit(RXRPC_CALL_RCVD_ABORT, &call->events))
+			mark = RXRPC_SKB_MARK_REMOTE_ABORT;
+		else
+			mark = RXRPC_SKB_MARK_BUSY;
+
+		_debug("post abort/busy");
+		rxrpc_clear_tx_window(call);
+		if (rxrpc_post_message(call, mark, ECONNABORTED, true) < 0)
+			goto no_mem;
+
+		clear_bit(RXRPC_CALL_RCVD_BUSY, &call->events);
+		clear_bit(RXRPC_CALL_RCVD_ABORT, &call->events);
+		goto kill_ACKs;
+	}
+
+	if (test_and_clear_bit(RXRPC_CALL_RCVD_ACKALL, &call->events)) {
+		_debug("do implicit ackall");
+		rxrpc_clear_tx_window(call);
+	}
+
+	if (test_bit(RXRPC_CALL_LIFE_TIMER, &call->events)) {
+		write_lock_bh(&call->state_lock);
+		if (call->state <= RXRPC_CALL_COMPLETE) {
+			call->state = RXRPC_CALL_LOCALLY_ABORTED;
+			call->abort_code = RX_CALL_TIMEOUT;
+			set_bit(RXRPC_CALL_ABORT, &call->events);
+		}
+		write_unlock_bh(&call->state_lock);
+
+		_debug("post timeout");
+		if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
+				       ETIME, true) < 0)
+			goto no_mem;
+
+		clear_bit(RXRPC_CALL_LIFE_TIMER, &call->events);
+		goto kill_ACKs;
+	}
+
+	/* deal with assorted inbound messages */
+	if (!skb_queue_empty(&call->rx_queue)) {
+		switch (rxrpc_process_rx_queue(call, &abort_code)) {
+		case 0:
+		case -EAGAIN:
+			break;
+		case -ENOMEM:
+			goto no_mem;
+		case -EKEYEXPIRED:
+		case -EKEYREJECTED:
+		case -EPROTO:
+			rxrpc_abort_call(call, abort_code);
+			goto kill_ACKs;
+		}
+	}
+
+	/* handle resending */
+	if (test_and_clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events))
+		rxrpc_resend_timer(call);
+	if (test_and_clear_bit(RXRPC_CALL_RESEND, &call->events))
+		rxrpc_resend(call);
+
+	/* consider sending an ordinary ACK */
+	if (test_bit(RXRPC_CALL_ACK, &call->events)) {
+		__be32 pad;
+
+		_debug("send ACK: window: %d - %d { %lx }",
+		       call->rx_data_eaten, call->ackr_win_top,
+		       call->ackr_window[0]);
+
+		if (call->state > RXRPC_CALL_SERVER_ACK_REQUEST &&
+		    call->ackr_reason != RXRPC_ACK_PING_RESPONSE) {
+			/* ACK by sending reply DATA packet in this state */
+			clear_bit(RXRPC_CALL_ACK, &call->events);
+			goto maybe_reschedule;
+		}
+
+		genbit = RXRPC_CALL_ACK;
+
+		acks = kzalloc(call->ackr_win_top - call->rx_data_eaten,
+			       GFP_NOFS);
+		if (!acks)
+			goto no_mem;
+
+		//hdr.flags	= RXRPC_SLOW_START_OK;
+		ack.bufferSpace	= htons(8);
+		ack.maxSkew	= 0;
+		ack.serial	= 0;
+		ack.reason	= 0;
+
+		ackinfo.rxMTU	= htonl(5692);
+//		ackinfo.rxMTU	= htonl(call->conn->trans->peer->maxdata);
+		ackinfo.maxMTU	= htonl(call->conn->trans->peer->maxdata);
+		ackinfo.rwind	= htonl(32);
+		ackinfo.jumbo_max = htonl(4);
+
+		spin_lock_bh(&call->lock);
+		ack.reason = call->ackr_reason;
+		ack.serial = call->ackr_serial;
+		ack.previousPacket = call->ackr_prev_seq;
+		ack.firstPacket = htonl(call->rx_data_eaten + 1);
+
+		ack.nAcks = 0;
+		for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) {
+			nbit = loop * BITS_PER_LONG;
+			for (bits = call->ackr_window[loop]; bits; bits >>= 1
+			     ) {
+				_debug("- l=%d n=%d b=%lx", loop, nbit, bits);
+				if (bits & 1) {
+					acks[nbit] = RXRPC_ACK_TYPE_ACK;
+					ack.nAcks = nbit + 1;
+				}
+				nbit++;
+			}
+		}
+		call->ackr_reason = 0;
+		spin_unlock_bh(&call->lock);
+
+		pad = 0;
+
+		iov[1].iov_base = &ack;
+		iov[1].iov_len	= sizeof(ack);
+		iov[2].iov_base = acks;
+		iov[2].iov_len	= ack.nAcks;
+		iov[3].iov_base = &pad;
+		iov[3].iov_len	= 3;
+		iov[4].iov_base = &ackinfo;
+		iov[4].iov_len	= sizeof(ackinfo);
+
+		switch (ack.reason) {
+		case RXRPC_ACK_REQUESTED:
+		case RXRPC_ACK_DUPLICATE:
+		case RXRPC_ACK_OUT_OF_SEQUENCE:
+		case RXRPC_ACK_EXCEEDS_WINDOW:
+		case RXRPC_ACK_NOSPACE:
+		case RXRPC_ACK_PING:
+		case RXRPC_ACK_PING_RESPONSE:
+			goto send_ACK_with_skew;
+		case RXRPC_ACK_DELAY:
+		case RXRPC_ACK_IDLE:
+			goto send_ACK;
+		}
+	}
+
+	/* handle completion of security negotiations on an incoming
+	 * connection */
+	if (test_and_clear_bit(RXRPC_CALL_SECURED, &call->events)) {
+		_debug("secured");
+		spin_lock_bh(&call->lock);
+
+		if (call->state == RXRPC_CALL_SERVER_SECURING) {
+			_debug("securing");
+			write_lock(&call->conn->lock);
+			if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
+			    !test_bit(RXRPC_CALL_RELEASE, &call->events)) {
+				_debug("not released");
+				call->state = RXRPC_CALL_SERVER_ACCEPTING;
+				list_move_tail(&call->accept_link,
+					       &call->socket->acceptq);
+			}
+			write_unlock(&call->conn->lock);
+			read_lock(&call->state_lock);
+			if (call->state < RXRPC_CALL_COMPLETE)
+				set_bit(RXRPC_CALL_POST_ACCEPT, &call->events);
+			read_unlock(&call->state_lock);
+		}
+
+		spin_unlock_bh(&call->lock);
+		if (!test_bit(RXRPC_CALL_POST_ACCEPT, &call->events))
+			goto maybe_reschedule;
+	}
+
+	/* post a notification of an acceptable connection to the app */
+	if (test_bit(RXRPC_CALL_POST_ACCEPT, &call->events)) {
+		_debug("post accept");
+		if (rxrpc_post_message(call, RXRPC_SKB_MARK_NEW_CALL,
+				       0, false) < 0)
+			goto no_mem;
+		clear_bit(RXRPC_CALL_POST_ACCEPT, &call->events);
+		goto maybe_reschedule;
+	}
+
+	/* handle incoming call acceptance */
+	if (test_and_clear_bit(RXRPC_CALL_ACCEPTED, &call->events)) {
+		_debug("accepted");
+		ASSERTCMP(call->rx_data_post, ==, 0);
+		call->rx_data_post = 1;
+		read_lock_bh(&call->state_lock);
+		if (call->state < RXRPC_CALL_COMPLETE)
+			set_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events);
+		read_unlock_bh(&call->state_lock);
+	}
+
+	/* drain the out of sequence received packet queue into the packet Rx
+	 * queue */
+	if (test_and_clear_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events)) {
+		while (call->rx_data_post == call->rx_first_oos)
+			if (rxrpc_drain_rx_oos_queue(call) < 0)
+				break;
+		goto maybe_reschedule;
+	}
+
+	/* other events may have been raised since we started checking */
+	goto maybe_reschedule;
+
+send_ACK_with_skew:
+	ack.maxSkew = htons(atomic_read(&call->conn->hi_serial) -
+			    ntohl(ack.serial));
+send_ACK:
+	hdr.serial = htonl(atomic_inc_return(&call->conn->serial));
+	_proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
+	       ntohl(hdr.serial),
+	       ntohs(ack.maxSkew),
+	       ntohl(ack.firstPacket),
+	       ntohl(ack.previousPacket),
+	       ntohl(ack.serial),
+	       rxrpc_acks[ack.reason],
+	       ack.nAcks);
+
+	del_timer_sync(&call->ack_timer);
+	if (ack.nAcks > 0)
+		set_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags);
+	goto send_message_2;
+
+send_message:
+	_debug("send message");
+
+	hdr.serial = htonl(atomic_inc_return(&call->conn->serial));
+	_proto("Tx %s %%%u", rxrpc_pkts[hdr.type], ntohl(hdr.serial));
+send_message_2:
+
+	len = iov[0].iov_len;
+	ioc = 1;
+	if (iov[4].iov_len) {
+		ioc = 5;
+		len += iov[4].iov_len;
+		len += iov[3].iov_len;
+		len += iov[2].iov_len;
+		len += iov[1].iov_len;
+	} else if (iov[3].iov_len) {
+		ioc = 4;
+		len += iov[3].iov_len;
+		len += iov[2].iov_len;
+		len += iov[1].iov_len;
+	} else if (iov[2].iov_len) {
+		ioc = 3;
+		len += iov[2].iov_len;
+		len += iov[1].iov_len;
+	} else if (iov[1].iov_len) {
+		ioc = 2;
+		len += iov[1].iov_len;
+	}
+
+	ret = kernel_sendmsg(call->conn->trans->local->socket,
+			     &msg, iov, ioc, len);
+	if (ret < 0) {
+		_debug("sendmsg failed: %d", ret);
+		read_lock_bh(&call->state_lock);
+		if (call->state < RXRPC_CALL_DEAD)
+			schedule_work(&call->processor);
+		read_unlock_bh(&call->state_lock);
+		goto error;
+	}
+
+	switch (genbit) {
+	case RXRPC_CALL_ABORT:
+		clear_bit(genbit, &call->events);
+		clear_bit(RXRPC_CALL_RCVD_ABORT, &call->events);
+		goto kill_ACKs;
+
+	case RXRPC_CALL_ACK_FINAL:
+		write_lock_bh(&call->state_lock);
+		if (call->state == RXRPC_CALL_CLIENT_FINAL_ACK)
+			call->state = RXRPC_CALL_COMPLETE;
+		write_unlock_bh(&call->state_lock);
+		goto kill_ACKs;
+
+	default:
+		clear_bit(genbit, &call->events);
+		switch (call->state) {
+		case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+		case RXRPC_CALL_CLIENT_RECV_REPLY:
+		case RXRPC_CALL_SERVER_RECV_REQUEST:
+		case RXRPC_CALL_SERVER_ACK_REQUEST:
+			_debug("start ACK timer");
+			rxrpc_propose_ACK(call, RXRPC_ACK_DELAY,
+					  call->ackr_serial, false);
+		default:
+			break;
+		}
+		goto maybe_reschedule;
+	}
+
+kill_ACKs:
+	del_timer_sync(&call->ack_timer);
+	if (test_and_clear_bit(RXRPC_CALL_ACK_FINAL, &call->events))
+		rxrpc_put_call(call);
+	clear_bit(RXRPC_CALL_ACK, &call->events);
+
+maybe_reschedule:
+	if (call->events || !skb_queue_empty(&call->rx_queue)) {
+		read_lock_bh(&call->state_lock);
+		if (call->state < RXRPC_CALL_DEAD)
+			schedule_work(&call->processor);
+		read_unlock_bh(&call->state_lock);
+	}
+
+	/* don't leave aborted connections on the accept queue */
+	if (call->state >= RXRPC_CALL_COMPLETE &&
+	    !list_empty(&call->accept_link)) {
+		_debug("X unlinking once-pending call %p { e=%lx f=%lx c=%x }",
+		       call, call->events, call->flags,
+		       ntohl(call->conn->cid));
+
+		read_lock_bh(&call->state_lock);
+		if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
+		    !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
+			schedule_work(&call->processor);
+		read_unlock_bh(&call->state_lock);
+	}
+
+error:
+	clear_bit(RXRPC_CALL_PROC_BUSY, &call->flags);
+	kfree(acks);
+
+	/* because we don't want two CPUs both processing the work item for one
+	 * call at the same time, we use a flag to note when it's busy; however
+	 * this means there's a race between clearing the flag and setting the
+	 * work pending bit and the work item being processed again */
+	if (call->events && !work_pending(&call->processor)) {
+		_debug("jumpstart %x", ntohl(call->conn->cid));
+		schedule_work(&call->processor);
+	}
+
+	_leave("");
+	return;
+
+no_mem:
+	_debug("out of memory");
+	goto maybe_reschedule;
+}
diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c
new file mode 100644
index 00000000000..ac31cceda2f
--- /dev/null
+++ b/net/rxrpc/ar-call.c
@@ -0,0 +1,787 @@
+/* RxRPC individual remote procedure call handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/circ_buf.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+struct kmem_cache *rxrpc_call_jar;
+LIST_HEAD(rxrpc_calls);
+DEFINE_RWLOCK(rxrpc_call_lock);
+static unsigned rxrpc_call_max_lifetime = 60;
+static unsigned rxrpc_dead_call_timeout = 10;
+
+static void rxrpc_destroy_call(struct work_struct *work);
+static void rxrpc_call_life_expired(unsigned long _call);
+static void rxrpc_dead_call_expired(unsigned long _call);
+static void rxrpc_ack_time_expired(unsigned long _call);
+static void rxrpc_resend_time_expired(unsigned long _call);
+
+/*
+ * allocate a new call
+ */
+static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
+{
+	struct rxrpc_call *call;
+
+	call = kmem_cache_zalloc(rxrpc_call_jar, gfp);
+	if (!call)
+		return NULL;
+
+	call->acks_winsz = 16;
+	call->acks_window = kmalloc(call->acks_winsz * sizeof(unsigned long),
+				    gfp);
+	if (!call->acks_window) {
+		kmem_cache_free(rxrpc_call_jar, call);
+		return NULL;
+	}
+
+	setup_timer(&call->lifetimer, &rxrpc_call_life_expired,
+		    (unsigned long) call);
+	setup_timer(&call->deadspan, &rxrpc_dead_call_expired,
+		    (unsigned long) call);
+	setup_timer(&call->ack_timer, &rxrpc_ack_time_expired,
+		    (unsigned long) call);
+	setup_timer(&call->resend_timer, &rxrpc_resend_time_expired,
+		    (unsigned long) call);
+	INIT_WORK(&call->destroyer, &rxrpc_destroy_call);
+	INIT_WORK(&call->processor, &rxrpc_process_call);
+	INIT_LIST_HEAD(&call->accept_link);
+	skb_queue_head_init(&call->rx_queue);
+	skb_queue_head_init(&call->rx_oos_queue);
+	init_waitqueue_head(&call->tx_waitq);
+	spin_lock_init(&call->lock);
+	rwlock_init(&call->state_lock);
+	atomic_set(&call->usage, 1);
+	call->debug_id = atomic_inc_return(&rxrpc_debug_id);
+	call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
+
+	memset(&call->sock_node, 0xed, sizeof(call->sock_node));
+
+	call->rx_data_expect = 1;
+	call->rx_data_eaten = 0;
+	call->rx_first_oos = 0;
+	call->ackr_win_top = call->rx_data_eaten + 1 + RXRPC_MAXACKS;
+	call->creation_jif = jiffies;
+	return call;
+}
+
+/*
+ * allocate a new client call and attempt to to get a connection slot for it
+ */
+static struct rxrpc_call *rxrpc_alloc_client_call(
+	struct rxrpc_sock *rx,
+	struct rxrpc_transport *trans,
+	struct rxrpc_conn_bundle *bundle,
+	gfp_t gfp)
+{
+	struct rxrpc_call *call;
+	int ret;
+
+	_enter("");
+
+	ASSERT(rx != NULL);
+	ASSERT(trans != NULL);
+	ASSERT(bundle != NULL);
+
+	call = rxrpc_alloc_call(gfp);
+	if (!call)
+		return ERR_PTR(-ENOMEM);
+
+	sock_hold(&rx->sk);
+	call->socket = rx;
+	call->rx_data_post = 1;
+
+	ret = rxrpc_connect_call(rx, trans, bundle, call, gfp);
+	if (ret < 0) {
+		kmem_cache_free(rxrpc_call_jar, call);
+		return ERR_PTR(ret);
+	}
+
+	spin_lock(&call->conn->trans->peer->lock);
+	list_add(&call->error_link, &call->conn->trans->peer->error_targets);
+	spin_unlock(&call->conn->trans->peer->lock);
+
+	call->lifetimer.expires = jiffies + rxrpc_call_max_lifetime * HZ;
+	add_timer(&call->lifetimer);
+
+	_leave(" = %p", call);
+	return call;
+}
+
+/*
+ * set up a call for the given data
+ * - called in process context with IRQs enabled
+ */
+struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *rx,
+					 struct rxrpc_transport *trans,
+					 struct rxrpc_conn_bundle *bundle,
+					 unsigned long user_call_ID,
+					 int create,
+					 gfp_t gfp)
+{
+	struct rxrpc_call *call, *candidate;
+	struct rb_node *p, *parent, **pp;
+
+	_enter("%p,%d,%d,%lx,%d",
+	       rx, trans ? trans->debug_id : -1, bundle ? bundle->debug_id : -1,
+	       user_call_ID, create);
+
+	/* search the extant calls first for one that matches the specified
+	 * user ID */
+	read_lock(&rx->call_lock);
+
+	p = rx->calls.rb_node;
+	while (p) {
+		call = rb_entry(p, struct rxrpc_call, sock_node);
+
+		if (user_call_ID < call->user_call_ID)
+			p = p->rb_left;
+		else if (user_call_ID > call->user_call_ID)
+			p = p->rb_right;
+		else
+			goto found_extant_call;
+	}
+
+	read_unlock(&rx->call_lock);
+
+	if (!create || !trans)
+		return ERR_PTR(-EBADSLT);
+
+	/* not yet present - create a candidate for a new record and then
+	 * redo the search */
+	candidate = rxrpc_alloc_client_call(rx, trans, bundle, gfp);
+	if (IS_ERR(candidate)) {
+		_leave(" = %ld", PTR_ERR(candidate));
+		return candidate;
+	}
+
+	candidate->user_call_ID = user_call_ID;
+	__set_bit(RXRPC_CALL_HAS_USERID, &candidate->flags);
+
+	write_lock(&rx->call_lock);
+
+	pp = &rx->calls.rb_node;
+	parent = NULL;
+	while (*pp) {
+		parent = *pp;
+		call = rb_entry(parent, struct rxrpc_call, sock_node);
+
+		if (user_call_ID < call->user_call_ID)
+			pp = &(*pp)->rb_left;
+		else if (user_call_ID > call->user_call_ID)
+			pp = &(*pp)->rb_right;
+		else
+			goto found_extant_second;
+	}
+
+	/* second search also failed; add the new call */
+	call = candidate;
+	candidate = NULL;
+	rxrpc_get_call(call);
+
+	rb_link_node(&call->sock_node, parent, pp);
+	rb_insert_color(&call->sock_node, &rx->calls);
+	write_unlock(&rx->call_lock);
+
+	write_lock_bh(&rxrpc_call_lock);
+	list_add_tail(&call->link, &rxrpc_calls);
+	write_unlock_bh(&rxrpc_call_lock);
+
+	_net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id);
+
+	_leave(" = %p [new]", call);
+	return call;
+
+	/* we found the call in the list immediately */
+found_extant_call:
+	rxrpc_get_call(call);
+	read_unlock(&rx->call_lock);
+	_leave(" = %p [extant %d]", call, atomic_read(&call->usage));
+	return call;
+
+	/* we found the call on the second time through the list */
+found_extant_second:
+	rxrpc_get_call(call);
+	write_unlock(&rx->call_lock);
+	rxrpc_put_call(candidate);
+	_leave(" = %p [second %d]", call, atomic_read(&call->usage));
+	return call;
+}
+
+/*
+ * set up an incoming call
+ * - called in process context with IRQs enabled
+ */
+struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
+				       struct rxrpc_connection *conn,
+				       struct rxrpc_header *hdr,
+				       gfp_t gfp)
+{
+	struct rxrpc_call *call, *candidate;
+	struct rb_node **p, *parent;
+	__be32 call_id;
+
+	_enter(",%d,,%x", conn->debug_id, gfp);
+
+	ASSERT(rx != NULL);
+
+	candidate = rxrpc_alloc_call(gfp);
+	if (!candidate)
+		return ERR_PTR(-EBUSY);
+
+	candidate->socket = rx;
+	candidate->conn = conn;
+	candidate->cid = hdr->cid;
+	candidate->call_id = hdr->callNumber;
+	candidate->channel = ntohl(hdr->cid) & RXRPC_CHANNELMASK;
+	candidate->rx_data_post = 0;
+	candidate->state = RXRPC_CALL_SERVER_ACCEPTING;
+	if (conn->security_ix > 0)
+		candidate->state = RXRPC_CALL_SERVER_SECURING;
+
+	write_lock_bh(&conn->lock);
+
+	/* set the channel for this call */
+	call = conn->channels[candidate->channel];
+	_debug("channel[%u] is %p", candidate->channel, call);
+	if (call && call->call_id == hdr->callNumber) {
+		/* already set; must've been a duplicate packet */
+		_debug("extant call [%d]", call->state);
+		ASSERTCMP(call->conn, ==, conn);
+
+		read_lock(&call->state_lock);
+		switch (call->state) {
+		case RXRPC_CALL_LOCALLY_ABORTED:
+			if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events))
+				schedule_work(&call->processor);
+		case RXRPC_CALL_REMOTELY_ABORTED:
+			read_unlock(&call->state_lock);
+			goto aborted_call;
+		default:
+			rxrpc_get_call(call);
+			read_unlock(&call->state_lock);
+			goto extant_call;
+		}
+	}
+
+	if (call) {
+		/* it seems the channel is still in use from the previous call
+		 * - ditch the old binding if its call is now complete */
+		_debug("CALL: %u { %s }",
+		       call->debug_id, rxrpc_call_states[call->state]);
+
+		if (call->state >= RXRPC_CALL_COMPLETE) {
+			conn->channels[call->channel] = NULL;
+		} else {
+			write_unlock_bh(&conn->lock);
+			kmem_cache_free(rxrpc_call_jar, candidate);
+			_leave(" = -EBUSY");
+			return ERR_PTR(-EBUSY);
+		}
+	}
+
+	/* check the call number isn't duplicate */
+	_debug("check dup");
+	call_id = hdr->callNumber;
+	p = &conn->calls.rb_node;
+	parent = NULL;
+	while (*p) {
+		parent = *p;
+		call = rb_entry(parent, struct rxrpc_call, conn_node);
+
+		if (call_id < call->call_id)
+			p = &(*p)->rb_left;
+		else if (call_id > call->call_id)
+			p = &(*p)->rb_right;
+		else
+			goto old_call;
+	}
+
+	/* make the call available */
+	_debug("new call");
+	call = candidate;
+	candidate = NULL;
+	rb_link_node(&call->conn_node, parent, p);
+	rb_insert_color(&call->conn_node, &conn->calls);
+	conn->channels[call->channel] = call;
+	sock_hold(&rx->sk);
+	atomic_inc(&conn->usage);
+	write_unlock_bh(&conn->lock);
+
+	spin_lock(&conn->trans->peer->lock);
+	list_add(&call->error_link, &conn->trans->peer->error_targets);
+	spin_unlock(&conn->trans->peer->lock);
+
+	write_lock_bh(&rxrpc_call_lock);
+	list_add_tail(&call->link, &rxrpc_calls);
+	write_unlock_bh(&rxrpc_call_lock);
+
+	_net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id);
+
+	call->lifetimer.expires = jiffies + rxrpc_call_max_lifetime * HZ;
+	add_timer(&call->lifetimer);
+	_leave(" = %p {%d} [new]", call, call->debug_id);
+	return call;
+
+extant_call:
+	write_unlock_bh(&conn->lock);
+	kmem_cache_free(rxrpc_call_jar, candidate);
+	_leave(" = %p {%d} [extant]", call, call ? call->debug_id : -1);
+	return call;
+
+aborted_call:
+	write_unlock_bh(&conn->lock);
+	kmem_cache_free(rxrpc_call_jar, candidate);
+	_leave(" = -ECONNABORTED");
+	return ERR_PTR(-ECONNABORTED);
+
+old_call:
+	write_unlock_bh(&conn->lock);
+	kmem_cache_free(rxrpc_call_jar, candidate);
+	_leave(" = -ECONNRESET [old]");
+	return ERR_PTR(-ECONNRESET);
+}
+
+/*
+ * find an extant server call
+ * - called in process context with IRQs enabled
+ */
+struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *rx,
+					  unsigned long user_call_ID)
+{
+	struct rxrpc_call *call;
+	struct rb_node *p;
+
+	_enter("%p,%lx", rx, user_call_ID);
+
+	/* search the extant calls for one that matches the specified user
+	 * ID */
+	read_lock(&rx->call_lock);
+
+	p = rx->calls.rb_node;
+	while (p) {
+		call = rb_entry(p, struct rxrpc_call, sock_node);
+
+		if (user_call_ID < call->user_call_ID)
+			p = p->rb_left;
+		else if (user_call_ID > call->user_call_ID)
+			p = p->rb_right;
+		else
+			goto found_extant_call;
+	}
+
+	read_unlock(&rx->call_lock);
+	_leave(" = NULL");
+	return NULL;
+
+	/* we found the call in the list immediately */
+found_extant_call:
+	rxrpc_get_call(call);
+	read_unlock(&rx->call_lock);
+	_leave(" = %p [%d]", call, atomic_read(&call->usage));
+	return call;
+}
+
+/*
+ * detach a call from a socket and set up for release
+ */
+void rxrpc_release_call(struct rxrpc_call *call)
+{
+	struct rxrpc_sock *rx = call->socket;
+
+	_enter("{%d,%d,%d,%d}",
+	       call->debug_id, atomic_read(&call->usage),
+	       atomic_read(&call->ackr_not_idle),
+	       call->rx_first_oos);
+
+	spin_lock_bh(&call->lock);
+	if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags))
+		BUG();
+	spin_unlock_bh(&call->lock);
+
+	/* dissociate from the socket
+	 * - the socket's ref on the call is passed to the death timer
+	 */
+	_debug("RELEASE CALL %p (%d CONN %p)",
+	       call, call->debug_id, call->conn);
+
+	write_lock_bh(&rx->call_lock);
+	if (!list_empty(&call->accept_link)) {
+		_debug("unlinking once-pending call %p { e=%lx f=%lx }",
+		       call, call->events, call->flags);
+		ASSERT(!test_bit(RXRPC_CALL_HAS_USERID, &call->flags));
+		list_del_init(&call->accept_link);
+		sk_acceptq_removed(&rx->sk);
+	} else if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
+		rb_erase(&call->sock_node, &rx->calls);
+		memset(&call->sock_node, 0xdd, sizeof(call->sock_node));
+		clear_bit(RXRPC_CALL_HAS_USERID, &call->flags);
+	}
+	write_unlock_bh(&rx->call_lock);
+
+	if (call->conn->out_clientflag)
+		spin_lock(&call->conn->trans->client_lock);
+	write_lock_bh(&call->conn->lock);
+
+	/* free up the channel for reuse */
+	if (call->conn->out_clientflag) {
+		call->conn->avail_calls++;
+		if (call->conn->avail_calls == RXRPC_MAXCALLS)
+			list_move_tail(&call->conn->bundle_link,
+				       &call->conn->bundle->unused_conns);
+		else if (call->conn->avail_calls == 1)
+			list_move_tail(&call->conn->bundle_link,
+				       &call->conn->bundle->avail_conns);
+	}
+
+	write_lock(&call->state_lock);
+	if (call->conn->channels[call->channel] == call)
+		call->conn->channels[call->channel] = NULL;
+
+	if (call->state < RXRPC_CALL_COMPLETE &&
+	    call->state != RXRPC_CALL_CLIENT_FINAL_ACK) {
+		_debug("+++ ABORTING STATE %d +++\n", call->state);
+		call->state = RXRPC_CALL_LOCALLY_ABORTED;
+		call->abort_code = RX_CALL_DEAD;
+		set_bit(RXRPC_CALL_ABORT, &call->events);
+		schedule_work(&call->processor);
+	}
+	write_unlock(&call->state_lock);
+	write_unlock_bh(&call->conn->lock);
+	if (call->conn->out_clientflag)
+		spin_unlock(&call->conn->trans->client_lock);
+
+	if (!skb_queue_empty(&call->rx_queue) ||
+	    !skb_queue_empty(&call->rx_oos_queue)) {
+		struct rxrpc_skb_priv *sp;
+		struct sk_buff *skb;
+
+		_debug("purge Rx queues");
+
+		spin_lock_bh(&call->lock);
+		while ((skb = skb_dequeue(&call->rx_queue)) ||
+		       (skb = skb_dequeue(&call->rx_oos_queue))) {
+			sp = rxrpc_skb(skb);
+			if (sp->call) {
+				ASSERTCMP(sp->call, ==, call);
+				rxrpc_put_call(call);
+				sp->call = NULL;
+			}
+			skb->destructor = NULL;
+			spin_unlock_bh(&call->lock);
+
+			_debug("- zap %s %%%u #%u",
+			       rxrpc_pkts[sp->hdr.type],
+			       ntohl(sp->hdr.serial),
+			       ntohl(sp->hdr.seq));
+			rxrpc_free_skb(skb);
+			spin_lock_bh(&call->lock);
+		}
+		spin_unlock_bh(&call->lock);
+
+		ASSERTCMP(call->state, !=, RXRPC_CALL_COMPLETE);
+	}
+
+	del_timer_sync(&call->resend_timer);
+	del_timer_sync(&call->ack_timer);
+	del_timer_sync(&call->lifetimer);
+	call->deadspan.expires = jiffies + rxrpc_dead_call_timeout * HZ;
+	add_timer(&call->deadspan);
+
+	_leave("");
+}
+
+/*
+ * handle a dead call being ready for reaping
+ */
+static void rxrpc_dead_call_expired(unsigned long _call)
+{
+	struct rxrpc_call *call = (struct rxrpc_call *) _call;
+
+	_enter("{%d}", call->debug_id);
+
+	write_lock_bh(&call->state_lock);
+	call->state = RXRPC_CALL_DEAD;
+	write_unlock_bh(&call->state_lock);
+	rxrpc_put_call(call);
+}
+
+/*
+ * mark a call as to be released, aborting it if it's still in progress
+ * - called with softirqs disabled
+ */
+static void rxrpc_mark_call_released(struct rxrpc_call *call)
+{
+	bool sched;
+
+	write_lock(&call->state_lock);
+	if (call->state < RXRPC_CALL_DEAD) {
+		sched = false;
+		if (call->state < RXRPC_CALL_COMPLETE) {
+			_debug("abort call %p", call);
+			call->state = RXRPC_CALL_LOCALLY_ABORTED;
+			call->abort_code = RX_CALL_DEAD;
+			if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events))
+				sched = true;
+		}
+		if (!test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
+			sched = true;
+		if (sched)
+			schedule_work(&call->processor);
+	}
+	write_unlock(&call->state_lock);
+}
+
+/*
+ * release all the calls associated with a socket
+ */
+void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
+{
+	struct rxrpc_call *call;
+	struct rb_node *p;
+
+	_enter("%p", rx);
+
+	read_lock_bh(&rx->call_lock);
+
+	/* mark all the calls as no longer wanting incoming packets */
+	for (p = rb_first(&rx->calls); p; p = rb_next(p)) {
+		call = rb_entry(p, struct rxrpc_call, sock_node);
+		rxrpc_mark_call_released(call);
+	}
+
+	/* kill the not-yet-accepted incoming calls */
+	list_for_each_entry(call, &rx->secureq, accept_link) {
+		rxrpc_mark_call_released(call);
+	}
+
+	list_for_each_entry(call, &rx->acceptq, accept_link) {
+		rxrpc_mark_call_released(call);
+	}
+
+	read_unlock_bh(&rx->call_lock);
+	_leave("");
+}
+
+/*
+ * release a call
+ */
+void __rxrpc_put_call(struct rxrpc_call *call)
+{
+	ASSERT(call != NULL);
+
+	_enter("%p{u=%d}", call, atomic_read(&call->usage));
+
+	ASSERTCMP(atomic_read(&call->usage), >, 0);
+
+	if (atomic_dec_and_test(&call->usage)) {
+		_debug("call %d dead", call->debug_id);
+		ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
+		schedule_work(&call->destroyer);
+	}
+	_leave("");
+}
+
+/*
+ * clean up a call
+ */
+static void rxrpc_cleanup_call(struct rxrpc_call *call)
+{
+	_net("DESTROY CALL %d", call->debug_id);
+
+	ASSERT(call->socket);
+
+	memset(&call->sock_node, 0xcd, sizeof(call->sock_node));
+
+	del_timer_sync(&call->lifetimer);
+	del_timer_sync(&call->deadspan);
+	del_timer_sync(&call->ack_timer);
+	del_timer_sync(&call->resend_timer);
+
+	ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags));
+	ASSERTCMP(call->events, ==, 0);
+	if (work_pending(&call->processor)) {
+		_debug("defer destroy");
+		schedule_work(&call->destroyer);
+		return;
+	}
+
+	if (call->conn) {
+		spin_lock(&call->conn->trans->peer->lock);
+		list_del(&call->error_link);
+		spin_unlock(&call->conn->trans->peer->lock);
+
+		write_lock_bh(&call->conn->lock);
+		rb_erase(&call->conn_node, &call->conn->calls);
+		write_unlock_bh(&call->conn->lock);
+		rxrpc_put_connection(call->conn);
+	}
+
+	if (call->acks_window) {
+		_debug("kill Tx window %d",
+		       CIRC_CNT(call->acks_head, call->acks_tail,
+				call->acks_winsz));
+		smp_mb();
+		while (CIRC_CNT(call->acks_head, call->acks_tail,
+				call->acks_winsz) > 0) {
+			struct rxrpc_skb_priv *sp;
+			unsigned long _skb;
+
+			_skb = call->acks_window[call->acks_tail] & ~1;
+			sp = rxrpc_skb((struct sk_buff *) _skb);
+			_debug("+++ clear Tx %u", ntohl(sp->hdr.seq));
+			rxrpc_free_skb((struct sk_buff *) _skb);
+			call->acks_tail =
+				(call->acks_tail + 1) & (call->acks_winsz - 1);
+		}
+
+		kfree(call->acks_window);
+	}
+
+	rxrpc_free_skb(call->tx_pending);
+
+	rxrpc_purge_queue(&call->rx_queue);
+	ASSERT(skb_queue_empty(&call->rx_oos_queue));
+	sock_put(&call->socket->sk);
+	kmem_cache_free(rxrpc_call_jar, call);
+}
+
+/*
+ * destroy a call
+ */
+static void rxrpc_destroy_call(struct work_struct *work)
+{
+	struct rxrpc_call *call =
+		container_of(work, struct rxrpc_call, destroyer);
+
+	_enter("%p{%d,%d,%p}",
+	       call, atomic_read(&call->usage), call->channel, call->conn);
+
+	ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
+
+	write_lock_bh(&rxrpc_call_lock);
+	list_del_init(&call->link);
+	write_unlock_bh(&rxrpc_call_lock);
+
+	rxrpc_cleanup_call(call);
+	_leave("");
+}
+
+/*
+ * preemptively destroy all the call records from a transport endpoint rather
+ * than waiting for them to time out
+ */
+void __exit rxrpc_destroy_all_calls(void)
+{
+	struct rxrpc_call *call;
+
+	_enter("");
+	write_lock_bh(&rxrpc_call_lock);
+
+	while (!list_empty(&rxrpc_calls)) {
+		call = list_entry(rxrpc_calls.next, struct rxrpc_call, link);
+		_debug("Zapping call %p", call);
+
+		list_del_init(&call->link);
+
+		switch (atomic_read(&call->usage)) {
+		case 0:
+			ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
+			break;
+		case 1:
+			if (del_timer_sync(&call->deadspan) != 0 &&
+			    call->state != RXRPC_CALL_DEAD)
+				rxrpc_dead_call_expired((unsigned long) call);
+			if (call->state != RXRPC_CALL_DEAD)
+				break;
+		default:
+			printk(KERN_ERR "RXRPC:"
+			       " Call %p still in use (%d,%d,%s,%lx,%lx)!\n",
+			       call, atomic_read(&call->usage),
+			       atomic_read(&call->ackr_not_idle),
+			       rxrpc_call_states[call->state],
+			       call->flags, call->events);
+			if (!skb_queue_empty(&call->rx_queue))
+				printk(KERN_ERR"RXRPC: Rx queue occupied\n");
+			if (!skb_queue_empty(&call->rx_oos_queue))
+				printk(KERN_ERR"RXRPC: OOS queue occupied\n");
+			break;
+		}
+
+		write_unlock_bh(&rxrpc_call_lock);
+		cond_resched();
+		write_lock_bh(&rxrpc_call_lock);
+	}
+
+	write_unlock_bh(&rxrpc_call_lock);
+	_leave("");
+}
+
+/*
+ * handle call lifetime being exceeded
+ */
+static void rxrpc_call_life_expired(unsigned long _call)
+{
+	struct rxrpc_call *call = (struct rxrpc_call *) _call;
+
+	if (call->state >= RXRPC_CALL_COMPLETE)
+		return;
+
+	_enter("{%d}", call->debug_id);
+	read_lock_bh(&call->state_lock);
+	if (call->state < RXRPC_CALL_COMPLETE) {
+		set_bit(RXRPC_CALL_LIFE_TIMER, &call->events);
+		schedule_work(&call->processor);
+	}
+	read_unlock_bh(&call->state_lock);
+}
+
+/*
+ * handle resend timer expiry
+ */
+static void rxrpc_resend_time_expired(unsigned long _call)
+{
+	struct rxrpc_call *call = (struct rxrpc_call *) _call;
+
+	_enter("{%d}", call->debug_id);
+
+	if (call->state >= RXRPC_CALL_COMPLETE)
+		return;
+
+	read_lock_bh(&call->state_lock);
+	clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+	if (call->state < RXRPC_CALL_COMPLETE &&
+	    !test_and_set_bit(RXRPC_CALL_RESEND_TIMER, &call->events))
+		schedule_work(&call->processor);
+	read_unlock_bh(&call->state_lock);
+}
+
+/*
+ * handle ACK timer expiry
+ */
+static void rxrpc_ack_time_expired(unsigned long _call)
+{
+	struct rxrpc_call *call = (struct rxrpc_call *) _call;
+
+	_enter("{%d}", call->debug_id);
+
+	if (call->state >= RXRPC_CALL_COMPLETE)
+		return;
+
+	read_lock_bh(&call->state_lock);
+	if (call->state < RXRPC_CALL_COMPLETE &&
+	    !test_and_set_bit(RXRPC_CALL_ACK, &call->events))
+		schedule_work(&call->processor);
+	read_unlock_bh(&call->state_lock);
+}
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
new file mode 100644
index 00000000000..01eb33c3057
--- /dev/null
+++ b/net/rxrpc/ar-connection.c
@@ -0,0 +1,895 @@
+/* RxRPC virtual connection handler
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/crypto.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static void rxrpc_connection_reaper(struct work_struct *work);
+
+LIST_HEAD(rxrpc_connections);
+DEFINE_RWLOCK(rxrpc_connection_lock);
+static unsigned long rxrpc_connection_timeout = 10 * 60;
+static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper);
+
+/*
+ * allocate a new client connection bundle
+ */
+static struct rxrpc_conn_bundle *rxrpc_alloc_bundle(gfp_t gfp)
+{
+	struct rxrpc_conn_bundle *bundle;
+
+	_enter("");
+
+	bundle = kzalloc(sizeof(struct rxrpc_conn_bundle), gfp);
+	if (bundle) {
+		INIT_LIST_HEAD(&bundle->unused_conns);
+		INIT_LIST_HEAD(&bundle->avail_conns);
+		INIT_LIST_HEAD(&bundle->busy_conns);
+		init_waitqueue_head(&bundle->chanwait);
+		atomic_set(&bundle->usage, 1);
+	}
+
+	_leave(" = %p", bundle);
+	return bundle;
+}
+
+/*
+ * compare bundle parameters with what we're looking for
+ * - return -ve, 0 or +ve
+ */
+static inline
+int rxrpc_cmp_bundle(const struct rxrpc_conn_bundle *bundle,
+		     struct key *key, __be16 service_id)
+{
+	return (bundle->service_id - service_id) ?:
+		((unsigned long) bundle->key - (unsigned long) key);
+}
+
+/*
+ * get bundle of client connections that a client socket can make use of
+ */
+struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *rx,
+					   struct rxrpc_transport *trans,
+					   struct key *key,
+					   __be16 service_id,
+					   gfp_t gfp)
+{
+	struct rxrpc_conn_bundle *bundle, *candidate;
+	struct rb_node *p, *parent, **pp;
+
+	_enter("%p{%x},%x,%hx,",
+	       rx, key_serial(key), trans->debug_id, ntohl(service_id));
+
+	if (rx->trans == trans && rx->bundle) {
+		atomic_inc(&rx->bundle->usage);
+		return rx->bundle;
+	}
+
+	/* search the extant bundles first for one that matches the specified
+	 * user ID */
+	spin_lock(&trans->client_lock);
+
+	p = trans->bundles.rb_node;
+	while (p) {
+		bundle = rb_entry(p, struct rxrpc_conn_bundle, node);
+
+		if (rxrpc_cmp_bundle(bundle, key, service_id) < 0)
+			p = p->rb_left;
+		else if (rxrpc_cmp_bundle(bundle, key, service_id) > 0)
+			p = p->rb_right;
+		else
+			goto found_extant_bundle;
+	}
+
+	spin_unlock(&trans->client_lock);
+
+	/* not yet present - create a candidate for a new record and then
+	 * redo the search */
+	candidate = rxrpc_alloc_bundle(gfp);
+	if (!candidate) {
+		_leave(" = -ENOMEM");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	candidate->key = key_get(key);
+	candidate->service_id = service_id;
+
+	spin_lock(&trans->client_lock);
+
+	pp = &trans->bundles.rb_node;
+	parent = NULL;
+	while (*pp) {
+		parent = *pp;
+		bundle = rb_entry(parent, struct rxrpc_conn_bundle, node);
+
+		if (rxrpc_cmp_bundle(bundle, key, service_id) < 0)
+			pp = &(*pp)->rb_left;
+		else if (rxrpc_cmp_bundle(bundle, key, service_id) > 0)
+			pp = &(*pp)->rb_right;
+		else
+			goto found_extant_second;
+	}
+
+	/* second search also failed; add the new bundle */
+	bundle = candidate;
+	candidate = NULL;
+
+	rb_link_node(&bundle->node, parent, pp);
+	rb_insert_color(&bundle->node, &trans->bundles);
+	spin_unlock(&trans->client_lock);
+	_net("BUNDLE new on trans %d", trans->debug_id);
+	if (!rx->bundle && rx->sk.sk_state == RXRPC_CLIENT_CONNECTED) {
+		atomic_inc(&bundle->usage);
+		rx->bundle = bundle;
+	}
+	_leave(" = %p [new]", bundle);
+	return bundle;
+
+	/* we found the bundle in the list immediately */
+found_extant_bundle:
+	atomic_inc(&bundle->usage);
+	spin_unlock(&trans->client_lock);
+	_net("BUNDLE old on trans %d", trans->debug_id);
+	if (!rx->bundle && rx->sk.sk_state == RXRPC_CLIENT_CONNECTED) {
+		atomic_inc(&bundle->usage);
+		rx->bundle = bundle;
+	}
+	_leave(" = %p [extant %d]", bundle, atomic_read(&bundle->usage));
+	return bundle;
+
+	/* we found the bundle on the second time through the list */
+found_extant_second:
+	atomic_inc(&bundle->usage);
+	spin_unlock(&trans->client_lock);
+	kfree(candidate);
+	_net("BUNDLE old2 on trans %d", trans->debug_id);
+	if (!rx->bundle && rx->sk.sk_state == RXRPC_CLIENT_CONNECTED) {
+		atomic_inc(&bundle->usage);
+		rx->bundle = bundle;
+	}
+	_leave(" = %p [second %d]", bundle, atomic_read(&bundle->usage));
+	return bundle;
+}
+
+/*
+ * release a bundle
+ */
+void rxrpc_put_bundle(struct rxrpc_transport *trans,
+		      struct rxrpc_conn_bundle *bundle)
+{
+	_enter("%p,%p{%d}",trans, bundle, atomic_read(&bundle->usage));
+
+	if (atomic_dec_and_lock(&bundle->usage, &trans->client_lock)) {
+		_debug("Destroy bundle");
+		rb_erase(&bundle->node, &trans->bundles);
+		spin_unlock(&trans->client_lock);
+		ASSERT(list_empty(&bundle->unused_conns));
+		ASSERT(list_empty(&bundle->avail_conns));
+		ASSERT(list_empty(&bundle->busy_conns));
+		ASSERTCMP(bundle->num_conns, ==, 0);
+		key_put(bundle->key);
+		kfree(bundle);
+	}
+
+	_leave("");
+}
+
+/*
+ * allocate a new connection
+ */
+static struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
+{
+	struct rxrpc_connection *conn;
+
+	_enter("");
+
+	conn = kzalloc(sizeof(struct rxrpc_connection), gfp);
+	if (conn) {
+		INIT_WORK(&conn->processor, &rxrpc_process_connection);
+		INIT_LIST_HEAD(&conn->bundle_link);
+		conn->calls = RB_ROOT;
+		skb_queue_head_init(&conn->rx_queue);
+		rwlock_init(&conn->lock);
+		spin_lock_init(&conn->state_lock);
+		atomic_set(&conn->usage, 1);
+		conn->debug_id = atomic_inc_return(&rxrpc_debug_id);
+		conn->avail_calls = RXRPC_MAXCALLS;
+		conn->size_align = 4;
+		conn->header_size = sizeof(struct rxrpc_header);
+	}
+
+	_leave(" = %p{%d}", conn, conn->debug_id);
+	return conn;
+}
+
+/*
+ * assign a connection ID to a connection and add it to the transport's
+ * connection lookup tree
+ * - called with transport client lock held
+ */
+static void rxrpc_assign_connection_id(struct rxrpc_connection *conn)
+{
+	struct rxrpc_connection *xconn;
+	struct rb_node *parent, **p;
+	__be32 epoch;
+	u32 real_conn_id;
+
+	_enter("");
+
+	epoch = conn->epoch;
+
+	write_lock_bh(&conn->trans->conn_lock);
+
+	conn->trans->conn_idcounter += RXRPC_CID_INC;
+	if (conn->trans->conn_idcounter < RXRPC_CID_INC)
+		conn->trans->conn_idcounter = RXRPC_CID_INC;
+	real_conn_id = conn->trans->conn_idcounter;
+
+attempt_insertion:
+	parent = NULL;
+	p = &conn->trans->client_conns.rb_node;
+
+	while (*p) {
+		parent = *p;
+		xconn = rb_entry(parent, struct rxrpc_connection, node);
+
+		if (epoch < xconn->epoch)
+			p = &(*p)->rb_left;
+		else if (epoch > xconn->epoch)
+			p = &(*p)->rb_right;
+		else if (real_conn_id < xconn->real_conn_id)
+			p = &(*p)->rb_left;
+		else if (real_conn_id > xconn->real_conn_id)
+			p = &(*p)->rb_right;
+		else
+			goto id_exists;
+	}
+
+	/* we've found a suitable hole - arrange for this connection to occupy
+	 * it */
+	rb_link_node(&conn->node, parent, p);
+	rb_insert_color(&conn->node, &conn->trans->client_conns);
+
+	conn->real_conn_id = real_conn_id;
+	conn->cid = htonl(real_conn_id);
+	write_unlock_bh(&conn->trans->conn_lock);
+	_leave(" [CONNID %x CID %x]", real_conn_id, ntohl(conn->cid));
+	return;
+
+	/* we found a connection with the proposed ID - walk the tree from that
+	 * point looking for the next unused ID */
+id_exists:
+	for (;;) {
+		real_conn_id += RXRPC_CID_INC;
+		if (real_conn_id < RXRPC_CID_INC) {
+			real_conn_id = RXRPC_CID_INC;
+			conn->trans->conn_idcounter = real_conn_id;
+			goto attempt_insertion;
+		}
+
+		parent = rb_next(parent);
+		if (!parent)
+			goto attempt_insertion;
+
+		xconn = rb_entry(parent, struct rxrpc_connection, node);
+		if (epoch < xconn->epoch ||
+		    real_conn_id < xconn->real_conn_id)
+			goto attempt_insertion;
+	}
+}
+
+/*
+ * add a call to a connection's call-by-ID tree
+ */
+static void rxrpc_add_call_ID_to_conn(struct rxrpc_connection *conn,
+				      struct rxrpc_call *call)
+{
+	struct rxrpc_call *xcall;
+	struct rb_node *parent, **p;
+	__be32 call_id;
+
+	write_lock_bh(&conn->lock);
+
+	call_id = call->call_id;
+	p = &conn->calls.rb_node;
+	parent = NULL;
+	while (*p) {
+		parent = *p;
+		xcall = rb_entry(parent, struct rxrpc_call, conn_node);
+
+		if (call_id < xcall->call_id)
+			p = &(*p)->rb_left;
+		else if (call_id > xcall->call_id)
+			p = &(*p)->rb_right;
+		else
+			BUG();
+	}
+
+	rb_link_node(&call->conn_node, parent, p);
+	rb_insert_color(&call->conn_node, &conn->calls);
+
+	write_unlock_bh(&conn->lock);
+}
+
+/*
+ * connect a call on an exclusive connection
+ */
+static int rxrpc_connect_exclusive(struct rxrpc_sock *rx,
+				   struct rxrpc_transport *trans,
+				   __be16 service_id,
+				   struct rxrpc_call *call,
+				   gfp_t gfp)
+{
+	struct rxrpc_connection *conn;
+	int chan, ret;
+
+	_enter("");
+
+	conn = rx->conn;
+	if (!conn) {
+		/* not yet present - create a candidate for a new connection
+		 * and then redo the check */
+		conn = rxrpc_alloc_connection(gfp);
+		if (IS_ERR(conn)) {
+			_leave(" = %ld", PTR_ERR(conn));
+			return PTR_ERR(conn);
+		}
+
+		conn->trans = trans;
+		conn->bundle = NULL;
+		conn->service_id = service_id;
+		conn->epoch = rxrpc_epoch;
+		conn->in_clientflag = 0;
+		conn->out_clientflag = RXRPC_CLIENT_INITIATED;
+		conn->cid = 0;
+		conn->state = RXRPC_CONN_CLIENT;
+		conn->avail_calls = RXRPC_MAXCALLS;
+		conn->security_level = rx->min_sec_level;
+		conn->key = key_get(rx->key);
+
+		ret = rxrpc_init_client_conn_security(conn);
+		if (ret < 0) {
+			key_put(conn->key);
+			kfree(conn);
+			_leave(" = %d [key]", ret);
+			return ret;
+		}
+
+		write_lock_bh(&rxrpc_connection_lock);
+		list_add_tail(&conn->link, &rxrpc_connections);
+		write_unlock_bh(&rxrpc_connection_lock);
+
+		spin_lock(&trans->client_lock);
+		atomic_inc(&trans->usage);
+
+		_net("CONNECT EXCL new %d on TRANS %d",
+		     conn->debug_id, conn->trans->debug_id);
+
+		rxrpc_assign_connection_id(conn);
+		rx->conn = conn;
+	}
+
+	/* we've got a connection with a free channel and we can now attach the
+	 * call to it
+	 * - we're holding the transport's client lock
+	 * - we're holding a reference on the connection
+	 */
+	for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
+		if (!conn->channels[chan])
+			goto found_channel;
+	goto no_free_channels;
+
+found_channel:
+	atomic_inc(&conn->usage);
+	conn->channels[chan] = call;
+	call->conn = conn;
+	call->channel = chan;
+	call->cid = conn->cid | htonl(chan);
+	call->call_id = htonl(++conn->call_counter);
+
+	_net("CONNECT client on conn %d chan %d as call %x",
+	     conn->debug_id, chan, ntohl(call->call_id));
+
+	spin_unlock(&trans->client_lock);
+
+	rxrpc_add_call_ID_to_conn(conn, call);
+	_leave(" = 0");
+	return 0;
+
+no_free_channels:
+	spin_unlock(&trans->client_lock);
+	_leave(" = -ENOSR");
+	return -ENOSR;
+}
+
+/*
+ * find a connection for a call
+ * - called in process context with IRQs enabled
+ */
+int rxrpc_connect_call(struct rxrpc_sock *rx,
+		       struct rxrpc_transport *trans,
+		       struct rxrpc_conn_bundle *bundle,
+		       struct rxrpc_call *call,
+		       gfp_t gfp)
+{
+	struct rxrpc_connection *conn, *candidate;
+	int chan, ret;
+
+	DECLARE_WAITQUEUE(myself, current);
+
+	_enter("%p,%lx,", rx, call->user_call_ID);
+
+	if (test_bit(RXRPC_SOCK_EXCLUSIVE_CONN, &rx->flags))
+		return rxrpc_connect_exclusive(rx, trans, bundle->service_id,
+					       call, gfp);
+
+	spin_lock(&trans->client_lock);
+	for (;;) {
+		/* see if the bundle has a call slot available */
+		if (!list_empty(&bundle->avail_conns)) {
+			_debug("avail");
+			conn = list_entry(bundle->avail_conns.next,
+					  struct rxrpc_connection,
+					  bundle_link);
+			if (--conn->avail_calls == 0)
+				list_move(&conn->bundle_link,
+					  &bundle->busy_conns);
+			atomic_inc(&conn->usage);
+			break;
+		}
+
+		if (!list_empty(&bundle->unused_conns)) {
+			_debug("unused");
+			conn = list_entry(bundle->unused_conns.next,
+					  struct rxrpc_connection,
+					  bundle_link);
+			atomic_inc(&conn->usage);
+			list_move(&conn->bundle_link, &bundle->avail_conns);
+			break;
+		}
+
+		/* need to allocate a new connection */
+		_debug("get new conn [%d]", bundle->num_conns);
+
+		spin_unlock(&trans->client_lock);
+
+		if (signal_pending(current))
+			goto interrupted;
+
+		if (bundle->num_conns >= 20) {
+			_debug("too many conns");
+
+			if (!(gfp & __GFP_WAIT)) {
+				_leave(" = -EAGAIN");
+				return -EAGAIN;
+			}
+
+			add_wait_queue(&bundle->chanwait, &myself);
+			for (;;) {
+				set_current_state(TASK_INTERRUPTIBLE);
+				if (bundle->num_conns < 20 ||
+				    !list_empty(&bundle->unused_conns) ||
+				    !list_empty(&bundle->avail_conns))
+					break;
+				if (signal_pending(current))
+					goto interrupted_dequeue;
+				schedule();
+			}
+			remove_wait_queue(&bundle->chanwait, &myself);
+			__set_current_state(TASK_RUNNING);
+			spin_lock(&trans->client_lock);
+			continue;
+		}
+
+		/* not yet present - create a candidate for a new connection and then
+		 * redo the check */
+		candidate = rxrpc_alloc_connection(gfp);
+		if (IS_ERR(candidate)) {
+			_leave(" = %ld", PTR_ERR(candidate));
+			return PTR_ERR(candidate);
+		}
+
+		candidate->trans = trans;
+		candidate->bundle = bundle;
+		candidate->service_id = bundle->service_id;
+		candidate->epoch = rxrpc_epoch;
+		candidate->in_clientflag = 0;
+		candidate->out_clientflag = RXRPC_CLIENT_INITIATED;
+		candidate->cid = 0;
+		candidate->state = RXRPC_CONN_CLIENT;
+		candidate->avail_calls = RXRPC_MAXCALLS;
+		candidate->security_level = rx->min_sec_level;
+		candidate->key = key_get(rx->key);
+
+		ret = rxrpc_init_client_conn_security(candidate);
+		if (ret < 0) {
+			key_put(candidate->key);
+			kfree(candidate);
+			_leave(" = %d [key]", ret);
+			return ret;
+		}
+
+		write_lock_bh(&rxrpc_connection_lock);
+		list_add_tail(&candidate->link, &rxrpc_connections);
+		write_unlock_bh(&rxrpc_connection_lock);
+
+		spin_lock(&trans->client_lock);
+
+		list_add(&candidate->bundle_link, &bundle->unused_conns);
+		bundle->num_conns++;
+		atomic_inc(&bundle->usage);
+		atomic_inc(&trans->usage);
+
+		_net("CONNECT new %d on TRANS %d",
+		     candidate->debug_id, candidate->trans->debug_id);
+
+		rxrpc_assign_connection_id(candidate);
+		if (candidate->security)
+			candidate->security->prime_packet_security(candidate);
+
+		/* leave the candidate lurking in zombie mode attached to the
+		 * bundle until we're ready for it */
+		rxrpc_put_connection(candidate);
+		candidate = NULL;
+	}
+
+	/* we've got a connection with a free channel and we can now attach the
+	 * call to it
+	 * - we're holding the transport's client lock
+	 * - we're holding a reference on the connection
+	 * - we're holding a reference on the bundle
+	 */
+	for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
+		if (!conn->channels[chan])
+			goto found_channel;
+	BUG();
+
+found_channel:
+	conn->channels[chan] = call;
+	call->conn = conn;
+	call->channel = chan;
+	call->cid = conn->cid | htonl(chan);
+	call->call_id = htonl(++conn->call_counter);
+
+	_net("CONNECT client on conn %d chan %d as call %x",
+	     conn->debug_id, chan, ntohl(call->call_id));
+
+	spin_unlock(&trans->client_lock);
+
+	rxrpc_add_call_ID_to_conn(conn, call);
+
+	_leave(" = 0");
+	return 0;
+
+interrupted_dequeue:
+	remove_wait_queue(&bundle->chanwait, &myself);
+	__set_current_state(TASK_RUNNING);
+interrupted:
+	_leave(" = -ERESTARTSYS");
+	return -ERESTARTSYS;
+}
+
+/*
+ * get a record of an incoming connection
+ */
+struct rxrpc_connection *
+rxrpc_incoming_connection(struct rxrpc_transport *trans,
+			  struct rxrpc_header *hdr,
+			  gfp_t gfp)
+{
+	struct rxrpc_connection *conn, *candidate = NULL;
+	struct rb_node *p, **pp;
+	const char *new = "old";
+	__be32 epoch;
+	u32 conn_id;
+
+	_enter("");
+
+	ASSERT(hdr->flags & RXRPC_CLIENT_INITIATED);
+
+	epoch = hdr->epoch;
+	conn_id = ntohl(hdr->cid) & RXRPC_CIDMASK;
+
+	/* search the connection list first */
+	read_lock_bh(&trans->conn_lock);
+
+	p = trans->server_conns.rb_node;
+	while (p) {
+		conn = rb_entry(p, struct rxrpc_connection, node);
+
+		_debug("maybe %x", conn->real_conn_id);
+
+		if (epoch < conn->epoch)
+			p = p->rb_left;
+		else if (epoch > conn->epoch)
+			p = p->rb_right;
+		else if (conn_id < conn->real_conn_id)
+			p = p->rb_left;
+		else if (conn_id > conn->real_conn_id)
+			p = p->rb_right;
+		else
+			goto found_extant_connection;
+	}
+	read_unlock_bh(&trans->conn_lock);
+
+	/* not yet present - create a candidate for a new record and then
+	 * redo the search */
+	candidate = rxrpc_alloc_connection(gfp);
+	if (!candidate) {
+		_leave(" = -ENOMEM");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	candidate->trans = trans;
+	candidate->epoch = hdr->epoch;
+	candidate->cid = hdr->cid & __constant_cpu_to_be32(RXRPC_CIDMASK);
+	candidate->service_id = hdr->serviceId;
+	candidate->security_ix = hdr->securityIndex;
+	candidate->in_clientflag = RXRPC_CLIENT_INITIATED;
+	candidate->out_clientflag = 0;
+	candidate->real_conn_id = conn_id;
+	candidate->state = RXRPC_CONN_SERVER;
+	if (candidate->service_id)
+		candidate->state = RXRPC_CONN_SERVER_UNSECURED;
+
+	write_lock_bh(&trans->conn_lock);
+
+	pp = &trans->server_conns.rb_node;
+	p = NULL;
+	while (*pp) {
+		p = *pp;
+		conn = rb_entry(p, struct rxrpc_connection, node);
+
+		if (epoch < conn->epoch)
+			pp = &(*pp)->rb_left;
+		else if (epoch > conn->epoch)
+			pp = &(*pp)->rb_right;
+		else if (conn_id < conn->real_conn_id)
+			pp = &(*pp)->rb_left;
+		else if (conn_id > conn->real_conn_id)
+			pp = &(*pp)->rb_right;
+		else
+			goto found_extant_second;
+	}
+
+	/* we can now add the new candidate to the list */
+	conn = candidate;
+	candidate = NULL;
+	rb_link_node(&conn->node, p, pp);
+	rb_insert_color(&conn->node, &trans->server_conns);
+	atomic_inc(&conn->trans->usage);
+
+	write_unlock_bh(&trans->conn_lock);
+
+	write_lock_bh(&rxrpc_connection_lock);
+	list_add_tail(&conn->link, &rxrpc_connections);
+	write_unlock_bh(&rxrpc_connection_lock);
+
+	new = "new";
+
+success:
+	_net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->real_conn_id);
+
+	_leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
+	return conn;
+
+	/* we found the connection in the list immediately */
+found_extant_connection:
+	if (hdr->securityIndex != conn->security_ix) {
+		read_unlock_bh(&trans->conn_lock);
+		goto security_mismatch;
+	}
+	atomic_inc(&conn->usage);
+	read_unlock_bh(&trans->conn_lock);
+	goto success;
+
+	/* we found the connection on the second time through the list */
+found_extant_second:
+	if (hdr->securityIndex != conn->security_ix) {
+		write_unlock_bh(&trans->conn_lock);
+		goto security_mismatch;
+	}
+	atomic_inc(&conn->usage);
+	write_unlock_bh(&trans->conn_lock);
+	kfree(candidate);
+	goto success;
+
+security_mismatch:
+	kfree(candidate);
+	_leave(" = -EKEYREJECTED");
+	return ERR_PTR(-EKEYREJECTED);
+}
+
+/*
+ * find a connection based on transport and RxRPC connection ID for an incoming
+ * packet
+ */
+struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *trans,
+					       struct rxrpc_header *hdr)
+{
+	struct rxrpc_connection *conn;
+	struct rb_node *p;
+	__be32 epoch;
+	u32 conn_id;
+
+	_enter(",{%x,%x}", ntohl(hdr->cid), hdr->flags);
+
+	read_lock_bh(&trans->conn_lock);
+
+	conn_id = ntohl(hdr->cid) & RXRPC_CIDMASK;
+	epoch = hdr->epoch;
+
+	if (hdr->flags & RXRPC_CLIENT_INITIATED)
+		p = trans->server_conns.rb_node;
+	else
+		p = trans->client_conns.rb_node;
+
+	while (p) {
+		conn = rb_entry(p, struct rxrpc_connection, node);
+
+		_debug("maybe %x", conn->real_conn_id);
+
+		if (epoch < conn->epoch)
+			p = p->rb_left;
+		else if (epoch > conn->epoch)
+			p = p->rb_right;
+		else if (conn_id < conn->real_conn_id)
+			p = p->rb_left;
+		else if (conn_id > conn->real_conn_id)
+			p = p->rb_right;
+		else
+			goto found;
+	}
+
+	read_unlock_bh(&trans->conn_lock);
+	_leave(" = NULL");
+	return NULL;
+
+found:
+	atomic_inc(&conn->usage);
+	read_unlock_bh(&trans->conn_lock);
+	_leave(" = %p", conn);
+	return conn;
+}
+
+/*
+ * release a virtual connection
+ */
+void rxrpc_put_connection(struct rxrpc_connection *conn)
+{
+	_enter("%p{u=%d,d=%d}",
+	       conn, atomic_read(&conn->usage), conn->debug_id);
+
+	ASSERTCMP(atomic_read(&conn->usage), >, 0);
+
+	conn->put_time = xtime.tv_sec;
+	if (atomic_dec_and_test(&conn->usage)) {
+		_debug("zombie");
+		schedule_delayed_work(&rxrpc_connection_reap, 0);
+	}
+
+	_leave("");
+}
+
+/*
+ * destroy a virtual connection
+ */
+static void rxrpc_destroy_connection(struct rxrpc_connection *conn)
+{
+	_enter("%p{%d}", conn, atomic_read(&conn->usage));
+
+	ASSERTCMP(atomic_read(&conn->usage), ==, 0);
+
+	_net("DESTROY CONN %d", conn->debug_id);
+
+	if (conn->bundle)
+		rxrpc_put_bundle(conn->trans, conn->bundle);
+
+	ASSERT(RB_EMPTY_ROOT(&conn->calls));
+	rxrpc_purge_queue(&conn->rx_queue);
+
+	rxrpc_clear_conn_security(conn);
+	rxrpc_put_transport(conn->trans);
+	kfree(conn);
+	_leave("");
+}
+
+/*
+ * reap dead connections
+ */
+void rxrpc_connection_reaper(struct work_struct *work)
+{
+	struct rxrpc_connection *conn, *_p;
+	unsigned long now, earliest, reap_time;
+
+	LIST_HEAD(graveyard);
+
+	_enter("");
+
+	now = xtime.tv_sec;
+	earliest = ULONG_MAX;
+
+	write_lock_bh(&rxrpc_connection_lock);
+	list_for_each_entry_safe(conn, _p, &rxrpc_connections, link) {
+		_debug("reap CONN %d { u=%d,t=%ld }",
+		       conn->debug_id, atomic_read(&conn->usage),
+		       (long) now - (long) conn->put_time);
+
+		if (likely(atomic_read(&conn->usage) > 0))
+			continue;
+
+		spin_lock(&conn->trans->client_lock);
+		write_lock(&conn->trans->conn_lock);
+		reap_time = conn->put_time + rxrpc_connection_timeout;
+
+		if (atomic_read(&conn->usage) > 0) {
+			;
+		} else if (reap_time <= now) {
+			list_move_tail(&conn->link, &graveyard);
+			if (conn->out_clientflag)
+				rb_erase(&conn->node,
+					 &conn->trans->client_conns);
+			else
+				rb_erase(&conn->node,
+					 &conn->trans->server_conns);
+			if (conn->bundle) {
+				list_del_init(&conn->bundle_link);
+				conn->bundle->num_conns--;
+			}
+
+		} else if (reap_time < earliest) {
+			earliest = reap_time;
+		}
+
+		write_unlock(&conn->trans->conn_lock);
+		spin_unlock(&conn->trans->client_lock);
+	}
+	write_unlock_bh(&rxrpc_connection_lock);
+
+	if (earliest != ULONG_MAX) {
+		_debug("reschedule reaper %ld", (long) earliest - now);
+		ASSERTCMP(earliest, >, now);
+		schedule_delayed_work(&rxrpc_connection_reap,
+				      (earliest - now) * HZ);
+	}
+
+	/* then destroy all those pulled out */
+	while (!list_empty(&graveyard)) {
+		conn = list_entry(graveyard.next, struct rxrpc_connection,
+				  link);
+		list_del_init(&conn->link);
+
+		ASSERTCMP(atomic_read(&conn->usage), ==, 0);
+		rxrpc_destroy_connection(conn);
+	}
+
+	_leave("");
+}
+
+/*
+ * preemptively destroy all the connection records rather than waiting for them
+ * to time out
+ */
+void __exit rxrpc_destroy_all_connections(void)
+{
+	_enter("");
+
+	rxrpc_connection_timeout = 0;
+	cancel_delayed_work(&rxrpc_connection_reap);
+	schedule_delayed_work(&rxrpc_connection_reap, 0);
+
+	_leave("");
+}
diff --git a/net/rxrpc/ar-connevent.c b/net/rxrpc/ar-connevent.c
new file mode 100644
index 00000000000..4b02815c1de
--- /dev/null
+++ b/net/rxrpc/ar-connevent.c
@@ -0,0 +1,387 @@
+/* connection-level event handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/errqueue.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/icmp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include "ar-internal.h"
+
+/*
+ * pass a connection-level abort onto all calls on that connection
+ */
+static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
+			      u32 abort_code)
+{
+	struct rxrpc_call *call;
+	struct rb_node *p;
+
+	_enter("{%d},%x", conn->debug_id, abort_code);
+
+	read_lock_bh(&conn->lock);
+
+	for (p = rb_first(&conn->calls); p; p = rb_next(p)) {
+		call = rb_entry(p, struct rxrpc_call, conn_node);
+		write_lock(&call->state_lock);
+		if (call->state <= RXRPC_CALL_COMPLETE) {
+			call->state = state;
+			call->abort_code = abort_code;
+			if (state == RXRPC_CALL_LOCALLY_ABORTED)
+				set_bit(RXRPC_CALL_CONN_ABORT, &call->events);
+			else
+				set_bit(RXRPC_CALL_RCVD_ABORT, &call->events);
+			schedule_work(&call->processor);
+		}
+		write_unlock(&call->state_lock);
+	}
+
+	read_unlock_bh(&conn->lock);
+	_leave("");
+}
+
+/*
+ * generate a connection-level abort
+ */
+static int rxrpc_abort_connection(struct rxrpc_connection *conn,
+				  u32 error, u32 abort_code)
+{
+	struct rxrpc_header hdr;
+	struct msghdr msg;
+	struct kvec iov[2];
+	__be32 word;
+	size_t len;
+	int ret;
+
+	_enter("%d,,%u,%u", conn->debug_id, error, abort_code);
+
+	/* generate a connection-level abort */
+	spin_lock_bh(&conn->state_lock);
+	if (conn->state < RXRPC_CONN_REMOTELY_ABORTED) {
+		conn->state = RXRPC_CONN_LOCALLY_ABORTED;
+		conn->error = error;
+		spin_unlock_bh(&conn->state_lock);
+	} else {
+		spin_unlock_bh(&conn->state_lock);
+		_leave(" = 0 [already dead]");
+		return 0;
+	}
+
+	rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code);
+
+	msg.msg_name	= &conn->trans->peer->srx.transport.sin;
+	msg.msg_namelen	= sizeof(conn->trans->peer->srx.transport.sin);
+	msg.msg_control	= NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags	= 0;
+
+	hdr.epoch	= conn->epoch;
+	hdr.cid		= conn->cid;
+	hdr.callNumber	= 0;
+	hdr.seq		= 0;
+	hdr.type	= RXRPC_PACKET_TYPE_ABORT;
+	hdr.flags	= conn->out_clientflag;
+	hdr.userStatus	= 0;
+	hdr.securityIndex = conn->security_ix;
+	hdr._rsvd	= 0;
+	hdr.serviceId	= conn->service_id;
+
+	word = htonl(abort_code);
+
+	iov[0].iov_base	= &hdr;
+	iov[0].iov_len	= sizeof(hdr);
+	iov[1].iov_base	= &word;
+	iov[1].iov_len	= sizeof(word);
+
+	len = iov[0].iov_len + iov[1].iov_len;
+
+	hdr.serial = htonl(atomic_inc_return(&conn->serial));
+	_proto("Tx CONN ABORT %%%u { %d }", ntohl(hdr.serial), abort_code);
+
+	ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 2, len);
+	if (ret < 0) {
+		_debug("sendmsg failed: %d", ret);
+		return -EAGAIN;
+	}
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * mark a call as being on a now-secured channel
+ * - must be called with softirqs disabled
+ */
+void rxrpc_call_is_secure(struct rxrpc_call *call)
+{
+	_enter("%p", call);
+	if (call) {
+		read_lock(&call->state_lock);
+		if (call->state < RXRPC_CALL_COMPLETE &&
+		    !test_and_set_bit(RXRPC_CALL_SECURED, &call->events))
+			schedule_work(&call->processor);
+		read_unlock(&call->state_lock);
+	}
+}
+
+/*
+ * connection-level Rx packet processor
+ */
+static int rxrpc_process_event(struct rxrpc_connection *conn,
+			       struct sk_buff *skb,
+			       u32 *_abort_code)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	__be32 tmp;
+	u32 serial;
+	int loop, ret;
+
+	if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED)
+		return -ECONNABORTED;
+
+	serial = ntohl(sp->hdr.serial);
+
+	switch (sp->hdr.type) {
+	case RXRPC_PACKET_TYPE_ABORT:
+		if (skb_copy_bits(skb, 0, &tmp, sizeof(tmp)) < 0)
+			return -EPROTO;
+		_proto("Rx ABORT %%%u { ac=%d }", serial, ntohl(tmp));
+
+		conn->state = RXRPC_CONN_REMOTELY_ABORTED;
+		rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED,
+				  ntohl(tmp));
+		return -ECONNABORTED;
+
+	case RXRPC_PACKET_TYPE_CHALLENGE:
+		if (conn->security)
+			return conn->security->respond_to_challenge(
+				conn, skb, _abort_code);
+		return -EPROTO;
+
+	case RXRPC_PACKET_TYPE_RESPONSE:
+		if (!conn->security)
+			return -EPROTO;
+
+		ret = conn->security->verify_response(conn, skb, _abort_code);
+		if (ret < 0)
+			return ret;
+
+		ret = conn->security->init_connection_security(conn);
+		if (ret < 0)
+			return ret;
+
+		conn->security->prime_packet_security(conn);
+		read_lock_bh(&conn->lock);
+		spin_lock(&conn->state_lock);
+
+		if (conn->state == RXRPC_CONN_SERVER_CHALLENGING) {
+			conn->state = RXRPC_CONN_SERVER;
+			for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
+				rxrpc_call_is_secure(conn->channels[loop]);
+		}
+
+		spin_unlock(&conn->state_lock);
+		read_unlock_bh(&conn->lock);
+		return 0;
+
+	default:
+		return -EPROTO;
+	}
+}
+
+/*
+ * set up security and issue a challenge
+ */
+static void rxrpc_secure_connection(struct rxrpc_connection *conn)
+{
+	u32 abort_code;
+	int ret;
+
+	_enter("{%d}", conn->debug_id);
+
+	ASSERT(conn->security_ix != 0);
+
+	if (!conn->key) {
+		_debug("set up security");
+		ret = rxrpc_init_server_conn_security(conn);
+		switch (ret) {
+		case 0:
+			break;
+		case -ENOENT:
+			abort_code = RX_CALL_DEAD;
+			goto abort;
+		default:
+			abort_code = RXKADNOAUTH;
+			goto abort;
+		}
+	}
+
+	ASSERT(conn->security != NULL);
+
+	if (conn->security->issue_challenge(conn) < 0) {
+		abort_code = RX_CALL_DEAD;
+		ret = -ENOMEM;
+		goto abort;
+	}
+
+	_leave("");
+	return;
+
+abort:
+	_debug("abort %d, %d", ret, abort_code);
+	rxrpc_abort_connection(conn, -ret, abort_code);
+	_leave(" [aborted]");
+}
+
+/*
+ * connection-level event processor
+ */
+void rxrpc_process_connection(struct work_struct *work)
+{
+	struct rxrpc_connection *conn =
+		container_of(work, struct rxrpc_connection, processor);
+	struct rxrpc_skb_priv *sp;
+	struct sk_buff *skb;
+	u32 abort_code = RX_PROTOCOL_ERROR;
+	int ret;
+
+	_enter("{%d}", conn->debug_id);
+
+	atomic_inc(&conn->usage);
+
+	if (test_and_clear_bit(RXRPC_CONN_CHALLENGE, &conn->events)) {
+		rxrpc_secure_connection(conn);
+		rxrpc_put_connection(conn);
+	}
+
+	/* go through the conn-level event packets, releasing the ref on this
+	 * connection that each one has when we've finished with it */
+	while ((skb = skb_dequeue(&conn->rx_queue))) {
+		sp = rxrpc_skb(skb);
+
+		ret = rxrpc_process_event(conn, skb, &abort_code);
+		switch (ret) {
+		case -EPROTO:
+		case -EKEYEXPIRED:
+		case -EKEYREJECTED:
+			goto protocol_error;
+		case -EAGAIN:
+			goto requeue_and_leave;
+		case -ECONNABORTED:
+		default:
+			rxrpc_put_connection(conn);
+			rxrpc_free_skb(skb);
+			break;
+		}
+	}
+
+out:
+	rxrpc_put_connection(conn);
+	_leave("");
+	return;
+
+requeue_and_leave:
+	skb_queue_head(&conn->rx_queue, skb);
+	goto out;
+
+protocol_error:
+	if (rxrpc_abort_connection(conn, -ret, abort_code) < 0)
+		goto requeue_and_leave;
+	rxrpc_put_connection(conn);
+	rxrpc_free_skb(skb);
+	_leave(" [EPROTO]");
+	goto out;
+}
+
+/*
+ * reject packets through the local endpoint
+ */
+void rxrpc_reject_packets(struct work_struct *work)
+{
+	union {
+		struct sockaddr sa;
+		struct sockaddr_in sin;
+	} sa;
+	struct rxrpc_skb_priv *sp;
+	struct rxrpc_header hdr;
+	struct rxrpc_local *local;
+	struct sk_buff *skb;
+	struct msghdr msg;
+	struct kvec iov[2];
+	size_t size;
+	__be32 code;
+
+	local = container_of(work, struct rxrpc_local, rejecter);
+	rxrpc_get_local(local);
+
+	_enter("%d", local->debug_id);
+
+	iov[0].iov_base = &hdr;
+	iov[0].iov_len = sizeof(hdr);
+	iov[1].iov_base = &code;
+	iov[1].iov_len = sizeof(code);
+	size = sizeof(hdr) + sizeof(code);
+
+	msg.msg_name = &sa;
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags = 0;
+
+	memset(&sa, 0, sizeof(sa));
+	sa.sa.sa_family = local->srx.transport.family;
+	switch (sa.sa.sa_family) {
+	case AF_INET:
+		msg.msg_namelen = sizeof(sa.sin);
+		break;
+	default:
+		msg.msg_namelen = 0;
+		break;
+	}
+
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.type = RXRPC_PACKET_TYPE_ABORT;
+
+	while ((skb = skb_dequeue(&local->reject_queue))) {
+		sp = rxrpc_skb(skb);
+		switch (sa.sa.sa_family) {
+		case AF_INET:
+			sa.sin.sin_port = udp_hdr(skb)->source;
+			sa.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+			code = htonl(skb->priority);
+
+			hdr.epoch = sp->hdr.epoch;
+			hdr.cid = sp->hdr.cid;
+			hdr.callNumber = sp->hdr.callNumber;
+			hdr.serviceId = sp->hdr.serviceId;
+			hdr.flags = sp->hdr.flags;
+			hdr.flags ^= RXRPC_CLIENT_INITIATED;
+			hdr.flags &= RXRPC_CLIENT_INITIATED;
+
+			kernel_sendmsg(local->socket, &msg, iov, 2, size);
+			break;
+
+		default:
+			break;
+		}
+
+		rxrpc_free_skb(skb);
+		rxrpc_put_local(local);
+	}
+
+	rxrpc_put_local(local);
+	_leave("");
+}
diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c
new file mode 100644
index 00000000000..f5539e2f7b5
--- /dev/null
+++ b/net/rxrpc/ar-error.c
@@ -0,0 +1,253 @@
+/* Error message handling (ICMP)
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/errqueue.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/icmp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include "ar-internal.h"
+
+/*
+ * handle an error received on the local endpoint
+ */
+void rxrpc_UDP_error_report(struct sock *sk)
+{
+	struct sock_exterr_skb *serr;
+	struct rxrpc_transport *trans;
+	struct rxrpc_local *local = sk->sk_user_data;
+	struct rxrpc_peer *peer;
+	struct sk_buff *skb;
+	__be32 addr;
+	__be16 port;
+
+	_enter("%p{%d}", sk, local->debug_id);
+
+	skb = skb_dequeue(&sk->sk_error_queue);
+	if (!skb) {
+		_leave("UDP socket errqueue empty");
+		return;
+	}
+
+	rxrpc_new_skb(skb);
+
+	serr = SKB_EXT_ERR(skb);
+	addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset);
+	port = serr->port;
+
+	_net("Rx UDP Error from "NIPQUAD_FMT":%hu",
+	     NIPQUAD(addr), ntohs(port));
+	_debug("Msg l:%d d:%d", skb->len, skb->data_len);
+
+	peer = rxrpc_find_peer(local, addr, port);
+	if (IS_ERR(peer)) {
+		rxrpc_free_skb(skb);
+		_leave(" [no peer]");
+		return;
+	}
+
+	trans = rxrpc_find_transport(local, peer);
+	if (!trans) {
+		rxrpc_put_peer(peer);
+		rxrpc_free_skb(skb);
+		_leave(" [no trans]");
+		return;
+	}
+
+	if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP &&
+	    serr->ee.ee_type == ICMP_DEST_UNREACH &&
+	    serr->ee.ee_code == ICMP_FRAG_NEEDED
+	    ) {
+		u32 mtu = serr->ee.ee_info;
+
+		_net("Rx Received ICMP Fragmentation Needed (%d)", mtu);
+
+		/* wind down the local interface MTU */
+		if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu) {
+			peer->if_mtu = mtu;
+			_net("I/F MTU %u", mtu);
+		}
+
+		/* ip_rt_frag_needed() may have eaten the info */
+		if (mtu == 0)
+			mtu = ntohs(icmp_hdr(skb)->un.frag.mtu);
+
+		if (mtu == 0) {
+			/* they didn't give us a size, estimate one */
+			if (mtu > 1500) {
+				mtu >>= 1;
+				if (mtu < 1500)
+					mtu = 1500;
+			} else {
+				mtu -= 100;
+				if (mtu < peer->hdrsize)
+					mtu = peer->hdrsize + 4;
+			}
+		}
+
+		if (mtu < peer->mtu) {
+			peer->mtu = mtu;
+			peer->maxdata = peer->mtu - peer->hdrsize;
+			_net("Net MTU %u (maxdata %u)",
+			     peer->mtu, peer->maxdata);
+		}
+	}
+
+	rxrpc_put_peer(peer);
+
+	/* pass the transport ref to error_handler to release */
+	skb_queue_tail(&trans->error_queue, skb);
+	schedule_work(&trans->error_handler);
+
+	/* reset and regenerate socket error */
+	spin_lock_bh(&sk->sk_error_queue.lock);
+	sk->sk_err = 0;
+	skb = skb_peek(&sk->sk_error_queue);
+	if (skb) {
+		sk->sk_err = SKB_EXT_ERR(skb)->ee.ee_errno;
+		spin_unlock_bh(&sk->sk_error_queue.lock);
+		sk->sk_error_report(sk);
+	} else {
+		spin_unlock_bh(&sk->sk_error_queue.lock);
+	}
+
+	_leave("");
+}
+
+/*
+ * deal with UDP error messages
+ */
+void rxrpc_UDP_error_handler(struct work_struct *work)
+{
+	struct sock_extended_err *ee;
+	struct sock_exterr_skb *serr;
+	struct rxrpc_transport *trans =
+		container_of(work, struct rxrpc_transport, error_handler);
+	struct sk_buff *skb;
+	int local, err;
+
+	_enter("");
+
+	skb = skb_dequeue(&trans->error_queue);
+	if (!skb)
+		return;
+
+	serr = SKB_EXT_ERR(skb);
+	ee = &serr->ee;
+
+	_net("Rx Error o=%d t=%d c=%d e=%d",
+	     ee->ee_origin, ee->ee_type, ee->ee_code, ee->ee_errno);
+
+	err = ee->ee_errno;
+
+	switch (ee->ee_origin) {
+	case SO_EE_ORIGIN_ICMP:
+		local = 0;
+		switch (ee->ee_type) {
+		case ICMP_DEST_UNREACH:
+			switch (ee->ee_code) {
+			case ICMP_NET_UNREACH:
+				_net("Rx Received ICMP Network Unreachable");
+				err = ENETUNREACH;
+				break;
+			case ICMP_HOST_UNREACH:
+				_net("Rx Received ICMP Host Unreachable");
+				err = EHOSTUNREACH;
+				break;
+			case ICMP_PORT_UNREACH:
+				_net("Rx Received ICMP Port Unreachable");
+				err = ECONNREFUSED;
+				break;
+			case ICMP_FRAG_NEEDED:
+				_net("Rx Received ICMP Fragmentation Needed (%d)",
+				     ee->ee_info);
+				err = 0; /* dealt with elsewhere */
+				break;
+			case ICMP_NET_UNKNOWN:
+				_net("Rx Received ICMP Unknown Network");
+				err = ENETUNREACH;
+				break;
+			case ICMP_HOST_UNKNOWN:
+				_net("Rx Received ICMP Unknown Host");
+				err = EHOSTUNREACH;
+				break;
+			default:
+				_net("Rx Received ICMP DestUnreach code=%u",
+				     ee->ee_code);
+				break;
+			}
+			break;
+
+		case ICMP_TIME_EXCEEDED:
+			_net("Rx Received ICMP TTL Exceeded");
+			break;
+
+		default:
+			_proto("Rx Received ICMP error { type=%u code=%u }",
+			       ee->ee_type, ee->ee_code);
+			break;
+		}
+		break;
+
+	case SO_EE_ORIGIN_LOCAL:
+		_proto("Rx Received local error { error=%d }",
+		       ee->ee_errno);
+		local = 1;
+		break;
+
+	case SO_EE_ORIGIN_NONE:
+	case SO_EE_ORIGIN_ICMP6:
+	default:
+		_proto("Rx Received error report { orig=%u }",
+		       ee->ee_origin);
+		local = 0;
+		break;
+	}
+
+	/* terminate all the affected calls if there's an unrecoverable
+	 * error */
+	if (err) {
+		struct rxrpc_call *call, *_n;
+
+		_debug("ISSUE ERROR %d", err);
+
+		spin_lock_bh(&trans->peer->lock);
+		trans->peer->net_error = err;
+
+		list_for_each_entry_safe(call, _n, &trans->peer->error_targets,
+					 error_link) {
+			write_lock(&call->state_lock);
+			if (call->state != RXRPC_CALL_COMPLETE &&
+			    call->state < RXRPC_CALL_NETWORK_ERROR) {
+				call->state = RXRPC_CALL_NETWORK_ERROR;
+				set_bit(RXRPC_CALL_RCVD_ERROR, &call->events);
+				schedule_work(&call->processor);
+			}
+			write_unlock(&call->state_lock);
+			list_del_init(&call->error_link);
+		}
+
+		spin_unlock_bh(&trans->peer->lock);
+	}
+
+	if (!skb_queue_empty(&trans->error_queue))
+		schedule_work(&trans->error_handler);
+
+	rxrpc_free_skb(skb);
+	rxrpc_put_transport(trans);
+	_leave("");
+}
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
new file mode 100644
index 00000000000..323c3454561
--- /dev/null
+++ b/net/rxrpc/ar-input.c
@@ -0,0 +1,791 @@
+/* RxRPC packet reception
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/errqueue.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/icmp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include "ar-internal.h"
+
+unsigned long rxrpc_ack_timeout = 1;
+
+const char *rxrpc_pkts[] = {
+	"?00",
+	"DATA", "ACK", "BUSY", "ABORT", "ACKALL", "CHALL", "RESP", "DEBUG",
+	"?09", "?10", "?11", "?12", "?13", "?14", "?15"
+};
+
+/*
+ * queue a packet for recvmsg to pass to userspace
+ * - the caller must hold a lock on call->lock
+ * - must not be called with interrupts disabled (sk_filter() disables BH's)
+ * - eats the packet whether successful or not
+ * - there must be just one reference to the packet, which the caller passes to
+ *   this function
+ */
+int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb,
+			bool force, bool terminal)
+{
+	struct rxrpc_skb_priv *sp;
+	struct sock *sk;
+	int skb_len, ret;
+
+	_enter(",,%d,%d", force, terminal);
+
+	ASSERT(!irqs_disabled());
+
+	sp = rxrpc_skb(skb);
+	ASSERTCMP(sp->call, ==, call);
+
+	/* if we've already posted the terminal message for a call, then we
+	 * don't post any more */
+	if (test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) {
+		_debug("already terminated");
+		ASSERTCMP(call->state, >=, RXRPC_CALL_COMPLETE);
+		skb->destructor = NULL;
+		sp->call = NULL;
+		rxrpc_put_call(call);
+		rxrpc_free_skb(skb);
+		return 0;
+	}
+
+	sk = &call->socket->sk;
+
+	if (!force) {
+		/* cast skb->rcvbuf to unsigned...  It's pointless, but
+		 * reduces number of warnings when compiling with -W
+		 * --ANK */
+//		ret = -ENOBUFS;
+//		if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
+//		    (unsigned) sk->sk_rcvbuf)
+//			goto out;
+
+		ret = sk_filter(sk, skb);
+		if (ret < 0)
+			goto out;
+	}
+
+	spin_lock_bh(&sk->sk_receive_queue.lock);
+	if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags) &&
+	    !test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
+	    call->socket->sk.sk_state != RXRPC_CLOSE) {
+		skb->destructor = rxrpc_packet_destructor;
+		skb->dev = NULL;
+		skb->sk = sk;
+		atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+
+		/* Cache the SKB length before we tack it onto the receive
+		 * queue.  Once it is added it no longer belongs to us and
+		 * may be freed by other threads of control pulling packets
+		 * from the queue.
+		 */
+		skb_len = skb->len;
+
+		_net("post skb %p", skb);
+		__skb_queue_tail(&sk->sk_receive_queue, skb);
+		spin_unlock_bh(&sk->sk_receive_queue.lock);
+
+		if (!sock_flag(sk, SOCK_DEAD))
+			sk->sk_data_ready(sk, skb_len);
+
+		if (terminal) {
+			_debug("<<<< TERMINAL MESSAGE >>>>");
+			set_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags);
+		}
+
+		skb = NULL;
+	} else {
+		spin_unlock_bh(&sk->sk_receive_queue.lock);
+	}
+	ret = 0;
+
+out:
+	/* release the socket buffer */
+	if (skb) {
+		skb->destructor = NULL;
+		sp->call = NULL;
+		rxrpc_put_call(call);
+		rxrpc_free_skb(skb);
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * process a DATA packet, posting the packet to the appropriate queue
+ * - eats the packet if successful
+ */
+static int rxrpc_fast_process_data(struct rxrpc_call *call,
+				   struct sk_buff *skb, u32 seq)
+{
+	struct rxrpc_skb_priv *sp;
+	bool terminal;
+	int ret, ackbit, ack;
+
+	_enter("{%u,%u},,{%u}", call->rx_data_post, call->rx_first_oos, seq);
+
+	sp = rxrpc_skb(skb);
+	ASSERTCMP(sp->call, ==, NULL);
+
+	spin_lock(&call->lock);
+
+	if (call->state > RXRPC_CALL_COMPLETE)
+		goto discard;
+
+	ASSERTCMP(call->rx_data_expect, >=, call->rx_data_post);
+	ASSERTCMP(call->rx_data_post, >=, call->rx_data_recv);
+	ASSERTCMP(call->rx_data_recv, >=, call->rx_data_eaten);
+
+	if (seq < call->rx_data_post) {
+		_debug("dup #%u [-%u]", seq, call->rx_data_post);
+		ack = RXRPC_ACK_DUPLICATE;
+		ret = -ENOBUFS;
+		goto discard_and_ack;
+	}
+
+	/* we may already have the packet in the out of sequence queue */
+	ackbit = seq - (call->rx_data_eaten + 1);
+	ASSERTCMP(ackbit, >=, 0);
+	if (__test_and_set_bit(ackbit, &call->ackr_window)) {
+		_debug("dup oos #%u [%u,%u]",
+		       seq, call->rx_data_eaten, call->rx_data_post);
+		ack = RXRPC_ACK_DUPLICATE;
+		goto discard_and_ack;
+	}
+
+	if (seq >= call->ackr_win_top) {
+		_debug("exceed #%u [%u]", seq, call->ackr_win_top);
+		__clear_bit(ackbit, &call->ackr_window);
+		ack = RXRPC_ACK_EXCEEDS_WINDOW;
+		goto discard_and_ack;
+	}
+
+	if (seq == call->rx_data_expect) {
+		clear_bit(RXRPC_CALL_EXPECT_OOS, &call->flags);
+		call->rx_data_expect++;
+	} else if (seq > call->rx_data_expect) {
+		_debug("oos #%u [%u]", seq, call->rx_data_expect);
+		call->rx_data_expect = seq + 1;
+		if (test_and_set_bit(RXRPC_CALL_EXPECT_OOS, &call->flags)) {
+			ack = RXRPC_ACK_OUT_OF_SEQUENCE;
+			goto enqueue_and_ack;
+		}
+		goto enqueue_packet;
+	}
+
+	if (seq != call->rx_data_post) {
+		_debug("ahead #%u [%u]", seq, call->rx_data_post);
+		goto enqueue_packet;
+	}
+
+	if (test_bit(RXRPC_CALL_RCVD_LAST, &call->flags))
+		goto protocol_error;
+
+	/* if the packet need security things doing to it, then it goes down
+	 * the slow path */
+	if (call->conn->security)
+		goto enqueue_packet;
+
+	sp->call = call;
+	rxrpc_get_call(call);
+	terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) &&
+		    !(sp->hdr.flags & RXRPC_CLIENT_INITIATED));
+	ret = rxrpc_queue_rcv_skb(call, skb, false, terminal);
+	if (ret < 0) {
+		if (ret == -ENOMEM || ret == -ENOBUFS) {
+			__clear_bit(ackbit, &call->ackr_window);
+			ack = RXRPC_ACK_NOSPACE;
+			goto discard_and_ack;
+		}
+		goto out;
+	}
+
+	skb = NULL;
+
+	_debug("post #%u", seq);
+	ASSERTCMP(call->rx_data_post, ==, seq);
+	call->rx_data_post++;
+
+	if (sp->hdr.flags & RXRPC_LAST_PACKET)
+		set_bit(RXRPC_CALL_RCVD_LAST, &call->flags);
+
+	/* if we've reached an out of sequence packet then we need to drain
+	 * that queue into the socket Rx queue now */
+	if (call->rx_data_post == call->rx_first_oos) {
+		_debug("drain rx oos now");
+		read_lock(&call->state_lock);
+		if (call->state < RXRPC_CALL_COMPLETE &&
+		    !test_and_set_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events))
+			schedule_work(&call->processor);
+		read_unlock(&call->state_lock);
+	}
+
+	spin_unlock(&call->lock);
+	atomic_inc(&call->ackr_not_idle);
+	rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, sp->hdr.serial, false);
+	_leave(" = 0 [posted]");
+	return 0;
+
+protocol_error:
+	ret = -EBADMSG;
+out:
+	spin_unlock(&call->lock);
+	_leave(" = %d", ret);
+	return ret;
+
+discard_and_ack:
+	_debug("discard and ACK packet %p", skb);
+	__rxrpc_propose_ACK(call, ack, sp->hdr.serial, true);
+discard:
+	spin_unlock(&call->lock);
+	rxrpc_free_skb(skb);
+	_leave(" = 0 [discarded]");
+	return 0;
+
+enqueue_and_ack:
+	__rxrpc_propose_ACK(call, ack, sp->hdr.serial, true);
+enqueue_packet:
+	_net("defer skb %p", skb);
+	spin_unlock(&call->lock);
+	skb_queue_tail(&call->rx_queue, skb);
+	atomic_inc(&call->ackr_not_idle);
+	read_lock(&call->state_lock);
+	if (call->state < RXRPC_CALL_DEAD)
+		schedule_work(&call->processor);
+	read_unlock(&call->state_lock);
+	_leave(" = 0 [queued]");
+	return 0;
+}
+
+/*
+ * assume an implicit ACKALL of the transmission phase of a client socket upon
+ * reception of the first reply packet
+ */
+static void rxrpc_assume_implicit_ackall(struct rxrpc_call *call, u32 serial)
+{
+	write_lock_bh(&call->state_lock);
+
+	switch (call->state) {
+	case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+		call->state = RXRPC_CALL_CLIENT_RECV_REPLY;
+		call->acks_latest = serial;
+
+		_debug("implicit ACKALL %%%u", call->acks_latest);
+		set_bit(RXRPC_CALL_RCVD_ACKALL, &call->events);
+		write_unlock_bh(&call->state_lock);
+
+		if (try_to_del_timer_sync(&call->resend_timer) >= 0) {
+			clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events);
+			clear_bit(RXRPC_CALL_RESEND, &call->events);
+			clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+		}
+		break;
+
+	default:
+		write_unlock_bh(&call->state_lock);
+		break;
+	}
+}
+
+/*
+ * post an incoming packet to the nominated call to deal with
+ * - must get rid of the sk_buff, either by freeing it or by queuing it
+ */
+void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	__be32 _abort_code;
+	u32 serial, hi_serial, seq, abort_code;
+
+	_enter("%p,%p", call, skb);
+
+	ASSERT(!irqs_disabled());
+
+#if 0 // INJECT RX ERROR
+	if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA) {
+		static int skip = 0;
+		if (++skip == 3) {
+			printk("DROPPED 3RD PACKET!!!!!!!!!!!!!\n");
+			skip = 0;
+			goto free_packet;
+		}
+	}
+#endif
+
+	/* track the latest serial number on this connection for ACK packet
+	 * information */
+	serial = ntohl(sp->hdr.serial);
+	hi_serial = atomic_read(&call->conn->hi_serial);
+	while (serial > hi_serial)
+		hi_serial = atomic_cmpxchg(&call->conn->hi_serial, hi_serial,
+					   serial);
+
+	/* request ACK generation for any ACK or DATA packet that requests
+	 * it */
+	if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
+		_proto("ACK Requested on %%%u", serial);
+		rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial,
+				  !(sp->hdr.flags & RXRPC_MORE_PACKETS));
+	}
+
+	switch (sp->hdr.type) {
+	case RXRPC_PACKET_TYPE_ABORT:
+		_debug("abort");
+
+		if (skb_copy_bits(skb, 0, &_abort_code,
+				  sizeof(_abort_code)) < 0)
+			goto protocol_error;
+
+		abort_code = ntohl(_abort_code);
+		_proto("Rx ABORT %%%u { %x }", serial, abort_code);
+
+		write_lock_bh(&call->state_lock);
+		if (call->state < RXRPC_CALL_COMPLETE) {
+			call->state = RXRPC_CALL_REMOTELY_ABORTED;
+			call->abort_code = abort_code;
+			set_bit(RXRPC_CALL_RCVD_ABORT, &call->events);
+			schedule_work(&call->processor);
+		}
+		goto free_packet_unlock;
+
+	case RXRPC_PACKET_TYPE_BUSY:
+		_proto("Rx BUSY %%%u", serial);
+
+		if (call->conn->out_clientflag)
+			goto protocol_error;
+
+		write_lock_bh(&call->state_lock);
+		switch (call->state) {
+		case RXRPC_CALL_CLIENT_SEND_REQUEST:
+			call->state = RXRPC_CALL_SERVER_BUSY;
+			set_bit(RXRPC_CALL_RCVD_BUSY, &call->events);
+			schedule_work(&call->processor);
+		case RXRPC_CALL_SERVER_BUSY:
+			goto free_packet_unlock;
+		default:
+			goto protocol_error_locked;
+		}
+
+	default:
+		_proto("Rx %s %%%u", rxrpc_pkts[sp->hdr.type], serial);
+		goto protocol_error;
+
+	case RXRPC_PACKET_TYPE_DATA:
+		seq = ntohl(sp->hdr.seq);
+
+		_proto("Rx DATA %%%u { #%u }", serial, seq);
+
+		if (seq == 0)
+			goto protocol_error;
+
+		call->ackr_prev_seq = sp->hdr.seq;
+
+		/* received data implicitly ACKs all of the request packets we
+		 * sent when we're acting as a client */
+		if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY)
+			rxrpc_assume_implicit_ackall(call, serial);
+
+		switch (rxrpc_fast_process_data(call, skb, seq)) {
+		case 0:
+			skb = NULL;
+			goto done;
+
+		default:
+			BUG();
+
+			/* data packet received beyond the last packet */
+		case -EBADMSG:
+			goto protocol_error;
+		}
+
+	case RXRPC_PACKET_TYPE_ACK:
+		/* ACK processing is done in process context */
+		read_lock_bh(&call->state_lock);
+		if (call->state < RXRPC_CALL_DEAD) {
+			skb_queue_tail(&call->rx_queue, skb);
+			schedule_work(&call->processor);
+			skb = NULL;
+		}
+		read_unlock_bh(&call->state_lock);
+		goto free_packet;
+	}
+
+protocol_error:
+	_debug("protocol error");
+	write_lock_bh(&call->state_lock);
+protocol_error_locked:
+	if (call->state <= RXRPC_CALL_COMPLETE) {
+		call->state = RXRPC_CALL_LOCALLY_ABORTED;
+		call->abort_code = RX_PROTOCOL_ERROR;
+		set_bit(RXRPC_CALL_ABORT, &call->events);
+		schedule_work(&call->processor);
+	}
+free_packet_unlock:
+	write_unlock_bh(&call->state_lock);
+free_packet:
+	rxrpc_free_skb(skb);
+done:
+	_leave("");
+}
+
+/*
+ * split up a jumbo data packet
+ */
+static void rxrpc_process_jumbo_packet(struct rxrpc_call *call,
+				       struct sk_buff *jumbo)
+{
+	struct rxrpc_jumbo_header jhdr;
+	struct rxrpc_skb_priv *sp;
+	struct sk_buff *part;
+
+	_enter(",{%u,%u}", jumbo->data_len, jumbo->len);
+
+	sp = rxrpc_skb(jumbo);
+
+	do {
+		sp->hdr.flags &= ~RXRPC_JUMBO_PACKET;
+
+		/* make a clone to represent the first subpacket in what's left
+		 * of the jumbo packet */
+		part = skb_clone(jumbo, GFP_ATOMIC);
+		if (!part) {
+			/* simply ditch the tail in the event of ENOMEM */
+			pskb_trim(jumbo, RXRPC_JUMBO_DATALEN);
+			break;
+		}
+		rxrpc_new_skb(part);
+
+		pskb_trim(part, RXRPC_JUMBO_DATALEN);
+
+		if (!pskb_pull(jumbo, RXRPC_JUMBO_DATALEN))
+			goto protocol_error;
+
+		if (skb_copy_bits(jumbo, 0, &jhdr, sizeof(jhdr)) < 0)
+			goto protocol_error;
+		if (!pskb_pull(jumbo, sizeof(jhdr)))
+			BUG();
+
+		sp->hdr.seq	= htonl(ntohl(sp->hdr.seq) + 1);
+		sp->hdr.serial	= htonl(ntohl(sp->hdr.serial) + 1);
+		sp->hdr.flags	= jhdr.flags;
+		sp->hdr._rsvd	= jhdr._rsvd;
+
+		_proto("Rx DATA Jumbo %%%u", ntohl(sp->hdr.serial) - 1);
+
+		rxrpc_fast_process_packet(call, part);
+		part = NULL;
+
+	} while (sp->hdr.flags & RXRPC_JUMBO_PACKET);
+
+	rxrpc_fast_process_packet(call, jumbo);
+	_leave("");
+	return;
+
+protocol_error:
+	_debug("protocol error");
+	rxrpc_free_skb(part);
+	rxrpc_free_skb(jumbo);
+	write_lock_bh(&call->state_lock);
+	if (call->state <= RXRPC_CALL_COMPLETE) {
+		call->state = RXRPC_CALL_LOCALLY_ABORTED;
+		call->abort_code = RX_PROTOCOL_ERROR;
+		set_bit(RXRPC_CALL_ABORT, &call->events);
+		schedule_work(&call->processor);
+	}
+	write_unlock_bh(&call->state_lock);
+	_leave("");
+}
+
+/*
+ * post an incoming packet to the appropriate call/socket to deal with
+ * - must get rid of the sk_buff, either by freeing it or by queuing it
+ */
+static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn,
+				      struct sk_buff *skb)
+{
+	struct rxrpc_skb_priv *sp;
+	struct rxrpc_call *call;
+	struct rb_node *p;
+	__be32 call_id;
+
+	_enter("%p,%p", conn, skb);
+
+	read_lock_bh(&conn->lock);
+
+	sp = rxrpc_skb(skb);
+
+	/* look at extant calls by channel number first */
+	call = conn->channels[ntohl(sp->hdr.cid) & RXRPC_CHANNELMASK];
+	if (!call || call->call_id != sp->hdr.callNumber)
+		goto call_not_extant;
+
+	_debug("extant call [%d]", call->state);
+	ASSERTCMP(call->conn, ==, conn);
+
+	read_lock(&call->state_lock);
+	switch (call->state) {
+	case RXRPC_CALL_LOCALLY_ABORTED:
+		if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events))
+			schedule_work(&call->processor);
+	case RXRPC_CALL_REMOTELY_ABORTED:
+	case RXRPC_CALL_NETWORK_ERROR:
+	case RXRPC_CALL_DEAD:
+		goto free_unlock;
+	default:
+		break;
+	}
+
+	read_unlock(&call->state_lock);
+	rxrpc_get_call(call);
+	read_unlock_bh(&conn->lock);
+
+	if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
+	    sp->hdr.flags & RXRPC_JUMBO_PACKET)
+		rxrpc_process_jumbo_packet(call, skb);
+	else
+		rxrpc_fast_process_packet(call, skb);
+
+	rxrpc_put_call(call);
+	goto done;
+
+call_not_extant:
+	/* search the completed calls in case what we're dealing with is
+	 * there */
+	_debug("call not extant");
+
+	call_id = sp->hdr.callNumber;
+	p = conn->calls.rb_node;
+	while (p) {
+		call = rb_entry(p, struct rxrpc_call, conn_node);
+
+		if (call_id < call->call_id)
+			p = p->rb_left;
+		else if (call_id > call->call_id)
+			p = p->rb_right;
+		else
+			goto found_completed_call;
+	}
+
+dead_call:
+	/* it's a either a really old call that we no longer remember or its a
+	 * new incoming call */
+	read_unlock_bh(&conn->lock);
+
+	if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
+	    sp->hdr.seq == __constant_cpu_to_be32(1)) {
+		_debug("incoming call");
+		skb_queue_tail(&conn->trans->local->accept_queue, skb);
+		schedule_work(&conn->trans->local->acceptor);
+		goto done;
+	}
+
+	_debug("dead call");
+	skb->priority = RX_CALL_DEAD;
+	rxrpc_reject_packet(conn->trans->local, skb);
+	goto done;
+
+	/* resend last packet of a completed call
+	 * - client calls may have been aborted or ACK'd
+	 * - server calls may have been aborted
+	 */
+found_completed_call:
+	_debug("completed call");
+
+	if (atomic_read(&call->usage) == 0)
+		goto dead_call;
+
+	/* synchronise any state changes */
+	read_lock(&call->state_lock);
+	ASSERTIFCMP(call->state != RXRPC_CALL_CLIENT_FINAL_ACK,
+		    call->state, >=, RXRPC_CALL_COMPLETE);
+
+	if (call->state == RXRPC_CALL_LOCALLY_ABORTED ||
+	    call->state == RXRPC_CALL_REMOTELY_ABORTED ||
+	    call->state == RXRPC_CALL_DEAD) {
+		read_unlock(&call->state_lock);
+		goto dead_call;
+	}
+
+	if (call->conn->in_clientflag) {
+		read_unlock(&call->state_lock);
+		goto dead_call; /* complete server call */
+	}
+
+	_debug("final ack again");
+	rxrpc_get_call(call);
+	set_bit(RXRPC_CALL_ACK_FINAL, &call->events);
+	schedule_work(&call->processor);
+
+free_unlock:
+	read_unlock(&call->state_lock);
+	read_unlock_bh(&conn->lock);
+	rxrpc_free_skb(skb);
+done:
+	_leave("");
+}
+
+/*
+ * post connection-level events to the connection
+ * - this includes challenges, responses and some aborts
+ */
+static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
+				      struct sk_buff *skb)
+{
+	_enter("%p,%p", conn, skb);
+
+	atomic_inc(&conn->usage);
+	skb_queue_tail(&conn->rx_queue, skb);
+	schedule_work(&conn->processor);
+}
+
+/*
+ * handle data received on the local endpoint
+ * - may be called in interrupt context
+ */
+void rxrpc_data_ready(struct sock *sk, int count)
+{
+	struct rxrpc_connection *conn;
+	struct rxrpc_transport *trans;
+	struct rxrpc_skb_priv *sp;
+	struct rxrpc_local *local;
+	struct rxrpc_peer *peer;
+	struct sk_buff *skb;
+	int ret;
+
+	_enter("%p, %d", sk, count);
+
+	ASSERT(!irqs_disabled());
+
+	read_lock_bh(&rxrpc_local_lock);
+	local = sk->sk_user_data;
+	if (local && atomic_read(&local->usage) > 0)
+		rxrpc_get_local(local);
+	else
+		local = NULL;
+	read_unlock_bh(&rxrpc_local_lock);
+	if (!local) {
+		_leave(" [local dead]");
+		return;
+	}
+
+	skb = skb_recv_datagram(sk, 0, 1, &ret);
+	if (!skb) {
+		rxrpc_put_local(local);
+		if (ret == -EAGAIN)
+			return;
+		_debug("UDP socket error %d", ret);
+		return;
+	}
+
+	rxrpc_new_skb(skb);
+
+	_net("recv skb %p", skb);
+
+	/* we'll probably need to checksum it (didn't call sock_recvmsg) */
+	if (skb_checksum_complete(skb)) {
+		rxrpc_free_skb(skb);
+		rxrpc_put_local(local);
+		_leave(" [CSUM failed]");
+		return;
+	}
+
+	/* the socket buffer we have is owned by UDP, with UDP's data all over
+	 * it, but we really want our own */
+	skb_orphan(skb);
+	sp = rxrpc_skb(skb);
+	memset(sp, 0, sizeof(*sp));
+
+	_net("Rx UDP packet from %08x:%04hu",
+	     ntohl(ip_hdr(skb)->saddr), ntohs(udp_hdr(skb)->source));
+
+	/* dig out the RxRPC connection details */
+	if (skb_copy_bits(skb, sizeof(struct udphdr), &sp->hdr,
+			  sizeof(sp->hdr)) < 0)
+		goto bad_message;
+	if (!pskb_pull(skb, sizeof(struct udphdr) + sizeof(sp->hdr)))
+		BUG();
+
+	_net("Rx RxRPC %s ep=%x call=%x:%x",
+	     sp->hdr.flags & RXRPC_CLIENT_INITIATED ? "ToServer" : "ToClient",
+	     ntohl(sp->hdr.epoch),
+	     ntohl(sp->hdr.cid),
+	     ntohl(sp->hdr.callNumber));
+
+	if (sp->hdr.type == 0 || sp->hdr.type >= RXRPC_N_PACKET_TYPES) {
+		_proto("Rx Bad Packet Type %u", sp->hdr.type);
+		goto bad_message;
+	}
+
+	if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
+	    (sp->hdr.callNumber == 0 || sp->hdr.seq == 0))
+		goto bad_message;
+
+	peer = rxrpc_find_peer(local, ip_hdr(skb)->saddr, udp_hdr(skb)->source);
+	if (IS_ERR(peer))
+		goto cant_route_call;
+
+	trans = rxrpc_find_transport(local, peer);
+	rxrpc_put_peer(peer);
+	if (!trans)
+		goto cant_route_call;
+
+	conn = rxrpc_find_connection(trans, &sp->hdr);
+	rxrpc_put_transport(trans);
+	if (!conn)
+		goto cant_route_call;
+
+	_debug("CONN %p {%d}", conn, conn->debug_id);
+
+	if (sp->hdr.callNumber == 0)
+		rxrpc_post_packet_to_conn(conn, skb);
+	else
+		rxrpc_post_packet_to_call(conn, skb);
+	rxrpc_put_connection(conn);
+	rxrpc_put_local(local);
+	return;
+
+cant_route_call:
+	_debug("can't route call");
+	if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
+	    sp->hdr.type == RXRPC_PACKET_TYPE_DATA) {
+		if (sp->hdr.seq == __constant_cpu_to_be32(1)) {
+			_debug("first packet");
+			skb_queue_tail(&local->accept_queue, skb);
+			schedule_work(&local->acceptor);
+			rxrpc_put_local(local);
+			_leave(" [incoming]");
+			return;
+		}
+		skb->priority = RX_INVALID_OPERATION;
+	} else {
+		skb->priority = RX_CALL_DEAD;
+	}
+
+	_debug("reject");
+	rxrpc_reject_packet(local, skb);
+	rxrpc_put_local(local);
+	_leave(" [no call]");
+	return;
+
+bad_message:
+	skb->priority = RX_PROTOCOL_ERROR;
+	rxrpc_reject_packet(local, skb);
+	rxrpc_put_local(local);
+	_leave(" [badmsg]");
+}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
new file mode 100644
index 00000000000..7bfbf471c81
--- /dev/null
+++ b/net/rxrpc/ar-internal.h
@@ -0,0 +1,842 @@
+/* AF_RXRPC internal definitions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <rxrpc/packet.h>
+
+#if 0
+#define CHECK_SLAB_OKAY(X)				     \
+	BUG_ON(atomic_read((X)) >> (sizeof(atomic_t) - 2) == \
+	       (POISON_FREE << 8 | POISON_FREE))
+#else
+#define CHECK_SLAB_OKAY(X) do {} while(0)
+#endif
+
+extern atomic_t rxrpc_n_skbs;
+
+#define FCRYPT_BSIZE 8
+struct rxrpc_crypt {
+	union {
+		u8	x[FCRYPT_BSIZE];
+		u32	n[2];
+	};
+} __attribute__((aligned(8)));
+
+extern __be32 rxrpc_epoch;		/* local epoch for detecting local-end reset */
+extern atomic_t rxrpc_debug_id;		/* current debugging ID */
+
+/*
+ * sk_state for RxRPC sockets
+ */
+enum {
+	RXRPC_UNCONNECTED = 0,
+	RXRPC_CLIENT_BOUND,		/* client local address bound */
+	RXRPC_CLIENT_CONNECTED,		/* client is connected */
+	RXRPC_SERVER_BOUND,		/* server local address bound */
+	RXRPC_SERVER_LISTENING,		/* server listening for connections */
+	RXRPC_CLOSE,			/* socket is being closed */
+};
+
+/*
+ * RxRPC socket definition
+ */
+struct rxrpc_sock {
+	/* WARNING: sk has to be the first member */
+	struct sock		sk;
+	struct rxrpc_local	*local;		/* local endpoint */
+	struct rxrpc_transport	*trans;		/* transport handler */
+	struct rxrpc_conn_bundle *bundle;	/* virtual connection bundle */
+	struct rxrpc_connection	*conn;		/* exclusive virtual connection */
+	struct list_head	listen_link;	/* link in the local endpoint's listen list */
+	struct list_head	secureq;	/* calls awaiting connection security clearance */
+	struct list_head	acceptq;	/* calls awaiting acceptance */
+	struct key		*key;		/* security for this socket */
+	struct key		*securities;	/* list of server security descriptors */
+	struct rb_root		calls;		/* outstanding calls on this socket */
+	unsigned long		flags;
+#define RXRPC_SOCK_EXCLUSIVE_CONN	1	/* exclusive connection for a client socket */
+	rwlock_t		call_lock;	/* lock for calls */
+	u32			min_sec_level;	/* minimum security level */
+#define RXRPC_SECURITY_MAX	RXRPC_SECURITY_ENCRYPT
+	struct sockaddr_rxrpc	srx;		/* local address */
+	sa_family_t		proto;		/* protocol created with */
+	__be16			service_id;	/* service ID of local/remote service */
+};
+
+#define rxrpc_sk(__sk) container_of((__sk), struct rxrpc_sock, sk)
+
+/*
+ * RxRPC socket buffer private variables
+ * - max 48 bytes (struct sk_buff::cb)
+ */
+struct rxrpc_skb_priv {
+	struct rxrpc_call	*call;		/* call with which associated */
+	unsigned long		resend_at;	/* time in jiffies at which to resend */
+	union {
+		unsigned	offset;		/* offset into buffer of next read */
+		int		remain;		/* amount of space remaining for next write */
+		u32		error;		/* network error code */
+		bool		need_resend;	/* T if needs resending */
+	};
+
+	struct rxrpc_header	hdr;		/* RxRPC packet header from this packet */
+};
+
+#define rxrpc_skb(__skb) ((struct rxrpc_skb_priv *) &(__skb)->cb)
+
+enum {
+	RXRPC_SKB_MARK_DATA,		/* data message */
+	RXRPC_SKB_MARK_FINAL_ACK,	/* final ACK received message */
+	RXRPC_SKB_MARK_BUSY,		/* server busy message */
+	RXRPC_SKB_MARK_REMOTE_ABORT,	/* remote abort message */
+	RXRPC_SKB_MARK_NET_ERROR,	/* network error message */
+	RXRPC_SKB_MARK_LOCAL_ERROR,	/* local error message */
+	RXRPC_SKB_MARK_NEW_CALL,	/* local error message */
+};
+
+enum rxrpc_command {
+	RXRPC_CMD_SEND_DATA,		/* send data message */
+	RXRPC_CMD_SEND_ABORT,		/* request abort generation */
+	RXRPC_CMD_ACCEPT,		/* [server] accept incoming call */
+	RXRPC_CMD_REJECT_BUSY,		/* [server] reject a call as busy */
+};
+
+/*
+ * RxRPC security module interface
+ */
+struct rxrpc_security {
+	struct module		*owner;		/* providing module */
+	struct list_head	link;		/* link in master list */
+	const char		*name;		/* name of this service */
+	u8			security_index;	/* security type provided */
+
+	/* initialise a connection's security */
+	int (*init_connection_security)(struct rxrpc_connection *);
+
+	/* prime a connection's packet security */
+	void (*prime_packet_security)(struct rxrpc_connection *);
+
+	/* impose security on a packet */
+	int (*secure_packet)(const struct rxrpc_call *,
+			     struct sk_buff *,
+			     size_t,
+			     void *);
+
+	/* verify the security on a received packet */
+	int (*verify_packet)(const struct rxrpc_call *, struct sk_buff *,
+			     u32 *);
+
+	/* issue a challenge */
+	int (*issue_challenge)(struct rxrpc_connection *);
+
+	/* respond to a challenge */
+	int (*respond_to_challenge)(struct rxrpc_connection *,
+				    struct sk_buff *,
+				    u32 *);
+
+	/* verify a response */
+	int (*verify_response)(struct rxrpc_connection *,
+			       struct sk_buff *,
+			       u32 *);
+
+	/* clear connection security */
+	void (*clear)(struct rxrpc_connection *);
+};
+
+/*
+ * RxRPC local transport endpoint definition
+ * - matched by local port, address and protocol type
+ */
+struct rxrpc_local {
+	struct socket		*socket;	/* my UDP socket */
+	struct work_struct	destroyer;	/* endpoint destroyer */
+	struct work_struct	acceptor;	/* incoming call processor */
+	struct work_struct	rejecter;	/* packet reject writer */
+	struct list_head	services;	/* services listening on this endpoint */
+	struct list_head	link;		/* link in endpoint list */
+	struct rw_semaphore	defrag_sem;	/* control re-enablement of IP DF bit */
+	struct sk_buff_head	accept_queue;	/* incoming calls awaiting acceptance */
+	struct sk_buff_head	reject_queue;	/* packets awaiting rejection */
+	spinlock_t		lock;		/* access lock */
+	rwlock_t		services_lock;	/* lock for services list */
+	atomic_t		usage;
+	int			debug_id;	/* debug ID for printks */
+	volatile char		error_rcvd;	/* T if received ICMP error outstanding */
+	struct sockaddr_rxrpc	srx;		/* local address */
+};
+
+/*
+ * RxRPC remote transport endpoint definition
+ * - matched by remote port, address and protocol type
+ * - holds the connection ID counter for connections between the two endpoints
+ */
+struct rxrpc_peer {
+	struct work_struct	destroyer;	/* peer destroyer */
+	struct list_head	link;		/* link in master peer list */
+	struct list_head	error_targets;	/* targets for net error distribution */
+	spinlock_t		lock;		/* access lock */
+	atomic_t		usage;
+	unsigned		if_mtu;		/* interface MTU for this peer */
+	unsigned		mtu;		/* network MTU for this peer */
+	unsigned		maxdata;	/* data size (MTU - hdrsize) */
+	unsigned short		hdrsize;	/* header size (IP + UDP + RxRPC) */
+	int			debug_id;	/* debug ID for printks */
+	int			net_error;	/* network error distributed */
+	struct sockaddr_rxrpc	srx;		/* remote address */
+
+	/* calculated RTT cache */
+#define RXRPC_RTT_CACHE_SIZE 32
+	suseconds_t		rtt;		/* current RTT estimate (in uS) */
+	unsigned		rtt_point;	/* next entry at which to insert */
+	unsigned		rtt_usage;	/* amount of cache actually used */
+	suseconds_t		rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* calculated RTT cache */
+};
+
+/*
+ * RxRPC point-to-point transport / connection manager definition
+ * - handles a bundle of connections between two endpoints
+ * - matched by { local, peer }
+ */
+struct rxrpc_transport {
+	struct rxrpc_local	*local;		/* local transport endpoint */
+	struct rxrpc_peer	*peer;		/* remote transport endpoint */
+	struct work_struct	error_handler;	/* network error distributor */
+	struct rb_root		bundles;	/* client connection bundles on this transport */
+	struct rb_root		client_conns;	/* client connections on this transport */
+	struct rb_root		server_conns;	/* server connections on this transport */
+	struct list_head	link;		/* link in master session list */
+	struct sk_buff_head	error_queue;	/* error packets awaiting processing */
+	time_t			put_time;	/* time at which to reap */
+	spinlock_t		client_lock;	/* client connection allocation lock */
+	rwlock_t		conn_lock;	/* lock for active/dead connections */
+	atomic_t		usage;
+	int			debug_id;	/* debug ID for printks */
+	unsigned int		conn_idcounter;	/* connection ID counter (client) */
+};
+
+/*
+ * RxRPC client connection bundle
+ * - matched by { transport, service_id, key }
+ */
+struct rxrpc_conn_bundle {
+	struct rb_node		node;		/* node in transport's lookup tree */
+	struct list_head	unused_conns;	/* unused connections in this bundle */
+	struct list_head	avail_conns;	/* available connections in this bundle */
+	struct list_head	busy_conns;	/* busy connections in this bundle */
+	struct key		*key;		/* security for this bundle */
+	wait_queue_head_t	chanwait;	/* wait for channel to become available */
+	atomic_t		usage;
+	int			debug_id;	/* debug ID for printks */
+	unsigned short		num_conns;	/* number of connections in this bundle */
+	__be16			service_id;	/* service ID */
+	uint8_t			security_ix;	/* security type */
+};
+
+/*
+ * RxRPC connection definition
+ * - matched by { transport, service_id, conn_id, direction, key }
+ * - each connection can only handle four simultaneous calls
+ */
+struct rxrpc_connection {
+	struct rxrpc_transport	*trans;		/* transport session */
+	struct rxrpc_conn_bundle *bundle;	/* connection bundle (client) */
+	struct work_struct	processor;	/* connection event processor */
+	struct rb_node		node;		/* node in transport's lookup tree */
+	struct list_head	link;		/* link in master connection list */
+	struct list_head	bundle_link;	/* link in bundle */
+	struct rb_root		calls;		/* calls on this connection */
+	struct sk_buff_head	rx_queue;	/* received conn-level packets */
+	struct rxrpc_call	*channels[RXRPC_MAXCALLS]; /* channels (active calls) */
+	struct rxrpc_security	*security;	/* applied security module */
+	struct key		*key;		/* security for this connection (client) */
+	struct key		*server_key;	/* security for this service */
+	struct crypto_blkcipher	*cipher;	/* encryption handle */
+	struct rxrpc_crypt	csum_iv;	/* packet checksum base */
+	unsigned long		events;
+#define RXRPC_CONN_CHALLENGE	0		/* send challenge packet */
+	time_t			put_time;	/* time at which to reap */
+	rwlock_t		lock;		/* access lock */
+	spinlock_t		state_lock;	/* state-change lock */
+	atomic_t		usage;
+	u32			real_conn_id;	/* connection ID (host-endian) */
+	enum {					/* current state of connection */
+		RXRPC_CONN_UNUSED,		/* - connection not yet attempted */
+		RXRPC_CONN_CLIENT,		/* - client connection */
+		RXRPC_CONN_SERVER_UNSECURED,	/* - server unsecured connection */
+		RXRPC_CONN_SERVER_CHALLENGING,	/* - server challenging for security */
+		RXRPC_CONN_SERVER,		/* - server secured connection */
+		RXRPC_CONN_REMOTELY_ABORTED,	/* - conn aborted by peer */
+		RXRPC_CONN_LOCALLY_ABORTED,	/* - conn aborted locally */
+		RXRPC_CONN_NETWORK_ERROR,	/* - conn terminated by network error */
+	} state;
+	int			error;		/* error code for local abort */
+	int			debug_id;	/* debug ID for printks */
+	unsigned		call_counter;	/* call ID counter */
+	atomic_t		serial;		/* packet serial number counter */
+	atomic_t		hi_serial;	/* highest serial number received */
+	u8			avail_calls;	/* number of calls available */
+	u8			size_align;	/* data size alignment (for security) */
+	u8			header_size;	/* rxrpc + security header size */
+	u8			security_size;	/* security header size */
+	u32			security_level;	/* security level negotiated */
+	u32			security_nonce;	/* response re-use preventer */
+
+	/* the following are all in net order */
+	__be32			epoch;		/* epoch of this connection */
+	__be32			cid;		/* connection ID */
+	__be16			service_id;	/* service ID */
+	u8			security_ix;	/* security type */
+	u8			in_clientflag;	/* RXRPC_CLIENT_INITIATED if we are server */
+	u8			out_clientflag;	/* RXRPC_CLIENT_INITIATED if we are client */
+};
+
+/*
+ * RxRPC call definition
+ * - matched by { connection, call_id }
+ */
+struct rxrpc_call {
+	struct rxrpc_connection	*conn;		/* connection carrying call */
+	struct rxrpc_sock	*socket;	/* socket responsible */
+	struct timer_list	lifetimer;	/* lifetime remaining on call */
+	struct timer_list	deadspan;	/* reap timer for re-ACK'ing, etc  */
+	struct timer_list	ack_timer;	/* ACK generation timer */
+	struct timer_list	resend_timer;	/* Tx resend timer */
+	struct work_struct	destroyer;	/* call destroyer */
+	struct work_struct	processor;	/* packet processor and ACK generator */
+	struct list_head	link;		/* link in master call list */
+	struct list_head	error_link;	/* link in error distribution list */
+	struct list_head	accept_link;	/* calls awaiting acceptance */
+	struct rb_node		sock_node;	/* node in socket call tree */
+	struct rb_node		conn_node;	/* node in connection call tree */
+	struct sk_buff_head	rx_queue;	/* received packets */
+	struct sk_buff_head	rx_oos_queue;	/* packets received out of sequence */
+	struct sk_buff		*tx_pending;	/* Tx socket buffer being filled */
+	wait_queue_head_t	tx_waitq;	/* wait for Tx window space to become available */
+	unsigned long		user_call_ID;	/* user-defined call ID */
+	unsigned long		creation_jif;	/* time of call creation */
+	unsigned long		flags;
+#define RXRPC_CALL_RELEASED	0	/* call has been released - no more message to userspace */
+#define RXRPC_CALL_TERMINAL_MSG	1	/* call has given the socket its final message */
+#define RXRPC_CALL_RCVD_LAST	2	/* all packets received */
+#define RXRPC_CALL_RUN_RTIMER	3	/* Tx resend timer started */
+#define RXRPC_CALL_TX_SOFT_ACK	4	/* sent some soft ACKs */
+#define RXRPC_CALL_PROC_BUSY	5	/* the processor is busy */
+#define RXRPC_CALL_INIT_ACCEPT	6	/* acceptance was initiated */
+#define RXRPC_CALL_HAS_USERID	7	/* has a user ID attached */
+#define RXRPC_CALL_EXPECT_OOS	8	/* expect out of sequence packets */
+	unsigned long		events;
+#define RXRPC_CALL_RCVD_ACKALL	0	/* ACKALL or reply received */
+#define RXRPC_CALL_RCVD_BUSY	1	/* busy packet received */
+#define RXRPC_CALL_RCVD_ABORT	2	/* abort packet received */
+#define RXRPC_CALL_RCVD_ERROR	3	/* network error received */
+#define RXRPC_CALL_ACK_FINAL	4	/* need to generate final ACK (and release call) */
+#define RXRPC_CALL_ACK		5	/* need to generate ACK */
+#define RXRPC_CALL_REJECT_BUSY	6	/* need to generate busy message */
+#define RXRPC_CALL_ABORT	7	/* need to generate abort */
+#define RXRPC_CALL_CONN_ABORT	8	/* local connection abort generated */
+#define RXRPC_CALL_RESEND_TIMER	9	/* Tx resend timer expired */
+#define RXRPC_CALL_RESEND	10	/* Tx resend required */
+#define RXRPC_CALL_DRAIN_RX_OOS	11	/* drain the Rx out of sequence queue */
+#define RXRPC_CALL_LIFE_TIMER	12	/* call's lifetimer ran out */
+#define RXRPC_CALL_ACCEPTED	13	/* incoming call accepted by userspace app */
+#define RXRPC_CALL_SECURED	14	/* incoming call's connection is now secure */
+#define RXRPC_CALL_POST_ACCEPT	15	/* need to post an "accept?" message to the app */
+#define RXRPC_CALL_RELEASE	16	/* need to release the call's resources */
+
+	spinlock_t		lock;
+	rwlock_t		state_lock;	/* lock for state transition */
+	atomic_t		usage;
+	atomic_t		sequence;	/* Tx data packet sequence counter */
+	u32			abort_code;	/* local/remote abort code */
+	enum {					/* current state of call */
+		RXRPC_CALL_CLIENT_SEND_REQUEST,	/* - client sending request phase */
+		RXRPC_CALL_CLIENT_AWAIT_REPLY,	/* - client awaiting reply */
+		RXRPC_CALL_CLIENT_RECV_REPLY,	/* - client receiving reply phase */
+		RXRPC_CALL_CLIENT_FINAL_ACK,	/* - client sending final ACK phase */
+		RXRPC_CALL_SERVER_SECURING,	/* - server securing request connection */
+		RXRPC_CALL_SERVER_ACCEPTING,	/* - server accepting request */
+		RXRPC_CALL_SERVER_RECV_REQUEST,	/* - server receiving request */
+		RXRPC_CALL_SERVER_ACK_REQUEST,	/* - server pending ACK of request */
+		RXRPC_CALL_SERVER_SEND_REPLY,	/* - server sending reply */
+		RXRPC_CALL_SERVER_AWAIT_ACK,	/* - server awaiting final ACK */
+		RXRPC_CALL_COMPLETE,		/* - call completed */
+		RXRPC_CALL_SERVER_BUSY,		/* - call rejected by busy server */
+		RXRPC_CALL_REMOTELY_ABORTED,	/* - call aborted by peer */
+		RXRPC_CALL_LOCALLY_ABORTED,	/* - call aborted locally on error or close */
+		RXRPC_CALL_NETWORK_ERROR,	/* - call terminated by network error */
+		RXRPC_CALL_DEAD,		/* - call is dead */
+	} state;
+	int			debug_id;	/* debug ID for printks */
+	u8			channel;	/* connection channel occupied by this call */
+
+	/* transmission-phase ACK management */
+	uint8_t			acks_head;	/* offset into window of first entry */
+	uint8_t			acks_tail;	/* offset into window of last entry */
+	uint8_t			acks_winsz;	/* size of un-ACK'd window */
+	uint8_t			acks_unacked;	/* lowest unacked packet in last ACK received */
+	int			acks_latest;	/* serial number of latest ACK received */
+	rxrpc_seq_t		acks_hard;	/* highest definitively ACK'd msg seq */
+	unsigned long		*acks_window;	/* sent packet window
+						 * - elements are pointers with LSB set if ACK'd
+						 */
+
+	/* receive-phase ACK management */
+	rxrpc_seq_t		rx_data_expect;	/* next data seq ID expected to be received */
+	rxrpc_seq_t		rx_data_post;	/* next data seq ID expected to be posted */
+	rxrpc_seq_t		rx_data_recv;	/* last data seq ID encountered by recvmsg */
+	rxrpc_seq_t		rx_data_eaten;	/* last data seq ID consumed by recvmsg */
+	rxrpc_seq_t		rx_first_oos;	/* first packet in rx_oos_queue (or 0) */
+	rxrpc_seq_t		ackr_win_top;	/* top of ACK window (rx_data_eaten is bottom) */
+	rxrpc_seq_net_t		ackr_prev_seq;	/* previous sequence number received */
+	uint8_t			ackr_reason;	/* reason to ACK */
+	__be32			ackr_serial;	/* serial of packet being ACK'd */
+	atomic_t		ackr_not_idle;	/* number of packets in Rx queue */
+
+	/* received packet records, 1 bit per record */
+#define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG)
+	unsigned long		ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1];
+
+	/* the following should all be in net order */
+	__be32			cid;		/* connection ID + channel index  */
+	__be32			call_id;	/* call ID on connection  */
+};
+
+/*
+ * RxRPC key for Kerberos (type-2 security)
+ */
+struct rxkad_key {
+	u16	security_index;		/* RxRPC header security index */
+	u16	ticket_len;		/* length of ticket[] */
+	u32	expiry;			/* time at which expires */
+	u32	kvno;			/* key version number */
+	u8	session_key[8];		/* DES session key */
+	u8	ticket[0];		/* the encrypted ticket */
+};
+
+struct rxrpc_key_payload {
+	struct rxkad_key k;
+};
+
+/*
+ * locally abort an RxRPC call
+ */
+static inline void rxrpc_abort_call(struct rxrpc_call *call, u32 abort_code)
+{
+	write_lock_bh(&call->state_lock);
+	if (call->state < RXRPC_CALL_COMPLETE) {
+		call->abort_code = abort_code;
+		call->state = RXRPC_CALL_LOCALLY_ABORTED;
+		set_bit(RXRPC_CALL_ABORT, &call->events);
+	}
+	write_unlock_bh(&call->state_lock);
+}
+
+/*
+ * put a packet up for transport-level abort
+ */
+static inline
+void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
+{
+	CHECK_SLAB_OKAY(&local->usage);
+	if (!atomic_inc_not_zero(&local->usage)) {
+		printk("resurrected on reject\n");
+		BUG();
+	}
+	skb_queue_tail(&local->reject_queue, skb);
+	schedule_work(&local->rejecter);
+}
+
+/*
+ * ar-accept.c
+ */
+extern void rxrpc_accept_incoming_calls(struct work_struct *);
+extern int rxrpc_accept_call(struct rxrpc_sock *, unsigned long);
+
+/*
+ * ar-ack.c
+ */
+extern void __rxrpc_propose_ACK(struct rxrpc_call *, uint8_t, __be32, bool);
+extern void rxrpc_propose_ACK(struct rxrpc_call *, uint8_t, __be32, bool);
+extern void rxrpc_process_call(struct work_struct *);
+
+/*
+ * ar-call.c
+ */
+extern struct kmem_cache *rxrpc_call_jar;
+extern struct list_head rxrpc_calls;
+extern rwlock_t rxrpc_call_lock;
+
+extern struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *,
+						struct rxrpc_transport *,
+						struct rxrpc_conn_bundle *,
+						unsigned long, int, gfp_t);
+extern struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *,
+					      struct rxrpc_connection *,
+					      struct rxrpc_header *, gfp_t);
+extern struct rxrpc_call *rxrpc_find_server_call(struct rxrpc_sock *,
+						 unsigned long);
+extern void rxrpc_release_call(struct rxrpc_call *);
+extern void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
+extern void __rxrpc_put_call(struct rxrpc_call *);
+extern void __exit rxrpc_destroy_all_calls(void);
+
+/*
+ * ar-connection.c
+ */
+extern struct list_head rxrpc_connections;
+extern rwlock_t rxrpc_connection_lock;
+
+extern struct rxrpc_conn_bundle *rxrpc_get_bundle(struct rxrpc_sock *,
+						  struct rxrpc_transport *,
+						  struct key *,
+						  __be16, gfp_t);
+extern void rxrpc_put_bundle(struct rxrpc_transport *,
+			     struct rxrpc_conn_bundle *);
+extern int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_transport *,
+			      struct rxrpc_conn_bundle *, struct rxrpc_call *,
+			      gfp_t);
+extern void rxrpc_put_connection(struct rxrpc_connection *);
+extern void __exit rxrpc_destroy_all_connections(void);
+extern struct rxrpc_connection *rxrpc_find_connection(struct rxrpc_transport *,
+						      struct rxrpc_header *);
+extern struct rxrpc_connection *
+rxrpc_incoming_connection(struct rxrpc_transport *, struct rxrpc_header *,
+			  gfp_t);
+
+/*
+ * ar-connevent.c
+ */
+extern void rxrpc_process_connection(struct work_struct *);
+extern void rxrpc_reject_packets(struct work_struct *);
+
+/*
+ * ar-error.c
+ */
+extern void rxrpc_UDP_error_report(struct sock *);
+extern void rxrpc_UDP_error_handler(struct work_struct *);
+
+/*
+ * ar-input.c
+ */
+extern unsigned long rxrpc_ack_timeout;
+extern const char *rxrpc_pkts[];
+
+extern void rxrpc_data_ready(struct sock *, int);
+extern int rxrpc_queue_rcv_skb(struct rxrpc_call *, struct sk_buff *, bool,
+			       bool);
+extern void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *);
+
+/*
+ * ar-local.c
+ */
+extern rwlock_t rxrpc_local_lock;
+extern struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *);
+extern void rxrpc_put_local(struct rxrpc_local *);
+extern void __exit rxrpc_destroy_all_locals(void);
+
+/*
+ * ar-key.c
+ */
+extern struct key_type key_type_rxrpc;
+extern struct key_type key_type_rxrpc_s;
+
+extern int rxrpc_request_key(struct rxrpc_sock *, char __user *, int);
+extern int rxrpc_server_keyring(struct rxrpc_sock *, char __user *, int);
+extern int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *,
+				     time_t, u32);
+
+/*
+ * ar-output.c
+ */
+extern int rxrpc_resend_timeout;
+
+extern int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *);
+extern int rxrpc_client_sendmsg(struct kiocb *, struct rxrpc_sock *,
+				struct rxrpc_transport *, struct msghdr *,
+				size_t);
+extern int rxrpc_server_sendmsg(struct kiocb *, struct rxrpc_sock *,
+				struct msghdr *, size_t);
+
+/*
+ * ar-peer.c
+ */
+extern struct rxrpc_peer *rxrpc_get_peer(struct sockaddr_rxrpc *, gfp_t);
+extern void rxrpc_put_peer(struct rxrpc_peer *);
+extern struct rxrpc_peer *rxrpc_find_peer(struct rxrpc_local *,
+					  __be32, __be16);
+extern void __exit rxrpc_destroy_all_peers(void);
+
+/*
+ * ar-proc.c
+ */
+extern const char *rxrpc_call_states[];
+extern struct file_operations rxrpc_call_seq_fops;
+extern struct file_operations rxrpc_connection_seq_fops;
+
+/*
+ * ar-recvmsg.c
+ */
+extern int rxrpc_recvmsg(struct kiocb *, struct socket *, struct msghdr *,
+			 size_t, int);
+
+/*
+ * ar-security.c
+ */
+extern int rxrpc_register_security(struct rxrpc_security *);
+extern void rxrpc_unregister_security(struct rxrpc_security *);
+extern int rxrpc_init_client_conn_security(struct rxrpc_connection *);
+extern int rxrpc_init_server_conn_security(struct rxrpc_connection *);
+extern int rxrpc_secure_packet(const struct rxrpc_call *, struct sk_buff *,
+			       size_t, void *);
+extern int rxrpc_verify_packet(const struct rxrpc_call *, struct sk_buff *,
+			       u32 *);
+extern void rxrpc_clear_conn_security(struct rxrpc_connection *);
+
+/*
+ * ar-skbuff.c
+ */
+extern void rxrpc_packet_destructor(struct sk_buff *);
+
+/*
+ * ar-transport.c
+ */
+extern struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *,
+						   struct rxrpc_peer *,
+						   gfp_t);
+extern void rxrpc_put_transport(struct rxrpc_transport *);
+extern void __exit rxrpc_destroy_all_transports(void);
+extern struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *,
+						    struct rxrpc_peer *);
+
+/*
+ * debug tracing
+ */
+extern unsigned rxrpc_debug;
+
+#define dbgprintk(FMT,...) \
+	printk("[%x%-6.6s] "FMT"\n", smp_processor_id(), current->comm ,##__VA_ARGS__)
+
+/* make sure we maintain the format strings, even when debugging is disabled */
+static inline __attribute__((format(printf,1,2)))
+void _dbprintk(const char *fmt, ...)
+{
+}
+
+#define kenter(FMT,...)	dbgprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__)
+#define kleave(FMT,...)	dbgprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__)
+#define kdebug(FMT,...)	dbgprintk("    "FMT ,##__VA_ARGS__)
+#define kproto(FMT,...)	dbgprintk("### "FMT ,##__VA_ARGS__)
+#define knet(FMT,...)	dbgprintk("@@@ "FMT ,##__VA_ARGS__)
+
+
+#if defined(__KDEBUG)
+#define _enter(FMT,...)	kenter(FMT,##__VA_ARGS__)
+#define _leave(FMT,...)	kleave(FMT,##__VA_ARGS__)
+#define _debug(FMT,...)	kdebug(FMT,##__VA_ARGS__)
+#define _proto(FMT,...)	kproto(FMT,##__VA_ARGS__)
+#define _net(FMT,...)	knet(FMT,##__VA_ARGS__)
+
+#elif defined(CONFIG_AF_RXRPC_DEBUG)
+#define RXRPC_DEBUG_KENTER	0x01
+#define RXRPC_DEBUG_KLEAVE	0x02
+#define RXRPC_DEBUG_KDEBUG	0x04
+#define RXRPC_DEBUG_KPROTO	0x08
+#define RXRPC_DEBUG_KNET	0x10
+
+#define _enter(FMT,...)					\
+do {							\
+	if (unlikely(rxrpc_debug & RXRPC_DEBUG_KENTER))	\
+		kenter(FMT,##__VA_ARGS__);		\
+} while (0)
+
+#define _leave(FMT,...)					\
+do {							\
+	if (unlikely(rxrpc_debug & RXRPC_DEBUG_KLEAVE))	\
+		kleave(FMT,##__VA_ARGS__);		\
+} while (0)
+
+#define _debug(FMT,...)					\
+do {							\
+	if (unlikely(rxrpc_debug & RXRPC_DEBUG_KDEBUG))	\
+		kdebug(FMT,##__VA_ARGS__);		\
+} while (0)
+
+#define _proto(FMT,...)					\
+do {							\
+	if (unlikely(rxrpc_debug & RXRPC_DEBUG_KPROTO))	\
+		kproto(FMT,##__VA_ARGS__);		\
+} while (0)
+
+#define _net(FMT,...)					\
+do {							\
+	if (unlikely(rxrpc_debug & RXRPC_DEBUG_KNET))	\
+		knet(FMT,##__VA_ARGS__);		\
+} while (0)
+
+#else
+#define _enter(FMT,...)	_dbprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__)
+#define _leave(FMT,...)	_dbprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__)
+#define _debug(FMT,...)	_dbprintk("    "FMT ,##__VA_ARGS__)
+#define _proto(FMT,...)	_dbprintk("### "FMT ,##__VA_ARGS__)
+#define _net(FMT,...)	_dbprintk("@@@ "FMT ,##__VA_ARGS__)
+#endif
+
+/*
+ * debug assertion checking
+ */
+#if 1 // defined(__KDEBUGALL)
+
+#define ASSERT(X)						\
+do {								\
+	if (unlikely(!(X))) {					\
+		printk(KERN_ERR "\n");				\
+		printk(KERN_ERR "RxRPC: Assertion failed\n");	\
+		BUG();						\
+	}							\
+} while(0)
+
+#define ASSERTCMP(X, OP, Y)						\
+do {									\
+	if (unlikely(!((X) OP (Y)))) {					\
+		printk(KERN_ERR "\n");					\
+		printk(KERN_ERR "RxRPC: Assertion failed\n");		\
+		printk(KERN_ERR "%lu " #OP " %lu is false\n",		\
+		       (unsigned long)(X), (unsigned long)(Y));		\
+		printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n",	\
+		       (unsigned long)(X), (unsigned long)(Y));		\
+		BUG();							\
+	}								\
+} while(0)
+
+#define ASSERTIF(C, X)						\
+do {								\
+	if (unlikely((C) && !(X))) {				\
+		printk(KERN_ERR "\n");				\
+		printk(KERN_ERR "RxRPC: Assertion failed\n");	\
+		BUG();						\
+	}							\
+} while(0)
+
+#define ASSERTIFCMP(C, X, OP, Y)					\
+do {									\
+	if (unlikely((C) && !((X) OP (Y)))) {				\
+		printk(KERN_ERR "\n");					\
+		printk(KERN_ERR "RxRPC: Assertion failed\n");		\
+		printk(KERN_ERR "%lu " #OP " %lu is false\n",		\
+		       (unsigned long)(X), (unsigned long)(Y));		\
+		printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n",	\
+		       (unsigned long)(X), (unsigned long)(Y));		\
+		BUG();							\
+	}								\
+} while(0)
+
+#else
+
+#define ASSERT(X)				\
+do {						\
+} while(0)
+
+#define ASSERTCMP(X, OP, Y)			\
+do {						\
+} while(0)
+
+#define ASSERTIF(C, X)				\
+do {						\
+} while(0)
+
+#define ASSERTIFCMP(C, X, OP, Y)		\
+do {						\
+} while(0)
+
+#endif /* __KDEBUGALL */
+
+/*
+ * socket buffer accounting / leak finding
+ */
+static inline void __rxrpc_new_skb(struct sk_buff *skb, const char *fn)
+{
+	//_net("new skb %p %s [%d]", skb, fn, atomic_read(&rxrpc_n_skbs));
+	//atomic_inc(&rxrpc_n_skbs);
+}
+
+#define rxrpc_new_skb(skb) __rxrpc_new_skb((skb), __func__)
+
+static inline void __rxrpc_kill_skb(struct sk_buff *skb, const char *fn)
+{
+	//_net("kill skb %p %s [%d]", skb, fn, atomic_read(&rxrpc_n_skbs));
+	//atomic_dec(&rxrpc_n_skbs);
+}
+
+#define rxrpc_kill_skb(skb) __rxrpc_kill_skb((skb), __func__)
+
+static inline void __rxrpc_free_skb(struct sk_buff *skb, const char *fn)
+{
+	if (skb) {
+		CHECK_SLAB_OKAY(&skb->users);
+		//_net("free skb %p %s [%d]",
+		//     skb, fn, atomic_read(&rxrpc_n_skbs));
+		//atomic_dec(&rxrpc_n_skbs);
+		kfree_skb(skb);
+	}
+}
+
+#define rxrpc_free_skb(skb) __rxrpc_free_skb((skb), __func__)
+
+static inline void rxrpc_purge_queue(struct sk_buff_head *list)
+{
+	struct sk_buff *skb;
+	while ((skb = skb_dequeue((list))) != NULL)
+		rxrpc_free_skb(skb);
+}
+
+static inline void __rxrpc__atomic_inc(atomic_t *v)
+{
+	CHECK_SLAB_OKAY(v);
+	atomic_inc(v);
+}
+
+#define atomic_inc(v) __rxrpc__atomic_inc((v))
+
+static inline void __rxrpc__atomic_dec(atomic_t *v)
+{
+	CHECK_SLAB_OKAY(v);
+	atomic_dec(v);
+}
+
+#define atomic_dec(v) __rxrpc__atomic_dec((v))
+
+static inline int __rxrpc__atomic_dec_and_test(atomic_t *v)
+{
+	CHECK_SLAB_OKAY(v);
+	return atomic_dec_and_test(v);
+}
+
+#define atomic_dec_and_test(v) __rxrpc__atomic_dec_and_test((v))
+
+static inline void __rxrpc_get_local(struct rxrpc_local *local, const char *f)
+{
+	CHECK_SLAB_OKAY(&local->usage);
+	if (atomic_inc_return(&local->usage) == 1)
+		printk("resurrected (%s)\n", f);
+}
+
+#define rxrpc_get_local(LOCAL) __rxrpc_get_local((LOCAL), __func__)
+
+#define rxrpc_get_call(CALL)				\
+do {							\
+	CHECK_SLAB_OKAY(&(CALL)->usage);		\
+	if (atomic_inc_return(&(CALL)->usage) == 1)	\
+		BUG();					\
+} while(0)
+
+#define rxrpc_put_call(CALL)				\
+do {							\
+	__rxrpc_put_call(CALL);				\
+} while(0)
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
new file mode 100644
index 00000000000..7e049ff6ae6
--- /dev/null
+++ b/net/rxrpc/ar-key.c
@@ -0,0 +1,334 @@
+/* RxRPC key management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * RxRPC keys should have a description of describing their purpose:
+ *	"afs@CAMBRIDGE.REDHAT.COM>
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/key.h>
+#include <linux/crypto.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <keys/rxrpc-type.h>
+#include <keys/user-type.h>
+#include "ar-internal.h"
+
+static int rxrpc_instantiate(struct key *, const void *, size_t);
+static int rxrpc_instantiate_s(struct key *, const void *, size_t);
+static void rxrpc_destroy(struct key *);
+static void rxrpc_destroy_s(struct key *);
+static void rxrpc_describe(const struct key *, struct seq_file *);
+
+/*
+ * rxrpc defined keys take an arbitrary string as the description and an
+ * arbitrary blob of data as the payload
+ */
+struct key_type key_type_rxrpc = {
+	.name		= "rxrpc",
+	.instantiate	= rxrpc_instantiate,
+	.match		= user_match,
+	.destroy	= rxrpc_destroy,
+	.describe	= rxrpc_describe,
+};
+
+EXPORT_SYMBOL(key_type_rxrpc);
+
+/*
+ * rxrpc server defined keys take "<serviceId>:<securityIndex>" as the
+ * description and an 8-byte decryption key as the payload
+ */
+struct key_type key_type_rxrpc_s = {
+	.name		= "rxrpc_s",
+	.instantiate	= rxrpc_instantiate_s,
+	.match		= user_match,
+	.destroy	= rxrpc_destroy_s,
+	.describe	= rxrpc_describe,
+};
+
+/*
+ * instantiate an rxrpc defined key
+ * data should be of the form:
+ *	OFFSET	LEN	CONTENT
+ *	0	4	key interface version number
+ *	4	2	security index (type)
+ *	6	2	ticket length
+ *	8	4	key expiry time (time_t)
+ *	12	4	kvno
+ *	16	8	session key
+ *	24	[len]	ticket
+ *
+ * if no data is provided, then a no-security key is made
+ */
+static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
+{
+	const struct rxkad_key *tsec;
+	struct rxrpc_key_payload *upayload;
+	size_t plen;
+	u32 kver;
+	int ret;
+
+	_enter("{%x},,%zu", key_serial(key), datalen);
+
+	/* handle a no-security key */
+	if (!data && datalen == 0)
+		return 0;
+
+	/* get the key interface version number */
+	ret = -EINVAL;
+	if (datalen <= 4 || !data)
+		goto error;
+	memcpy(&kver, data, sizeof(kver));
+	data += sizeof(kver);
+	datalen -= sizeof(kver);
+
+	_debug("KEY I/F VERSION: %u", kver);
+
+	ret = -EKEYREJECTED;
+	if (kver != 1)
+		goto error;
+
+	/* deal with a version 1 key */
+	ret = -EINVAL;
+	if (datalen < sizeof(*tsec))
+		goto error;
+
+	tsec = data;
+	if (datalen != sizeof(*tsec) + tsec->ticket_len)
+		goto error;
+
+	_debug("SCIX: %u", tsec->security_index);
+	_debug("TLEN: %u", tsec->ticket_len);
+	_debug("EXPY: %x", tsec->expiry);
+	_debug("KVNO: %u", tsec->kvno);
+	_debug("SKEY: %02x%02x%02x%02x%02x%02x%02x%02x",
+	       tsec->session_key[0], tsec->session_key[1],
+	       tsec->session_key[2], tsec->session_key[3],
+	       tsec->session_key[4], tsec->session_key[5],
+	       tsec->session_key[6], tsec->session_key[7]);
+	if (tsec->ticket_len >= 8)
+		_debug("TCKT: %02x%02x%02x%02x%02x%02x%02x%02x",
+		       tsec->ticket[0], tsec->ticket[1],
+		       tsec->ticket[2], tsec->ticket[3],
+		       tsec->ticket[4], tsec->ticket[5],
+		       tsec->ticket[6], tsec->ticket[7]);
+
+	ret = -EPROTONOSUPPORT;
+	if (tsec->security_index != 2)
+		goto error;
+
+	key->type_data.x[0] = tsec->security_index;
+
+	plen = sizeof(*upayload) + tsec->ticket_len;
+	ret = key_payload_reserve(key, plen);
+	if (ret < 0)
+		goto error;
+
+	ret = -ENOMEM;
+	upayload = kmalloc(plen, GFP_KERNEL);
+	if (!upayload)
+		goto error;
+
+	/* attach the data */
+	memcpy(&upayload->k, tsec, sizeof(*tsec));
+	memcpy(&upayload->k.ticket, (void *)tsec + sizeof(*tsec),
+	       tsec->ticket_len);
+	key->payload.data = upayload;
+	key->expiry = tsec->expiry;
+	ret = 0;
+
+error:
+	return ret;
+}
+
+/*
+ * instantiate a server secret key
+ * data should be a pointer to the 8-byte secret key
+ */
+static int rxrpc_instantiate_s(struct key *key, const void *data,
+			       size_t datalen)
+{
+	struct crypto_blkcipher *ci;
+
+	_enter("{%x},,%zu", key_serial(key), datalen);
+
+	if (datalen != 8)
+		return -EINVAL;
+
+	memcpy(&key->type_data, data, 8);
+
+	ci = crypto_alloc_blkcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(ci)) {
+		_leave(" = %ld", PTR_ERR(ci));
+		return PTR_ERR(ci);
+	}
+
+	if (crypto_blkcipher_setkey(ci, data, 8) < 0)
+		BUG();
+
+	key->payload.data = ci;
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * dispose of the data dangling from the corpse of a rxrpc key
+ */
+static void rxrpc_destroy(struct key *key)
+{
+	kfree(key->payload.data);
+}
+
+/*
+ * dispose of the data dangling from the corpse of a rxrpc key
+ */
+static void rxrpc_destroy_s(struct key *key)
+{
+	if (key->payload.data) {
+		crypto_free_blkcipher(key->payload.data);
+		key->payload.data = NULL;
+	}
+}
+
+/*
+ * describe the rxrpc key
+ */
+static void rxrpc_describe(const struct key *key, struct seq_file *m)
+{
+	seq_puts(m, key->description);
+}
+
+/*
+ * grab the security key for a socket
+ */
+int rxrpc_request_key(struct rxrpc_sock *rx, char __user *optval, int optlen)
+{
+	struct key *key;
+	char *description;
+
+	_enter("");
+
+	if (optlen <= 0 || optlen > PAGE_SIZE - 1)
+		return -EINVAL;
+
+	description = kmalloc(optlen + 1, GFP_KERNEL);
+	if (!description)
+		return -ENOMEM;
+
+	if (copy_from_user(description, optval, optlen)) {
+		kfree(description);
+		return -EFAULT;
+	}
+	description[optlen] = 0;
+
+	key = request_key(&key_type_rxrpc, description, NULL);
+	if (IS_ERR(key)) {
+		kfree(description);
+		_leave(" = %ld", PTR_ERR(key));
+		return PTR_ERR(key);
+	}
+
+	rx->key = key;
+	kfree(description);
+	_leave(" = 0 [key %x]", key->serial);
+	return 0;
+}
+
+/*
+ * grab the security keyring for a server socket
+ */
+int rxrpc_server_keyring(struct rxrpc_sock *rx, char __user *optval,
+			 int optlen)
+{
+	struct key *key;
+	char *description;
+
+	_enter("");
+
+	if (optlen <= 0 || optlen > PAGE_SIZE - 1)
+		return -EINVAL;
+
+	description = kmalloc(optlen + 1, GFP_KERNEL);
+	if (!description)
+		return -ENOMEM;
+
+	if (copy_from_user(description, optval, optlen)) {
+		kfree(description);
+		return -EFAULT;
+	}
+	description[optlen] = 0;
+
+	key = request_key(&key_type_keyring, description, NULL);
+	if (IS_ERR(key)) {
+		kfree(description);
+		_leave(" = %ld", PTR_ERR(key));
+		return PTR_ERR(key);
+	}
+
+	rx->securities = key;
+	kfree(description);
+	_leave(" = 0 [key %x]", key->serial);
+	return 0;
+}
+
+/*
+ * generate a server data key
+ */
+int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
+			      const void *session_key,
+			      time_t expiry,
+			      u32 kvno)
+{
+	struct key *key;
+	int ret;
+
+	struct {
+		u32 kver;
+		struct rxkad_key tsec;
+	} data;
+
+	_enter("");
+
+	key = key_alloc(&key_type_rxrpc, "x", 0, 0, current, 0,
+			KEY_ALLOC_NOT_IN_QUOTA);
+	if (IS_ERR(key)) {
+		_leave(" = -ENOMEM [alloc %ld]", PTR_ERR(key));
+		return -ENOMEM;
+	}
+
+	_debug("key %d", key_serial(key));
+
+	data.kver = 1;
+	data.tsec.security_index = 2;
+	data.tsec.ticket_len = 0;
+	data.tsec.expiry = expiry;
+	data.tsec.kvno = 0;
+
+	memcpy(&data.tsec.session_key, session_key,
+	       sizeof(data.tsec.session_key));
+
+	ret = key_instantiate_and_link(key, &data, sizeof(data), NULL, NULL);
+	if (ret < 0)
+		goto error;
+
+	conn->key = key;
+	_leave(" = 0 [%d]", key_serial(key));
+	return 0;
+
+error:
+	key_revoke(key);
+	key_put(key);
+	_leave(" = -ENOMEM [ins %d]", ret);
+	return -ENOMEM;
+}
+
+EXPORT_SYMBOL(rxrpc_get_server_data_key);
diff --git a/net/rxrpc/ar-local.c b/net/rxrpc/ar-local.c
new file mode 100644
index 00000000000..a20a2c0fe10
--- /dev/null
+++ b/net/rxrpc/ar-local.c
@@ -0,0 +1,309 @@
+/* AF_RXRPC local endpoint management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static LIST_HEAD(rxrpc_locals);
+DEFINE_RWLOCK(rxrpc_local_lock);
+static DECLARE_RWSEM(rxrpc_local_sem);
+static DECLARE_WAIT_QUEUE_HEAD(rxrpc_local_wq);
+
+static void rxrpc_destroy_local(struct work_struct *work);
+
+/*
+ * allocate a new local
+ */
+static
+struct rxrpc_local *rxrpc_alloc_local(struct sockaddr_rxrpc *srx)
+{
+	struct rxrpc_local *local;
+
+	local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL);
+	if (local) {
+		INIT_WORK(&local->destroyer, &rxrpc_destroy_local);
+		INIT_WORK(&local->acceptor, &rxrpc_accept_incoming_calls);
+		INIT_WORK(&local->rejecter, &rxrpc_reject_packets);
+		INIT_LIST_HEAD(&local->services);
+		INIT_LIST_HEAD(&local->link);
+		init_rwsem(&local->defrag_sem);
+		skb_queue_head_init(&local->accept_queue);
+		skb_queue_head_init(&local->reject_queue);
+		spin_lock_init(&local->lock);
+		rwlock_init(&local->services_lock);
+		atomic_set(&local->usage, 1);
+		local->debug_id = atomic_inc_return(&rxrpc_debug_id);
+		memcpy(&local->srx, srx, sizeof(*srx));
+	}
+
+	_leave(" = %p", local);
+	return local;
+}
+
+/*
+ * create the local socket
+ * - must be called with rxrpc_local_sem writelocked
+ */
+static int rxrpc_create_local(struct rxrpc_local *local)
+{
+	struct sock *sock;
+	int ret, opt;
+
+	_enter("%p{%d}", local, local->srx.transport_type);
+
+	/* create a socket to represent the local endpoint */
+	ret = sock_create_kern(PF_INET, local->srx.transport_type, IPPROTO_UDP,
+			       &local->socket);
+	if (ret < 0) {
+		_leave(" = %d [socket]", ret);
+		return ret;
+	}
+
+	/* if a local address was supplied then bind it */
+	if (local->srx.transport_len > sizeof(sa_family_t)) {
+		_debug("bind");
+		ret = kernel_bind(local->socket,
+				  (struct sockaddr *) &local->srx.transport,
+				  local->srx.transport_len);
+		if (ret < 0) {
+			_debug("bind failed");
+			goto error;
+		}
+	}
+
+	/* we want to receive ICMP errors */
+	opt = 1;
+	ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR,
+				(char *) &opt, sizeof(opt));
+	if (ret < 0) {
+		_debug("setsockopt failed");
+		goto error;
+	}
+
+	/* we want to set the don't fragment bit */
+	opt = IP_PMTUDISC_DO;
+	ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER,
+				(char *) &opt, sizeof(opt));
+	if (ret < 0) {
+		_debug("setsockopt failed");
+		goto error;
+	}
+
+	write_lock_bh(&rxrpc_local_lock);
+	list_add(&local->link, &rxrpc_locals);
+	write_unlock_bh(&rxrpc_local_lock);
+
+	/* set the socket up */
+	sock = local->socket->sk;
+	sock->sk_user_data	= local;
+	sock->sk_data_ready	= rxrpc_data_ready;
+	sock->sk_error_report	= rxrpc_UDP_error_report;
+	_leave(" = 0");
+	return 0;
+
+error:
+	local->socket->ops->shutdown(local->socket, 2);
+	local->socket->sk->sk_user_data = NULL;
+	sock_release(local->socket);
+	local->socket = NULL;
+
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * create a new local endpoint using the specified UDP address
+ */
+struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *srx)
+{
+	struct rxrpc_local *local;
+	int ret;
+
+	_enter("{%d,%u,%u.%u.%u.%u+%hu}",
+	       srx->transport_type,
+	       srx->transport.family,
+	       NIPQUAD(srx->transport.sin.sin_addr),
+	       ntohs(srx->transport.sin.sin_port));
+
+	down_write(&rxrpc_local_sem);
+
+	/* see if we have a suitable local local endpoint already */
+	read_lock_bh(&rxrpc_local_lock);
+
+	list_for_each_entry(local, &rxrpc_locals, link) {
+		_debug("CMP {%d,%u,%u.%u.%u.%u+%hu}",
+		       local->srx.transport_type,
+		       local->srx.transport.family,
+		       NIPQUAD(local->srx.transport.sin.sin_addr),
+		       ntohs(local->srx.transport.sin.sin_port));
+
+		if (local->srx.transport_type != srx->transport_type ||
+		    local->srx.transport.family != srx->transport.family)
+			continue;
+
+		switch (srx->transport.family) {
+		case AF_INET:
+			if (local->srx.transport.sin.sin_port !=
+			    srx->transport.sin.sin_port)
+				continue;
+			if (memcmp(&local->srx.transport.sin.sin_addr,
+				   &srx->transport.sin.sin_addr,
+				   sizeof(struct in_addr)) != 0)
+				continue;
+			goto found_local;
+
+		default:
+			BUG();
+		}
+	}
+
+	read_unlock_bh(&rxrpc_local_lock);
+
+	/* we didn't find one, so we need to create one */
+	local = rxrpc_alloc_local(srx);
+	if (!local) {
+		up_write(&rxrpc_local_sem);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	ret = rxrpc_create_local(local);
+	if (ret < 0) {
+		up_write(&rxrpc_local_sem);
+		kfree(local);
+		_leave(" = %d", ret);
+		return ERR_PTR(ret);
+	}
+
+	up_write(&rxrpc_local_sem);
+
+	_net("LOCAL new %d {%d,%u,%u.%u.%u.%u+%hu}",
+	     local->debug_id,
+	     local->srx.transport_type,
+	     local->srx.transport.family,
+	     NIPQUAD(local->srx.transport.sin.sin_addr),
+	     ntohs(local->srx.transport.sin.sin_port));
+
+	_leave(" = %p [new]", local);
+	return local;
+
+found_local:
+	rxrpc_get_local(local);
+	read_unlock_bh(&rxrpc_local_lock);
+	up_write(&rxrpc_local_sem);
+
+	_net("LOCAL old %d {%d,%u,%u.%u.%u.%u+%hu}",
+	     local->debug_id,
+	     local->srx.transport_type,
+	     local->srx.transport.family,
+	     NIPQUAD(local->srx.transport.sin.sin_addr),
+	     ntohs(local->srx.transport.sin.sin_port));
+
+	_leave(" = %p [reuse]", local);
+	return local;
+}
+
+/*
+ * release a local endpoint
+ */
+void rxrpc_put_local(struct rxrpc_local *local)
+{
+	_enter("%p{u=%d}", local, atomic_read(&local->usage));
+
+	ASSERTCMP(atomic_read(&local->usage), >, 0);
+
+	/* to prevent a race, the decrement and the dequeue must be effectively
+	 * atomic */
+	write_lock_bh(&rxrpc_local_lock);
+	if (unlikely(atomic_dec_and_test(&local->usage))) {
+		_debug("destroy local");
+		schedule_work(&local->destroyer);
+	}
+	write_unlock_bh(&rxrpc_local_lock);
+	_leave("");
+}
+
+/*
+ * destroy a local endpoint
+ */
+static void rxrpc_destroy_local(struct work_struct *work)
+{
+	struct rxrpc_local *local =
+		container_of(work, struct rxrpc_local, destroyer);
+
+	_enter("%p{%d}", local, atomic_read(&local->usage));
+
+	down_write(&rxrpc_local_sem);
+
+	write_lock_bh(&rxrpc_local_lock);
+	if (atomic_read(&local->usage) > 0) {
+		write_unlock_bh(&rxrpc_local_lock);
+		up_read(&rxrpc_local_sem);
+		_leave(" [resurrected]");
+		return;
+	}
+
+	list_del(&local->link);
+	local->socket->sk->sk_user_data = NULL;
+	write_unlock_bh(&rxrpc_local_lock);
+
+	downgrade_write(&rxrpc_local_sem);
+
+	ASSERT(list_empty(&local->services));
+	ASSERT(!work_pending(&local->acceptor));
+	ASSERT(!work_pending(&local->rejecter));
+
+	/* finish cleaning up the local descriptor */
+	rxrpc_purge_queue(&local->accept_queue);
+	rxrpc_purge_queue(&local->reject_queue);
+	local->socket->ops->shutdown(local->socket, 2);
+	sock_release(local->socket);
+
+	up_read(&rxrpc_local_sem);
+
+	_net("DESTROY LOCAL %d", local->debug_id);
+	kfree(local);
+
+	if (list_empty(&rxrpc_locals))
+		wake_up_all(&rxrpc_local_wq);
+
+	_leave("");
+}
+
+/*
+ * preemptively destroy all local local endpoint rather than waiting for
+ * them to be destroyed
+ */
+void __exit rxrpc_destroy_all_locals(void)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	_enter("");
+
+	/* we simply have to wait for them to go away */
+	if (!list_empty(&rxrpc_locals)) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		add_wait_queue(&rxrpc_local_wq, &myself);
+
+		while (!list_empty(&rxrpc_locals)) {
+			schedule();
+			set_current_state(TASK_UNINTERRUPTIBLE);
+		}
+
+		remove_wait_queue(&rxrpc_local_wq, &myself);
+		set_current_state(TASK_RUNNING);
+	}
+
+	_leave("");
+}
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
new file mode 100644
index 00000000000..67aa9510f09
--- /dev/null
+++ b/net/rxrpc/ar-output.c
@@ -0,0 +1,658 @@
+/* RxRPC packet transmission
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/circ_buf.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+int rxrpc_resend_timeout = 4;
+
+static int rxrpc_send_data(struct kiocb *iocb,
+			   struct rxrpc_sock *rx,
+			   struct rxrpc_call *call,
+			   struct msghdr *msg, size_t len);
+
+/*
+ * extract control messages from the sendmsg() control buffer
+ */
+static int rxrpc_sendmsg_cmsg(struct rxrpc_sock *rx, struct msghdr *msg,
+			      unsigned long *user_call_ID,
+			      enum rxrpc_command *command,
+			      u32 *abort_code,
+			      bool server)
+{
+	struct cmsghdr *cmsg;
+	int len;
+
+	*command = RXRPC_CMD_SEND_DATA;
+
+	if (msg->msg_controllen == 0)
+		return -EINVAL;
+
+	for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+		if (!CMSG_OK(msg, cmsg))
+			return -EINVAL;
+
+		len = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
+		_debug("CMSG %d, %d, %d",
+		       cmsg->cmsg_level, cmsg->cmsg_type, len);
+
+		if (cmsg->cmsg_level != SOL_RXRPC)
+			continue;
+
+		switch (cmsg->cmsg_type) {
+		case RXRPC_USER_CALL_ID:
+			if (msg->msg_flags & MSG_CMSG_COMPAT) {
+				if (len != sizeof(u32))
+					return -EINVAL;
+				*user_call_ID = *(u32 *) CMSG_DATA(cmsg);
+			} else {
+				if (len != sizeof(unsigned long))
+					return -EINVAL;
+				*user_call_ID = *(unsigned long *)
+					CMSG_DATA(cmsg);
+			}
+			_debug("User Call ID %lx", *user_call_ID);
+			break;
+
+		case RXRPC_ABORT:
+			if (*command != RXRPC_CMD_SEND_DATA)
+				return -EINVAL;
+			*command = RXRPC_CMD_SEND_ABORT;
+			if (len != sizeof(*abort_code))
+				return -EINVAL;
+			*abort_code = *(unsigned int *) CMSG_DATA(cmsg);
+			_debug("Abort %x", *abort_code);
+			if (*abort_code == 0)
+				return -EINVAL;
+			break;
+
+		case RXRPC_ACCEPT:
+			if (*command != RXRPC_CMD_SEND_DATA)
+				return -EINVAL;
+			*command = RXRPC_CMD_ACCEPT;
+			if (len != 0)
+				return -EINVAL;
+			if (!server)
+				return -EISCONN;
+			break;
+
+		default:
+			return -EINVAL;
+		}
+	}
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * abort a call, sending an ABORT packet to the peer
+ */
+static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code)
+{
+	write_lock_bh(&call->state_lock);
+
+	if (call->state <= RXRPC_CALL_COMPLETE) {
+		call->state = RXRPC_CALL_LOCALLY_ABORTED;
+		call->abort_code = abort_code;
+		set_bit(RXRPC_CALL_ABORT, &call->events);
+		del_timer_sync(&call->resend_timer);
+		del_timer_sync(&call->ack_timer);
+		clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events);
+		clear_bit(RXRPC_CALL_ACK, &call->events);
+		clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+		schedule_work(&call->processor);
+	}
+
+	write_unlock_bh(&call->state_lock);
+}
+
+/*
+ * send a message forming part of a client call through an RxRPC socket
+ * - caller holds the socket locked
+ * - the socket may be either a client socket or a server socket
+ */
+int rxrpc_client_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx,
+			 struct rxrpc_transport *trans, struct msghdr *msg,
+			 size_t len)
+{
+	struct rxrpc_conn_bundle *bundle;
+	enum rxrpc_command cmd;
+	struct rxrpc_call *call;
+	unsigned long user_call_ID = 0;
+	struct key *key;
+	__be16 service_id;
+	u32 abort_code = 0;
+	int ret;
+
+	_enter("");
+
+	ASSERT(trans != NULL);
+
+	ret = rxrpc_sendmsg_cmsg(rx, msg, &user_call_ID, &cmd, &abort_code,
+				 false);
+	if (ret < 0)
+		return ret;
+
+	bundle = NULL;
+	if (trans) {
+		service_id = rx->service_id;
+		if (msg->msg_name) {
+			struct sockaddr_rxrpc *srx =
+				(struct sockaddr_rxrpc *) msg->msg_name;
+			service_id = htons(srx->srx_service);
+		}
+		key = rx->key;
+		if (key && !rx->key->payload.data)
+			key = NULL;
+		bundle = rxrpc_get_bundle(rx, trans, key, service_id,
+					  GFP_KERNEL);
+		if (IS_ERR(bundle))
+			return PTR_ERR(bundle);
+	}
+
+	call = rxrpc_get_client_call(rx, trans, bundle, user_call_ID,
+				     abort_code == 0, GFP_KERNEL);
+	if (trans)
+		rxrpc_put_bundle(trans, bundle);
+	if (IS_ERR(call)) {
+		_leave(" = %ld", PTR_ERR(call));
+		return PTR_ERR(call);
+	}
+
+	_debug("CALL %d USR %lx ST %d on CONN %p",
+	       call->debug_id, call->user_call_ID, call->state, call->conn);
+
+	if (call->state >= RXRPC_CALL_COMPLETE) {
+		/* it's too late for this call */
+		ret = -ESHUTDOWN;
+	} else if (cmd == RXRPC_CMD_SEND_ABORT) {
+		rxrpc_send_abort(call, abort_code);
+	} else if (cmd != RXRPC_CMD_SEND_DATA) {
+		ret = -EINVAL;
+	} else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
+		/* request phase complete for this client call */
+		ret = -EPROTO;
+	} else {
+		ret = rxrpc_send_data(iocb, rx, call, msg, len);
+	}
+
+	rxrpc_put_call(call);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * send a message through a server socket
+ * - caller holds the socket locked
+ */
+int rxrpc_server_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx,
+			 struct msghdr *msg, size_t len)
+{
+	enum rxrpc_command cmd;
+	struct rxrpc_call *call;
+	unsigned long user_call_ID = 0;
+	u32 abort_code = 0;
+	int ret;
+
+	_enter("");
+
+	ret = rxrpc_sendmsg_cmsg(rx, msg, &user_call_ID, &cmd, &abort_code,
+				 true);
+	if (ret < 0)
+		return ret;
+
+	if (cmd == RXRPC_CMD_ACCEPT)
+		return rxrpc_accept_call(rx, user_call_ID);
+
+	call = rxrpc_find_server_call(rx, user_call_ID);
+	if (!call)
+		return -EBADSLT;
+	if (call->state >= RXRPC_CALL_COMPLETE) {
+		ret = -ESHUTDOWN;
+		goto out;
+	}
+
+	switch (cmd) {
+	case RXRPC_CMD_SEND_DATA:
+		if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
+		    call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+		    call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
+			/* Tx phase not yet begun for this call */
+			ret = -EPROTO;
+			break;
+		}
+
+		ret = rxrpc_send_data(iocb, rx, call, msg, len);
+		break;
+
+	case RXRPC_CMD_SEND_ABORT:
+		rxrpc_send_abort(call, abort_code);
+		break;
+	default:
+		BUG();
+	}
+
+	out:
+	rxrpc_put_call(call);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * send a packet through the transport endpoint
+ */
+int rxrpc_send_packet(struct rxrpc_transport *trans, struct sk_buff *skb)
+{
+	struct kvec iov[1];
+	struct msghdr msg;
+	int ret, opt;
+
+	_enter(",{%d}", skb->len);
+
+	iov[0].iov_base = skb->head;
+	iov[0].iov_len = skb->len;
+
+	msg.msg_name = &trans->peer->srx.transport.sin;
+	msg.msg_namelen = sizeof(trans->peer->srx.transport.sin);
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags = 0;
+
+	/* send the packet with the don't fragment bit set if we currently
+	 * think it's small enough */
+	if (skb->len - sizeof(struct rxrpc_header) < trans->peer->maxdata) {
+		down_read(&trans->local->defrag_sem);
+		/* send the packet by UDP
+		 * - returns -EMSGSIZE if UDP would have to fragment the packet
+		 *   to go out of the interface
+		 *   - in which case, we'll have processed the ICMP error
+		 *     message and update the peer record
+		 */
+		ret = kernel_sendmsg(trans->local->socket, &msg, iov, 1,
+				     iov[0].iov_len);
+
+		up_read(&trans->local->defrag_sem);
+		if (ret == -EMSGSIZE)
+			goto send_fragmentable;
+
+		_leave(" = %d [%u]", ret, trans->peer->maxdata);
+		return ret;
+	}
+
+send_fragmentable:
+	/* attempt to send this message with fragmentation enabled */
+	_debug("send fragment");
+
+	down_write(&trans->local->defrag_sem);
+	opt = IP_PMTUDISC_DONT;
+	ret = kernel_setsockopt(trans->local->socket, SOL_IP, IP_MTU_DISCOVER,
+				(char *) &opt, sizeof(opt));
+	if (ret == 0) {
+		ret = kernel_sendmsg(trans->local->socket, &msg, iov, 1,
+				     iov[0].iov_len);
+
+		opt = IP_PMTUDISC_DO;
+		kernel_setsockopt(trans->local->socket, SOL_IP,
+				  IP_MTU_DISCOVER, (char *) &opt, sizeof(opt));
+	}
+
+	up_write(&trans->local->defrag_sem);
+	_leave(" = %d [frag %u]", ret, trans->peer->maxdata);
+	return ret;
+}
+
+/*
+ * wait for space to appear in the transmit/ACK window
+ * - caller holds the socket locked
+ */
+static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
+				    struct rxrpc_call *call,
+				    long *timeo)
+{
+	DECLARE_WAITQUEUE(myself, current);
+	int ret;
+
+	_enter(",{%d},%ld",
+	       CIRC_SPACE(call->acks_head, call->acks_tail, call->acks_winsz),
+	       *timeo);
+
+	add_wait_queue(&call->tx_waitq, &myself);
+
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		ret = 0;
+		if (CIRC_SPACE(call->acks_head, call->acks_tail,
+			       call->acks_winsz) > 0)
+			break;
+		if (signal_pending(current)) {
+			ret = sock_intr_errno(*timeo);
+			break;
+		}
+
+		release_sock(&rx->sk);
+		*timeo = schedule_timeout(*timeo);
+		lock_sock(&rx->sk);
+	}
+
+	remove_wait_queue(&call->tx_waitq, &myself);
+	set_current_state(TASK_RUNNING);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * attempt to schedule an instant Tx resend
+ */
+static inline void rxrpc_instant_resend(struct rxrpc_call *call)
+{
+	read_lock_bh(&call->state_lock);
+	if (try_to_del_timer_sync(&call->resend_timer) >= 0) {
+		clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
+		if (call->state < RXRPC_CALL_COMPLETE &&
+		    !test_and_set_bit(RXRPC_CALL_RESEND_TIMER, &call->events))
+			schedule_work(&call->processor);
+	}
+	read_unlock_bh(&call->state_lock);
+}
+
+/*
+ * queue a packet for transmission, set the resend timer and attempt
+ * to send the packet immediately
+ */
+static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
+			       bool last)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	int ret;
+
+	_net("queue skb %p [%d]", skb, call->acks_head);
+
+	ASSERT(call->acks_window != NULL);
+	call->acks_window[call->acks_head] = (unsigned long) skb;
+	smp_wmb();
+	call->acks_head = (call->acks_head + 1) & (call->acks_winsz - 1);
+
+	if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) {
+		_debug("________awaiting reply/ACK__________");
+		write_lock_bh(&call->state_lock);
+		switch (call->state) {
+		case RXRPC_CALL_CLIENT_SEND_REQUEST:
+			call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
+			break;
+		case RXRPC_CALL_SERVER_ACK_REQUEST:
+			call->state = RXRPC_CALL_SERVER_SEND_REPLY;
+			if (!last)
+				break;
+		case RXRPC_CALL_SERVER_SEND_REPLY:
+			call->state = RXRPC_CALL_SERVER_AWAIT_ACK;
+			break;
+		default:
+			break;
+		}
+		write_unlock_bh(&call->state_lock);
+	}
+
+	_proto("Tx DATA %%%u { #%u }",
+	       ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
+
+	sp->need_resend = 0;
+	sp->resend_at = jiffies + rxrpc_resend_timeout * HZ;
+	if (!test_and_set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags)) {
+		_debug("run timer");
+		call->resend_timer.expires = sp->resend_at;
+		add_timer(&call->resend_timer);
+	}
+
+	/* attempt to cancel the rx-ACK timer, deferring reply transmission if
+	 * we're ACK'ing the request phase of an incoming call */
+	ret = -EAGAIN;
+	if (try_to_del_timer_sync(&call->ack_timer) >= 0) {
+		/* the packet may be freed by rxrpc_process_call() before this
+		 * returns */
+		ret = rxrpc_send_packet(call->conn->trans, skb);
+		_net("sent skb %p", skb);
+	} else {
+		_debug("failed to delete ACK timer");
+	}
+
+	if (ret < 0) {
+		_debug("need instant resend %d", ret);
+		sp->need_resend = 1;
+		rxrpc_instant_resend(call);
+	}
+
+	_leave("");
+}
+
+/*
+ * send data through a socket
+ * - must be called in process context
+ * - caller holds the socket locked
+ */
+static int rxrpc_send_data(struct kiocb *iocb,
+			   struct rxrpc_sock *rx,
+			   struct rxrpc_call *call,
+			   struct msghdr *msg, size_t len)
+{
+	struct rxrpc_skb_priv *sp;
+	unsigned char __user *from;
+	struct sk_buff *skb;
+	struct iovec *iov;
+	struct sock *sk = &rx->sk;
+	long timeo;
+	bool more;
+	int ret, ioc, segment, copied;
+
+	_enter(",,,{%zu},%zu", msg->msg_iovlen, len);
+
+	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+
+	/* this should be in poll */
+	clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+
+	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+		return -EPIPE;
+
+	iov = msg->msg_iov;
+	ioc = msg->msg_iovlen - 1;
+	from = iov->iov_base;
+	segment = iov->iov_len;
+	iov++;
+	more = msg->msg_flags & MSG_MORE;
+
+	skb = call->tx_pending;
+	call->tx_pending = NULL;
+
+	copied = 0;
+	do {
+		int copy;
+
+		if (segment > len)
+			segment = len;
+
+		_debug("SEGMENT %d @%p", segment, from);
+
+		if (!skb) {
+			size_t size, chunk, max, space;
+
+			_debug("alloc");
+
+			if (CIRC_SPACE(call->acks_head, call->acks_tail,
+				       call->acks_winsz) <= 0) {
+				ret = -EAGAIN;
+				if (msg->msg_flags & MSG_DONTWAIT)
+					goto maybe_error;
+				ret = rxrpc_wait_for_tx_window(rx, call,
+							       &timeo);
+				if (ret < 0)
+					goto maybe_error;
+			}
+
+			max = call->conn->trans->peer->maxdata;
+			max -= call->conn->security_size;
+			max &= ~(call->conn->size_align - 1UL);
+
+			chunk = max;
+			if (chunk > len)
+				chunk = len;
+
+			space = chunk + call->conn->size_align;
+			space &= ~(call->conn->size_align - 1UL);
+
+			size = space + call->conn->header_size;
+
+			_debug("SIZE: %zu/%zu/%zu", chunk, space, size);
+
+			/* create a buffer that we can retain until it's ACK'd */
+			skb = sock_alloc_send_skb(
+				sk, size, msg->msg_flags & MSG_DONTWAIT, &ret);
+			if (!skb)
+				goto maybe_error;
+
+			rxrpc_new_skb(skb);
+
+			_debug("ALLOC SEND %p", skb);
+
+			ASSERTCMP(skb->mark, ==, 0);
+
+			_debug("HS: %u", call->conn->header_size);
+			skb_reserve(skb, call->conn->header_size);
+			skb->len += call->conn->header_size;
+
+			sp = rxrpc_skb(skb);
+			sp->remain = chunk;
+			if (sp->remain > skb_tailroom(skb))
+				sp->remain = skb_tailroom(skb);
+
+			_net("skb: hr %d, tr %d, hl %d, rm %d",
+			       skb_headroom(skb),
+			       skb_tailroom(skb),
+			       skb_headlen(skb),
+			       sp->remain);
+
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+		}
+
+		_debug("append");
+		sp = rxrpc_skb(skb);
+
+		/* append next segment of data to the current buffer */
+		copy = skb_tailroom(skb);
+		ASSERTCMP(copy, >, 0);
+		if (copy > segment)
+			copy = segment;
+		if (copy > sp->remain)
+			copy = sp->remain;
+
+		_debug("add");
+		ret = skb_add_data(skb, from, copy);
+		_debug("added");
+		if (ret < 0)
+			goto efault;
+		sp->remain -= copy;
+		skb->mark += copy;
+
+		len -= copy;
+		segment -= copy;
+		from += copy;
+		while (segment == 0 && ioc > 0) {
+			from = iov->iov_base;
+			segment = iov->iov_len;
+			iov++;
+			ioc--;
+		}
+		if (len == 0) {
+			segment = 0;
+			ioc = 0;
+		}
+
+		/* check for the far side aborting the call or a network error
+		 * occurring */
+		if (call->state > RXRPC_CALL_COMPLETE)
+			goto call_aborted;
+
+		/* add the packet to the send queue if it's now full */
+		if (sp->remain <= 0 || (segment == 0 && !more)) {
+			struct rxrpc_connection *conn = call->conn;
+			size_t pad;
+
+			/* pad out if we're using security */
+			if (conn->security) {
+				pad = conn->security_size + skb->mark;
+				pad = conn->size_align - pad;
+				pad &= conn->size_align - 1;
+				_debug("pad %zu", pad);
+				if (pad)
+					memset(skb_put(skb, pad), 0, pad);
+			}
+
+			sp->hdr.epoch = conn->epoch;
+			sp->hdr.cid = call->cid;
+			sp->hdr.callNumber = call->call_id;
+			sp->hdr.seq =
+				htonl(atomic_inc_return(&call->sequence));
+			sp->hdr.serial =
+				htonl(atomic_inc_return(&conn->serial));
+			sp->hdr.type = RXRPC_PACKET_TYPE_DATA;
+			sp->hdr.userStatus = 0;
+			sp->hdr.securityIndex = conn->security_ix;
+			sp->hdr._rsvd = 0;
+			sp->hdr.serviceId = conn->service_id;
+
+			sp->hdr.flags = conn->out_clientflag;
+			if (len == 0 && !more)
+				sp->hdr.flags |= RXRPC_LAST_PACKET;
+			else if (CIRC_SPACE(call->acks_head, call->acks_tail,
+					    call->acks_winsz) > 1)
+				sp->hdr.flags |= RXRPC_MORE_PACKETS;
+
+			ret = rxrpc_secure_packet(
+				call, skb, skb->mark,
+				skb->head + sizeof(struct rxrpc_header));
+			if (ret < 0)
+				goto out;
+
+			memcpy(skb->head, &sp->hdr,
+			       sizeof(struct rxrpc_header));
+			rxrpc_queue_packet(call, skb, segment == 0 && !more);
+			skb = NULL;
+		}
+
+	} while (segment > 0);
+
+out:
+	call->tx_pending = skb;
+	_leave(" = %d", ret);
+	return ret;
+
+call_aborted:
+	rxrpc_free_skb(skb);
+	if (call->state == RXRPC_CALL_NETWORK_ERROR)
+		ret = call->conn->trans->peer->net_error;
+	else
+		ret = -ECONNABORTED;
+	_leave(" = %d", ret);
+	return ret;
+
+maybe_error:
+	if (copied)
+		ret = copied;
+	goto out;
+
+efault:
+	ret = -EFAULT;
+	goto out;
+}
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
new file mode 100644
index 00000000000..69ac355546a
--- /dev/null
+++ b/net/rxrpc/ar-peer.c
@@ -0,0 +1,273 @@
+/* RxRPC remote transport endpoint management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/udp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/icmp.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <net/ip.h>
+#include "ar-internal.h"
+
+static LIST_HEAD(rxrpc_peers);
+static DEFINE_RWLOCK(rxrpc_peer_lock);
+static DECLARE_WAIT_QUEUE_HEAD(rxrpc_peer_wq);
+
+static void rxrpc_destroy_peer(struct work_struct *work);
+
+/*
+ * allocate a new peer
+ */
+static struct rxrpc_peer *rxrpc_alloc_peer(struct sockaddr_rxrpc *srx,
+					   gfp_t gfp)
+{
+	struct rxrpc_peer *peer;
+
+	_enter("");
+
+	peer = kzalloc(sizeof(struct rxrpc_peer), gfp);
+	if (peer) {
+		INIT_WORK(&peer->destroyer, &rxrpc_destroy_peer);
+		INIT_LIST_HEAD(&peer->link);
+		INIT_LIST_HEAD(&peer->error_targets);
+		spin_lock_init(&peer->lock);
+		atomic_set(&peer->usage, 1);
+		peer->debug_id = atomic_inc_return(&rxrpc_debug_id);
+		memcpy(&peer->srx, srx, sizeof(*srx));
+
+		peer->mtu = peer->if_mtu = 65535;
+
+		if (srx->transport.family == AF_INET) {
+			peer->hdrsize = sizeof(struct iphdr);
+			switch (srx->transport_type) {
+			case SOCK_DGRAM:
+				peer->hdrsize += sizeof(struct udphdr);
+				break;
+			default:
+				BUG();
+				break;
+			}
+		} else {
+			BUG();
+		}
+
+		peer->hdrsize += sizeof(struct rxrpc_header);
+		peer->maxdata = peer->mtu - peer->hdrsize;
+	}
+
+	_leave(" = %p", peer);
+	return peer;
+}
+
+/*
+ * obtain a remote transport endpoint for the specified address
+ */
+struct rxrpc_peer *rxrpc_get_peer(struct sockaddr_rxrpc *srx, gfp_t gfp)
+{
+	struct rxrpc_peer *peer, *candidate;
+	const char *new = "old";
+	int usage;
+
+	_enter("{%d,%d,%u.%u.%u.%u+%hu}",
+	       srx->transport_type,
+	       srx->transport_len,
+	       NIPQUAD(srx->transport.sin.sin_addr),
+	       ntohs(srx->transport.sin.sin_port));
+
+	/* search the peer list first */
+	read_lock_bh(&rxrpc_peer_lock);
+	list_for_each_entry(peer, &rxrpc_peers, link) {
+		_debug("check PEER %d { u=%d t=%d l=%d }",
+		       peer->debug_id,
+		       atomic_read(&peer->usage),
+		       peer->srx.transport_type,
+		       peer->srx.transport_len);
+
+		if (atomic_read(&peer->usage) > 0 &&
+		    peer->srx.transport_type == srx->transport_type &&
+		    peer->srx.transport_len == srx->transport_len &&
+		    memcmp(&peer->srx.transport,
+			   &srx->transport,
+			   srx->transport_len) == 0)
+			goto found_extant_peer;
+	}
+	read_unlock_bh(&rxrpc_peer_lock);
+
+	/* not yet present - create a candidate for a new record and then
+	 * redo the search */
+	candidate = rxrpc_alloc_peer(srx, gfp);
+	if (!candidate) {
+		_leave(" = -ENOMEM");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	write_lock_bh(&rxrpc_peer_lock);
+
+	list_for_each_entry(peer, &rxrpc_peers, link) {
+		if (atomic_read(&peer->usage) > 0 &&
+		    peer->srx.transport_type == srx->transport_type &&
+		    peer->srx.transport_len == srx->transport_len &&
+		    memcmp(&peer->srx.transport,
+			   &srx->transport,
+			   srx->transport_len) == 0)
+			goto found_extant_second;
+	}
+
+	/* we can now add the new candidate to the list */
+	peer = candidate;
+	candidate = NULL;
+
+	list_add_tail(&peer->link, &rxrpc_peers);
+	write_unlock_bh(&rxrpc_peer_lock);
+	new = "new";
+
+success:
+	_net("PEER %s %d {%d,%u,%u.%u.%u.%u+%hu}",
+	     new,
+	     peer->debug_id,
+	     peer->srx.transport_type,
+	     peer->srx.transport.family,
+	     NIPQUAD(peer->srx.transport.sin.sin_addr),
+	     ntohs(peer->srx.transport.sin.sin_port));
+
+	_leave(" = %p {u=%d}", peer, atomic_read(&peer->usage));
+	return peer;
+
+	/* we found the peer in the list immediately */
+found_extant_peer:
+	usage = atomic_inc_return(&peer->usage);
+	read_unlock_bh(&rxrpc_peer_lock);
+	goto success;
+
+	/* we found the peer on the second time through the list */
+found_extant_second:
+	usage = atomic_inc_return(&peer->usage);
+	write_unlock_bh(&rxrpc_peer_lock);
+	kfree(candidate);
+	goto success;
+}
+
+/*
+ * find the peer associated with a packet
+ */
+struct rxrpc_peer *rxrpc_find_peer(struct rxrpc_local *local,
+				   __be32 addr, __be16 port)
+{
+	struct rxrpc_peer *peer;
+
+	_enter("");
+
+	/* search the peer list */
+	read_lock_bh(&rxrpc_peer_lock);
+
+	if (local->srx.transport.family == AF_INET &&
+	    local->srx.transport_type == SOCK_DGRAM
+	    ) {
+		list_for_each_entry(peer, &rxrpc_peers, link) {
+			if (atomic_read(&peer->usage) > 0 &&
+			    peer->srx.transport_type == SOCK_DGRAM &&
+			    peer->srx.transport.family == AF_INET &&
+			    peer->srx.transport.sin.sin_port == port &&
+			    peer->srx.transport.sin.sin_addr.s_addr == addr)
+				goto found_UDP_peer;
+		}
+
+		goto new_UDP_peer;
+	}
+
+	read_unlock_bh(&rxrpc_peer_lock);
+	_leave(" = -EAFNOSUPPORT");
+	return ERR_PTR(-EAFNOSUPPORT);
+
+found_UDP_peer:
+	_net("Rx UDP DGRAM from peer %d", peer->debug_id);
+	atomic_inc(&peer->usage);
+	read_unlock_bh(&rxrpc_peer_lock);
+	_leave(" = %p", peer);
+	return peer;
+
+new_UDP_peer:
+	_net("Rx UDP DGRAM from NEW peer %d", peer->debug_id);
+	read_unlock_bh(&rxrpc_peer_lock);
+	_leave(" = -EBUSY [new]");
+	return ERR_PTR(-EBUSY);
+}
+
+/*
+ * release a remote transport endpoint
+ */
+void rxrpc_put_peer(struct rxrpc_peer *peer)
+{
+	_enter("%p{u=%d}", peer, atomic_read(&peer->usage));
+
+	ASSERTCMP(atomic_read(&peer->usage), >, 0);
+
+	if (likely(!atomic_dec_and_test(&peer->usage))) {
+		_leave(" [in use]");
+		return;
+	}
+
+	schedule_work(&peer->destroyer);
+	_leave("");
+}
+
+/*
+ * destroy a remote transport endpoint
+ */
+static void rxrpc_destroy_peer(struct work_struct *work)
+{
+	struct rxrpc_peer *peer =
+		container_of(work, struct rxrpc_peer, destroyer);
+
+	_enter("%p{%d}", peer, atomic_read(&peer->usage));
+
+	write_lock_bh(&rxrpc_peer_lock);
+	list_del(&peer->link);
+	write_unlock_bh(&rxrpc_peer_lock);
+
+	_net("DESTROY PEER %d", peer->debug_id);
+	kfree(peer);
+
+	if (list_empty(&rxrpc_peers))
+		wake_up_all(&rxrpc_peer_wq);
+	_leave("");
+}
+
+/*
+ * preemptively destroy all the peer records from a transport endpoint rather
+ * than waiting for them to time out
+ */
+void __exit rxrpc_destroy_all_peers(void)
+{
+	DECLARE_WAITQUEUE(myself,current);
+
+	_enter("");
+
+	/* we simply have to wait for them to go away */
+	if (!list_empty(&rxrpc_peers)) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		add_wait_queue(&rxrpc_peer_wq, &myself);
+
+		while (!list_empty(&rxrpc_peers)) {
+			schedule();
+			set_current_state(TASK_UNINTERRUPTIBLE);
+		}
+
+		remove_wait_queue(&rxrpc_peer_wq, &myself);
+		set_current_state(TASK_RUNNING);
+	}
+
+	_leave("");
+}
diff --git a/net/rxrpc/ar-proc.c b/net/rxrpc/ar-proc.c
new file mode 100644
index 00000000000..58f4b4e5cec
--- /dev/null
+++ b/net/rxrpc/ar-proc.c
@@ -0,0 +1,247 @@
+/* /proc/net/ support for AF_RXRPC
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static const char *rxrpc_conn_states[] = {
+	[RXRPC_CONN_UNUSED]		= "Unused  ",
+	[RXRPC_CONN_CLIENT]		= "Client  ",
+	[RXRPC_CONN_SERVER_UNSECURED]	= "SvUnsec ",
+	[RXRPC_CONN_SERVER_CHALLENGING]	= "SvChall ",
+	[RXRPC_CONN_SERVER]		= "SvSecure",
+	[RXRPC_CONN_REMOTELY_ABORTED]	= "RmtAbort",
+	[RXRPC_CONN_LOCALLY_ABORTED]	= "LocAbort",
+	[RXRPC_CONN_NETWORK_ERROR]	= "NetError",
+};
+
+const char *rxrpc_call_states[] = {
+	[RXRPC_CALL_CLIENT_SEND_REQUEST]	= "ClSndReq",
+	[RXRPC_CALL_CLIENT_AWAIT_REPLY]		= "ClAwtRpl",
+	[RXRPC_CALL_CLIENT_RECV_REPLY]		= "ClRcvRpl",
+	[RXRPC_CALL_CLIENT_FINAL_ACK]		= "ClFnlACK",
+	[RXRPC_CALL_SERVER_SECURING]		= "SvSecure",
+	[RXRPC_CALL_SERVER_ACCEPTING]		= "SvAccept",
+	[RXRPC_CALL_SERVER_RECV_REQUEST]	= "SvRcvReq",
+	[RXRPC_CALL_SERVER_ACK_REQUEST]		= "SvAckReq",
+	[RXRPC_CALL_SERVER_SEND_REPLY]		= "SvSndRpl",
+	[RXRPC_CALL_SERVER_AWAIT_ACK]		= "SvAwtACK",
+	[RXRPC_CALL_COMPLETE]			= "Complete",
+	[RXRPC_CALL_SERVER_BUSY]		= "SvBusy  ",
+	[RXRPC_CALL_REMOTELY_ABORTED]		= "RmtAbort",
+	[RXRPC_CALL_LOCALLY_ABORTED]		= "LocAbort",
+	[RXRPC_CALL_NETWORK_ERROR]		= "NetError",
+	[RXRPC_CALL_DEAD]			= "Dead    ",
+};
+
+/*
+ * generate a list of extant and dead calls in /proc/net/rxrpc_calls
+ */
+static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos)
+{
+	struct list_head *_p;
+	loff_t pos = *_pos;
+
+	read_lock(&rxrpc_call_lock);
+	if (!pos)
+		return SEQ_START_TOKEN;
+	pos--;
+
+	list_for_each(_p, &rxrpc_calls)
+		if (!pos--)
+			break;
+
+	return _p != &rxrpc_calls ? _p : NULL;
+}
+
+static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct list_head *_p;
+
+	(*pos)++;
+
+	_p = v;
+	_p = (v == SEQ_START_TOKEN) ? rxrpc_calls.next : _p->next;
+
+	return _p != &rxrpc_calls ? _p : NULL;
+}
+
+static void rxrpc_call_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock(&rxrpc_call_lock);
+}
+
+static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
+{
+	struct rxrpc_transport *trans;
+	struct rxrpc_call *call;
+	char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
+
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq,
+			 "Proto Local                  Remote                "
+			 " SvID ConnID   CallID   End Use State    Abort   "
+			 " UserID\n");
+		return 0;
+	}
+
+	call = list_entry(v, struct rxrpc_call, link);
+	trans = call->conn->trans;
+
+	sprintf(lbuff, NIPQUAD_FMT":%u",
+		NIPQUAD(trans->local->srx.transport.sin.sin_addr),
+		ntohs(trans->local->srx.transport.sin.sin_port));
+
+	sprintf(rbuff, NIPQUAD_FMT":%u",
+		NIPQUAD(trans->peer->srx.transport.sin.sin_addr),
+		ntohs(trans->peer->srx.transport.sin.sin_port));
+
+	seq_printf(seq,
+		   "UDP   %-22.22s %-22.22s %4x %08x %08x %s %3u"
+		   " %-8.8s %08x %lx\n",
+		   lbuff,
+		   rbuff,
+		   ntohs(call->conn->service_id),
+		   ntohl(call->conn->cid),
+		   ntohl(call->call_id),
+		   call->conn->in_clientflag ? "Svc" : "Clt",
+		   atomic_read(&call->usage),
+		   rxrpc_call_states[call->state],
+		   call->abort_code,
+		   call->user_call_ID);
+
+	return 0;
+}
+
+static struct seq_operations rxrpc_call_seq_ops = {
+	.start  = rxrpc_call_seq_start,
+	.next   = rxrpc_call_seq_next,
+	.stop   = rxrpc_call_seq_stop,
+	.show   = rxrpc_call_seq_show,
+};
+
+static int rxrpc_call_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &rxrpc_call_seq_ops);
+}
+
+struct file_operations rxrpc_call_seq_fops = {
+	.owner		= THIS_MODULE,
+	.open		= rxrpc_call_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_private,
+};
+
+/*
+ * generate a list of extant virtual connections in /proc/net/rxrpc_conns
+ */
+static void *rxrpc_connection_seq_start(struct seq_file *seq, loff_t *_pos)
+{
+	struct list_head *_p;
+	loff_t pos = *_pos;
+
+	read_lock(&rxrpc_connection_lock);
+	if (!pos)
+		return SEQ_START_TOKEN;
+	pos--;
+
+	list_for_each(_p, &rxrpc_connections)
+		if (!pos--)
+			break;
+
+	return _p != &rxrpc_connections ? _p : NULL;
+}
+
+static void *rxrpc_connection_seq_next(struct seq_file *seq, void *v,
+				       loff_t *pos)
+{
+	struct list_head *_p;
+
+	(*pos)++;
+
+	_p = v;
+	_p = (v == SEQ_START_TOKEN) ? rxrpc_connections.next : _p->next;
+
+	return _p != &rxrpc_connections ? _p : NULL;
+}
+
+static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v)
+{
+	read_unlock(&rxrpc_connection_lock);
+}
+
+static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
+{
+	struct rxrpc_connection *conn;
+	struct rxrpc_transport *trans;
+	char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
+
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(seq,
+			 "Proto Local                  Remote                "
+			 " SvID ConnID   Calls    End Use State    Key     "
+			 " Serial   ISerial\n"
+			 );
+		return 0;
+	}
+
+	conn = list_entry(v, struct rxrpc_connection, link);
+	trans = conn->trans;
+
+	sprintf(lbuff, NIPQUAD_FMT":%u",
+		NIPQUAD(trans->local->srx.transport.sin.sin_addr),
+		ntohs(trans->local->srx.transport.sin.sin_port));
+
+	sprintf(rbuff, NIPQUAD_FMT":%u",
+		NIPQUAD(trans->peer->srx.transport.sin.sin_addr),
+		ntohs(trans->peer->srx.transport.sin.sin_port));
+
+	seq_printf(seq,
+		   "UDP   %-22.22s %-22.22s %4x %08x %08x %s %3u"
+		   " %s %08x %08x %08x\n",
+		   lbuff,
+		   rbuff,
+		   ntohs(conn->service_id),
+		   ntohl(conn->cid),
+		   conn->call_counter,
+		   conn->in_clientflag ? "Svc" : "Clt",
+		   atomic_read(&conn->usage),
+		   rxrpc_conn_states[conn->state],
+		   key_serial(conn->key),
+		   atomic_read(&conn->serial),
+		   atomic_read(&conn->hi_serial));
+
+	return 0;
+}
+
+static struct seq_operations rxrpc_connection_seq_ops = {
+	.start  = rxrpc_connection_seq_start,
+	.next   = rxrpc_connection_seq_next,
+	.stop   = rxrpc_connection_seq_stop,
+	.show   = rxrpc_connection_seq_show,
+};
+
+
+static int rxrpc_connection_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &rxrpc_connection_seq_ops);
+}
+
+struct file_operations rxrpc_connection_seq_fops = {
+	.owner		= THIS_MODULE,
+	.open		= rxrpc_connection_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_private,
+};
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
new file mode 100644
index 00000000000..e947d5c1590
--- /dev/null
+++ b/net/rxrpc/ar-recvmsg.c
@@ -0,0 +1,366 @@
+/* RxRPC recvmsg() implementation
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+/*
+ * removal a call's user ID from the socket tree to make the user ID available
+ * again and so that it won't be seen again in association with that call
+ */
+static void rxrpc_remove_user_ID(struct rxrpc_sock *rx, struct rxrpc_call *call)
+{
+	_debug("RELEASE CALL %d", call->debug_id);
+
+	if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
+		write_lock_bh(&rx->call_lock);
+		rb_erase(&call->sock_node, &call->socket->calls);
+		clear_bit(RXRPC_CALL_HAS_USERID, &call->flags);
+		write_unlock_bh(&rx->call_lock);
+	}
+
+	read_lock_bh(&call->state_lock);
+	if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
+	    !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
+		schedule_work(&call->processor);
+	read_unlock_bh(&call->state_lock);
+}
+
+/*
+ * receive a message from an RxRPC socket
+ * - we need to be careful about two or more threads calling recvmsg
+ *   simultaneously
+ */
+int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
+		  struct msghdr *msg, size_t len, int flags)
+{
+	struct rxrpc_skb_priv *sp;
+	struct rxrpc_call *call = NULL, *continue_call = NULL;
+	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+	struct sk_buff *skb;
+	long timeo;
+	int copy, ret, ullen, offset, copied = 0;
+	u32 abort_code;
+
+	DEFINE_WAIT(wait);
+
+	_enter(",,,%zu,%d", len, flags);
+
+	if (flags & (MSG_OOB | MSG_TRUNC))
+		return -EOPNOTSUPP;
+
+	ullen = msg->msg_flags & MSG_CMSG_COMPAT ? 4 : sizeof(unsigned long);
+
+	timeo = sock_rcvtimeo(&rx->sk, flags & MSG_DONTWAIT);
+	msg->msg_flags |= MSG_MORE;
+
+	lock_sock(&rx->sk);
+
+	for (;;) {
+		/* return immediately if a client socket has no outstanding
+		 * calls */
+		if (RB_EMPTY_ROOT(&rx->calls)) {
+			if (copied)
+				goto out;
+			if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) {
+				release_sock(&rx->sk);
+				if (continue_call)
+					rxrpc_put_call(continue_call);
+				return -ENODATA;
+			}
+		}
+
+		/* get the next message on the Rx queue */
+		skb = skb_peek(&rx->sk.sk_receive_queue);
+		if (!skb) {
+			/* nothing remains on the queue */
+			if (copied &&
+			    (msg->msg_flags & MSG_PEEK || timeo == 0))
+				goto out;
+
+			/* wait for a message to turn up */
+			release_sock(&rx->sk);
+			prepare_to_wait_exclusive(rx->sk.sk_sleep, &wait,
+						  TASK_INTERRUPTIBLE);
+			ret = sock_error(&rx->sk);
+			if (ret)
+				goto wait_error;
+
+			if (skb_queue_empty(&rx->sk.sk_receive_queue)) {
+				if (signal_pending(current))
+					goto wait_interrupted;
+				timeo = schedule_timeout(timeo);
+			}
+			finish_wait(rx->sk.sk_sleep, &wait);
+			lock_sock(&rx->sk);
+			continue;
+		}
+
+	peek_next_packet:
+		sp = rxrpc_skb(skb);
+		call = sp->call;
+		ASSERT(call != NULL);
+
+		_debug("next pkt %s", rxrpc_pkts[sp->hdr.type]);
+
+		/* make sure we wait for the state to be updated in this call */
+		spin_lock_bh(&call->lock);
+		spin_unlock_bh(&call->lock);
+
+		if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) {
+			_debug("packet from released call");
+			if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
+				BUG();
+			rxrpc_free_skb(skb);
+			continue;
+		}
+
+		/* determine whether to continue last data receive */
+		if (continue_call) {
+			_debug("maybe cont");
+			if (call != continue_call ||
+			    skb->mark != RXRPC_SKB_MARK_DATA) {
+				release_sock(&rx->sk);
+				rxrpc_put_call(continue_call);
+				_leave(" = %d [noncont]", copied);
+				return copied;
+			}
+		}
+
+		rxrpc_get_call(call);
+
+		/* copy the peer address and timestamp */
+		if (!continue_call) {
+			if (msg->msg_name && msg->msg_namelen > 0)
+				memcpy(&msg->msg_name, &call->conn->trans->peer->srx,
+				       sizeof(call->conn->trans->peer->srx));
+			sock_recv_timestamp(msg, &rx->sk, skb);
+		}
+
+		/* receive the message */
+		if (skb->mark != RXRPC_SKB_MARK_DATA)
+			goto receive_non_data_message;
+
+		_debug("recvmsg DATA #%u { %d, %d }",
+		       ntohl(sp->hdr.seq), skb->len, sp->offset);
+
+		if (!continue_call) {
+			/* only set the control data once per recvmsg() */
+			ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
+				       ullen, &call->user_call_ID);
+			if (ret < 0)
+				goto copy_error;
+			ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags));
+		}
+
+		ASSERTCMP(ntohl(sp->hdr.seq), >=, call->rx_data_recv);
+		ASSERTCMP(ntohl(sp->hdr.seq), <=, call->rx_data_recv + 1);
+		call->rx_data_recv = ntohl(sp->hdr.seq);
+
+		ASSERTCMP(ntohl(sp->hdr.seq), >, call->rx_data_eaten);
+
+		offset = sp->offset;
+		copy = skb->len - offset;
+		if (copy > len - copied)
+			copy = len - copied;
+
+		if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+			ret = skb_copy_datagram_iovec(skb, offset,
+						      msg->msg_iov, copy);
+		} else {
+			ret = skb_copy_and_csum_datagram_iovec(skb, offset,
+							       msg->msg_iov);
+			if (ret == -EINVAL)
+				goto csum_copy_error;
+		}
+
+		if (ret < 0)
+			goto copy_error;
+
+		/* handle piecemeal consumption of data packets */
+		_debug("copied %d+%d", copy, copied);
+
+		offset += copy;
+		copied += copy;
+
+		if (!(flags & MSG_PEEK))
+			sp->offset = offset;
+
+		if (sp->offset < skb->len) {
+			_debug("buffer full");
+			ASSERTCMP(copied, ==, len);
+			break;
+		}
+
+		/* we transferred the whole data packet */
+		if (sp->hdr.flags & RXRPC_LAST_PACKET) {
+			_debug("last");
+			if (call->conn->out_clientflag) {
+				 /* last byte of reply received */
+				ret = copied;
+				goto terminal_message;
+			}
+
+			/* last bit of request received */
+			if (!(flags & MSG_PEEK)) {
+				_debug("eat packet");
+				if (skb_dequeue(&rx->sk.sk_receive_queue) !=
+				    skb)
+					BUG();
+				rxrpc_free_skb(skb);
+			}
+			msg->msg_flags &= ~MSG_MORE;
+			break;
+		}
+
+		/* move on to the next data message */
+		_debug("next");
+		if (!continue_call)
+			continue_call = sp->call;
+		else
+			rxrpc_put_call(call);
+		call = NULL;
+
+		if (flags & MSG_PEEK) {
+			_debug("peek next");
+			skb = skb->next;
+			if (skb == (struct sk_buff *) &rx->sk.sk_receive_queue)
+				break;
+			goto peek_next_packet;
+		}
+
+		_debug("eat packet");
+		if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
+			BUG();
+		rxrpc_free_skb(skb);
+	}
+
+	/* end of non-terminal data packet reception for the moment */
+	_debug("end rcv data");
+out:
+	release_sock(&rx->sk);
+	if (call)
+		rxrpc_put_call(call);
+	if (continue_call)
+		rxrpc_put_call(continue_call);
+	_leave(" = %d [data]", copied);
+	return copied;
+
+	/* handle non-DATA messages such as aborts, incoming connections and
+	 * final ACKs */
+receive_non_data_message:
+	_debug("non-data");
+
+	if (skb->mark == RXRPC_SKB_MARK_NEW_CALL) {
+		_debug("RECV NEW CALL");
+		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NEW_CALL, 0, &abort_code);
+		if (ret < 0)
+			goto copy_error;
+		if (!(flags & MSG_PEEK)) {
+			if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
+				BUG();
+			rxrpc_free_skb(skb);
+		}
+		goto out;
+	}
+
+	ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
+		       ullen, &call->user_call_ID);
+	if (ret < 0)
+		goto copy_error;
+	ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags));
+
+	switch (skb->mark) {
+	case RXRPC_SKB_MARK_DATA:
+		BUG();
+	case RXRPC_SKB_MARK_FINAL_ACK:
+		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ACK, 0, &abort_code);
+		break;
+	case RXRPC_SKB_MARK_BUSY:
+		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_BUSY, 0, &abort_code);
+		break;
+	case RXRPC_SKB_MARK_REMOTE_ABORT:
+		abort_code = call->abort_code;
+		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code);
+		break;
+	case RXRPC_SKB_MARK_NET_ERROR:
+		_debug("RECV NET ERROR %d", sp->error);
+		abort_code = sp->error;
+		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &abort_code);
+		break;
+	case RXRPC_SKB_MARK_LOCAL_ERROR:
+		_debug("RECV LOCAL ERROR %d", sp->error);
+		abort_code = sp->error;
+		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4,
+			       &abort_code);
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	if (ret < 0)
+		goto copy_error;
+
+terminal_message:
+	_debug("terminal");
+	msg->msg_flags &= ~MSG_MORE;
+	msg->msg_flags |= MSG_EOR;
+
+	if (!(flags & MSG_PEEK)) {
+		_net("free terminal skb %p", skb);
+		if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
+			BUG();
+		rxrpc_free_skb(skb);
+		rxrpc_remove_user_ID(rx, call);
+	}
+
+	release_sock(&rx->sk);
+	rxrpc_put_call(call);
+	if (continue_call)
+		rxrpc_put_call(continue_call);
+	_leave(" = %d", ret);
+	return ret;
+
+copy_error:
+	_debug("copy error");
+	release_sock(&rx->sk);
+	rxrpc_put_call(call);
+	if (continue_call)
+		rxrpc_put_call(continue_call);
+	_leave(" = %d", ret);
+	return ret;
+
+csum_copy_error:
+	_debug("csum error");
+	release_sock(&rx->sk);
+	if (continue_call)
+		rxrpc_put_call(continue_call);
+	rxrpc_kill_skb(skb);
+	skb_kill_datagram(&rx->sk, skb, flags);
+	rxrpc_put_call(call);
+	return -EAGAIN;
+
+wait_interrupted:
+	ret = sock_intr_errno(timeo);
+wait_error:
+	finish_wait(rx->sk.sk_sleep, &wait);
+	if (continue_call)
+		rxrpc_put_call(continue_call);
+	if (copied)
+		copied = ret;
+	_leave(" = %d [waitfail %d]", copied, ret);
+	return copied;
+
+}
diff --git a/net/rxrpc/ar-security.c b/net/rxrpc/ar-security.c
new file mode 100644
index 00000000000..60d1d364430
--- /dev/null
+++ b/net/rxrpc/ar-security.c
@@ -0,0 +1,258 @@
+/* RxRPC security handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/udp.h>
+#include <linux/crypto.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static LIST_HEAD(rxrpc_security_methods);
+static DECLARE_RWSEM(rxrpc_security_sem);
+
+/*
+ * get an RxRPC security module
+ */
+static struct rxrpc_security *rxrpc_security_get(struct rxrpc_security *sec)
+{
+	return try_module_get(sec->owner) ? sec : NULL;
+}
+
+/*
+ * release an RxRPC security module
+ */
+static void rxrpc_security_put(struct rxrpc_security *sec)
+{
+	module_put(sec->owner);
+}
+
+/*
+ * look up an rxrpc security module
+ */
+struct rxrpc_security *rxrpc_security_lookup(u8 security_index)
+{
+	struct rxrpc_security *sec = NULL;
+
+	_enter("");
+
+	down_read(&rxrpc_security_sem);
+
+	list_for_each_entry(sec, &rxrpc_security_methods, link) {
+		if (sec->security_index == security_index) {
+			if (unlikely(!rxrpc_security_get(sec)))
+				break;
+			goto out;
+		}
+	}
+
+	sec = NULL;
+out:
+	up_read(&rxrpc_security_sem);
+	_leave(" = %p [%s]", sec, sec ? sec->name : "");
+	return sec;
+}
+
+/**
+ * rxrpc_register_security - register an RxRPC security handler
+ * @sec: security module
+ *
+ * register an RxRPC security handler for use by RxRPC
+ */
+int rxrpc_register_security(struct rxrpc_security *sec)
+{
+	struct rxrpc_security *psec;
+	int ret;
+
+	_enter("");
+	down_write(&rxrpc_security_sem);
+
+	ret = -EEXIST;
+	list_for_each_entry(psec, &rxrpc_security_methods, link) {
+		if (psec->security_index == sec->security_index)
+			goto out;
+	}
+
+	list_add(&sec->link, &rxrpc_security_methods);
+
+	printk(KERN_NOTICE "RxRPC: Registered security type %d '%s'\n",
+	       sec->security_index, sec->name);
+	ret = 0;
+
+out:
+	up_write(&rxrpc_security_sem);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+EXPORT_SYMBOL_GPL(rxrpc_register_security);
+
+/**
+ * rxrpc_unregister_security - unregister an RxRPC security handler
+ * @sec: security module
+ *
+ * unregister an RxRPC security handler
+ */
+void rxrpc_unregister_security(struct rxrpc_security *sec)
+{
+
+	_enter("");
+	down_write(&rxrpc_security_sem);
+	list_del_init(&sec->link);
+	up_write(&rxrpc_security_sem);
+
+	printk(KERN_NOTICE "RxRPC: Unregistered security type %d '%s'\n",
+	       sec->security_index, sec->name);
+}
+
+EXPORT_SYMBOL_GPL(rxrpc_unregister_security);
+
+/*
+ * initialise the security on a client connection
+ */
+int rxrpc_init_client_conn_security(struct rxrpc_connection *conn)
+{
+	struct rxrpc_security *sec;
+	struct key *key = conn->key;
+	int ret;
+
+	_enter("{%d},{%x}", conn->debug_id, key_serial(key));
+
+	if (!key)
+		return 0;
+
+	ret = key_validate(key);
+	if (ret < 0)
+		return ret;
+
+	sec = rxrpc_security_lookup(key->type_data.x[0]);
+	if (!sec)
+		return -EKEYREJECTED;
+	conn->security = sec;
+
+	ret = conn->security->init_connection_security(conn);
+	if (ret < 0) {
+		rxrpc_security_put(conn->security);
+		conn->security = NULL;
+		return ret;
+	}
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * initialise the security on a server connection
+ */
+int rxrpc_init_server_conn_security(struct rxrpc_connection *conn)
+{
+	struct rxrpc_security *sec;
+	struct rxrpc_local *local = conn->trans->local;
+	struct rxrpc_sock *rx;
+	struct key *key;
+	key_ref_t kref;
+	char kdesc[5+1+3+1];
+
+	_enter("");
+
+	sprintf(kdesc, "%u:%u", ntohs(conn->service_id), conn->security_ix);
+
+	sec = rxrpc_security_lookup(conn->security_ix);
+	if (!sec) {
+		_leave(" = -ENOKEY [lookup]");
+		return -ENOKEY;
+	}
+
+	/* find the service */
+	read_lock_bh(&local->services_lock);
+	list_for_each_entry(rx, &local->services, listen_link) {
+		if (rx->service_id == conn->service_id)
+			goto found_service;
+	}
+
+	/* the service appears to have died */
+	read_unlock_bh(&local->services_lock);
+	rxrpc_security_put(sec);
+	_leave(" = -ENOENT");
+	return -ENOENT;
+
+found_service:
+	if (!rx->securities) {
+		read_unlock_bh(&local->services_lock);
+		rxrpc_security_put(sec);
+		_leave(" = -ENOKEY");
+		return -ENOKEY;
+	}
+
+	/* look through the service's keyring */
+	kref = keyring_search(make_key_ref(rx->securities, 1UL),
+			      &key_type_rxrpc_s, kdesc);
+	if (IS_ERR(kref)) {
+		read_unlock_bh(&local->services_lock);
+		rxrpc_security_put(sec);
+		_leave(" = %ld [search]", PTR_ERR(kref));
+		return PTR_ERR(kref);
+	}
+
+	key = key_ref_to_ptr(kref);
+	read_unlock_bh(&local->services_lock);
+
+	conn->server_key = key;
+	conn->security = sec;
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * secure a packet prior to transmission
+ */
+int rxrpc_secure_packet(const struct rxrpc_call *call,
+			struct sk_buff *skb,
+			size_t data_size,
+			void *sechdr)
+{
+	if (call->conn->security)
+		return call->conn->security->secure_packet(
+			call, skb, data_size, sechdr);
+	return 0;
+}
+
+/*
+ * secure a packet prior to transmission
+ */
+int rxrpc_verify_packet(const struct rxrpc_call *call, struct sk_buff *skb,
+			u32 *_abort_code)
+{
+	if (call->conn->security)
+		return call->conn->security->verify_packet(
+			call, skb, _abort_code);
+	return 0;
+}
+
+/*
+ * clear connection security
+ */
+void rxrpc_clear_conn_security(struct rxrpc_connection *conn)
+{
+	_enter("{%d}", conn->debug_id);
+
+	if (conn->security) {
+		conn->security->clear(conn);
+		rxrpc_security_put(conn->security);
+		conn->security = NULL;
+	}
+
+	key_put(conn->key);
+	key_put(conn->server_key);
+}
diff --git a/net/rxrpc/ar-skbuff.c b/net/rxrpc/ar-skbuff.c
new file mode 100644
index 00000000000..d73f6fc7601
--- /dev/null
+++ b/net/rxrpc/ar-skbuff.c
@@ -0,0 +1,118 @@
+/* ar-skbuff.c: socket buffer destruction handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+/*
+ * set up for the ACK at the end of the receive phase when we discard the final
+ * receive phase data packet
+ * - called with softirqs disabled
+ */
+static void rxrpc_request_final_ACK(struct rxrpc_call *call)
+{
+	/* the call may be aborted before we have a chance to ACK it */
+	write_lock(&call->state_lock);
+
+	switch (call->state) {
+	case RXRPC_CALL_CLIENT_RECV_REPLY:
+		call->state = RXRPC_CALL_CLIENT_FINAL_ACK;
+		_debug("request final ACK");
+
+		/* get an extra ref on the call for the final-ACK generator to
+		 * release */
+		rxrpc_get_call(call);
+		set_bit(RXRPC_CALL_ACK_FINAL, &call->events);
+		if (try_to_del_timer_sync(&call->ack_timer) >= 0)
+			schedule_work(&call->processor);
+		break;
+
+	case RXRPC_CALL_SERVER_RECV_REQUEST:
+		call->state = RXRPC_CALL_SERVER_ACK_REQUEST;
+	default:
+		break;
+	}
+
+	write_unlock(&call->state_lock);
+}
+
+/*
+ * drop the bottom ACK off of the call ACK window and advance the window
+ */
+static void rxrpc_hard_ACK_data(struct rxrpc_call *call,
+				struct rxrpc_skb_priv *sp)
+{
+	int loop;
+	u32 seq;
+
+	spin_lock_bh(&call->lock);
+
+	_debug("hard ACK #%u", ntohl(sp->hdr.seq));
+
+	for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) {
+		call->ackr_window[loop] >>= 1;
+		call->ackr_window[loop] |=
+			call->ackr_window[loop + 1] << (BITS_PER_LONG - 1);
+	}
+
+	seq = ntohl(sp->hdr.seq);
+	ASSERTCMP(seq, ==, call->rx_data_eaten + 1);
+	call->rx_data_eaten = seq;
+
+	if (call->ackr_win_top < UINT_MAX)
+		call->ackr_win_top++;
+
+	ASSERTIFCMP(call->state <= RXRPC_CALL_COMPLETE,
+		    call->rx_data_post, >=, call->rx_data_recv);
+	ASSERTIFCMP(call->state <= RXRPC_CALL_COMPLETE,
+		    call->rx_data_recv, >=, call->rx_data_eaten);
+
+	if (sp->hdr.flags & RXRPC_LAST_PACKET) {
+		rxrpc_request_final_ACK(call);
+	} else if (atomic_dec_and_test(&call->ackr_not_idle) &&
+		   test_and_clear_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags)) {
+		_debug("send Rx idle ACK");
+		__rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, sp->hdr.serial,
+				    true);
+	}
+
+	spin_unlock_bh(&call->lock);
+}
+
+/*
+ * destroy a packet that has an RxRPC control buffer
+ * - advance the hard-ACK state of the parent call (done here in case something
+ *   in the kernel bypasses recvmsg() and steals the packet directly off of the
+ *   socket receive queue)
+ */
+void rxrpc_packet_destructor(struct sk_buff *skb)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	struct rxrpc_call *call = sp->call;
+
+	_enter("%p{%p}", skb, call);
+
+	if (call) {
+		/* send the final ACK on a client call */
+		if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA)
+			rxrpc_hard_ACK_data(call, sp);
+		rxrpc_put_call(call);
+		sp->call = NULL;
+	}
+
+	if (skb->sk)
+		sock_rfree(skb);
+	_leave("");
+}
diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c
new file mode 100644
index 00000000000..9b4e5cb545d
--- /dev/null
+++ b/net/rxrpc/ar-transport.c
@@ -0,0 +1,276 @@
+/* RxRPC point-to-point transport session management
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static void rxrpc_transport_reaper(struct work_struct *work);
+
+static LIST_HEAD(rxrpc_transports);
+static DEFINE_RWLOCK(rxrpc_transport_lock);
+static unsigned long rxrpc_transport_timeout = 3600 * 24;
+static DECLARE_DELAYED_WORK(rxrpc_transport_reap, rxrpc_transport_reaper);
+
+/*
+ * allocate a new transport session manager
+ */
+static struct rxrpc_transport *rxrpc_alloc_transport(struct rxrpc_local *local,
+						     struct rxrpc_peer *peer,
+						     gfp_t gfp)
+{
+	struct rxrpc_transport *trans;
+
+	_enter("");
+
+	trans = kzalloc(sizeof(struct rxrpc_transport), gfp);
+	if (trans) {
+		trans->local = local;
+		trans->peer = peer;
+		INIT_LIST_HEAD(&trans->link);
+		trans->bundles = RB_ROOT;
+		trans->client_conns = RB_ROOT;
+		trans->server_conns = RB_ROOT;
+		skb_queue_head_init(&trans->error_queue);
+		spin_lock_init(&trans->client_lock);
+		rwlock_init(&trans->conn_lock);
+		atomic_set(&trans->usage, 1);
+		trans->debug_id = atomic_inc_return(&rxrpc_debug_id);
+
+		if (peer->srx.transport.family == AF_INET) {
+			switch (peer->srx.transport_type) {
+			case SOCK_DGRAM:
+				INIT_WORK(&trans->error_handler,
+					  rxrpc_UDP_error_handler);
+				break;
+			default:
+				BUG();
+				break;
+			}
+		} else {
+			BUG();
+		}
+	}
+
+	_leave(" = %p", trans);
+	return trans;
+}
+
+/*
+ * obtain a transport session for the nominated endpoints
+ */
+struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *local,
+					    struct rxrpc_peer *peer,
+					    gfp_t gfp)
+{
+	struct rxrpc_transport *trans, *candidate;
+	const char *new = "old";
+	int usage;
+
+	_enter("{%u.%u.%u.%u+%hu},{%u.%u.%u.%u+%hu},",
+	       NIPQUAD(local->srx.transport.sin.sin_addr),
+	       ntohs(local->srx.transport.sin.sin_port),
+	       NIPQUAD(peer->srx.transport.sin.sin_addr),
+	       ntohs(peer->srx.transport.sin.sin_port));
+
+	/* search the transport list first */
+	read_lock_bh(&rxrpc_transport_lock);
+	list_for_each_entry(trans, &rxrpc_transports, link) {
+		if (trans->local == local && trans->peer == peer)
+			goto found_extant_transport;
+	}
+	read_unlock_bh(&rxrpc_transport_lock);
+
+	/* not yet present - create a candidate for a new record and then
+	 * redo the search */
+	candidate = rxrpc_alloc_transport(local, peer, gfp);
+	if (!candidate) {
+		_leave(" = -ENOMEM");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	write_lock_bh(&rxrpc_transport_lock);
+
+	list_for_each_entry(trans, &rxrpc_transports, link) {
+		if (trans->local == local && trans->peer == peer)
+			goto found_extant_second;
+	}
+
+	/* we can now add the new candidate to the list */
+	trans = candidate;
+	candidate = NULL;
+
+	rxrpc_get_local(trans->local);
+	atomic_inc(&trans->peer->usage);
+	list_add_tail(&trans->link, &rxrpc_transports);
+	write_unlock_bh(&rxrpc_transport_lock);
+	new = "new";
+
+success:
+	_net("TRANSPORT %s %d local %d -> peer %d",
+	     new,
+	     trans->debug_id,
+	     trans->local->debug_id,
+	     trans->peer->debug_id);
+
+	_leave(" = %p {u=%d}", trans, atomic_read(&trans->usage));
+	return trans;
+
+	/* we found the transport in the list immediately */
+found_extant_transport:
+	usage = atomic_inc_return(&trans->usage);
+	read_unlock_bh(&rxrpc_transport_lock);
+	goto success;
+
+	/* we found the transport on the second time through the list */
+found_extant_second:
+	usage = atomic_inc_return(&trans->usage);
+	write_unlock_bh(&rxrpc_transport_lock);
+	kfree(candidate);
+	goto success;
+}
+
+/*
+ * find the transport connecting two endpoints
+ */
+struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *local,
+					     struct rxrpc_peer *peer)
+{
+	struct rxrpc_transport *trans;
+
+	_enter("{%u.%u.%u.%u+%hu},{%u.%u.%u.%u+%hu},",
+	       NIPQUAD(local->srx.transport.sin.sin_addr),
+	       ntohs(local->srx.transport.sin.sin_port),
+	       NIPQUAD(peer->srx.transport.sin.sin_addr),
+	       ntohs(peer->srx.transport.sin.sin_port));
+
+	/* search the transport list */
+	read_lock_bh(&rxrpc_transport_lock);
+
+	list_for_each_entry(trans, &rxrpc_transports, link) {
+		if (trans->local == local && trans->peer == peer)
+			goto found_extant_transport;
+	}
+
+	read_unlock_bh(&rxrpc_transport_lock);
+	_leave(" = NULL");
+	return NULL;
+
+found_extant_transport:
+	atomic_inc(&trans->usage);
+	read_unlock_bh(&rxrpc_transport_lock);
+	_leave(" = %p", trans);
+	return trans;
+}
+
+/*
+ * release a transport session
+ */
+void rxrpc_put_transport(struct rxrpc_transport *trans)
+{
+	_enter("%p{u=%d}", trans, atomic_read(&trans->usage));
+
+	ASSERTCMP(atomic_read(&trans->usage), >, 0);
+
+	trans->put_time = xtime.tv_sec;
+	if (unlikely(atomic_dec_and_test(&trans->usage)))
+		_debug("zombie");
+		/* let the reaper determine the timeout to avoid a race with
+		 * overextending the timeout if the reaper is running at the
+		 * same time */
+		schedule_delayed_work(&rxrpc_transport_reap, 0);
+	_leave("");
+}
+
+/*
+ * clean up a transport session
+ */
+static void rxrpc_cleanup_transport(struct rxrpc_transport *trans)
+{
+	_net("DESTROY TRANS %d", trans->debug_id);
+
+	rxrpc_purge_queue(&trans->error_queue);
+
+	rxrpc_put_local(trans->local);
+	rxrpc_put_peer(trans->peer);
+	kfree(trans);
+}
+
+/*
+ * reap dead transports that have passed their expiry date
+ */
+static void rxrpc_transport_reaper(struct work_struct *work)
+{
+	struct rxrpc_transport *trans, *_p;
+	unsigned long now, earliest, reap_time;
+
+	LIST_HEAD(graveyard);
+
+	_enter("");
+
+	now = xtime.tv_sec;
+	earliest = ULONG_MAX;
+
+	/* extract all the transports that have been dead too long */
+	write_lock_bh(&rxrpc_transport_lock);
+	list_for_each_entry_safe(trans, _p, &rxrpc_transports, link) {
+		_debug("reap TRANS %d { u=%d t=%ld }",
+		       trans->debug_id, atomic_read(&trans->usage),
+		       (long) now - (long) trans->put_time);
+
+		if (likely(atomic_read(&trans->usage) > 0))
+			continue;
+
+		reap_time = trans->put_time + rxrpc_transport_timeout;
+		if (reap_time <= now)
+			list_move_tail(&trans->link, &graveyard);
+		else if (reap_time < earliest)
+			earliest = reap_time;
+	}
+	write_unlock_bh(&rxrpc_transport_lock);
+
+	if (earliest != ULONG_MAX) {
+		_debug("reschedule reaper %ld", (long) earliest - now);
+		ASSERTCMP(earliest, >, now);
+		schedule_delayed_work(&rxrpc_transport_reap,
+				      (earliest - now) * HZ);
+	}
+
+	/* then destroy all those pulled out */
+	while (!list_empty(&graveyard)) {
+		trans = list_entry(graveyard.next, struct rxrpc_transport,
+				   link);
+		list_del_init(&trans->link);
+
+		ASSERTCMP(atomic_read(&trans->usage), ==, 0);
+		rxrpc_cleanup_transport(trans);
+	}
+
+	_leave("");
+}
+
+/*
+ * preemptively destroy all the transport session records rather than waiting
+ * for them to time out
+ */
+void __exit rxrpc_destroy_all_transports(void)
+{
+	_enter("");
+
+	rxrpc_transport_timeout = 0;
+	cancel_delayed_work(&rxrpc_transport_reap);
+	schedule_delayed_work(&rxrpc_transport_reap, 0);
+
+	_leave("");
+}
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
new file mode 100644
index 00000000000..1eaf529efac
--- /dev/null
+++ b/net/rxrpc/rxkad.c
@@ -0,0 +1,1153 @@
+/* Kerberos-based RxRPC security
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/udp.h>
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
+#include <linux/ctype.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+#define RXKAD_VERSION			2
+#define MAXKRB5TICKETLEN		1024
+#define RXKAD_TKT_TYPE_KERBEROS_V5	256
+#define ANAME_SZ			40	/* size of authentication name */
+#define INST_SZ				40	/* size of principal's instance */
+#define REALM_SZ			40	/* size of principal's auth domain */
+#define SNAME_SZ			40	/* size of service name */
+
+unsigned rxrpc_debug;
+module_param_named(debug, rxrpc_debug, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(rxrpc_debug, "rxkad debugging mask");
+
+struct rxkad_level1_hdr {
+	__be32	data_size;	/* true data size (excluding padding) */
+};
+
+struct rxkad_level2_hdr {
+	__be32	data_size;	/* true data size (excluding padding) */
+	__be32	checksum;	/* decrypted data checksum */
+};
+
+MODULE_DESCRIPTION("RxRPC network protocol type-2 security (Kerberos)");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+
+/*
+ * this holds a pinned cipher so that keventd doesn't get called by the cipher
+ * alloc routine, but since we have it to hand, we use it to decrypt RESPONSE
+ * packets
+ */
+static struct crypto_blkcipher *rxkad_ci;
+static DEFINE_MUTEX(rxkad_ci_mutex);
+
+/*
+ * initialise connection security
+ */
+static int rxkad_init_connection_security(struct rxrpc_connection *conn)
+{
+	struct rxrpc_key_payload *payload;
+	struct crypto_blkcipher *ci;
+	int ret;
+
+	_enter("{%d},{%x}", conn->debug_id, key_serial(conn->key));
+
+	payload = conn->key->payload.data;
+	conn->security_ix = payload->k.security_index;
+
+	ci = crypto_alloc_blkcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(ci)) {
+		_debug("no cipher");
+		ret = PTR_ERR(ci);
+		goto error;
+	}
+
+	if (crypto_blkcipher_setkey(ci, payload->k.session_key,
+				    sizeof(payload->k.session_key)) < 0)
+		BUG();
+
+	switch (conn->security_level) {
+	case RXRPC_SECURITY_PLAIN:
+		break;
+	case RXRPC_SECURITY_AUTH:
+		conn->size_align = 8;
+		conn->security_size = sizeof(struct rxkad_level1_hdr);
+		conn->header_size += sizeof(struct rxkad_level1_hdr);
+		break;
+	case RXRPC_SECURITY_ENCRYPT:
+		conn->size_align = 8;
+		conn->security_size = sizeof(struct rxkad_level2_hdr);
+		conn->header_size += sizeof(struct rxkad_level2_hdr);
+		break;
+	default:
+		ret = -EKEYREJECTED;
+		goto error;
+	}
+
+	conn->cipher = ci;
+	ret = 0;
+error:
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * prime the encryption state with the invariant parts of a connection's
+ * description
+ */
+static void rxkad_prime_packet_security(struct rxrpc_connection *conn)
+{
+	struct rxrpc_key_payload *payload;
+	struct blkcipher_desc desc;
+	struct scatterlist sg[2];
+	struct rxrpc_crypt iv;
+	struct {
+		__be32 x[4];
+	} tmpbuf __attribute__((aligned(16))); /* must all be in same page */
+
+	_enter("");
+
+	if (!conn->key)
+		return;
+
+	payload = conn->key->payload.data;
+	memcpy(&iv, payload->k.session_key, sizeof(iv));
+
+	desc.tfm = conn->cipher;
+	desc.info = iv.x;
+	desc.flags = 0;
+
+	tmpbuf.x[0] = conn->epoch;
+	tmpbuf.x[1] = conn->cid;
+	tmpbuf.x[2] = 0;
+	tmpbuf.x[3] = htonl(conn->security_ix);
+
+	memset(sg, 0, sizeof(sg));
+	sg_set_buf(&sg[0], &tmpbuf, sizeof(tmpbuf));
+	sg_set_buf(&sg[1], &tmpbuf, sizeof(tmpbuf));
+	crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
+
+	memcpy(&conn->csum_iv, &tmpbuf.x[2], sizeof(conn->csum_iv));
+	ASSERTCMP(conn->csum_iv.n[0], ==, tmpbuf.x[2]);
+
+	_leave("");
+}
+
+/*
+ * partially encrypt a packet (level 1 security)
+ */
+static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
+				    struct sk_buff *skb,
+				    u32 data_size,
+				    void *sechdr)
+{
+	struct rxrpc_skb_priv *sp;
+	struct blkcipher_desc desc;
+	struct rxrpc_crypt iv;
+	struct scatterlist sg[2];
+	struct {
+		struct rxkad_level1_hdr hdr;
+		__be32	first;	/* first four bytes of data and padding */
+	} tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+	u16 check;
+
+	sp = rxrpc_skb(skb);
+
+	_enter("");
+
+	check = ntohl(sp->hdr.seq ^ sp->hdr.callNumber);
+	data_size |= (u32) check << 16;
+
+	tmpbuf.hdr.data_size = htonl(data_size);
+	memcpy(&tmpbuf.first, sechdr + 4, sizeof(tmpbuf.first));
+
+	/* start the encryption afresh */
+	memset(&iv, 0, sizeof(iv));
+	desc.tfm = call->conn->cipher;
+	desc.info = iv.x;
+	desc.flags = 0;
+
+	memset(sg, 0, sizeof(sg));
+	sg_set_buf(&sg[0], &tmpbuf, sizeof(tmpbuf));
+	sg_set_buf(&sg[1], &tmpbuf, sizeof(tmpbuf));
+	crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
+
+	memcpy(sechdr, &tmpbuf, sizeof(tmpbuf));
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * wholly encrypt a packet (level 2 security)
+ */
+static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
+					struct sk_buff *skb,
+					u32 data_size,
+					void *sechdr)
+{
+	const struct rxrpc_key_payload *payload;
+	struct rxkad_level2_hdr rxkhdr
+		__attribute__((aligned(8))); /* must be all on one page */
+	struct rxrpc_skb_priv *sp;
+	struct blkcipher_desc desc;
+	struct rxrpc_crypt iv;
+	struct scatterlist sg[16];
+	struct sk_buff *trailer;
+	unsigned len;
+	u16 check;
+	int nsg;
+
+	sp = rxrpc_skb(skb);
+
+	_enter("");
+
+	check = ntohl(sp->hdr.seq ^ sp->hdr.callNumber);
+
+	rxkhdr.data_size = htonl(data_size | (u32) check << 16);
+	rxkhdr.checksum = 0;
+
+	/* encrypt from the session key */
+	payload = call->conn->key->payload.data;
+	memcpy(&iv, payload->k.session_key, sizeof(iv));
+	desc.tfm = call->conn->cipher;
+	desc.info = iv.x;
+	desc.flags = 0;
+
+	memset(sg, 0, sizeof(sg[0]) * 2);
+	sg_set_buf(&sg[0], sechdr, sizeof(rxkhdr));
+	sg_set_buf(&sg[1], &rxkhdr, sizeof(rxkhdr));
+	crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(rxkhdr));
+
+	/* we want to encrypt the skbuff in-place */
+	nsg = skb_cow_data(skb, 0, &trailer);
+	if (nsg < 0 || nsg > 16)
+		return -ENOMEM;
+
+	len = data_size + call->conn->size_align - 1;
+	len &= ~(call->conn->size_align - 1);
+
+	skb_to_sgvec(skb, sg, 0, len);
+	crypto_blkcipher_encrypt_iv(&desc, sg, sg, len);
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * checksum an RxRPC packet header
+ */
+static int rxkad_secure_packet(const struct rxrpc_call *call,
+				struct sk_buff *skb,
+				size_t data_size,
+				void *sechdr)
+{
+	struct rxrpc_skb_priv *sp;
+	struct blkcipher_desc desc;
+	struct rxrpc_crypt iv;
+	struct scatterlist sg[2];
+	struct {
+		__be32 x[2];
+	} tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+	__be32 x;
+	int ret;
+
+	sp = rxrpc_skb(skb);
+
+	_enter("{%d{%x}},{#%u},%zu,",
+	       call->debug_id, key_serial(call->conn->key), ntohl(sp->hdr.seq),
+	       data_size);
+
+	if (!call->conn->cipher)
+		return 0;
+
+	ret = key_validate(call->conn->key);
+	if (ret < 0)
+		return ret;
+
+	/* continue encrypting from where we left off */
+	memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
+	desc.tfm = call->conn->cipher;
+	desc.info = iv.x;
+	desc.flags = 0;
+
+	/* calculate the security checksum */
+	x = htonl(call->channel << (32 - RXRPC_CIDSHIFT));
+	x |= sp->hdr.seq & __constant_cpu_to_be32(0x3fffffff);
+	tmpbuf.x[0] = sp->hdr.callNumber;
+	tmpbuf.x[1] = x;
+
+	memset(&sg, 0, sizeof(sg));
+	sg_set_buf(&sg[0], &tmpbuf, sizeof(tmpbuf));
+	sg_set_buf(&sg[1], &tmpbuf, sizeof(tmpbuf));
+	crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
+
+	x = ntohl(tmpbuf.x[1]);
+	x = (x >> 16) & 0xffff;
+	if (x == 0)
+		x = 1; /* zero checksums are not permitted */
+	sp->hdr.cksum = htons(x);
+
+	switch (call->conn->security_level) {
+	case RXRPC_SECURITY_PLAIN:
+		ret = 0;
+		break;
+	case RXRPC_SECURITY_AUTH:
+		ret = rxkad_secure_packet_auth(call, skb, data_size, sechdr);
+		break;
+	case RXRPC_SECURITY_ENCRYPT:
+		ret = rxkad_secure_packet_encrypt(call, skb, data_size,
+						  sechdr);
+		break;
+	default:
+		ret = -EPERM;
+		break;
+	}
+
+	_leave(" = %d [set %hx]", ret, x);
+	return ret;
+}
+
+/*
+ * decrypt partial encryption on a packet (level 1 security)
+ */
+static int rxkad_verify_packet_auth(const struct rxrpc_call *call,
+				    struct sk_buff *skb,
+				    u32 *_abort_code)
+{
+	struct rxkad_level1_hdr sechdr;
+	struct rxrpc_skb_priv *sp;
+	struct blkcipher_desc desc;
+	struct rxrpc_crypt iv;
+	struct scatterlist sg[2];
+	struct sk_buff *trailer;
+	u32 data_size, buf;
+	u16 check;
+
+	_enter("");
+
+	sp = rxrpc_skb(skb);
+
+	/* we want to decrypt the skbuff in-place */
+	if (skb_cow_data(skb, 0, &trailer) < 0)
+		goto nomem;
+
+	skb_to_sgvec(skb, sg, 0, 8);
+
+	/* start the decryption afresh */
+	memset(&iv, 0, sizeof(iv));
+	desc.tfm = call->conn->cipher;
+	desc.info = iv.x;
+	desc.flags = 0;
+
+	crypto_blkcipher_decrypt_iv(&desc, sg, sg, 8);
+
+	/* remove the decrypted packet length */
+	if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0)
+		goto datalen_error;
+	if (!skb_pull(skb, sizeof(sechdr)))
+		BUG();
+
+	buf = ntohl(sechdr.data_size);
+	data_size = buf & 0xffff;
+
+	check = buf >> 16;
+	check ^= ntohl(sp->hdr.seq ^ sp->hdr.callNumber);
+	check &= 0xffff;
+	if (check != 0) {
+		*_abort_code = RXKADSEALEDINCON;
+		goto protocol_error;
+	}
+
+	/* shorten the packet to remove the padding */
+	if (data_size > skb->len)
+		goto datalen_error;
+	else if (data_size < skb->len)
+		skb->len = data_size;
+
+	_leave(" = 0 [dlen=%x]", data_size);
+	return 0;
+
+datalen_error:
+	*_abort_code = RXKADDATALEN;
+protocol_error:
+	_leave(" = -EPROTO");
+	return -EPROTO;
+
+nomem:
+	_leave(" = -ENOMEM");
+	return -ENOMEM;
+}
+
+/*
+ * wholly decrypt a packet (level 2 security)
+ */
+static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
+				       struct sk_buff *skb,
+				       u32 *_abort_code)
+{
+	const struct rxrpc_key_payload *payload;
+	struct rxkad_level2_hdr sechdr;
+	struct rxrpc_skb_priv *sp;
+	struct blkcipher_desc desc;
+	struct rxrpc_crypt iv;
+	struct scatterlist _sg[4], *sg;
+	struct sk_buff *trailer;
+	u32 data_size, buf;
+	u16 check;
+	int nsg;
+
+	_enter(",{%d}", skb->len);
+
+	sp = rxrpc_skb(skb);
+
+	/* we want to decrypt the skbuff in-place */
+	nsg = skb_cow_data(skb, 0, &trailer);
+	if (nsg < 0)
+		goto nomem;
+
+	sg = _sg;
+	if (unlikely(nsg > 4)) {
+		sg = kmalloc(sizeof(*sg) * nsg, GFP_NOIO);
+		if (!sg)
+			goto nomem;
+	}
+
+	skb_to_sgvec(skb, sg, 0, skb->len);
+
+	/* decrypt from the session key */
+	payload = call->conn->key->payload.data;
+	memcpy(&iv, payload->k.session_key, sizeof(iv));
+	desc.tfm = call->conn->cipher;
+	desc.info = iv.x;
+	desc.flags = 0;
+
+	crypto_blkcipher_decrypt_iv(&desc, sg, sg, skb->len);
+	if (sg != _sg)
+		kfree(sg);
+
+	/* remove the decrypted packet length */
+	if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0)
+		goto datalen_error;
+	if (!skb_pull(skb, sizeof(sechdr)))
+		BUG();
+
+	buf = ntohl(sechdr.data_size);
+	data_size = buf & 0xffff;
+
+	check = buf >> 16;
+	check ^= ntohl(sp->hdr.seq ^ sp->hdr.callNumber);
+	check &= 0xffff;
+	if (check != 0) {
+		*_abort_code = RXKADSEALEDINCON;
+		goto protocol_error;
+	}
+
+	/* shorten the packet to remove the padding */
+	if (data_size > skb->len)
+		goto datalen_error;
+	else if (data_size < skb->len)
+		skb->len = data_size;
+
+	_leave(" = 0 [dlen=%x]", data_size);
+	return 0;
+
+datalen_error:
+	*_abort_code = RXKADDATALEN;
+protocol_error:
+	_leave(" = -EPROTO");
+	return -EPROTO;
+
+nomem:
+	_leave(" = -ENOMEM");
+	return -ENOMEM;
+}
+
+/*
+ * verify the security on a received packet
+ */
+static int rxkad_verify_packet(const struct rxrpc_call *call,
+			       struct sk_buff *skb,
+			       u32 *_abort_code)
+{
+	struct blkcipher_desc desc;
+	struct rxrpc_skb_priv *sp;
+	struct rxrpc_crypt iv;
+	struct scatterlist sg[2];
+	struct {
+		__be32 x[2];
+	} tmpbuf __attribute__((aligned(8))); /* must all be in same page */
+	__be32 x;
+	__be16 cksum;
+	int ret;
+
+	sp = rxrpc_skb(skb);
+
+	_enter("{%d{%x}},{#%u}",
+	       call->debug_id, key_serial(call->conn->key),
+	       ntohl(sp->hdr.seq));
+
+	if (!call->conn->cipher)
+		return 0;
+
+	if (sp->hdr.securityIndex != 2) {
+		*_abort_code = RXKADINCONSISTENCY;
+		_leave(" = -EPROTO [not rxkad]");
+		return -EPROTO;
+	}
+
+	/* continue encrypting from where we left off */
+	memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
+	desc.tfm = call->conn->cipher;
+	desc.info = iv.x;
+	desc.flags = 0;
+
+	/* validate the security checksum */
+	x = htonl(call->channel << (32 - RXRPC_CIDSHIFT));
+	x |= sp->hdr.seq & __constant_cpu_to_be32(0x3fffffff);
+	tmpbuf.x[0] = call->call_id;
+	tmpbuf.x[1] = x;
+
+	memset(&sg, 0, sizeof(sg));
+	sg_set_buf(&sg[0], &tmpbuf, sizeof(tmpbuf));
+	sg_set_buf(&sg[1], &tmpbuf, sizeof(tmpbuf));
+	crypto_blkcipher_encrypt_iv(&desc, &sg[0], &sg[1], sizeof(tmpbuf));
+
+	x = ntohl(tmpbuf.x[1]);
+	x = (x >> 16) & 0xffff;
+	if (x == 0)
+		x = 1; /* zero checksums are not permitted */
+
+	cksum = htons(x);
+	if (sp->hdr.cksum != cksum) {
+		*_abort_code = RXKADSEALEDINCON;
+		_leave(" = -EPROTO [csum failed]");
+		return -EPROTO;
+	}
+
+	switch (call->conn->security_level) {
+	case RXRPC_SECURITY_PLAIN:
+		ret = 0;
+		break;
+	case RXRPC_SECURITY_AUTH:
+		ret = rxkad_verify_packet_auth(call, skb, _abort_code);
+		break;
+	case RXRPC_SECURITY_ENCRYPT:
+		ret = rxkad_verify_packet_encrypt(call, skb, _abort_code);
+		break;
+	default:
+		ret = -ENOANO;
+		break;
+	}
+
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * issue a challenge
+ */
+static int rxkad_issue_challenge(struct rxrpc_connection *conn)
+{
+	struct rxkad_challenge challenge;
+	struct rxrpc_header hdr;
+	struct msghdr msg;
+	struct kvec iov[2];
+	size_t len;
+	int ret;
+
+	_enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
+
+	ret = key_validate(conn->key);
+	if (ret < 0)
+		return ret;
+
+	get_random_bytes(&conn->security_nonce, sizeof(conn->security_nonce));
+
+	challenge.version	= htonl(2);
+	challenge.nonce		= htonl(conn->security_nonce);
+	challenge.min_level	= htonl(0);
+	challenge.__padding	= 0;
+
+	msg.msg_name	= &conn->trans->peer->srx.transport.sin;
+	msg.msg_namelen	= sizeof(conn->trans->peer->srx.transport.sin);
+	msg.msg_control	= NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags	= 0;
+
+	hdr.epoch	= conn->epoch;
+	hdr.cid		= conn->cid;
+	hdr.callNumber	= 0;
+	hdr.seq		= 0;
+	hdr.type	= RXRPC_PACKET_TYPE_CHALLENGE;
+	hdr.flags	= conn->out_clientflag;
+	hdr.userStatus	= 0;
+	hdr.securityIndex = conn->security_ix;
+	hdr._rsvd	= 0;
+	hdr.serviceId	= conn->service_id;
+
+	iov[0].iov_base	= &hdr;
+	iov[0].iov_len	= sizeof(hdr);
+	iov[1].iov_base	= &challenge;
+	iov[1].iov_len	= sizeof(challenge);
+
+	len = iov[0].iov_len + iov[1].iov_len;
+
+	hdr.serial = htonl(atomic_inc_return(&conn->serial));
+	_proto("Tx CHALLENGE %%%u", ntohl(hdr.serial));
+
+	ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 2, len);
+	if (ret < 0) {
+		_debug("sendmsg failed: %d", ret);
+		return -EAGAIN;
+	}
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * send a Kerberos security response
+ */
+static int rxkad_send_response(struct rxrpc_connection *conn,
+			       struct rxrpc_header *hdr,
+			       struct rxkad_response *resp,
+			       const struct rxkad_key *s2)
+{
+	struct msghdr msg;
+	struct kvec iov[3];
+	size_t len;
+	int ret;
+
+	_enter("");
+
+	msg.msg_name	= &conn->trans->peer->srx.transport.sin;
+	msg.msg_namelen	= sizeof(conn->trans->peer->srx.transport.sin);
+	msg.msg_control	= NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags	= 0;
+
+	hdr->epoch	= conn->epoch;
+	hdr->seq	= 0;
+	hdr->type	= RXRPC_PACKET_TYPE_RESPONSE;
+	hdr->flags	= conn->out_clientflag;
+	hdr->userStatus	= 0;
+	hdr->_rsvd	= 0;
+
+	iov[0].iov_base	= hdr;
+	iov[0].iov_len	= sizeof(*hdr);
+	iov[1].iov_base	= resp;
+	iov[1].iov_len	= sizeof(*resp);
+	iov[2].iov_base	= (void *) s2->ticket;
+	iov[2].iov_len	= s2->ticket_len;
+
+	len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len;
+
+	hdr->serial = htonl(atomic_inc_return(&conn->serial));
+	_proto("Tx RESPONSE %%%u", ntohl(hdr->serial));
+
+	ret = kernel_sendmsg(conn->trans->local->socket, &msg, iov, 3, len);
+	if (ret < 0) {
+		_debug("sendmsg failed: %d", ret);
+		return -EAGAIN;
+	}
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * calculate the response checksum
+ */
+static void rxkad_calc_response_checksum(struct rxkad_response *response)
+{
+	u32 csum = 1000003;
+	int loop;
+	u8 *p = (u8 *) response;
+
+	for (loop = sizeof(*response); loop > 0; loop--)
+		csum = csum * 0x10204081 + *p++;
+
+	response->encrypted.checksum = htonl(csum);
+}
+
+/*
+ * load a scatterlist with a potentially split-page buffer
+ */
+static void rxkad_sg_set_buf2(struct scatterlist sg[2],
+			      void *buf, size_t buflen)
+{
+
+	memset(sg, 0, sizeof(sg));
+
+	sg_set_buf(&sg[0], buf, buflen);
+	if (sg[0].offset + buflen > PAGE_SIZE) {
+		/* the buffer was split over two pages */
+		sg[0].length = PAGE_SIZE - sg[0].offset;
+		sg_set_buf(&sg[1], buf + sg[0].length, buflen - sg[0].length);
+	}
+
+	ASSERTCMP(sg[0].length + sg[1].length, ==, buflen);
+}
+
+/*
+ * encrypt the response packet
+ */
+static void rxkad_encrypt_response(struct rxrpc_connection *conn,
+				   struct rxkad_response *resp,
+				   const struct rxkad_key *s2)
+{
+	struct blkcipher_desc desc;
+	struct rxrpc_crypt iv;
+	struct scatterlist ssg[2], dsg[2];
+
+	/* continue encrypting from where we left off */
+	memcpy(&iv, s2->session_key, sizeof(iv));
+	desc.tfm = conn->cipher;
+	desc.info = iv.x;
+	desc.flags = 0;
+
+	rxkad_sg_set_buf2(ssg, &resp->encrypted, sizeof(resp->encrypted));
+	memcpy(dsg, ssg, sizeof(dsg));
+	crypto_blkcipher_encrypt_iv(&desc, dsg, ssg, sizeof(resp->encrypted));
+}
+
+/*
+ * respond to a challenge packet
+ */
+static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
+				      struct sk_buff *skb,
+				      u32 *_abort_code)
+{
+	const struct rxrpc_key_payload *payload;
+	struct rxkad_challenge challenge;
+	struct rxkad_response resp
+		__attribute__((aligned(8))); /* must be aligned for crypto */
+	struct rxrpc_skb_priv *sp;
+	u32 version, nonce, min_level, abort_code;
+	int ret;
+
+	_enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
+
+	if (!conn->key) {
+		_leave(" = -EPROTO [no key]");
+		return -EPROTO;
+	}
+
+	ret = key_validate(conn->key);
+	if (ret < 0) {
+		*_abort_code = RXKADEXPIRED;
+		return ret;
+	}
+
+	abort_code = RXKADPACKETSHORT;
+	sp = rxrpc_skb(skb);
+	if (skb_copy_bits(skb, 0, &challenge, sizeof(challenge)) < 0)
+		goto protocol_error;
+
+	version = ntohl(challenge.version);
+	nonce = ntohl(challenge.nonce);
+	min_level = ntohl(challenge.min_level);
+
+	_proto("Rx CHALLENGE %%%u { v=%u n=%u ml=%u }",
+	       ntohl(sp->hdr.serial), version, nonce, min_level);
+
+	abort_code = RXKADINCONSISTENCY;
+	if (version != RXKAD_VERSION)
+		goto protocol_error;
+
+	abort_code = RXKADLEVELFAIL;
+	if (conn->security_level < min_level)
+		goto protocol_error;
+
+	payload = conn->key->payload.data;
+
+	/* build the response packet */
+	memset(&resp, 0, sizeof(resp));
+
+	resp.version = RXKAD_VERSION;
+	resp.encrypted.epoch = conn->epoch;
+	resp.encrypted.cid = conn->cid;
+	resp.encrypted.securityIndex = htonl(conn->security_ix);
+	resp.encrypted.call_id[0] =
+		(conn->channels[0] ? conn->channels[0]->call_id : 0);
+	resp.encrypted.call_id[1] =
+		(conn->channels[1] ? conn->channels[1]->call_id : 0);
+	resp.encrypted.call_id[2] =
+		(conn->channels[2] ? conn->channels[2]->call_id : 0);
+	resp.encrypted.call_id[3] =
+		(conn->channels[3] ? conn->channels[3]->call_id : 0);
+	resp.encrypted.inc_nonce = htonl(nonce + 1);
+	resp.encrypted.level = htonl(conn->security_level);
+	resp.kvno = htonl(payload->k.kvno);
+	resp.ticket_len = htonl(payload->k.ticket_len);
+
+	/* calculate the response checksum and then do the encryption */
+	rxkad_calc_response_checksum(&resp);
+	rxkad_encrypt_response(conn, &resp, &payload->k);
+	return rxkad_send_response(conn, &sp->hdr, &resp, &payload->k);
+
+protocol_error:
+	*_abort_code = abort_code;
+	_leave(" = -EPROTO [%d]", abort_code);
+	return -EPROTO;
+}
+
+/*
+ * decrypt the kerberos IV ticket in the response
+ */
+static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
+				void *ticket, size_t ticket_len,
+				struct rxrpc_crypt *_session_key,
+				time_t *_expiry,
+				u32 *_abort_code)
+{
+	struct blkcipher_desc desc;
+	struct rxrpc_crypt iv, key;
+	struct scatterlist ssg[1], dsg[1];
+	struct in_addr addr;
+	unsigned life;
+	time_t issue, now;
+	bool little_endian;
+	int ret;
+	u8 *p, *q, *name, *end;
+
+	_enter("{%d},{%x}", conn->debug_id, key_serial(conn->server_key));
+
+	*_expiry = 0;
+
+	ret = key_validate(conn->server_key);
+	if (ret < 0) {
+		switch (ret) {
+		case -EKEYEXPIRED:
+			*_abort_code = RXKADEXPIRED;
+			goto error;
+		default:
+			*_abort_code = RXKADNOAUTH;
+			goto error;
+		}
+	}
+
+	ASSERT(conn->server_key->payload.data != NULL);
+	ASSERTCMP((unsigned long) ticket & 7UL, ==, 0);
+
+	memcpy(&iv, &conn->server_key->type_data, sizeof(iv));
+
+	desc.tfm = conn->server_key->payload.data;
+	desc.info = iv.x;
+	desc.flags = 0;
+
+	sg_init_one(&ssg[0], ticket, ticket_len);
+	memcpy(dsg, ssg, sizeof(dsg));
+	crypto_blkcipher_decrypt_iv(&desc, dsg, ssg, ticket_len);
+
+	p = ticket;
+	end = p + ticket_len;
+
+#define Z(size)						\
+	({						\
+		u8 *__str = p;				\
+		q = memchr(p, 0, end - p);		\
+		if (!q || q - p > (size))		\
+			goto bad_ticket;		\
+		for (; p < q; p++)			\
+			if (!isprint(*p))		\
+				goto bad_ticket;	\
+		p++;					\
+		__str;					\
+	})
+
+	/* extract the ticket flags */
+	_debug("KIV FLAGS: %x", *p);
+	little_endian = *p & 1;
+	p++;
+
+	/* extract the authentication name */
+	name = Z(ANAME_SZ);
+	_debug("KIV ANAME: %s", name);
+
+	/* extract the principal's instance */
+	name = Z(INST_SZ);
+	_debug("KIV INST : %s", name);
+
+	/* extract the principal's authentication domain */
+	name = Z(REALM_SZ);
+	_debug("KIV REALM: %s", name);
+
+	if (end - p < 4 + 8 + 4 + 2)
+		goto bad_ticket;
+
+	/* get the IPv4 address of the entity that requested the ticket */
+	memcpy(&addr, p, sizeof(addr));
+	p += 4;
+	_debug("KIV ADDR : "NIPQUAD_FMT, NIPQUAD(addr));
+
+	/* get the session key from the ticket */
+	memcpy(&key, p, sizeof(key));
+	p += 8;
+	_debug("KIV KEY  : %08x %08x", ntohl(key.n[0]), ntohl(key.n[1]));
+	memcpy(_session_key, &key, sizeof(key));
+
+	/* get the ticket's lifetime */
+	life = *p++ * 5 * 60;
+	_debug("KIV LIFE : %u", life);
+
+	/* get the issue time of the ticket */
+	if (little_endian) {
+		__le32 stamp;
+		memcpy(&stamp, p, 4);
+		issue = le32_to_cpu(stamp);
+	} else {
+		__be32 stamp;
+		memcpy(&stamp, p, 4);
+		issue = be32_to_cpu(stamp);
+	}
+	p += 4;
+	now = xtime.tv_sec;
+	_debug("KIV ISSUE: %lx [%lx]", issue, now);
+
+	/* check the ticket is in date */
+	if (issue > now) {
+		*_abort_code = RXKADNOAUTH;
+		ret = -EKEYREJECTED;
+		goto error;
+	}
+
+	if (issue < now - life) {
+		*_abort_code = RXKADEXPIRED;
+		ret = -EKEYEXPIRED;
+		goto error;
+	}
+
+	*_expiry = issue + life;
+
+	/* get the service name */
+	name = Z(SNAME_SZ);
+	_debug("KIV SNAME: %s", name);
+
+	/* get the service instance name */
+	name = Z(INST_SZ);
+	_debug("KIV SINST: %s", name);
+
+	ret = 0;
+error:
+	_leave(" = %d", ret);
+	return ret;
+
+bad_ticket:
+	*_abort_code = RXKADBADTICKET;
+	ret = -EBADMSG;
+	goto error;
+}
+
+/*
+ * decrypt the response packet
+ */
+static void rxkad_decrypt_response(struct rxrpc_connection *conn,
+				   struct rxkad_response *resp,
+				   const struct rxrpc_crypt *session_key)
+{
+	struct blkcipher_desc desc;
+	struct scatterlist ssg[2], dsg[2];
+	struct rxrpc_crypt iv;
+
+	_enter(",,%08x%08x",
+	       ntohl(session_key->n[0]), ntohl(session_key->n[1]));
+
+	ASSERT(rxkad_ci != NULL);
+
+	mutex_lock(&rxkad_ci_mutex);
+	if (crypto_blkcipher_setkey(rxkad_ci, session_key->x,
+				    sizeof(*session_key)) < 0)
+		BUG();
+
+	memcpy(&iv, session_key, sizeof(iv));
+	desc.tfm = rxkad_ci;
+	desc.info = iv.x;
+	desc.flags = 0;
+
+	rxkad_sg_set_buf2(ssg, &resp->encrypted, sizeof(resp->encrypted));
+	memcpy(dsg, ssg, sizeof(dsg));
+	crypto_blkcipher_decrypt_iv(&desc, dsg, ssg, sizeof(resp->encrypted));
+	mutex_unlock(&rxkad_ci_mutex);
+
+	_leave("");
+}
+
+/*
+ * verify a response
+ */
+static int rxkad_verify_response(struct rxrpc_connection *conn,
+				 struct sk_buff *skb,
+				 u32 *_abort_code)
+{
+	struct rxkad_response response
+		__attribute__((aligned(8))); /* must be aligned for crypto */
+	struct rxrpc_skb_priv *sp;
+	struct rxrpc_crypt session_key;
+	time_t expiry;
+	void *ticket;
+	u32 abort_code, version, kvno, ticket_len, csum, level;
+	int ret;
+
+	_enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key));
+
+	abort_code = RXKADPACKETSHORT;
+	if (skb_copy_bits(skb, 0, &response, sizeof(response)) < 0)
+		goto protocol_error;
+	if (!pskb_pull(skb, sizeof(response)))
+		BUG();
+
+	version = ntohl(response.version);
+	ticket_len = ntohl(response.ticket_len);
+	kvno = ntohl(response.kvno);
+	sp = rxrpc_skb(skb);
+	_proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }",
+	       ntohl(sp->hdr.serial), version, kvno, ticket_len);
+
+	abort_code = RXKADINCONSISTENCY;
+	if (version != RXKAD_VERSION)
+
+	abort_code = RXKADTICKETLEN;
+	if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN)
+		goto protocol_error;
+
+	abort_code = RXKADUNKNOWNKEY;
+	if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5)
+		goto protocol_error;
+
+	/* extract the kerberos ticket and decrypt and decode it */
+	ticket = kmalloc(ticket_len, GFP_NOFS);
+	if (!ticket)
+		return -ENOMEM;
+
+	abort_code = RXKADPACKETSHORT;
+	if (skb_copy_bits(skb, 0, ticket, ticket_len) < 0)
+		goto protocol_error_free;
+
+	ret = rxkad_decrypt_ticket(conn, ticket, ticket_len, &session_key,
+				   &expiry, &abort_code);
+	if (ret < 0) {
+		*_abort_code = abort_code;
+		kfree(ticket);
+		return ret;
+	}
+
+	/* use the session key from inside the ticket to decrypt the
+	 * response */
+	rxkad_decrypt_response(conn, &response, &session_key);
+
+	abort_code = RXKADSEALEDINCON;
+	if (response.encrypted.epoch != conn->epoch)
+		goto protocol_error_free;
+	if (response.encrypted.cid != conn->cid)
+		goto protocol_error_free;
+	if (ntohl(response.encrypted.securityIndex) != conn->security_ix)
+		goto protocol_error_free;
+	csum = response.encrypted.checksum;
+	response.encrypted.checksum = 0;
+	rxkad_calc_response_checksum(&response);
+	if (response.encrypted.checksum != csum)
+		goto protocol_error_free;
+
+	if (ntohl(response.encrypted.call_id[0]) > INT_MAX ||
+	    ntohl(response.encrypted.call_id[1]) > INT_MAX ||
+	    ntohl(response.encrypted.call_id[2]) > INT_MAX ||
+	    ntohl(response.encrypted.call_id[3]) > INT_MAX)
+		goto protocol_error_free;
+
+	abort_code = RXKADOUTOFSEQUENCE;
+	if (response.encrypted.inc_nonce != htonl(conn->security_nonce + 1))
+		goto protocol_error_free;
+
+	abort_code = RXKADLEVELFAIL;
+	level = ntohl(response.encrypted.level);
+	if (level > RXRPC_SECURITY_ENCRYPT)
+		goto protocol_error_free;
+	conn->security_level = level;
+
+	/* create a key to hold the security data and expiration time - after
+	 * this the connection security can be handled in exactly the same way
+	 * as for a client connection */
+	ret = rxrpc_get_server_data_key(conn, &session_key, expiry, kvno);
+	if (ret < 0) {
+		kfree(ticket);
+		return ret;
+	}
+
+	kfree(ticket);
+	_leave(" = 0");
+	return 0;
+
+protocol_error_free:
+	kfree(ticket);
+protocol_error:
+	*_abort_code = abort_code;
+	_leave(" = -EPROTO [%d]", abort_code);
+	return -EPROTO;
+}
+
+/*
+ * clear the connection security
+ */
+static void rxkad_clear(struct rxrpc_connection *conn)
+{
+	_enter("");
+
+	if (conn->cipher)
+		crypto_free_blkcipher(conn->cipher);
+}
+
+/*
+ * RxRPC Kerberos-based security
+ */
+static struct rxrpc_security rxkad = {
+	.owner				= THIS_MODULE,
+	.name				= "rxkad",
+	.security_index			= RXKAD_VERSION,
+	.init_connection_security	= rxkad_init_connection_security,
+	.prime_packet_security		= rxkad_prime_packet_security,
+	.secure_packet			= rxkad_secure_packet,
+	.verify_packet			= rxkad_verify_packet,
+	.issue_challenge		= rxkad_issue_challenge,
+	.respond_to_challenge		= rxkad_respond_to_challenge,
+	.verify_response		= rxkad_verify_response,
+	.clear				= rxkad_clear,
+};
+
+static __init int rxkad_init(void)
+{
+	_enter("");
+
+	/* pin the cipher we need so that the crypto layer doesn't invoke
+	 * keventd to go get it */
+	rxkad_ci = crypto_alloc_blkcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(rxkad_ci))
+		return PTR_ERR(rxkad_ci);
+
+	return rxrpc_register_security(&rxkad);
+}
+
+module_init(rxkad_init);
+
+static __exit void rxkad_exit(void)
+{
+	_enter("");
+
+	rxrpc_unregister_security(&rxkad);
+	crypto_free_blkcipher(rxkad_ci);
+}
+
+module_exit(rxkad_exit);
-- 
cgit v1.2.3


From ec26815ad847dbf74a1e27aa5515fb7d5dc6ee6f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 26 Apr 2007 15:49:28 -0700
Subject: [AFS]: Clean up the AFS sources

Clean up the AFS sources.

Also remove references to AFS keys.  RxRPC keys are used instead.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/afs/Makefile     |   2 -
 fs/afs/cache.h      |  12 ++--
 fs/afs/callback.c   |   9 +--
 fs/afs/cell.c       |  51 ++++++-----------
 fs/afs/cell.h       |  30 ++++------
 fs/afs/cmservice.c  |  46 +++++----------
 fs/afs/cmservice.h  |  17 +++---
 fs/afs/dir.c        |  50 ++++++----------
 fs/afs/errors.h     |  14 +++--
 fs/afs/file.c       |  20 ++-----
 fs/afs/fsclient.c   |  73 +++++++++++-------------
 fs/afs/fsclient.h   |  48 ++++++++--------
 fs/afs/inode.c      |  29 ++++------
 fs/afs/internal.h   |  36 +++++-------
 fs/afs/kafsasyncd.c |  22 +++----
 fs/afs/kafsasyncd.h |  22 ++++---
 fs/afs/kafstimod.c  |  31 ++++------
 fs/afs/kafstimod.h  |  20 +++----
 fs/afs/main.c       |  94 ++++++------------------------
 fs/afs/misc.c       |   6 +-
 fs/afs/mntpt.c      |  27 ++++-----
 fs/afs/mount.h      |   8 +--
 fs/afs/proc.c       | 161 +++++++++++++++++++---------------------------------
 fs/afs/server.c     |  47 ++++++---------
 fs/afs/server.h     |  41 ++++++-------
 fs/afs/super.c      |  62 +++++++-------------
 fs/afs/super.h      |  16 ++----
 fs/afs/transport.h  |   8 +--
 fs/afs/types.h      |  34 +++--------
 fs/afs/vlclient.c   |  58 ++++++++-----------
 fs/afs/vlclient.h   |  34 +++++------
 fs/afs/vlocation.c  | 102 ++++++++++++---------------------
 fs/afs/vnode.c      |  36 ++++--------
 fs/afs/vnode.h      |  36 +++++-------
 fs/afs/volume.c     |  58 +++++++------------
 fs/afs/volume.h     |  52 +++++++----------
 36 files changed, 506 insertions(+), 906 deletions(-)

diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 4029c9da4b8..8e719737967 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -2,8 +2,6 @@
 # Makefile for Red Hat Linux AFS client.
 #
 
-#CFLAGS += -finstrument-functions
-
 kafs-objs := \
 	callback.o \
 	cell.o \
diff --git a/fs/afs/cache.h b/fs/afs/cache.h
index 9eb7722b34d..36a3642cf90 100644
--- a/fs/afs/cache.h
+++ b/fs/afs/cache.h
@@ -1,4 +1,4 @@
-/* cache.h: AFS local cache management interface
+/* AFS local cache management interface
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -9,8 +9,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#ifndef _LINUX_AFS_CACHE_H
-#define _LINUX_AFS_CACHE_H
+#ifndef AFS_CACHE_H
+#define AFS_CACHE_H
 
 #undef AFS_CACHING_SUPPORT
 
@@ -20,8 +20,4 @@
 #endif
 #include "types.h"
 
-#ifdef __KERNEL__
-
-#endif /* __KERNEL__ */
-
-#endif /* _LINUX_AFS_CACHE_H */
+#endif /* AFS_CACHE_H */
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 9cb206e9d4b..26a48fea42f 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -21,7 +21,6 @@
 #include "internal.h"
 #include "cmservice.h"
 
-/*****************************************************************************/
 /*
  * allow the fileserver to request callback state (re-)initialisation
  */
@@ -79,9 +78,8 @@ int SRXAFSCM_InitCallBackState(struct afs_server *server)
 
 	_leave(" = 0");
 	return 0;
-} /* end SRXAFSCM_InitCallBackState() */
+}
 
-/*****************************************************************************/
 /*
  * allow the fileserver to break callback promises
  */
@@ -156,9 +154,8 @@ int SRXAFSCM_CallBack(struct afs_server *server, size_t count,
 
 	_leave(" = 0");
 	return 0;
-} /* end SRXAFSCM_CallBack() */
+}
 
-/*****************************************************************************/
 /*
  * allow the fileserver to see if the cache manager is still alive
  */
@@ -166,4 +163,4 @@ int SRXAFSCM_Probe(struct afs_server *server)
 {
 	_debug("SRXAFSCM_Probe(%p)\n", server);
 	return 0;
-} /* end SRXAFSCM_Probe() */
+}
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 1fc57837275..28ed84ec8ff 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -1,4 +1,4 @@
-/* cell.c: AFS cell and server record management
+/* AFS cell and server record management
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -44,7 +44,6 @@ struct cachefs_index_def afs_cache_cell_index_def = {
 };
 #endif
 
-/*****************************************************************************/
 /*
  * create a cell record
  * - "name" is the name of the cell
@@ -137,16 +136,15 @@ int afs_cell_create(const char *name, char *vllist, struct afs_cell **_cell)
 	_leave(" = 0 (%p)", cell);
 	return 0;
 
- badaddr:
+badaddr:
 	printk(KERN_ERR "kAFS: bad VL server IP address: '%s'\n", vllist);
- error:
+error:
 	up_write(&afs_cells_sem);
 	kfree(cell);
 	_leave(" = %d", ret);
 	return ret;
-} /* end afs_cell_create() */
+}
 
-/*****************************************************************************/
 /*
  * initialise the cell database from module parameters
  */
@@ -199,10 +197,8 @@ int afs_cell_init(char *rootcell)
 
 	_leave(" = %d", ret);
 	return ret;
+}
 
-} /* end afs_cell_init() */
-
-/*****************************************************************************/
 /*
  * lookup a cell record
  */
@@ -234,8 +230,7 @@ int afs_cell_lookup(const char *name, unsigned namesz, struct afs_cell **_cell)
 
 		if (cell)
 			ret = 0;
-	}
-	else {
+	} else {
 		read_lock(&afs_cells_lock);
 
 		cell = afs_cell_root;
@@ -247,8 +242,7 @@ int afs_cell_lookup(const char *name, unsigned namesz, struct afs_cell **_cell)
 			 * for other reasons.
 			 */
 			ret = -EDESTADDRREQ;
-		}
-		else {
+		} else {
 			afs_get_cell(cell);
 			ret = 0;
 		}
@@ -259,10 +253,8 @@ int afs_cell_lookup(const char *name, unsigned namesz, struct afs_cell **_cell)
 	*_cell = cell;
 	_leave(" = %d (%p)", ret, cell);
 	return ret;
+}
 
-} /* end afs_cell_lookup() */
-
-/*****************************************************************************/
 /*
  * try and get a cell record
  */
@@ -281,9 +273,8 @@ struct afs_cell *afs_get_cell_maybe(struct afs_cell **_cell)
 	write_unlock(&afs_cells_lock);
 
 	return cell;
-} /* end afs_get_cell_maybe() */
+}
 
-/*****************************************************************************/
 /*
  * destroy a cell record
  */
@@ -315,9 +306,8 @@ void afs_put_cell(struct afs_cell *cell)
 	BUG_ON(!list_empty(&cell->vl_graveyard));
 
 	_leave(" [unused]");
-} /* end afs_put_cell() */
+}
 
-/*****************************************************************************/
 /*
  * destroy a cell record
  */
@@ -359,9 +349,8 @@ static void afs_cell_destroy(struct afs_cell *cell)
 	kfree(cell);
 
 	_leave(" [destroyed]");
-} /* end afs_cell_destroy() */
+}
 
-/*****************************************************************************/
 /*
  * lookup the server record corresponding to an Rx RPC peer
  */
@@ -411,7 +400,7 @@ int afs_server_find_by_peer(const struct rxrpc_peer *peer,
 	return -ENOENT;
 
 	/* we found it in the graveyard - resurrect it */
- found_dead_server:
+found_dead_server:
 	list_move_tail(&server->link, &cell->sv_list);
 	afs_get_server(server);
 	afs_kafstimod_del_timer(&server->timeout);
@@ -419,20 +408,18 @@ int afs_server_find_by_peer(const struct rxrpc_peer *peer,
 	goto success;
 
 	/* we found it - increment its ref count and return it */
- found_server:
+found_server:
 	afs_get_server(server);
 
- success:
+success:
 	write_unlock(&cell->sv_lock);
 	read_unlock(&afs_cells_lock);
 
 	*_server = server;
 	_leave(" = 0 (s=%p c=%p)", server, cell);
 	return 0;
+}
 
-} /* end afs_server_find_by_peer() */
-
-/*****************************************************************************/
 /*
  * purge in-memory cell database on module unload or afs_init() failure
  * - the timeout daemon is stopped before calling this
@@ -520,9 +507,8 @@ void afs_cell_purge(void)
 	}
 
 	_leave("");
-} /* end afs_cell_purge() */
+}
 
-/*****************************************************************************/
 /*
  * match a cell record obtained from the cache
  */
@@ -542,10 +528,9 @@ static cachefs_match_val_t afs_cell_cache_match(void *target,
 
 	_leave(" = FAILED");
 	return CACHEFS_MATCH_FAILED;
-} /* end afs_cell_cache_match() */
+}
 #endif
 
-/*****************************************************************************/
 /*
  * update a cell record in the cache
  */
@@ -563,5 +548,5 @@ static void afs_cell_cache_update(void *source, void *entry)
 	       cell->vl_addrs,
 	       min(sizeof(ccell->vl_servers), sizeof(cell->vl_addrs)));
 
-} /* end afs_cell_cache_update() */
+}
 #endif
diff --git a/fs/afs/cell.h b/fs/afs/cell.h
index 48349108fb0..c135b00c6c7 100644
--- a/fs/afs/cell.h
+++ b/fs/afs/cell.h
@@ -1,4 +1,4 @@
-/* cell.h: AFS cell record
+/* AFS cell record
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -9,8 +9,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#ifndef _LINUX_AFS_CELL_H
-#define _LINUX_AFS_CELL_H
+#ifndef AFS_CELL_H
+#define AFS_CELL_H
 
 #include "types.h"
 #include "cache.h"
@@ -19,22 +19,18 @@
 
 extern volatile int afs_cells_being_purged; /* T when cells are being purged by rmmod */
 
-/*****************************************************************************/
 /*
  * entry in the cached cell catalogue
  */
-struct afs_cache_cell
-{
+struct afs_cache_cell {
 	char			name[64];	/* cell name (padded with NULs) */
 	struct in_addr		vl_servers[15];	/* cached cell VL servers */
 };
 
-/*****************************************************************************/
 /*
  * AFS cell record
  */
-struct afs_cell
-{
+struct afs_cell {
 	atomic_t		usage;
 	struct list_head	link;		/* main cell list link */
 	struct list_head	proc_link;	/* /proc cell list link */
@@ -61,18 +57,14 @@ struct afs_cell
 	char			name[0];	/* cell name - must go last */
 };
 
-extern int afs_cell_init(char *rootcell);
-
-extern int afs_cell_create(const char *name, char *vllist, struct afs_cell **_cell);
-
-extern int afs_cell_lookup(const char *name, unsigned nmsize, struct afs_cell **_cell);
+extern int afs_cell_init(char *);
+extern int afs_cell_create(const char *, char *, struct afs_cell **);
+extern int afs_cell_lookup(const char *, unsigned, struct afs_cell **);
 
 #define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0)
 
-extern struct afs_cell *afs_get_cell_maybe(struct afs_cell **_cell);
-
-extern void afs_put_cell(struct afs_cell *cell);
-
+extern struct afs_cell *afs_get_cell_maybe(struct afs_cell **);
+extern void afs_put_cell(struct afs_cell *);
 extern void afs_cell_purge(void);
 
-#endif /* _LINUX_AFS_CELL_H */
+#endif /* AFS_CELL_H */
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 3d097fddcb7..3f4585765cb 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -1,4 +1,4 @@
-/* cmservice.c: AFS Cache Manager Service
+/* AFS Cache Manager Service
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -106,7 +106,6 @@ static DEFINE_SPINLOCK(afscm_calls_lock);
 static DEFINE_SPINLOCK(kafscmd_attention_lock);
 static int kafscmd_die;
 
-/*****************************************************************************/
 /*
  * AFS Cache Manager kernel thread
  */
@@ -177,10 +176,8 @@ static int kafscmd(void *arg)
 
 	/* and that's all */
 	complete_and_exit(&kafscmd_dead, 0);
+}
 
-} /* end kafscmd() */
-
-/*****************************************************************************/
 /*
  * handle a call coming in to the cache manager
  * - if I want to keep the call, I must increment its usage count
@@ -202,10 +199,8 @@ static int afscm_new_call(struct rxrpc_call *call)
 
 	_leave(" = 0");
 	return 0;
+}
 
-} /* end afscm_new_call() */
-
-/*****************************************************************************/
 /*
  * queue on the kafscmd queue for attention
  */
@@ -226,9 +221,8 @@ static void afscm_attention(struct rxrpc_call *call)
 	wake_up(&kafscmd_sleepq);
 
 	_leave(" {u=%d}", atomic_read(&call->usage));
-} /* end afscm_attention() */
+}
 
-/*****************************************************************************/
 /*
  * handle my call being aborted
  * - clean up, dequeue and put my ref to the call
@@ -266,9 +260,8 @@ static void afscm_error(struct rxrpc_call *call)
 	wake_up(&kafscmd_sleepq);
 
 	_leave("");
-} /* end afscm_error() */
+}
 
-/*****************************************************************************/
 /*
  * map afs abort codes to/from Linux error codes
  * - called with call->lock held
@@ -285,9 +278,8 @@ static void afscm_aemap(struct rxrpc_call *call)
 	default:
 		break;
 	}
-} /* end afscm_aemap() */
+}
 
-/*****************************************************************************/
 /*
  * start the cache manager service if not already started
  */
@@ -316,18 +308,16 @@ int afscm_start(void)
 
 	return 0;
 
- kill:
+kill:
 	kafscmd_die = 1;
 	wake_up(&kafscmd_sleepq);
 	wait_for_completion(&kafscmd_dead);
 
- out:
+out:
 	up_write(&afscm_sem);
 	return ret;
+}
 
-} /* end afscm_start() */
-
-/*****************************************************************************/
 /*
  * stop the cache manager service
  */
@@ -394,10 +384,8 @@ void afscm_stop(void)
 	}
 
 	up_write(&afscm_sem);
+}
 
-} /* end afscm_stop() */
-
-/*****************************************************************************/
 /*
  * handle the fileserver breaking a set of callbacks
  */
@@ -460,8 +448,7 @@ static void _SRXAFSCM_CallBack(struct rxrpc_call *call)
 					pcb->version	= ntohl(*bp++);
 					pcb->expiry	= ntohl(*bp++);
 					pcb->type	= ntohl(*bp++);
-				}
-				else {
+				} else {
 					pcb->version	= 0;
 					pcb->expiry	= 0;
 					pcb->type	= AFSCM_CB_UNTYPED;
@@ -512,10 +499,8 @@ static void _SRXAFSCM_CallBack(struct rxrpc_call *call)
 	afs_put_server(server);
 
 	_leave(" = %d", ret);
+}
 
-} /* end _SRXAFSCM_CallBack() */
-
-/*****************************************************************************/
 /*
  * handle the fileserver asking us to initialise our callback state
  */
@@ -580,10 +565,8 @@ static void _SRXAFSCM_InitCallBackState(struct rxrpc_call *call)
 	afs_put_server(server);
 
 	_leave(" = %d", ret);
+}
 
-} /* end _SRXAFSCM_InitCallBackState() */
-
-/*****************************************************************************/
 /*
  * handle a probe from a fileserver
  */
@@ -648,5 +631,4 @@ static void _SRXAFSCM_Probe(struct rxrpc_call *call)
 	afs_put_server(server);
 
 	_leave(" = %d", ret);
-
-} /* end _SRXAFSCM_Probe() */
+}
diff --git a/fs/afs/cmservice.h b/fs/afs/cmservice.h
index af8d4d689cb..66e10c15bd1 100644
--- a/fs/afs/cmservice.h
+++ b/fs/afs/cmservice.h
@@ -1,4 +1,4 @@
-/* cmservice.h: AFS Cache Manager Service declarations
+/* AFS Cache Manager Service declarations
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -9,8 +9,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#ifndef _LINUX_AFS_CMSERVICE_H
-#define _LINUX_AFS_CMSERVICE_H
+#ifndef AFS_CMSERVICE_H
+#define AFS_CMSERVICE_H
 
 #include <rxrpc/transport.h>
 #include "types.h"
@@ -20,10 +20,9 @@ extern int afscm_start(void);
 extern void afscm_stop(void);
 
 /* cache manager server functions */
-extern int SRXAFSCM_InitCallBackState(struct afs_server *server);
-extern int SRXAFSCM_CallBack(struct afs_server *server,
-			     size_t count,
-			     struct afs_callback callbacks[]);
-extern int SRXAFSCM_Probe(struct afs_server *server);
+extern int SRXAFSCM_InitCallBackState(struct afs_server *);
+extern int SRXAFSCM_CallBack(struct afs_server *, size_t,
+			     struct afs_callback[]);
+extern int SRXAFSCM_Probe(struct afs_server *);
 
-#endif /* _LINUX_AFS_CMSERVICE_H */
+#endif /* AFS_CMSERVICE_H */
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index b6dc2ebe47a..2f6d9237646 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -112,7 +112,6 @@ struct afs_dir_lookup_cookie {
 	int		found;
 };
 
-/*****************************************************************************/
 /*
  * check that a directory page is valid
  */
@@ -157,13 +156,11 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page)
 	SetPageChecked(page);
 	return;
 
- error:
+error:
 	SetPageChecked(page);
 	SetPageError(page);
+}
 
-} /* end afs_dir_check_page() */
-
-/*****************************************************************************/
 /*
  * discard a page cached in the pagecache
  */
@@ -171,10 +168,8 @@ static inline void afs_dir_put_page(struct page *page)
 {
 	kunmap(page);
 	page_cache_release(page);
+}
 
-} /* end afs_dir_put_page() */
-
-/*****************************************************************************/
 /*
  * get a page into the pagecache
  */
@@ -197,12 +192,11 @@ static struct page *afs_dir_get_page(struct inode *dir, unsigned long index)
 	}
 	return page;
 
- fail:
+fail:
 	afs_dir_put_page(page);
 	return ERR_PTR(-EIO);
-} /* end afs_dir_get_page() */
+}
 
-/*****************************************************************************/
 /*
  * open an AFS directory file
  */
@@ -218,10 +212,8 @@ static int afs_dir_open(struct inode *inode, struct file *file)
 
 	_leave(" = 0");
 	return 0;
+}
 
-} /* end afs_dir_open() */
-
-/*****************************************************************************/
 /*
  * deal with one block in an AFS directory
  */
@@ -316,9 +308,8 @@ static int afs_dir_iterate_block(unsigned *fpos,
 
 	_leave(" = 1 [more]");
 	return 1;
-} /* end afs_dir_iterate_block() */
+}
 
-/*****************************************************************************/
 /*
  * read an AFS directory
  */
@@ -377,12 +368,11 @@ static int afs_dir_iterate(struct inode *dir, unsigned *fpos, void *cookie,
 		ret = 0;
 	}
 
- out:
+out:
 	_leave(" = %d", ret);
 	return ret;
-} /* end afs_dir_iterate() */
+}
 
-/*****************************************************************************/
 /*
  * read an AFS directory
  */
@@ -399,9 +389,8 @@ static int afs_dir_readdir(struct file *file, void *cookie, filldir_t filldir)
 
 	_leave(" = %d", ret);
 	return ret;
-} /* end afs_dir_readdir() */
+}
 
-/*****************************************************************************/
 /*
  * search the directory for a name
  * - if afs_dir_iterate_block() spots this function, it'll pass the FID
@@ -426,9 +415,8 @@ static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen,
 
 	_leave(" = -1 [found]");
 	return -1;
-} /* end afs_dir_lookup_filldir() */
+}
 
-/*****************************************************************************/
 /*
  * look up an entry in a directory
  */
@@ -498,9 +486,8 @@ static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry,
 	       dentry->d_inode->i_version);
 
 	return NULL;
-} /* end afs_dir_lookup() */
+}
 
-/*****************************************************************************/
 /*
  * check that a dentry lookup hit has found a valid entry
  * - NOTE! the hit can be a negative hit too, so we can't assume we have an
@@ -605,18 +592,18 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 			(void *) (unsigned long) AFS_FS_I(dir)->status.version;
 	}
 
- out_valid:
+out_valid:
 	dput(parent);
 	_leave(" = 1 [valid]");
 	return 1;
 
 	/* the dirent, if it exists, now points to a different vnode */
- not_found:
+not_found:
 	spin_lock(&dentry->d_lock);
 	dentry->d_flags |= DCACHE_NFSFS_RENAMED;
 	spin_unlock(&dentry->d_lock);
 
- out_bad:
+out_bad:
 	if (inode) {
 		/* don't unhash if we have submounts */
 		if (have_submounts(dentry))
@@ -633,9 +620,8 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 
 	_leave(" = 0 [bad]");
 	return 0;
-} /* end afs_d_revalidate() */
+}
 
-/*****************************************************************************/
 /*
  * allow the VFS to enquire as to whether a dentry should be unhashed (mustn't
  * sleep)
@@ -657,7 +643,7 @@ static int afs_d_delete(struct dentry *dentry)
 	_leave(" = 0 [keep]");
 	return 0;
 
- zap:
+zap:
 	_leave(" = 1 [zap]");
 	return 1;
-} /* end afs_d_delete() */
+}
diff --git a/fs/afs/errors.h b/fs/afs/errors.h
index 574d94ac8d0..bcc0a3309e7 100644
--- a/fs/afs/errors.h
+++ b/fs/afs/errors.h
@@ -1,4 +1,4 @@
-/* errors.h: AFS abort/error codes
+/* AFS abort/error codes
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -9,12 +9,14 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#ifndef _LINUX_AFS_ERRORS_H
-#define _LINUX_AFS_ERRORS_H
+#ifndef AFS_ERRORS_H
+#define AFS_ERRORS_H
 
 #include "types.h"
 
-/* file server abort codes */
+/*
+ * file server abort codes
+ */
 typedef enum {
 	VSALVAGE	= 101,	/* volume needs salvaging */
 	VNOVNODE	= 102,	/* no such file/dir (vnode) */
@@ -29,6 +31,6 @@ typedef enum {
 	VMOVED		= 111,	/* volume moved to new server - ask this FS where */
 } afs_rxfs_abort_t;
 
-extern int afs_abort_to_error(int abortcode);
+extern int afs_abort_to_error(int);
 
-#endif /* _LINUX_AFS_ERRORS_H */
+#endif /* AFS_ERRORS_H */
diff --git a/fs/afs/file.c b/fs/afs/file.c
index b17634541f6..01df30d256b 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -40,7 +40,6 @@ const struct address_space_operations afs_fs_aops = {
 	.invalidatepage	= afs_file_invalidatepage,
 };
 
-/*****************************************************************************/
 /*
  * deal with notification that a page was read from the cache
  */
@@ -58,10 +57,9 @@ static void afs_file_readpage_read_complete(void *cookie_data,
 		SetPageUptodate(page);
 	unlock_page(page);
 
-} /* end afs_file_readpage_read_complete() */
+}
 #endif
 
-/*****************************************************************************/
 /*
  * deal with notification that a page was written to the cache
  */
@@ -74,11 +72,9 @@ static void afs_file_readpage_write_complete(void *cookie_data,
 	_enter("%p,%p,%p,%d", cookie_data, page, data, error);
 
 	unlock_page(page);
-
-} /* end afs_file_readpage_write_complete() */
+}
 #endif
 
-/*****************************************************************************/
 /*
  * AFS read page from file (or symlink)
  */
@@ -184,10 +180,8 @@ static int afs_file_readpage(struct file *file, struct page *page)
 
 	_leave(" = %d", ret);
 	return ret;
+}
 
-} /* end afs_file_readpage() */
-
-/*****************************************************************************/
 /*
  * get a page cookie for the specified page
  */
@@ -202,10 +196,9 @@ int afs_cache_get_page_cookie(struct page *page,
 
 	_leave(" = %d", ret);
 	return ret;
-} /* end afs_cache_get_page_cookie() */
+}
 #endif
 
-/*****************************************************************************/
 /*
  * invalidate part or all of a page
  */
@@ -240,9 +233,8 @@ static void afs_file_invalidatepage(struct page *page, unsigned long offset)
 	}
 
 	_leave(" = %d", ret);
-} /* end afs_file_invalidatepage() */
+}
 
-/*****************************************************************************/
 /*
  * release a page and cleanup its private data
  */
@@ -267,4 +259,4 @@ static int afs_file_releasepage(struct page *page, gfp_t gfp_flags)
 
 	_leave(" = 0");
 	return 0;
-} /* end afs_file_releasepage() */
+}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 61bc371532a..f1c3a186842 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -29,7 +29,6 @@
 #define FSGETROOTVOLUME		151	/* AFS Get root volume name */
 #define FSLOOKUP		161	/* AFS lookup file in directory */
 
-/*****************************************************************************/
 /*
  * map afs abort codes to/from Linux error codes
  * - called with call->lock held
@@ -46,9 +45,8 @@ static void afs_rxfs_aemap(struct rxrpc_call *call)
 	default:
 		break;
 	}
-} /* end afs_rxfs_aemap() */
+}
 
-/*****************************************************************************/
 /*
  * get the root volume name from a fileserver
  * - this operation doesn't seem to work correctly in OpenAFS server 1.2.2
@@ -162,23 +160,22 @@ int afs_rxfs_get_root_volume(struct afs_server *server,
 		BUG();
 	}
 
- abort:
+abort:
 	set_current_state(TASK_UNINTERRUPTIBLE);
 	rxrpc_call_abort(call, ret);
 	schedule();
- out_unwait:
+out_unwait:
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&call->waitq, &myself);
 	rxrpc_put_call(call);
- out_put_conn:
+out_put_conn:
 	afs_server_release_fsconn(server, conn);
- out:
+out:
 	kleave("");
 	return ret;
-} /* end afs_rxfs_get_root_volume() */
+}
 #endif
 
-/*****************************************************************************/
 /*
  * get information about a volume
  */
@@ -275,26 +272,24 @@ int afs_rxfs_get_volume_info(struct afs_server *server,
 	/* success */
 	ret = 0;
 
- out_unwait:
+out_unwait:
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&call->waitq, &myself);
 	rxrpc_put_call(call);
- out_put_conn:
+out_put_conn:
 	afs_server_release_fsconn(server, conn);
- out:
+out:
 	_leave("");
 	return ret;
 
- abort:
+abort:
 	set_current_state(TASK_UNINTERRUPTIBLE);
 	rxrpc_call_abort(call, ret);
 	schedule();
 	goto out_unwait;
-
-} /* end afs_rxfs_get_volume_info() */
+}
 #endif
 
-/*****************************************************************************/
 /*
  * fetch the status information for a file
  */
@@ -401,24 +396,23 @@ int afs_rxfs_fetch_file_status(struct afs_server *server,
 	/* success */
 	ret = 0;
 
- out_unwait:
+out_unwait:
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&call->waitq, &myself);
 	rxrpc_put_call(call);
- out_put_conn:
+out_put_conn:
 	afs_server_release_callslot(server, &callslot);
- out:
+out:
 	_leave("");
 	return ret;
 
- abort:
+abort:
 	set_current_state(TASK_UNINTERRUPTIBLE);
 	rxrpc_call_abort(call, ret);
 	schedule();
 	goto out_unwait;
-} /* end afs_rxfs_fetch_file_status() */
+}
 
-/*****************************************************************************/
 /*
  * fetch the contents of a file or directory
  */
@@ -547,31 +541,29 @@ int afs_rxfs_fetch_file_data(struct afs_server *server,
 	/* success */
 	ret = 0;
 
- out_unwait:
+out_unwait:
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&call->waitq,&myself);
 	rxrpc_put_call(call);
- out_put_conn:
+out_put_conn:
 	afs_server_release_callslot(server, &callslot);
- out:
+out:
 	_leave(" = %d", ret);
 	return ret;
 
- read_failed:
+read_failed:
 	if (ret == -ECONNABORTED) {
 		ret = call->app_errno;
 		goto out_unwait;
 	}
 
- abort:
+abort:
 	set_current_state(TASK_UNINTERRUPTIBLE);
 	rxrpc_call_abort(call, ret);
 	schedule();
 	goto out_unwait;
+}
 
-} /* end afs_rxfs_fetch_file_data() */
-
-/*****************************************************************************/
 /*
  * ask the AFS fileserver to discard a callback request on a file
  */
@@ -655,24 +647,23 @@ int afs_rxfs_give_up_callback(struct afs_server *server,
 		BUG();
 	}
 
- out_unwait:
+out_unwait:
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&call->waitq, &myself);
 	rxrpc_put_call(call);
- out_put_conn:
+out_put_conn:
 	afs_server_release_callslot(server, &callslot);
- out:
+out:
 	_leave("");
 	return ret;
 
- abort:
+abort:
 	set_current_state(TASK_UNINTERRUPTIBLE);
 	rxrpc_call_abort(call, ret);
 	schedule();
 	goto out_unwait;
-} /* end afs_rxfs_give_up_callback() */
+}
 
-/*****************************************************************************/
 /*
  * look a filename up in a directory
  * - this operation doesn't seem to work correctly in OpenAFS server 1.2.2
@@ -818,20 +809,20 @@ int afs_rxfs_lookup(struct afs_server *server,
 	/* success */
 	ret = 0;
 
- out_unwait:
+out_unwait:
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&call->waitq, &myself);
 	rxrpc_put_call(call);
- out_put_conn:
+out_put_conn:
 	afs_server_release_fsconn(server, conn);
- out:
+out:
 	kleave("");
 	return ret;
 
- abort:
+abort:
 	set_current_state(TASK_UNINTERRUPTIBLE);
 	rxrpc_call_abort(call, ret);
 	schedule();
 	goto out_unwait;
-} /* end afs_rxfs_lookup() */
+}
 #endif
diff --git a/fs/afs/fsclient.h b/fs/afs/fsclient.h
index 8ba3e749ee3..e2b0b7bcd09 100644
--- a/fs/afs/fsclient.h
+++ b/fs/afs/fsclient.h
@@ -1,4 +1,4 @@
-/* fsclient.h: AFS File Server client stub declarations
+/* AFS File Server client stub declarations
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -9,18 +9,18 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#ifndef _LINUX_AFS_FSCLIENT_H
-#define _LINUX_AFS_FSCLIENT_H
+#ifndef AFS_FSCLIENT_H
+#define AFS_FSCLIENT_H
 
 #include "server.h"
 
-extern int afs_rxfs_get_volume_info(struct afs_server *server,
-				    const char *name,
-				    struct afs_volume_info *vinfo);
+extern int afs_rxfs_get_volume_info(struct afs_server *,
+				    const char *,
+				    struct afs_volume_info *);
 
-extern int afs_rxfs_fetch_file_status(struct afs_server *server,
-				      struct afs_vnode *vnode,
-				      struct afs_volsync *volsync);
+extern int afs_rxfs_fetch_file_status(struct afs_server *,
+				      struct afs_vnode *,
+				      struct afs_volsync *);
 
 struct afs_rxfs_fetch_descriptor {
 	struct afs_fid	fid;		/* file ID to fetch */
@@ -30,25 +30,25 @@ struct afs_rxfs_fetch_descriptor {
 	size_t		actual;		/* actual size sent back by server */
 };
 
-extern int afs_rxfs_fetch_file_data(struct afs_server *server,
-				    struct afs_vnode *vnode,
-				    struct afs_rxfs_fetch_descriptor *desc,
-				    struct afs_volsync *volsync);
+extern int afs_rxfs_fetch_file_data(struct afs_server *,
+				    struct afs_vnode *,
+				    struct afs_rxfs_fetch_descriptor *,
+				    struct afs_volsync *);
 
-extern int afs_rxfs_give_up_callback(struct afs_server *server,
-				     struct afs_vnode *vnode);
+extern int afs_rxfs_give_up_callback(struct afs_server *,
+				     struct afs_vnode *);
 
 /* this doesn't appear to work in OpenAFS server */
-extern int afs_rxfs_lookup(struct afs_server *server,
-			   struct afs_vnode *dir,
-			   const char *filename,
-			   struct afs_vnode *vnode,
-			   struct afs_volsync *volsync);
+extern int afs_rxfs_lookup(struct afs_server *,
+			   struct afs_vnode *,
+			   const char *,
+			   struct afs_vnode *,
+			   struct afs_volsync *);
 
 /* this is apparently mis-implemented in OpenAFS server */
-extern int afs_rxfs_get_root_volume(struct afs_server *server,
-				    char *buf,
-				    size_t *buflen);
+extern int afs_rxfs_get_root_volume(struct afs_server *,
+				    char *,
+				    size_t *);
 
 
-#endif /* _LINUX_AFS_FSCLIENT_H */
+#endif /* AFS_FSCLIENT_H */
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 9d9bca6c28b..900c8bb1c3b 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -29,7 +29,6 @@ struct afs_iget_data {
 	struct afs_volume	*volume;	/* volume on which resides */
 };
 
-/*****************************************************************************/
 /*
  * map the AFS file status to the inode member variables
  */
@@ -87,9 +86,8 @@ static int afs_inode_map_status(struct afs_vnode *vnode)
 	}
 
 	return 0;
-} /* end afs_inode_map_status() */
+}
 
-/*****************************************************************************/
 /*
  * attempt to fetch the status of an inode, coelescing multiple simultaneous
  * fetches
@@ -107,10 +105,8 @@ static int afs_inode_fetch_status(struct inode *inode)
 		ret = afs_inode_map_status(vnode);
 
 	return ret;
+}
 
-} /* end afs_inode_fetch_status() */
-
-/*****************************************************************************/
 /*
  * iget5() comparator
  */
@@ -120,9 +116,8 @@ static int afs_iget5_test(struct inode *inode, void *opaque)
 
 	return inode->i_ino == data->fid.vnode &&
 		inode->i_version == data->fid.unique;
-} /* end afs_iget5_test() */
+}
 
-/*****************************************************************************/
 /*
  * iget5() inode initialiser
  */
@@ -137,9 +132,8 @@ static int afs_iget5_set(struct inode *inode, void *opaque)
 	vnode->volume = data->volume;
 
 	return 0;
-} /* end afs_iget5_set() */
+}
 
-/*****************************************************************************/
 /*
  * inode retrieval
  */
@@ -169,7 +163,7 @@ inline int afs_iget(struct super_block *sb, struct afs_fid *fid,
 	/* deal with an existing inode */
 	if (!(inode->i_state & I_NEW)) {
 		ret = afs_vnode_fetch_status(vnode);
-		if (ret==0)
+		if (ret == 0)
 			*_inode = inode;
 		else
 			iput(inode);
@@ -204,16 +198,15 @@ inline int afs_iget(struct super_block *sb, struct afs_fid *fid,
 	return 0;
 
 	/* failure */
- bad_inode:
+bad_inode:
 	make_bad_inode(inode);
 	unlock_new_inode(inode);
 	iput(inode);
 
 	_leave(" = %d [bad]", ret);
 	return ret;
-} /* end afs_iget() */
+}
 
-/*****************************************************************************/
 /*
  * read the attributes of an inode
  */
@@ -235,8 +228,7 @@ int afs_inode_getattr(struct vfsmount *mnt, struct dentry *dentry,
 		_leave(" = %d [%d %p]",
 		       ret, atomic_read(&dentry->d_count), dentry->d_inode);
 		return ret;
-	}
-	else if (ret < 0) {
+	} else if (ret < 0) {
 		make_bad_inode(inode);
 		_leave(" = %d", ret);
 		return ret;
@@ -252,9 +244,8 @@ int afs_inode_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	       vnode->cb_type);
 
 	return 0;
-} /* end afs_inode_getattr() */
+}
 
-/*****************************************************************************/
 /*
  * clear an AFS inode
  */
@@ -282,4 +273,4 @@ void afs_clear_inode(struct inode *inode)
 #endif
 
 	_leave("");
-} /* end afs_clear_inode() */
+}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 5151d5da2c2..b6dd20a93cc 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -1,4 +1,4 @@
-/* internal.h: internal AFS stuff
+/* internal AFS stuff
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -73,26 +73,16 @@ extern const struct address_space_operations afs_fs_aops;
 extern const struct inode_operations afs_file_inode_operations;
 
 #ifdef AFS_CACHING_SUPPORT
-extern int afs_cache_get_page_cookie(struct page *page,
-				     struct cachefs_page **_page_cookie);
+extern int afs_cache_get_page_cookie(struct page *, struct cachefs_page **);
 #endif
 
 /*
  * inode.c
  */
-extern int afs_iget(struct super_block *sb, struct afs_fid *fid,
-		    struct inode **_inode);
-extern int afs_inode_getattr(struct vfsmount *mnt, struct dentry *dentry,
-			     struct kstat *stat);
-extern void afs_clear_inode(struct inode *inode);
-
-/*
- * key_afs.c
- */
-#ifdef CONFIG_KEYS
-extern int afs_key_register(void);
-extern void afs_key_unregister(void);
-#endif
+extern int afs_iget(struct super_block *, struct afs_fid *, struct inode **);
+extern int afs_inode_getattr(struct vfsmount *, struct dentry *,
+			     struct kstat *);
+extern void afs_clear_inode(struct inode *);
 
 /*
  * main.c
@@ -110,7 +100,7 @@ extern struct afs_timer afs_mntpt_expiry_timer;
 extern struct afs_timer_ops afs_mntpt_expiry_timer_ops;
 extern unsigned long afs_mntpt_expiry_timeout;
 
-extern int afs_mntpt_check_symlink(struct afs_vnode *vnode);
+extern int afs_mntpt_check_symlink(struct afs_vnode *);
 
 /*
  * super.c
@@ -123,17 +113,17 @@ extern void afs_fs_exit(void);
 extern struct list_head afs_cb_hash_tbl[];
 extern spinlock_t afs_cb_hash_lock;
 
-#define afs_cb_hash(SRV,FID) \
-	afs_cb_hash_tbl[((unsigned long)(SRV) + \
-			(FID)->vid + (FID)->vnode + (FID)->unique) % \
-			AFS_CB_HASH_COUNT]
+#define afs_cb_hash(SRV, FID)						\
+	afs_cb_hash_tbl[((unsigned long)(SRV) +				\
+			 (FID)->vid + (FID)->vnode + (FID)->unique) &	\
+			(AFS_CB_HASH_COUNT - 1)]
 
 /*
  * proc.c
  */
 extern int afs_proc_init(void);
 extern void afs_proc_cleanup(void);
-extern int afs_proc_cell_setup(struct afs_cell *cell);
-extern void afs_proc_cell_remove(struct afs_cell *cell);
+extern int afs_proc_cell_setup(struct afs_cell *);
+extern void afs_proc_cell_remove(struct afs_cell *);
 
 #endif /* AFS_INTERNAL_H */
diff --git a/fs/afs/kafsasyncd.c b/fs/afs/kafsasyncd.c
index 615df2407cb..8ca01c23601 100644
--- a/fs/afs/kafsasyncd.c
+++ b/fs/afs/kafsasyncd.c
@@ -1,4 +1,4 @@
-/* kafsasyncd.c: AFS asynchronous operation daemon
+/* AFS asynchronous operation daemon
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -50,7 +50,6 @@ static void kafsasyncd_null_call_error_func(struct rxrpc_call *call)
 {
 }
 
-/*****************************************************************************/
 /*
  * start the async daemon
  */
@@ -65,9 +64,8 @@ int afs_kafsasyncd_start(void)
 	wait_for_completion(&kafsasyncd_alive);
 
 	return ret;
-} /* end afs_kafsasyncd_start() */
+}
 
-/*****************************************************************************/
 /*
  * stop the async daemon
  */
@@ -77,10 +75,8 @@ void afs_kafsasyncd_stop(void)
 	kafsasyncd_die = 1;
 	wake_up(&kafsasyncd_sleepq);
 	wait_for_completion(&kafsasyncd_dead);
+}
 
-} /* end afs_kafsasyncd_stop() */
-
-/*****************************************************************************/
 /*
  * probing daemon
  */
@@ -187,10 +183,8 @@ static int kafsasyncd(void *arg)
 	/* and that's all */
 	_leave("");
 	complete_and_exit(&kafsasyncd_dead, 0);
+}
 
-} /* end kafsasyncd() */
-
-/*****************************************************************************/
 /*
  * begin an operation
  * - place operation on busy queue
@@ -209,9 +203,8 @@ void afs_kafsasyncd_begin_op(struct afs_async_op *op)
 	spin_unlock(&kafsasyncd_async_lock);
 
 	_leave("");
-} /* end afs_kafsasyncd_begin_op() */
+}
 
-/*****************************************************************************/
 /*
  * request attention for an operation
  * - move to attention queue
@@ -229,9 +222,8 @@ void afs_kafsasyncd_attend_op(struct afs_async_op *op)
 	wake_up(&kafsasyncd_sleepq);
 
 	_leave("");
-} /* end afs_kafsasyncd_attend_op() */
+}
 
-/*****************************************************************************/
 /*
  * terminate an operation
  * - remove from either queue
@@ -252,4 +244,4 @@ void afs_kafsasyncd_terminate_op(struct afs_async_op *op)
 	wake_up(&kafsasyncd_sleepq);
 
 	_leave("");
-} /* end afs_kafsasyncd_terminate_op() */
+}
diff --git a/fs/afs/kafsasyncd.h b/fs/afs/kafsasyncd.h
index 791803f9a6f..1273eb544c5 100644
--- a/fs/afs/kafsasyncd.h
+++ b/fs/afs/kafsasyncd.h
@@ -1,4 +1,4 @@
-/* kafsasyncd.h: AFS asynchronous operation daemon
+/* AFS asynchronous operation daemon
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -9,24 +9,22 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#ifndef _LINUX_AFS_KAFSASYNCD_H
-#define _LINUX_AFS_KAFSASYNCD_H
+#ifndef AFS_KAFSASYNCD_H
+#define AFS_KAFSASYNCD_H
 
 #include "types.h"
 
 struct afs_async_op;
 
 struct afs_async_op_ops {
-	void (*attend)(struct afs_async_op *op);
-	void (*discard)(struct afs_async_op *op);
+	void (*attend)(struct afs_async_op *);
+	void (*discard)(struct afs_async_op *);
 };
 
-/*****************************************************************************/
 /*
  * asynchronous operation record
  */
-struct afs_async_op
-{
+struct afs_async_op {
 	struct list_head		link;
 	struct afs_server		*server;	/* server being contacted */
 	struct rxrpc_call		*call;		/* RxRPC call performing op */
@@ -45,8 +43,8 @@ static inline void afs_async_op_init(struct afs_async_op *op,
 extern int afs_kafsasyncd_start(void);
 extern void afs_kafsasyncd_stop(void);
 
-extern void afs_kafsasyncd_begin_op(struct afs_async_op *op);
-extern void afs_kafsasyncd_attend_op(struct afs_async_op *op);
-extern void afs_kafsasyncd_terminate_op(struct afs_async_op *op);
+extern void afs_kafsasyncd_begin_op(struct afs_async_op *);
+extern void afs_kafsasyncd_attend_op(struct afs_async_op *);
+extern void afs_kafsasyncd_terminate_op(struct afs_async_op *);
 
-#endif /* _LINUX_AFS_KAFSASYNCD_H */
+#endif /* AFS_KAFSASYNCD_H */
diff --git a/fs/afs/kafstimod.c b/fs/afs/kafstimod.c
index 694344e4d3c..3526dcccc16 100644
--- a/fs/afs/kafstimod.c
+++ b/fs/afs/kafstimod.c
@@ -1,4 +1,4 @@
-/* kafstimod.c: AFS timeout daemon
+/* AFS timeout daemon
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -30,7 +30,6 @@ static DEFINE_SPINLOCK(kafstimod_lock);
 
 static int kafstimod(void *arg);
 
-/*****************************************************************************/
 /*
  * start the timeout daemon
  */
@@ -45,9 +44,8 @@ int afs_kafstimod_start(void)
 	wait_for_completion(&kafstimod_alive);
 
 	return ret;
-} /* end afs_kafstimod_start() */
+}
 
-/*****************************************************************************/
 /*
  * stop the timeout daemon
  */
@@ -57,10 +55,8 @@ void afs_kafstimod_stop(void)
 	kafstimod_die = 1;
 	wake_up(&kafstimod_sleepq);
 	wait_for_completion(&kafstimod_dead);
+}
 
-} /* end afs_kafstimod_stop() */
-
-/*****************************************************************************/
 /*
  * timeout processing daemon
  */
@@ -77,7 +73,7 @@ static int kafstimod(void *arg)
 	complete(&kafstimod_alive);
 
 	/* loop around looking for things to attend to */
- loop:
+loop:
 	set_current_state(TASK_INTERRUPTIBLE);
 	add_wait_queue(&kafstimod_sleepq, &myself);
 
@@ -101,8 +97,7 @@ static int kafstimod(void *arg)
 		spin_lock(&kafstimod_lock);
 		if (list_empty(&kafstimod_list)) {
 			timeout = MAX_SCHEDULE_TIMEOUT;
-		}
-		else {
+		} else {
 			timer = list_entry(kafstimod_list.next,
 					   struct afs_timer, link);
 			timeout = timer->timo_jif;
@@ -110,10 +105,7 @@ static int kafstimod(void *arg)
 
 			if (time_before_eq((unsigned long) timeout, jif))
 				goto immediate;
-
-			else {
-				timeout = (long) timeout - (long) jiffies;
-			}
+			timeout = (long) timeout - (long) jiffies;
 		}
 		spin_unlock(&kafstimod_lock);
 
@@ -126,7 +118,7 @@ static int kafstimod(void *arg)
 	 * - we come here with the lock held and timer pointing to the expired
 	 *   entry
 	 */
- immediate:
+immediate:
 	remove_wait_queue(&kafstimod_sleepq, &myself);
 	set_current_state(TASK_RUNNING);
 
@@ -141,10 +133,8 @@ static int kafstimod(void *arg)
 
 	_debug("@@@ End Timeout");
 	goto loop;
+}
 
-} /* end kafstimod() */
-
-/*****************************************************************************/
 /*
  * (re-)queue a timer
  */
@@ -176,9 +166,8 @@ void afs_kafstimod_add_timer(struct afs_timer *timer, unsigned long timeout)
 	wake_up(&kafstimod_sleepq);
 
 	_leave("");
-} /* end afs_kafstimod_add_timer() */
+}
 
-/*****************************************************************************/
 /*
  * dequeue a timer
  * - returns 0 if the timer was deleted or -ENOENT if it wasn't queued
@@ -202,4 +191,4 @@ int afs_kafstimod_del_timer(struct afs_timer *timer)
 
 	_leave(" = %d", ret);
 	return ret;
-} /* end afs_kafstimod_del_timer() */
+}
diff --git a/fs/afs/kafstimod.h b/fs/afs/kafstimod.h
index e312f1a61a7..0d39becbbe0 100644
--- a/fs/afs/kafstimod.h
+++ b/fs/afs/kafstimod.h
@@ -1,4 +1,4 @@
-/* kafstimod.h: AFS timeout daemon
+/* AFS timeout daemon
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -9,8 +9,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#ifndef _LINUX_AFS_KAFSTIMOD_H
-#define _LINUX_AFS_KAFSTIMOD_H
+#ifndef AFS_KAFSTIMOD_H
+#define AFS_KAFSTIMOD_H
 
 #include "types.h"
 
@@ -18,15 +18,13 @@ struct afs_timer;
 
 struct afs_timer_ops {
 	/* called when the front of the timer queue has timed out */
-	void (*timed_out)(struct afs_timer *timer);
+	void (*timed_out)(struct afs_timer *);
 };
 
-/*****************************************************************************/
 /*
  * AFS timer/timeout record
  */
-struct afs_timer
-{
+struct afs_timer {
 	struct list_head		link;		/* link in timer queue */
 	unsigned long			timo_jif;	/* timeout time */
 	const struct afs_timer_ops	*ops;		/* timeout expiry function */
@@ -41,9 +39,7 @@ static inline void afs_timer_init(struct afs_timer *timer,
 
 extern int afs_kafstimod_start(void);
 extern void afs_kafstimod_stop(void);
+extern void afs_kafstimod_add_timer(struct afs_timer *, unsigned long);
+extern int afs_kafstimod_del_timer(struct afs_timer *);
 
-extern void afs_kafstimod_add_timer(struct afs_timer *timer,
-				    unsigned long timeout);
-extern int afs_kafstimod_del_timer(struct afs_timer *timer);
-
-#endif /* _LINUX_AFS_KAFSTIMOD_H */
+#endif /* AFS_KAFSTIMOD_H */
diff --git a/fs/afs/main.c b/fs/afs/main.c
index f2704ba5385..5bf39f66f4c 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -1,4 +1,4 @@
-/* main.c: AFS client file system
+/* AFS client file system
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -62,7 +62,6 @@ struct cachefs_netfs afs_cache_netfs = {
 };
 #endif
 
-/*****************************************************************************/
 /*
  * initialise the AFS client FS module
  */
@@ -86,12 +85,6 @@ static int __init afs_init(void)
 	/* we want to be able to cache */
 	ret = cachefs_register_netfs(&afs_cache_netfs,
 				     &afs_cache_cell_index_def);
-	if (ret < 0)
-		goto error;
-#endif
-
-#ifdef CONFIG_KEYS_TURNED_OFF
-	ret = afs_key_register();
 	if (ret < 0)
 		goto error_cache;
 #endif
@@ -99,58 +92,55 @@ static int __init afs_init(void)
 	/* initialise the cell DB */
 	ret = afs_cell_init(rootcell);
 	if (ret < 0)
-		goto error_keys;
+		goto error_cell_init;
 
 	/* start the timeout daemon */
 	ret = afs_kafstimod_start();
 	if (ret < 0)
-		goto error_keys;
+		goto error_kafstimod;
 
 	/* start the async operation daemon */
 	ret = afs_kafsasyncd_start();
 	if (ret < 0)
-		goto error_kafstimod;
+		goto error_kafsasyncd;
 
 	/* create the RxRPC transport */
 	ret = rxrpc_create_transport(7001, &afs_transport);
 	if (ret < 0)
-		goto error_kafsasyncd;
+		goto error_transport;
 
 	afs_transport->peer_ops = &afs_peer_ops;
 
 	/* register the filesystems */
 	ret = afs_fs_init();
 	if (ret < 0)
-		goto error_transport;
+		goto error_fs;
 
 	return ret;
 
- error_transport:
+error_fs:
 	rxrpc_put_transport(afs_transport);
- error_kafsasyncd:
+error_transport:
 	afs_kafsasyncd_stop();
- error_kafstimod:
+error_kafsasyncd:
 	afs_kafstimod_stop();
- error_keys:
-#ifdef CONFIG_KEYS_TURNED_OFF
-	afs_key_unregister();
- error_cache:
-#endif
+error_kafstimod:
+error_cell_init:
 #ifdef AFS_CACHING_SUPPORT
 	cachefs_unregister_netfs(&afs_cache_netfs);
- error:
+error_cache:
 #endif
 	afs_cell_purge();
 	afs_proc_cleanup();
 	printk(KERN_ERR "kAFS: failed to register: %d\n", ret);
 	return ret;
-} /* end afs_init() */
+}
 
 /* XXX late_initcall is kludgy, but the only alternative seems to create
  * a transport upon the first mount, which is worse. Or is it?
  */
 late_initcall(afs_init);	/* must be called after net/ to create socket */
-/*****************************************************************************/
+
 /*
  * clean up on module removal
  */
@@ -163,19 +153,14 @@ static void __exit afs_exit(void)
 	afs_kafstimod_stop();
 	afs_kafsasyncd_stop();
 	afs_cell_purge();
-#ifdef CONFIG_KEYS_TURNED_OFF
-	afs_key_unregister();
-#endif
 #ifdef AFS_CACHING_SUPPORT
 	cachefs_unregister_netfs(&afs_cache_netfs);
 #endif
 	afs_proc_cleanup();
-
-} /* end afs_exit() */
+}
 
 module_exit(afs_exit);
 
-/*****************************************************************************/
 /*
  * notification that new peer record is being added
  * - called from krxsecd
@@ -208,9 +193,8 @@ static int afs_adding_peer(struct rxrpc_peer *peer)
 	afs_put_server(server);
 
 	return 0;
-} /* end afs_adding_peer() */
+}
 
-/*****************************************************************************/
 /*
  * notification that a peer record is being discarded
  * - called from krxiod or krxsecd
@@ -236,50 +220,4 @@ static void afs_discarding_peer(struct rxrpc_peer *peer)
 	spin_unlock(&afs_server_peer_lock);
 
 	_leave("");
-
-} /* end afs_discarding_peer() */
-
-/*****************************************************************************/
-/*
- * clear the dead space between task_struct and kernel stack
- * - called by supplying -finstrument-functions to gcc
- */
-#if 0
-void __cyg_profile_func_enter (void *this_fn, void *call_site)
-__attribute__((no_instrument_function));
-
-void __cyg_profile_func_enter (void *this_fn, void *call_site)
-{
-       asm volatile("  movl    %%esp,%%edi     \n"
-                    "  andl    %0,%%edi        \n"
-                    "  addl    %1,%%edi        \n"
-                    "  movl    %%esp,%%ecx     \n"
-                    "  subl    %%edi,%%ecx     \n"
-                    "  shrl    $2,%%ecx        \n"
-                    "  movl    $0xedededed,%%eax     \n"
-                    "  rep stosl               \n"
-                    :
-                    : "i"(~(THREAD_SIZE - 1)), "i"(sizeof(struct thread_info))
-                    : "eax", "ecx", "edi", "memory", "cc"
-                    );
-}
-
-void __cyg_profile_func_exit(void *this_fn, void *call_site)
-__attribute__((no_instrument_function));
-
-void __cyg_profile_func_exit(void *this_fn, void *call_site)
-{
-       asm volatile("  movl    %%esp,%%edi     \n"
-                    "  andl    %0,%%edi        \n"
-                    "  addl    %1,%%edi        \n"
-                    "  movl    %%esp,%%ecx     \n"
-                    "  subl    %%edi,%%ecx     \n"
-                    "  shrl    $2,%%ecx        \n"
-                    "  movl    $0xdadadada,%%eax     \n"
-                    "  rep stosl               \n"
-                    :
-                    : "i"(~(THREAD_SIZE - 1)), "i"(sizeof(struct thread_info))
-                    : "eax", "ecx", "edi", "memory", "cc"
-                    );
 }
-#endif
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index e4fce66d76e..55bc6778cec 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -1,4 +1,4 @@
-/* misc.c: miscellaneous bits
+/* miscellaneous bits
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -15,7 +15,6 @@
 #include "errors.h"
 #include "internal.h"
 
-/*****************************************************************************/
 /*
  * convert an AFS abort code to a Linux error number
  */
@@ -35,5 +34,4 @@ int afs_abort_to_error(int abortcode)
 	case VMOVED:		return -ENXIO;
 	default:		return -EIO;
 	}
-
-} /* end afs_abort_to_error() */
+}
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 68495f0de7b..ca3fa81b068 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -1,4 +1,4 @@
-/* mntpt.c: mountpoint management
+/* mountpoint management
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -54,7 +54,6 @@ struct afs_timer afs_mntpt_expiry_timer;
 
 unsigned long afs_mntpt_expiry_timeout = 20;
 
-/*****************************************************************************/
 /*
  * check a symbolic link to see whether it actually encodes a mountpoint
  * - sets the AFS_VNODE_MOUNTPOINT flag on the vnode appropriately
@@ -99,16 +98,14 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode)
 
 	ret = 0;
 
- out_free:
+out_free:
 	kunmap(page);
 	page_cache_release(page);
- out:
+out:
 	_leave(" = %d", ret);
 	return ret;
+}
 
-} /* end afs_mntpt_check_symlink() */
-
-/*****************************************************************************/
 /*
  * no valid lookup procedure on this sort of dir
  */
@@ -125,9 +122,8 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir,
 	       dentry->d_name.name);
 
 	return ERR_PTR(-EREMOTE);
-} /* end afs_mntpt_lookup() */
+}
 
-/*****************************************************************************/
 /*
  * no valid open procedure on this sort of dir
  */
@@ -142,9 +138,8 @@ static int afs_mntpt_open(struct inode *inode, struct file *file)
 	       file->f_path.dentry->d_name.name);
 
 	return -EREMOTE;
-} /* end afs_mntpt_open() */
+}
 
-/*****************************************************************************/
 /*
  * create a vfsmount to be automounted
  */
@@ -210,7 +205,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
 	kleave(" = %p", mnt);
 	return mnt;
 
- error:
+error:
 	if (page)
 		page_cache_release(page);
 	if (devname)
@@ -219,9 +214,8 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
 		free_page((unsigned long) options);
 	kleave(" = %d", ret);
 	return ERR_PTR(ret);
-} /* end afs_mntpt_do_automount() */
+}
 
-/*****************************************************************************/
 /*
  * follow a link from a mountpoint directory, thus causing it to be mounted
  */
@@ -260,9 +254,8 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
 
 	kleave(" = %d", err);
 	return ERR_PTR(err);
-} /* end afs_mntpt_follow_link() */
+}
 
-/*****************************************************************************/
 /*
  * handle mountpoint expiry timer going off
  */
@@ -276,4 +269,4 @@ static void afs_mntpt_expiry_timed_out(struct afs_timer *timer)
 				afs_mntpt_expiry_timeout * HZ);
 
 	kleave("");
-} /* end afs_mntpt_expiry_timed_out() */
+}
diff --git a/fs/afs/mount.h b/fs/afs/mount.h
index 9d2f46ec549..41b848320e0 100644
--- a/fs/afs/mount.h
+++ b/fs/afs/mount.h
@@ -1,4 +1,4 @@
-/* mount.h: mount parameters
+/* mount parameters
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -9,8 +9,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#ifndef _LINUX_AFS_MOUNT_H
-#define _LINUX_AFS_MOUNT_H
+#ifndef AFS_MOUNT_H
+#define AFS_MOUNT_H
 
 struct afs_mountdata {
 	const char		*volume;	/* name of volume */
@@ -20,4 +20,4 @@ struct afs_mountdata {
 	uint32_t		servers[10];	/* IP addresses of servers in this cell */
 };
 
-#endif /* _LINUX_AFS_MOUNT_H */
+#endif /* AFS_MOUNT_H */
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index ae6b85b1e48..5ebcc0cd3dd 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -1,4 +1,4 @@
-/* proc.c: /proc interface for AFS
+/* /proc interface for AFS
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -130,7 +130,6 @@ static const struct file_operations afs_proc_cell_servers_fops = {
 	.release	= afs_proc_cell_servers_release,
 };
 
-/*****************************************************************************/
 /*
  * initialise the /proc/fs/afs/ directory
  */
@@ -142,47 +141,43 @@ int afs_proc_init(void)
 
 	proc_afs = proc_mkdir("fs/afs", NULL);
 	if (!proc_afs)
-		goto error;
+		goto error_dir;
 	proc_afs->owner = THIS_MODULE;
 
 	p = create_proc_entry("cells", 0, proc_afs);
 	if (!p)
-		goto error_proc;
+		goto error_cells;
 	p->proc_fops = &afs_proc_cells_fops;
 	p->owner = THIS_MODULE;
 
 	p = create_proc_entry("rootcell", 0, proc_afs);
 	if (!p)
-		goto error_cells;
+		goto error_rootcell;
 	p->proc_fops = &afs_proc_rootcell_fops;
 	p->owner = THIS_MODULE;
 
 	_leave(" = 0");
 	return 0;
 
- error_cells:
+error_rootcell:
  	remove_proc_entry("cells", proc_afs);
- error_proc:
+error_cells:
 	remove_proc_entry("fs/afs", NULL);
- error:
+error_dir:
 	_leave(" = -ENOMEM");
 	return -ENOMEM;
+}
 
-} /* end afs_proc_init() */
-
-/*****************************************************************************/
 /*
  * clean up the /proc/fs/afs/ directory
  */
 void afs_proc_cleanup(void)
 {
+	remove_proc_entry("rootcell", proc_afs);
 	remove_proc_entry("cells", proc_afs);
-
 	remove_proc_entry("fs/afs", NULL);
+}
 
-} /* end afs_proc_cleanup() */
-
-/*****************************************************************************/
 /*
  * open "/proc/fs/afs/cells" which provides a summary of extant cells
  */
@@ -199,9 +194,8 @@ static int afs_proc_cells_open(struct inode *inode, struct file *file)
 	m->private = PDE(inode)->data;
 
 	return 0;
-} /* end afs_proc_cells_open() */
+}
 
-/*****************************************************************************/
 /*
  * set up the iterator to start reading from the cells list and return the
  * first item
@@ -225,9 +219,8 @@ static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos)
 			break;
 
 	return _p != &afs_proc_cells ? _p : NULL;
-} /* end afs_proc_cells_start() */
+}
 
-/*****************************************************************************/
 /*
  * move to next cell in cells list
  */
@@ -241,19 +234,16 @@ static void *afs_proc_cells_next(struct seq_file *p, void *v, loff_t *pos)
 	_p = v == (void *) 1 ? afs_proc_cells.next : _p->next;
 
 	return _p != &afs_proc_cells ? _p : NULL;
-} /* end afs_proc_cells_next() */
+}
 
-/*****************************************************************************/
 /*
  * clean up after reading from the cells list
  */
 static void afs_proc_cells_stop(struct seq_file *p, void *v)
 {
 	up_read(&afs_proc_cells_sem);
+}
 
-} /* end afs_proc_cells_stop() */
-
-/*****************************************************************************/
 /*
  * display a header line followed by a load of cell lines
  */
@@ -261,19 +251,18 @@ static int afs_proc_cells_show(struct seq_file *m, void *v)
 {
 	struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link);
 
-	/* display header on line 1 */
 	if (v == (void *) 1) {
+		/* display header on line 1 */
 		seq_puts(m, "USE NAME\n");
 		return 0;
 	}
 
 	/* display one cell per line on subsequent lines */
-	seq_printf(m, "%3d %s\n", atomic_read(&cell->usage), cell->name);
-
+	seq_printf(m, "%3d %s\n",
+		   atomic_read(&cell->usage), cell->name);
 	return 0;
-} /* end afs_proc_cells_show() */
+}
 
-/*****************************************************************************/
 /*
  * handle writes to /proc/fs/afs/cells
  * - to add cells: echo "add <cellname> <IP>[:<IP>][:<IP>]"
@@ -331,25 +320,23 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf,
 			goto done;
 
 		printk("kAFS: Added new cell '%s'\n", name);
-	}
-	else {
+	} else {
 		goto inval;
 	}
 
 	ret = size;
 
- done:
+done:
 	kfree(kbuf);
 	_leave(" = %d", ret);
 	return ret;
 
- inval:
+inval:
 	ret = -EINVAL;
 	printk("kAFS: Invalid Command on /proc/fs/afs/cells file\n");
 	goto done;
-} /* end afs_proc_cells_write() */
+}
 
-/*****************************************************************************/
 /*
  * Stubs for /proc/fs/afs/rootcell
  */
@@ -369,7 +356,6 @@ static ssize_t afs_proc_rootcell_read(struct file *file, char __user *buf,
 	return 0;
 }
 
-/*****************************************************************************/
 /*
  * handle writes to /proc/fs/afs/rootcell
  * - to initialize rootcell: echo "cell.name:192.168.231.14"
@@ -407,14 +393,13 @@ static ssize_t afs_proc_rootcell_write(struct file *file,
 	if (ret >= 0)
 		ret = size;	/* consume everything, always */
 
- infault:
+infault:
 	kfree(kbuf);
- nomem:
+nomem:
 	_leave(" = %d", ret);
 	return ret;
-} /* end afs_proc_rootcell_write() */
+}
 
-/*****************************************************************************/
 /*
  * initialise /proc/fs/afs/<cell>/
  */
@@ -426,25 +411,25 @@ int afs_proc_cell_setup(struct afs_cell *cell)
 
 	cell->proc_dir = proc_mkdir(cell->name, proc_afs);
 	if (!cell->proc_dir)
-		return -ENOMEM;
+		goto error_dir;
 
 	p = create_proc_entry("servers", 0, cell->proc_dir);
 	if (!p)
-		goto error_proc;
+		goto error_servers;
 	p->proc_fops = &afs_proc_cell_servers_fops;
 	p->owner = THIS_MODULE;
 	p->data = cell;
 
 	p = create_proc_entry("vlservers", 0, cell->proc_dir);
 	if (!p)
-		goto error_servers;
+		goto error_vlservers;
 	p->proc_fops = &afs_proc_cell_vlservers_fops;
 	p->owner = THIS_MODULE;
 	p->data = cell;
 
 	p = create_proc_entry("volumes", 0, cell->proc_dir);
 	if (!p)
-		goto error_vlservers;
+		goto error_volumes;
 	p->proc_fops = &afs_proc_cell_volumes_fops;
 	p->owner = THIS_MODULE;
 	p->data = cell;
@@ -452,17 +437,17 @@ int afs_proc_cell_setup(struct afs_cell *cell)
 	_leave(" = 0");
 	return 0;
 
- error_vlservers:
+error_volumes:
 	remove_proc_entry("vlservers", cell->proc_dir);
- error_servers:
+error_vlservers:
 	remove_proc_entry("servers", cell->proc_dir);
- error_proc:
+error_servers:
 	remove_proc_entry(cell->name, proc_afs);
+error_dir:
 	_leave(" = -ENOMEM");
 	return -ENOMEM;
-} /* end afs_proc_cell_setup() */
+}
 
-/*****************************************************************************/
 /*
  * remove /proc/fs/afs/<cell>/
  */
@@ -476,9 +461,8 @@ void afs_proc_cell_remove(struct afs_cell *cell)
 	remove_proc_entry(cell->name, proc_afs);
 
 	_leave("");
-} /* end afs_proc_cell_remove() */
+}
 
-/*****************************************************************************/
 /*
  * open "/proc/fs/afs/<cell>/volumes" which provides a summary of extant cells
  */
@@ -500,9 +484,8 @@ static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file)
 	m->private = cell;
 
 	return 0;
-} /* end afs_proc_cell_volumes_open() */
+}
 
-/*****************************************************************************/
 /*
  * close the file and release the ref to the cell
  */
@@ -511,14 +494,12 @@ static int afs_proc_cell_volumes_release(struct inode *inode, struct file *file)
 	struct afs_cell *cell = PDE(inode)->data;
 	int ret;
 
-	ret = seq_release(inode,file);
+	ret = seq_release(inode, file);
 
 	afs_put_cell(cell);
-
 	return ret;
-} /* end afs_proc_cell_volumes_release() */
+}
 
-/*****************************************************************************/
 /*
  * set up the iterator to start reading from the cells list and return the
  * first item
@@ -545,9 +526,8 @@ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
 			break;
 
 	return _p != &cell->vl_list ? _p : NULL;
-} /* end afs_proc_cell_volumes_start() */
+}
 
-/*****************************************************************************/
 /*
  * move to next cell in cells list
  */
@@ -562,12 +542,11 @@ static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
 	(*_pos)++;
 
 	_p = v;
-	_p = v == (void *) 1 ? cell->vl_list.next : _p->next;
+	_p = (v == (void *) 1) ? cell->vl_list.next : _p->next;
 
-	return _p != &cell->vl_list ? _p : NULL;
-} /* end afs_proc_cell_volumes_next() */
+	return (_p != &cell->vl_list) ? _p : NULL;
+}
 
-/*****************************************************************************/
 /*
  * clean up after reading from the cells list
  */
@@ -576,10 +555,8 @@ static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v)
 	struct afs_cell *cell = p->private;
 
 	up_read(&cell->vl_sem);
+}
 
-} /* end afs_proc_cell_volumes_stop() */
-
-/*****************************************************************************/
 /*
  * display a header line followed by a load of volume lines
  */
@@ -600,13 +577,11 @@ static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
 		   vlocation->vldb.vid[0],
 		   vlocation->vldb.vid[1],
 		   vlocation->vldb.vid[2],
-		   vlocation->vldb.name
-		   );
+		   vlocation->vldb.name);
 
 	return 0;
-} /* end afs_proc_cell_volumes_show() */
+}
 
-/*****************************************************************************/
 /*
  * open "/proc/fs/afs/<cell>/vlservers" which provides a list of volume
  * location server
@@ -629,9 +604,8 @@ static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file)
 	m->private = cell;
 
 	return 0;
-} /* end afs_proc_cell_vlservers_open() */
+}
 
-/*****************************************************************************/
 /*
  * close the file and release the ref to the cell
  */
@@ -644,11 +618,9 @@ static int afs_proc_cell_vlservers_release(struct inode *inode,
 	ret = seq_release(inode,file);
 
 	afs_put_cell(cell);
-
 	return ret;
-} /* end afs_proc_cell_vlservers_release() */
+}
 
-/*****************************************************************************/
 /*
  * set up the iterator to start reading from the cells list and return the
  * first item
@@ -672,9 +644,8 @@ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
 		return NULL;
 
 	return &cell->vl_addrs[pos];
-} /* end afs_proc_cell_vlservers_start() */
+}
 
-/*****************************************************************************/
 /*
  * move to next cell in cells list
  */
@@ -692,9 +663,8 @@ static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
 		return NULL;
 
 	return &cell->vl_addrs[pos];
-} /* end afs_proc_cell_vlservers_next() */
+}
 
-/*****************************************************************************/
 /*
  * clean up after reading from the cells list
  */
@@ -703,10 +673,8 @@ static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v)
 	struct afs_cell *cell = p->private;
 
 	up_read(&cell->vl_sem);
+}
 
-} /* end afs_proc_cell_vlservers_stop() */
-
-/*****************************************************************************/
 /*
  * display a header line followed by a load of volume lines
  */
@@ -722,11 +690,9 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
 
 	/* display one cell per line on subsequent lines */
 	seq_printf(m, "%u.%u.%u.%u\n", NIPQUAD(addr->s_addr));
-
 	return 0;
-} /* end afs_proc_cell_vlservers_show() */
+}
 
-/*****************************************************************************/
 /*
  * open "/proc/fs/afs/<cell>/servers" which provides a summary of active
  * servers
@@ -747,11 +713,9 @@ static int afs_proc_cell_servers_open(struct inode *inode, struct file *file)
 
 	m = file->private_data;
 	m->private = cell;
-
 	return 0;
-} /* end afs_proc_cell_servers_open() */
+}
 
-/*****************************************************************************/
 /*
  * close the file and release the ref to the cell
  */
@@ -764,11 +728,9 @@ static int afs_proc_cell_servers_release(struct inode *inode,
 	ret = seq_release(inode, file);
 
 	afs_put_cell(cell);
-
 	return ret;
-} /* end afs_proc_cell_servers_release() */
+}
 
-/*****************************************************************************/
 /*
  * set up the iterator to start reading from the cells list and return the
  * first item
@@ -796,9 +758,8 @@ static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos)
 			break;
 
 	return _p != &cell->sv_list ? _p : NULL;
-} /* end afs_proc_cell_servers_start() */
+}
 
-/*****************************************************************************/
 /*
  * move to next cell in cells list
  */
@@ -816,9 +777,8 @@ static void *afs_proc_cell_servers_next(struct seq_file *p, void *v,
 	_p = v == (void *) 1 ? cell->sv_list.next : _p->next;
 
 	return _p != &cell->sv_list ? _p : NULL;
-} /* end afs_proc_cell_servers_next() */
+}
 
-/*****************************************************************************/
 /*
  * clean up after reading from the cells list
  */
@@ -828,10 +788,8 @@ static void afs_proc_cell_servers_stop(struct seq_file *p, void *v)
 	struct afs_cell *cell = p->private;
 
 	read_unlock(&cell->sv_lock);
+}
 
-} /* end afs_proc_cell_servers_stop() */
-
-/*****************************************************************************/
 /*
  * display a header line followed by a load of volume lines
  */
@@ -849,10 +807,7 @@ static int afs_proc_cell_servers_show(struct seq_file *m, void *v)
 	/* display one cell per line on subsequent lines */
 	sprintf(ipaddr, "%u.%u.%u.%u", NIPQUAD(server->addr));
 	seq_printf(m, "%3d %-15.15s %5d\n",
-		   atomic_read(&server->usage),
-		   ipaddr,
-		   server->fs_state
-		   );
+		   atomic_read(&server->usage), ipaddr, server->fs_state);
 
 	return 0;
-} /* end afs_proc_cell_servers_show() */
+}
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 44aff81dc6a..44b0ce53e91 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -1,4 +1,4 @@
-/* server.c: AFS server record management
+/* AFS server record management
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -41,7 +41,6 @@ static const struct afs_timer_ops afs_server_timer_ops = {
 	.timed_out	= __afs_server_timeout,
 };
 
-/*****************************************************************************/
 /*
  * lookup a server record in a cell
  * - TODO: search the cell's server list
@@ -106,7 +105,7 @@ int afs_server_lookup(struct afs_cell *cell, const struct in_addr *addr,
 	return 0;
 
 	/* found a matching active server */
- use_active_server:
+use_active_server:
 	_debug("active server");
 	afs_get_server(active);
 	write_unlock(&cell->sv_lock);
@@ -119,7 +118,7 @@ int afs_server_lookup(struct afs_cell *cell, const struct in_addr *addr,
 
 	/* found a matching server in the graveyard, so resurrect it and
 	 * dispose of the new record */
- resurrect_server:
+resurrect_server:
 	_debug("resurrecting server");
 
 	list_move_tail(&zombie->link, &cell->sv_list);
@@ -133,10 +132,8 @@ int afs_server_lookup(struct afs_cell *cell, const struct in_addr *addr,
 	*_server = zombie;
 	_leave(" = 0 (%p)", zombie);
 	return 0;
+}
 
-} /* end afs_server_lookup() */
-
-/*****************************************************************************/
 /*
  * destroy a server record
  * - removes from the cell list
@@ -175,9 +172,8 @@ void afs_put_server(struct afs_server *server)
 	write_unlock(&cell->sv_lock);
 
 	_leave(" [killed]");
-} /* end afs_put_server() */
+}
 
-/*****************************************************************************/
 /*
  * timeout server record
  * - removes from the cell's graveyard if the usage count is zero
@@ -230,9 +226,8 @@ void afs_server_do_timeout(struct afs_server *server)
 	kfree(server);
 
 	_leave(" [destroyed]");
-} /* end afs_server_do_timeout() */
+}
 
-/*****************************************************************************/
 /*
  * get a callslot on a connection to the fileserver on the specified server
  */
@@ -323,7 +318,7 @@ int afs_server_request_callslot(struct afs_server *server,
 	}
 
 	/* got a callslot, but no connection */
- obtained_slot:
+obtained_slot:
 
 	/* need to get hold of the RxRPC connection */
 	down_write(&server->sem);
@@ -337,8 +332,7 @@ int afs_server_request_callslot(struct afs_server *server,
 		/* reuse an existing connection */
 		rxrpc_get_connection(server->fs_conn[nconn]);
 		callslot->conn = server->fs_conn[nconn];
-	}
-	else {
+	} else {
 		/* create a new connection */
 		ret = rxrpc_create_connection(afs_transport,
 					      htons(7000),
@@ -360,23 +354,21 @@ int afs_server_request_callslot(struct afs_server *server,
 	return 0;
 
 	/* handle an error occurring */
- error_release_upw:
+error_release_upw:
 	up_write(&server->sem);
 
- error_release:
+error_release:
 	/* either release the callslot or pass it along to another deserving
 	 * task */
 	spin_lock(&server->fs_lock);
 
 	if (nconn < 0) {
 		/* no callslot allocated */
-	}
-	else if (list_empty(&server->fs_callq)) {
+	} else if (list_empty(&server->fs_callq)) {
 		/* no one waiting */
 		server->fs_conn_cnt[nconn]++;
 		spin_unlock(&server->fs_lock);
-	}
-	else {
+	} else {
 		/* someone's waiting - dequeue them and wake them up */
 		pcallslot = list_entry(server->fs_callq.next,
 				       struct afs_server_callslot, link);
@@ -400,10 +392,8 @@ int afs_server_request_callslot(struct afs_server *server,
 
 	_leave(" = %d", ret);
 	return ret;
+}
 
-} /* end afs_server_request_callslot() */
-
-/*****************************************************************************/
 /*
  * release a callslot back to the server
  * - transfers the RxRPC connection to the next pending callslot if possible
@@ -426,8 +416,7 @@ void afs_server_release_callslot(struct afs_server *server,
 		/* no one waiting */
 		server->fs_conn_cnt[callslot->nconn]++;
 		spin_unlock(&server->fs_lock);
-	}
-	else {
+	} else {
 		/* someone's waiting - dequeue them and wake them up */
 		pcallslot = list_entry(server->fs_callq.next,
 				       struct afs_server_callslot, link);
@@ -449,9 +438,8 @@ void afs_server_release_callslot(struct afs_server *server,
 	rxrpc_put_connection(callslot->conn);
 
 	_leave("");
-} /* end afs_server_release_callslot() */
+}
 
-/*****************************************************************************/
 /*
  * get a handle to a connection to the vlserver (volume location) on the
  * specified server
@@ -473,8 +461,7 @@ int afs_server_get_vlconn(struct afs_server *server,
 		rxrpc_get_connection(server->vlserver);
 		conn = server->vlserver;
 		up_read(&server->sem);
-	}
-	else {
+	} else {
 		/* create a new connection */
 		up_read(&server->sem);
 		down_write(&server->sem);
@@ -496,4 +483,4 @@ int afs_server_get_vlconn(struct afs_server *server,
 	*_conn = conn;
 	_leave(" = %d", ret);
 	return ret;
-} /* end afs_server_get_vlconn() */
+}
diff --git a/fs/afs/server.h b/fs/afs/server.h
index c3d24115578..e1a006829b5 100644
--- a/fs/afs/server.h
+++ b/fs/afs/server.h
@@ -1,4 +1,4 @@
-/* server.h: AFS server record
+/* AFS server record
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -9,8 +9,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#ifndef _LINUX_AFS_SERVER_H
-#define _LINUX_AFS_SERVER_H
+#ifndef AFS_SERVER_H
+#define AFS_SERVER_H
 
 #include "types.h"
 #include "kafstimod.h"
@@ -19,12 +19,10 @@
 
 extern spinlock_t afs_server_peer_lock;
 
-/*****************************************************************************/
 /*
  * AFS server record
  */
-struct afs_server
-{
+struct afs_server {
 	atomic_t		usage;
 	struct afs_cell		*cell;		/* cell in which server resides */
 	struct list_head	link;		/* link in cell's server list */
@@ -50,20 +48,19 @@ struct afs_server
 	spinlock_t		cb_lock;	/* access lock */
 };
 
-extern int afs_server_lookup(struct afs_cell *cell,
-			     const struct in_addr *addr,
-			     struct afs_server **_server);
+extern int afs_server_lookup(struct afs_cell *, const struct in_addr *,
+			     struct afs_server **);
 
 #define afs_get_server(S) do { atomic_inc(&(S)->usage); } while(0)
 
-extern void afs_put_server(struct afs_server *server);
-extern void afs_server_do_timeout(struct afs_server *server);
+extern void afs_put_server(struct afs_server *);
+extern void afs_server_do_timeout(struct afs_server *);
 
-extern int afs_server_find_by_peer(const struct rxrpc_peer *peer,
-				   struct afs_server **_server);
+extern int afs_server_find_by_peer(const struct rxrpc_peer *,
+				   struct afs_server **);
 
-extern int afs_server_get_vlconn(struct afs_server *server,
-				 struct rxrpc_connection **_conn);
+extern int afs_server_get_vlconn(struct afs_server *,
+				 struct rxrpc_connection **);
 
 static inline
 struct afs_server *afs_server_get_from_peer(struct rxrpc_peer *peer)
@@ -79,12 +76,10 @@ struct afs_server *afs_server_get_from_peer(struct rxrpc_peer *peer)
 	return server;
 }
 
-/*****************************************************************************/
 /*
  * AFS server callslot grant record
  */
-struct afs_server_callslot
-{
+struct afs_server_callslot {
 	struct list_head	link;		/* link in server's list */
 	struct task_struct	*task;		/* process waiting to make call */
 	struct rxrpc_connection	*conn;		/* connection to use (or NULL on error) */
@@ -93,10 +88,10 @@ struct afs_server_callslot
 	int			errno;		/* error number if nconn==-1 */
 };
 
-extern int afs_server_request_callslot(struct afs_server *server,
-				       struct afs_server_callslot *callslot);
+extern int afs_server_request_callslot(struct afs_server *,
+				       struct afs_server_callslot *);
 
-extern void afs_server_release_callslot(struct afs_server *server,
-					struct afs_server_callslot *callslot);
+extern void afs_server_release_callslot(struct afs_server *,
+					struct afs_server_callslot *);
 
-#endif /* _LINUX_AFS_SERVER_H */
+#endif /* AFS_SERVER_H */
diff --git a/fs/afs/super.c b/fs/afs/super.c
index eb7e32349da..0470a5c0b8a 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -1,4 +1,5 @@
-/*
+/* AFS superblock handling
+ *
  * Copyright (c) 2002 Red Hat, Inc. All rights reserved.
  *
  * This software may be freely redistributed under the terms of the
@@ -9,7 +10,7 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
  * Authors: David Howells <dhowells@redhat.com>
- *          David Woodhouse <dwmw2@cambridge.redhat.com>
+ *          David Woodhouse <dwmw2@redhat.com>
  *
  */
 
@@ -68,7 +69,6 @@ static const struct super_operations afs_super_ops = {
 static struct kmem_cache *afs_inode_cachep;
 static atomic_t afs_count_active_inodes;
 
-/*****************************************************************************/
 /*
  * initialise the filesystem
  */
@@ -105,9 +105,8 @@ int __init afs_fs_init(void)
 
 	kleave(" = 0");
 	return 0;
-} /* end afs_fs_init() */
+}
 
-/*****************************************************************************/
 /*
  * clean up the filesystem
  */
@@ -122,10 +121,8 @@ void __exit afs_fs_exit(void)
 	}
 
 	kmem_cache_destroy(afs_inode_cachep);
+}
 
-} /* end afs_fs_exit() */
-
-/*****************************************************************************/
 /*
  * check that an argument has a value
  */
@@ -136,9 +133,8 @@ static int want_arg(char **_value, const char *option)
 		return 0;
 	}
 	return 1;
-} /* end want_arg() */
+}
 
-/*****************************************************************************/
 /*
  * check that there's no subsequent value
  */
@@ -150,9 +146,8 @@ static int want_no_value(char *const *_value, const char *option)
 		return 0;
 	}
 	return 1;
-} /* end want_no_value() */
+}
 
-/*****************************************************************************/
 /*
  * parse the mount options
  * - this function has been shamelessly adapted from the ext3 fs which
@@ -183,14 +178,12 @@ static int afs_super_parse_options(struct afs_mount_params *params,
 				return -EINVAL;
 			params->rwpath = 1;
 			continue;
-		}
-		else if (strcmp(key, "vol") == 0) {
+		} else if (strcmp(key, "vol") == 0) {
 			if (!want_arg(&value, "vol"))
 				return -EINVAL;
 			*devname = value;
 			continue;
-		}
-		else if (strcmp(key, "cell") == 0) {
+		} else if (strcmp(key, "cell") == 0) {
 			if (!want_arg(&value, "cell"))
 				return -EINVAL;
 			afs_put_cell(params->default_cell);
@@ -209,12 +202,11 @@ static int afs_super_parse_options(struct afs_mount_params *params,
 
 	ret = 0;
 
- error:
+error:
 	_leave(" = %d", ret);
 	return ret;
-} /* end afs_super_parse_options() */
+}
 
-/*****************************************************************************/
 /*
  * check a superblock to see if it's the one we're looking for
  */
@@ -224,9 +216,8 @@ static int afs_test_super(struct super_block *sb, void *data)
 	struct afs_super_info *as = sb->s_fs_info;
 
 	return as->volume == params->volume;
-} /* end afs_test_super() */
+}
 
-/*****************************************************************************/
 /*
  * fill in the superblock
  */
@@ -276,7 +267,7 @@ static int afs_fill_super(struct super_block *sb, void *data, int silent)
 	kleave(" = 0");
 	return 0;
 
- error:
+error:
 	iput(inode);
 	afs_put_volume(as->volume);
 	kfree(as);
@@ -285,9 +276,8 @@ static int afs_fill_super(struct super_block *sb, void *data, int silent)
 
 	kleave(" = %d", ret);
 	return ret;
-} /* end afs_fill_super() */
+}
 
-/*****************************************************************************/
 /*
  * get an AFS superblock
  * - TODO: don't use get_sb_nodev(), but rather call sget() directly
@@ -354,15 +344,14 @@ static int afs_get_sb(struct file_system_type *fs_type,
 	_leave(" = 0 [%p]", 0, sb);
 	return 0;
 
- error:
+error:
 	afs_put_volume(params.volume);
 	afs_put_cell(params.default_cell);
 	afscm_stop();
 	_leave(" = %d", ret);
 	return ret;
-} /* end afs_get_sb() */
+}
 
-/*****************************************************************************/
 /*
  * finish the unmounting process on the superblock
  */
@@ -376,16 +365,15 @@ static void afs_put_super(struct super_block *sb)
 	afscm_stop();
 
 	_leave("");
-} /* end afs_put_super() */
+}
 
-/*****************************************************************************/
 /*
  * initialise an inode cache slab element prior to any use
  */
 static void afs_i_init_once(void *_vnode, struct kmem_cache *cachep,
 			    unsigned long flags)
 {
-	struct afs_vnode *vnode = (struct afs_vnode *) _vnode;
+	struct afs_vnode *vnode = _vnode;
 
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 	    SLAB_CTOR_CONSTRUCTOR) {
@@ -398,10 +386,8 @@ static void afs_i_init_once(void *_vnode, struct kmem_cache *cachep,
 		afs_timer_init(&vnode->cb_timeout,
 			       &afs_vnode_cb_timed_out_ops);
 	}
+}
 
-} /* end afs_i_init_once() */
-
-/*****************************************************************************/
 /*
  * allocate an AFS inode struct from our slab cache
  */
@@ -409,8 +395,7 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
 {
 	struct afs_vnode *vnode;
 
-	vnode = (struct afs_vnode *)
-		kmem_cache_alloc(afs_inode_cachep, GFP_KERNEL);
+	vnode = kmem_cache_alloc(afs_inode_cachep, GFP_KERNEL);
 	if (!vnode)
 		return NULL;
 
@@ -424,9 +409,8 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
 	vnode->flags		= 0;
 
 	return &vnode->vfs_inode;
-} /* end afs_alloc_inode() */
+}
 
-/*****************************************************************************/
 /*
  * destroy an AFS inode struct
  */
@@ -435,7 +419,5 @@ static void afs_destroy_inode(struct inode *inode)
 	_enter("{%lu}", inode->i_ino);
 
 	kmem_cache_free(afs_inode_cachep, AFS_FS_I(inode));
-
 	atomic_dec(&afs_count_active_inodes);
-
-} /* end afs_destroy_inode() */
+}
diff --git a/fs/afs/super.h b/fs/afs/super.h
index 32de8cc6fae..c95b48edfc7 100644
--- a/fs/afs/super.h
+++ b/fs/afs/super.h
@@ -1,4 +1,4 @@
-/* super.h: AFS filesystem internal private data
+/* AFS filesystem internal private data
  *
  * Copyright (c) 2002 Red Hat, Inc. All rights reserved.
  *
@@ -14,21 +14,17 @@
  *
  */
 
-#ifndef _LINUX_AFS_SUPER_H
-#define _LINUX_AFS_SUPER_H
+#ifndef AFS_SUPER_H
+#define AFS_SUPER_H
 
 #include <linux/fs.h>
 #include "server.h"
 
-#ifdef __KERNEL__
-
-/*****************************************************************************/
 /*
  * AFS superblock private data
  * - there's one superblock per volume
  */
-struct afs_super_info
-{
+struct afs_super_info {
 	struct afs_volume	*volume;	/* volume record */
 	char			rwparent;	/* T if parent is R/W AFS volume */
 };
@@ -40,6 +36,4 @@ static inline struct afs_super_info *AFS_FS_S(struct super_block *sb)
 
 extern struct file_system_type afs_fs_type;
 
-#endif /* __KERNEL__ */
-
-#endif /* _LINUX_AFS_SUPER_H */
+#endif /* AFS_SUPER_H */
diff --git a/fs/afs/transport.h b/fs/afs/transport.h
index 7013ae6ccc8..f56be4b7b1d 100644
--- a/fs/afs/transport.h
+++ b/fs/afs/transport.h
@@ -1,4 +1,4 @@
-/* transport.h: AFS transport management
+/* AFS transport management
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -9,8 +9,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#ifndef _LINUX_AFS_TRANSPORT_H
-#define _LINUX_AFS_TRANSPORT_H
+#ifndef AFS_TRANSPORT_H
+#define AFS_TRANSPORT_H
 
 #include "types.h"
 #include <rxrpc/transport.h>
@@ -18,4 +18,4 @@
 /* the cache manager transport endpoint */
 extern struct rxrpc_transport *afs_transport;
 
-#endif /* _LINUX_AFS_TRANSPORT_H */
+#endif /* AFS_TRANSPORT_H */
diff --git a/fs/afs/types.h b/fs/afs/types.h
index b1a2367c758..db2b5dc9ff4 100644
--- a/fs/afs/types.h
+++ b/fs/afs/types.h
@@ -1,4 +1,4 @@
-/* types.h: AFS types
+/* AFS types
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -9,12 +9,10 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#ifndef _LINUX_AFS_TYPES_H
-#define _LINUX_AFS_TYPES_H
+#ifndef AFS_TYPES_H
+#define AFS_TYPES_H
 
-#ifdef __KERNEL__
 #include <rxrpc/types.h>
-#endif /* __KERNEL__ */
 
 typedef unsigned			afs_volid_t;
 typedef unsigned			afs_vnodeid_t;
@@ -33,23 +31,18 @@ typedef enum {
 	AFS_FTYPE_SYMLINK	= 3,
 } afs_file_type_t;
 
-#ifdef __KERNEL__
-
 struct afs_cell;
 struct afs_vnode;
 
-/*****************************************************************************/
 /*
  * AFS file identifier
  */
-struct afs_fid
-{
+struct afs_fid {
 	afs_volid_t	vid;		/* volume ID */
 	afs_vnodeid_t	vnode;		/* file index within volume */
 	unsigned	unique;		/* unique ID number (file index version) */
 };
 
-/*****************************************************************************/
 /*
  * AFS callback notification
  */
@@ -60,8 +53,7 @@ typedef enum {
 	AFSCM_CB_DROPPED	= 3,	/* CB promise cancelled by file server */
 } afs_callback_type_t;
 
-struct afs_callback
-{
+struct afs_callback {
 	struct afs_server	*server;	/* server that made the promise */
 	struct afs_fid		fid;		/* file identifier */
 	unsigned		version;	/* callback version */
@@ -71,12 +63,10 @@ struct afs_callback
 
 #define AFSCBMAX 50
 
-/*****************************************************************************/
 /*
  * AFS volume information
  */
-struct afs_volume_info
-{
+struct afs_volume_info {
 	afs_volid_t		vid;		/* volume ID */
 	afs_voltype_t		type;		/* type of this volume */
 	afs_volid_t		type_vids[5];	/* volume ID's for possible types for this vol */
@@ -88,12 +78,10 @@ struct afs_volume_info
 	} servers[8];
 };
 
-/*****************************************************************************/
 /*
  * AFS file status information
  */
-struct afs_file_status
-{
+struct afs_file_status {
 	unsigned		if_version;	/* interface version */
 #define AFS_FSTATUS_VERSION	1
 
@@ -111,15 +99,11 @@ struct afs_file_status
 	time_t			mtime_server;	/* last time server changed data */
 };
 
-/*****************************************************************************/
 /*
  * AFS volume synchronisation information
  */
-struct afs_volsync
-{
+struct afs_volsync {
 	time_t			creation;	/* volume creation time */
 };
 
-#endif /* __KERNEL__ */
-
-#endif /* _LINUX_AFS_TYPES_H */
+#endif /* AFS_TYPES_H */
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 7b0e3192ee3..dac9faa70ff 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -1,4 +1,4 @@
-/* vlclient.c: AFS Volume Location Service client
+/* AFS Volume Location Service client
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -30,7 +30,6 @@
 static void afs_rxvl_get_entry_by_id_attn(struct rxrpc_call *call);
 static void afs_rxvl_get_entry_by_id_error(struct rxrpc_call *call);
 
-/*****************************************************************************/
 /*
  * map afs VL abort codes to/from Linux error codes
  * - called with call->lock held
@@ -87,10 +86,9 @@ static void afs_rxvl_aemap(struct rxrpc_call *call)
 	default:
 		return;
 	}
-} /* end afs_rxvl_aemap() */
+}
 
 #if 0
-/*****************************************************************************/
 /*
  * probe a volume location server to see if it is still alive -- unused
  */
@@ -159,24 +157,22 @@ static int afs_rxvl_probe(struct afs_server *server, int alloc_flags)
 		BUG();
 	}
 
- abort:
+abort:
 	set_current_state(TASK_UNINTERRUPTIBLE);
 	rxrpc_call_abort(call, ret);
 	schedule();
 
- out_unwait:
+out_unwait:
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&call->waitq, &myself);
 	rxrpc_put_call(call);
- out_put_conn:
+out_put_conn:
 	rxrpc_put_connection(conn);
- out:
+out:
 	return ret;
-
-} /* end afs_rxvl_probe() */
+}
 #endif
 
-/*****************************************************************************/
 /*
  * look up a volume location database entry by name
  */
@@ -294,24 +290,23 @@ int afs_rxvl_get_entry_by_name(struct afs_server *server,
 	entry->rtime = get_seconds();
 	ret = 0;
 
- out_unwait:
+out_unwait:
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&call->waitq, &myself);
 	rxrpc_put_call(call);
- out_put_conn:
+out_put_conn:
 	rxrpc_put_connection(conn);
- out:
+out:
 	_leave(" = %d", ret);
 	return ret;
 
- abort:
+abort:
 	set_current_state(TASK_UNINTERRUPTIBLE);
 	rxrpc_call_abort(call, ret);
 	schedule();
 	goto out_unwait;
-} /* end afs_rxvl_get_entry_by_name() */
+}
 
-/*****************************************************************************/
 /*
  * look up a volume location database entry by ID
  */
@@ -434,24 +429,23 @@ int afs_rxvl_get_entry_by_id(struct afs_server *server,
 	entry->rtime = get_seconds();
 	ret = 0;
 
- out_unwait:
+out_unwait:
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&call->waitq, &myself);
 	rxrpc_put_call(call);
- out_put_conn:
+out_put_conn:
 	rxrpc_put_connection(conn);
- out:
+out:
 	_leave(" = %d", ret);
 	return ret;
 
- abort:
+abort:
 	set_current_state(TASK_UNINTERRUPTIBLE);
 	rxrpc_call_abort(call, ret);
 	schedule();
 	goto out_unwait;
-} /* end afs_rxvl_get_entry_by_id() */
+}
 
-/*****************************************************************************/
 /*
  * look up a volume location database entry by ID asynchronously
  */
@@ -533,14 +527,12 @@ int afs_rxvl_get_entry_by_id_async(struct afs_async_op *op,
 		break;
 	}
 
- out:
+out:
 	rxrpc_put_call(call);
 	_leave(" = %d", ret);
 	return ret;
+}
 
-} /* end afs_rxvl_get_entry_by_id_async() */
-
-/*****************************************************************************/
 /*
  * attend to the asynchronous get VLDB entry by ID
  */
@@ -630,14 +622,13 @@ int afs_rxvl_get_entry_by_id_async2(struct afs_async_op *op,
 	_leave(" = -EAGAIN");
 	return -EAGAIN;
 
- done:
+done:
 	rxrpc_put_call(op->call);
 	op->call = NULL;
 	_leave(" = %d", ret);
 	return ret;
-} /* end afs_rxvl_get_entry_by_id_async2() */
+}
 
-/*****************************************************************************/
 /*
  * handle attention events on an async get-entry-by-ID op
  * - called from krxiod
@@ -674,10 +665,8 @@ static void afs_rxvl_get_entry_by_id_attn(struct rxrpc_call *call)
 	}
 
 	_leave("");
+}
 
-} /* end afs_rxvl_get_entry_by_id_attn() */
-
-/*****************************************************************************/
 /*
  * handle error events on an async get-entry-by-ID op
  * - called from krxiod
@@ -691,5 +680,4 @@ static void afs_rxvl_get_entry_by_id_error(struct rxrpc_call *call)
 	afs_kafsasyncd_attend_op(op);
 
 	_leave("");
-
-} /* end afs_rxvl_get_entry_by_id_error() */
+}
diff --git a/fs/afs/vlclient.h b/fs/afs/vlclient.h
index e3d601179c4..11dc10fe300 100644
--- a/fs/afs/vlclient.h
+++ b/fs/afs/vlclient.h
@@ -1,4 +1,4 @@
-/* vlclient.h: Volume Location Service client interface
+/* Volume Location Service client interface
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -9,8 +9,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#ifndef _LINUX_AFS_VLCLIENT_H
-#define _LINUX_AFS_VLCLIENT_H
+#ifndef AFS_VLCLIENT_H
+#define AFS_VLCLIENT_H
 
 #include "types.h"
 
@@ -68,26 +68,18 @@ struct afs_vldbentry {
 #define AFS_VLSF_RWVOL		0x0004	/* this server holds a R/W instance of the volume */
 #define AFS_VLSF_BACKVOL	0x0008	/* this server holds a backup instance of the volume */
 	} servers[8];
-
 };
 
-/* look up a volume location database entry by name */
-extern int afs_rxvl_get_entry_by_name(struct afs_server *server,
-				      const char *volname,
-				      unsigned volnamesz,
-				      struct afs_cache_vlocation *entry);
-
-/* look up a volume location database entry by ID */
-extern int afs_rxvl_get_entry_by_id(struct afs_server *server,
-				    afs_volid_t	volid,
-				    afs_voltype_t voltype,
-				    struct afs_cache_vlocation *entry);
+extern int afs_rxvl_get_entry_by_name(struct afs_server *, const char *,
+				      unsigned, struct afs_cache_vlocation *);
+extern int afs_rxvl_get_entry_by_id(struct afs_server *, afs_volid_t,
+				    afs_voltype_t,
+				    struct afs_cache_vlocation *);
 
-extern int afs_rxvl_get_entry_by_id_async(struct afs_async_op *op,
-					  afs_volid_t volid,
-					  afs_voltype_t voltype);
+extern int afs_rxvl_get_entry_by_id_async(struct afs_async_op *,
+					  afs_volid_t, afs_voltype_t);
 
-extern int afs_rxvl_get_entry_by_id_async2(struct afs_async_op *op,
-					   struct afs_cache_vlocation *entry);
+extern int afs_rxvl_get_entry_by_id_async2(struct afs_async_op *,
+					   struct afs_cache_vlocation *);
 
-#endif /* _LINUX_AFS_VLCLIENT_H */
+#endif /* AFS_VLCLIENT_H */
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index 782ee7c600c..e48728c9217 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -1,4 +1,4 @@
-/* vlocation.c: volume location management
+/* volume location management
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -73,7 +73,6 @@ struct cachefs_index_def afs_vlocation_cache_index_def = {
 };
 #endif
 
-/*****************************************************************************/
 /*
  * iterate through the VL servers in a cell until one of them admits knowing
  * about the volume in question
@@ -146,13 +145,11 @@ static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vlocation,
 		cell->vl_curr_svix %= cell->vl_naddrs;
 	}
 
- out:
+out:
 	_leave(" = %d", ret);
 	return ret;
+}
 
-} /* end afs_vlocation_access_vl_by_name() */
-
-/*****************************************************************************/
 /*
  * iterate through the VL servers in a cell until one of them admits knowing
  * about the volume in question
@@ -225,13 +222,11 @@ static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vlocation,
 		cell->vl_curr_svix %= cell->vl_naddrs;
 	}
 
- out:
+out:
 	_leave(" = %d", ret);
 	return ret;
+}
 
-} /* end afs_vlocation_access_vl_by_id() */
-
-/*****************************************************************************/
 /*
  * lookup volume location
  * - caller must have cell->vol_sem write-locked
@@ -321,7 +316,7 @@ int afs_vlocation_lookup(struct afs_cell *cell,
 
 	goto found_on_vlserver;
 
- found_in_graveyard:
+found_in_graveyard:
 	/* found in the graveyard - resurrect */
 	_debug("found in graveyard");
 	atomic_inc(&vlocation->usage);
@@ -331,16 +326,16 @@ int afs_vlocation_lookup(struct afs_cell *cell,
 	afs_kafstimod_del_timer(&vlocation->timeout);
 	goto active;
 
- found_in_memory:
+found_in_memory:
 	/* found in memory - check to see if it's active */
 	_debug("found in memory");
 	atomic_inc(&vlocation->usage);
 
- active:
+active:
 	active = 1;
 
 #ifdef AFS_CACHING_SUPPORT
- found_in_cache:
+found_in_cache:
 #endif
 	/* try to look up a cached volume in the cell VL databases by ID */
 	_debug("found in cache");
@@ -364,16 +359,13 @@ int afs_vlocation_lookup(struct afs_cell *cell,
 	if (vlocation->vldb.vidmask & AFS_VOL_VTM_RW) {
 		vid = vlocation->vldb.vid[0];
 		voltype = AFSVL_RWVOL;
-	}
-	else if (vlocation->vldb.vidmask & AFS_VOL_VTM_RO) {
+	} else if (vlocation->vldb.vidmask & AFS_VOL_VTM_RO) {
 		vid = vlocation->vldb.vid[1];
 		voltype = AFSVL_ROVOL;
-	}
-	else if (vlocation->vldb.vidmask & AFS_VOL_VTM_BAK) {
+	} else if (vlocation->vldb.vidmask & AFS_VOL_VTM_BAK) {
 		vid = vlocation->vldb.vid[2];
 		voltype = AFSVL_BACKVOL;
-	}
-	else {
+	} else {
 		BUG();
 		vid = 0;
 		voltype = 0;
@@ -400,7 +392,7 @@ int afs_vlocation_lookup(struct afs_cell *cell,
 		goto error;
 	}
 
- found_on_vlserver:
+found_on_vlserver:
 	_debug("Done VL Lookup: %*.*s %02x { %08x(%x) %08x(%x) %08x(%x) }",
 	       namesz, namesz, name,
 	       vldb.vidmask,
@@ -430,12 +422,11 @@ int afs_vlocation_lookup(struct afs_cell *cell,
 	_leave(" = 0 (%p)",vlocation);
 	return 0;
 
- error:
+error:
 	if (vlocation) {
 		if (active) {
 			__afs_put_vlocation(vlocation);
-		}
-		else {
+		} else {
 			list_del(&vlocation->link);
 #ifdef AFS_CACHING_SUPPORT
 			cachefs_relinquish_cookie(vlocation->cache, 0);
@@ -447,9 +438,8 @@ int afs_vlocation_lookup(struct afs_cell *cell,
 
 	_leave(" = %d", ret);
 	return ret;
-} /* end afs_vlocation_lookup() */
+}
 
-/*****************************************************************************/
 /*
  * finish using a volume location record
  * - caller must have cell->vol_sem write-locked
@@ -489,9 +479,8 @@ static void __afs_put_vlocation(struct afs_vlocation *vlocation)
 	spin_unlock(&cell->vl_gylock);
 
 	_leave(" [killed]");
-} /* end __afs_put_vlocation() */
+}
 
-/*****************************************************************************/
 /*
  * finish using a volume location record
  */
@@ -504,9 +493,8 @@ void afs_put_vlocation(struct afs_vlocation *vlocation)
 		__afs_put_vlocation(vlocation);
 		up_write(&cell->vl_sem);
 	}
-} /* end afs_put_vlocation() */
+}
 
-/*****************************************************************************/
 /*
  * timeout vlocation record
  * - removes from the cell's graveyard if the usage count is zero
@@ -543,9 +531,8 @@ void afs_vlocation_do_timeout(struct afs_vlocation *vlocation)
 	kfree(vlocation);
 
 	_leave(" [destroyed]");
-} /* end afs_vlocation_do_timeout() */
+}
 
-/*****************************************************************************/
 /*
  * send an update operation to the currently selected server
  */
@@ -564,16 +551,13 @@ static int afs_vlocation_update_begin(struct afs_vlocation *vlocation)
 	if (vlocation->vldb.vidmask & AFS_VOL_VTM_RW) {
 		vid = vlocation->vldb.vid[0];
 		voltype = AFSVL_RWVOL;
-	}
-	else if (vlocation->vldb.vidmask & AFS_VOL_VTM_RO) {
+	} else if (vlocation->vldb.vidmask & AFS_VOL_VTM_RO) {
 		vid = vlocation->vldb.vid[1];
 		voltype = AFSVL_ROVOL;
-	}
-	else if (vlocation->vldb.vidmask & AFS_VOL_VTM_BAK) {
+	} else if (vlocation->vldb.vidmask & AFS_VOL_VTM_BAK) {
 		vid = vlocation->vldb.vid[2];
 		voltype = AFSVL_BACKVOL;
-	}
-	else {
+	} else {
 		BUG();
 		vid = 0;
 		voltype = 0;
@@ -604,9 +588,8 @@ static int afs_vlocation_update_begin(struct afs_vlocation *vlocation)
 
 	_leave(" = %d", ret);
 	return ret;
-} /* end afs_vlocation_update_begin() */
+}
 
-/*****************************************************************************/
 /*
  * abandon updating a VL record
  * - does not restart the update timer
@@ -634,9 +617,8 @@ static void afs_vlocation_update_abandon(struct afs_vlocation *vlocation,
 	spin_unlock(&afs_vlocation_update_lock);
 
 	_leave("");
-} /* end afs_vlocation_update_abandon() */
+}
 
-/*****************************************************************************/
 /*
  * handle periodic update timeouts and busy retry timeouts
  * - called from kafstimod
@@ -663,8 +645,7 @@ static void afs_vlocation_update_timer(struct afs_timer *timer)
 		if (afs_vlocation_update) {
 			list_add(&vlocation->upd_op.link,
 				 &afs_vlocation_update_pendq);
-		}
-		else {
+		} else {
 			afs_get_vlocation(vlocation);
 			afs_vlocation_update = vlocation;
 			vlocation->upd_state = AFS_VLUPD_INPROGRESS;
@@ -706,16 +687,13 @@ static void afs_vlocation_update_timer(struct afs_timer *timer)
 	_leave("");
 	return;
 
- out_unlock2:
+out_unlock2:
 	spin_unlock(&afs_vlocation_update_lock);
- out_unlock1:
+out_unlock1:
 	spin_unlock(&vlocation->cell->vl_gylock);
 	_leave("");
-	return;
-
-} /* end afs_vlocation_update_timer() */
+}
 
-/*****************************************************************************/
 /*
  * attend to an update operation upon which an event happened
  * - called in kafsasyncd context
@@ -818,7 +796,7 @@ static void afs_vlocation_update_attend(struct afs_async_op *op)
 	}
 
 	/* try contacting the next server */
- try_next:
+try_next:
 	vlocation->upd_busy_cnt = 0;
 
 	/* discard the server record */
@@ -856,15 +834,13 @@ static void afs_vlocation_update_attend(struct afs_async_op *op)
 	}
 
 	/* abandon the update */
- abandon:
+abandon:
 	afs_vlocation_update_abandon(vlocation, AFS_VLUPD_SLEEP, ret);
 	afs_kafstimod_add_timer(&vlocation->upd_timer, HZ * 10);
 	afs_put_vlocation(vlocation);
 	_leave(" [abandoned]");
+}
 
-} /* end afs_vlocation_update_attend() */
-
-/*****************************************************************************/
 /*
  * deal with an update operation being discarded
  * - called in kafsasyncd context when it's dying due to rmmod
@@ -883,9 +859,8 @@ static void afs_vlocation_update_discard(struct afs_async_op *op)
 	afs_put_vlocation(vlocation);
 
 	_leave("");
-} /* end afs_vlocation_update_discard() */
+}
 
-/*****************************************************************************/
 /*
  * match a VLDB record stored in the cache
  * - may also load target from entry
@@ -908,9 +883,7 @@ static cachefs_match_val_t afs_vlocation_cache_match(void *target,
 			vlocation->valid = 1;
 			_leave(" = SUCCESS [c->m]");
 			return CACHEFS_MATCH_SUCCESS;
-		}
-		/* need to update cache if cached info differs */
-		else if (memcmp(&vlocation->vldb, vldb, sizeof(*vldb)) != 0) {
+		} else if (memcmp(&vlocation->vldb, vldb, sizeof(*vldb)) != 0) {
 			/* delete if VIDs for this name differ */
 			if (memcmp(&vlocation->vldb.vid,
 				   &vldb->vid,
@@ -921,8 +894,7 @@ static cachefs_match_val_t afs_vlocation_cache_match(void *target,
 
 			_leave(" = UPDATE");
 			return CACHEFS_MATCH_SUCCESS_UPDATE;
-		}
-		else {
+		} else {
 			_leave(" = SUCCESS");
 			return CACHEFS_MATCH_SUCCESS;
 		}
@@ -930,10 +902,9 @@ static cachefs_match_val_t afs_vlocation_cache_match(void *target,
 
 	_leave(" = FAILED");
 	return CACHEFS_MATCH_FAILED;
-} /* end afs_vlocation_cache_match() */
+}
 #endif
 
-/*****************************************************************************/
 /*
  * update a VLDB record stored in the cache
  */
@@ -946,6 +917,5 @@ static void afs_vlocation_cache_update(void *source, void *entry)
 	_enter("");
 
 	*vldb = vlocation->vldb;
-
-} /* end afs_vlocation_cache_update() */
+}
 #endif
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
index cf62da5d782..4ab1ed71028 100644
--- a/fs/afs/vnode.c
+++ b/fs/afs/vnode.c
@@ -1,4 +1,4 @@
-/* vnode.c: AFS vnode management
+/* AFS vnode management
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -43,7 +43,6 @@ struct cachefs_index_def afs_vnode_cache_index_def = {
 };
 #endif
 
-/*****************************************************************************/
 /*
  * handle a callback timing out
  * TODO: retain a ref to vnode struct for an outstanding callback timeout
@@ -78,9 +77,8 @@ static void afs_vnode_cb_timed_out(struct afs_timer *timer)
 	afs_put_server(oldserver);
 
 	_leave("");
-} /* end afs_vnode_cb_timed_out() */
+}
 
-/*****************************************************************************/
 /*
  * finish off updating the recorded status of a file
  * - starts callback expiry timer
@@ -105,7 +103,7 @@ static void afs_vnode_finalise_status_update(struct afs_vnode *vnode,
 
 		spin_lock(&afs_cb_hash_lock);
 		list_move_tail(&vnode->cb_hash_link,
-			      &afs_cb_hash(server, &vnode->fid));
+			       &afs_cb_hash(server, &vnode->fid));
 		spin_unlock(&afs_cb_hash_lock);
 
 		/* swap ref to old callback server with that for new callback
@@ -122,13 +120,11 @@ static void afs_vnode_finalise_status_update(struct afs_vnode *vnode,
 			spin_lock(&server->cb_lock);
 			list_add_tail(&vnode->cb_link, &server->cb_promises);
 			spin_unlock(&server->cb_lock);
-		}
-		else {
+		} else {
 			/* same server */
 			oldserver = NULL;
 		}
-	}
-	else if (ret == -ENOENT) {
+	} else if (ret == -ENOENT) {
 		/* the file was deleted - clear the callback timeout */
 		oldserver = xchg(&vnode->cb_server, NULL);
 		afs_kafstimod_del_timer(&vnode->cb_timeout);
@@ -146,10 +142,8 @@ static void afs_vnode_finalise_status_update(struct afs_vnode *vnode,
 	afs_put_server(oldserver);
 
 	_leave("");
+}
 
-} /* end afs_vnode_finalise_status_update() */
-
-/*****************************************************************************/
 /*
  * fetch file status from the volume
  * - don't issue a fetch if:
@@ -222,7 +216,7 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode)
 		return vnode->flags & AFS_VNODE_DELETED ? -ENOENT : 0;
 	}
 
- get_anyway:
+get_anyway:
 	/* okay... we're going to have to initiate the op */
 	vnode->update_cnt++;
 
@@ -247,9 +241,8 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode)
 
 	_leave(" = %d", ret);
 	return ret;
-} /* end afs_vnode_fetch_status() */
+}
 
-/*****************************************************************************/
 /*
  * fetch file data from the volume
  * - TODO implement caching and server failover
@@ -290,10 +283,8 @@ int afs_vnode_fetch_data(struct afs_vnode *vnode,
 
 	_leave(" = %d", ret);
 	return ret;
+}
 
-} /* end afs_vnode_fetch_data() */
-
-/*****************************************************************************/
 /*
  * break any outstanding callback on a vnode
  * - only relevent to server that issued it
@@ -337,9 +328,8 @@ int afs_vnode_give_up_callback(struct afs_vnode *vnode)
 
 	_leave(" = %d", ret);
 	return ret;
-} /* end afs_vnode_give_up_callback() */
+}
 
-/*****************************************************************************/
 /*
  * match a vnode record stored in the cache
  */
@@ -371,10 +361,9 @@ static cachefs_match_val_t afs_vnode_cache_match(void *target,
 
 	_leave(" = SUCCESS");
 	return CACHEFS_MATCH_SUCCESS;
-} /* end afs_vnode_cache_match() */
+}
 #endif
 
-/*****************************************************************************/
 /*
  * update a vnode record stored in the cache
  */
@@ -389,6 +378,5 @@ static void afs_vnode_cache_update(void *source, void *entry)
 	cvnode->vnode_id	= vnode->fid.vnode;
 	cvnode->vnode_unique	= vnode->fid.unique;
 	cvnode->data_version	= vnode->status.version;
-
-} /* end afs_vnode_cache_update() */
+}
 #endif
diff --git a/fs/afs/vnode.h b/fs/afs/vnode.h
index b86a97102e8..7f6d05b197a 100644
--- a/fs/afs/vnode.h
+++ b/fs/afs/vnode.h
@@ -1,4 +1,4 @@
-/* vnode.h: AFS vnode record
+/* AFS vnode record
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -9,24 +9,22 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#ifndef _LINUX_AFS_VNODE_H
-#define _LINUX_AFS_VNODE_H
+#ifndef AFS_VNODE_H
+#define AFS_VNODE_H
 
 #include <linux/fs.h>
 #include "server.h"
 #include "kafstimod.h"
 #include "cache.h"
 
-#ifdef __KERNEL__
-
 struct afs_rxfs_fetch_descriptor;
 
-/*****************************************************************************/
+extern struct afs_timer_ops afs_vnode_cb_timed_out_ops;
+
 /*
  * vnode catalogue entry
  */
-struct afs_cache_vnode
-{
+struct afs_cache_vnode {
 	afs_vnodeid_t		vnode_id;	/* vnode ID */
 	unsigned		vnode_unique;	/* vnode ID uniquifier */
 	afs_dataversion_t	data_version;	/* data version */
@@ -36,12 +34,10 @@ struct afs_cache_vnode
 extern struct cachefs_index_def afs_vnode_cache_index_def;
 #endif
 
-/*****************************************************************************/
 /*
  * AFS inode private data
  */
-struct afs_vnode
-{
+struct afs_vnode {
 	struct inode		vfs_inode;	/* the VFS's inode record */
 
 	struct afs_volume	*volume;	/* volume on which vnode resides */
@@ -72,7 +68,7 @@ struct afs_vnode
 
 static inline struct afs_vnode *AFS_FS_I(struct inode *inode)
 {
-	return container_of(inode,struct afs_vnode,vfs_inode);
+	return container_of(inode, struct afs_vnode, vfs_inode);
 }
 
 static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode)
@@ -80,15 +76,9 @@ static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode)
 	return &vnode->vfs_inode;
 }
 
-extern int afs_vnode_fetch_status(struct afs_vnode *vnode);
-
-extern int afs_vnode_fetch_data(struct afs_vnode *vnode,
-				struct afs_rxfs_fetch_descriptor *desc);
-
-extern int afs_vnode_give_up_callback(struct afs_vnode *vnode);
-
-extern struct afs_timer_ops afs_vnode_cb_timed_out_ops;
-
-#endif /* __KERNEL__ */
+extern int afs_vnode_fetch_status(struct afs_vnode *);
+extern int afs_vnode_fetch_data(struct afs_vnode *,
+				struct afs_rxfs_fetch_descriptor *);
+extern int afs_vnode_give_up_callback(struct afs_vnode *);
 
-#endif /* _LINUX_AFS_VNODE_H */
+#endif /* AFS_VNODE_H */
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 768c6dbd323..c82e1bb4f2d 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -1,4 +1,4 @@
-/* volume.c: AFS volume management
+/* AFS volume management
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -43,7 +43,6 @@ struct cachefs_index_def afs_volume_cache_index_def = {
 };
 #endif
 
-/*****************************************************************************/
 /*
  * lookup a volume by name
  * - this can be one of the following:
@@ -97,14 +96,11 @@ int afs_volume_lookup(const char *name, struct afs_cell *cell, int rwpath,
 		if (strcmp(suffix, ".readonly") == 0) {
 			type = AFSVL_ROVOL;
 			force = 1;
-		}
-		else if (strcmp(suffix, ".backup") == 0) {
+		} else if (strcmp(suffix, ".backup") == 0) {
 			type = AFSVL_BACKVOL;
 			force = 1;
-		}
-		else if (suffix[1] == 0) {
-		}
-		else {
+		} else if (suffix[1] == 0) {
+		} else {
 			suffix = NULL;
 		}
 	}
@@ -116,8 +112,7 @@ int afs_volume_lookup(const char *name, struct afs_cell *cell, int rwpath,
 		cellname = name;
 		cellnamesz = volname - name;
 		volname++;
-	}
-	else {
+	} else {
 		volname = name;
 		cellname = NULL;
 		cellnamesz = 0;
@@ -139,8 +134,7 @@ int afs_volume_lookup(const char *name, struct afs_cell *cell, int rwpath,
 			       cellname ?: "");
 			goto error;
 		}
-	}
-	else {
+	} else {
 		afs_get_cell(cell);
 	}
 
@@ -161,14 +155,11 @@ int afs_volume_lookup(const char *name, struct afs_cell *cell, int rwpath,
 	if (force) {
 		if (!(srvtmask & (1 << type)))
 			goto error;
-	}
-	else if (srvtmask & AFS_VOL_VTM_RO) {
+	} else if (srvtmask & AFS_VOL_VTM_RO) {
 		type = AFSVL_ROVOL;
-	}
-	else if (srvtmask & AFS_VOL_VTM_RW) {
+	} else if (srvtmask & AFS_VOL_VTM_RW) {
 		type = AFSVL_RWVOL;
-	}
-	else {
+	} else {
 		goto error;
 	}
 
@@ -225,23 +216,23 @@ int afs_volume_lookup(const char *name, struct afs_cell *cell, int rwpath,
 
 	vlocation->vols[type] = volume;
 
- success:
+success:
 	_debug("kAFS selected %s volume %08x",
 	       afs_voltypes[volume->type], volume->vid);
 	*_volume = volume;
 	ret = 0;
 
 	/* clean up */
- error_up:
+error_up:
 	up_write(&cell->vl_sem);
- error:
+error:
 	afs_put_vlocation(vlocation);
 	afs_put_cell(cell);
 
 	_leave(" = %d (%p)", ret, volume);
 	return ret;
 
- error_discard:
+error_discard:
 	up_write(&cell->vl_sem);
 
 	for (loop = volume->nservers - 1; loop >= 0; loop--)
@@ -249,9 +240,8 @@ int afs_volume_lookup(const char *name, struct afs_cell *cell, int rwpath,
 
 	kfree(volume);
 	goto error;
-} /* end afs_volume_lookup() */
+}
 
-/*****************************************************************************/
 /*
  * destroy a volume record
  */
@@ -296,9 +286,8 @@ void afs_put_volume(struct afs_volume *volume)
 	kfree(volume);
 
 	_leave(" [destroyed]");
-} /* end afs_put_volume() */
+}
 
-/*****************************************************************************/
 /*
  * pick a server to use to try accessing this volume
  * - returns with an elevated usage count on the server chosen
@@ -373,9 +362,8 @@ int afs_volume_pick_fileserver(struct afs_volume *volume,
 	up_read(&volume->server_sem);
 	_leave(" = %d", ret);
 	return ret;
-} /* end afs_volume_pick_fileserver() */
+}
 
-/*****************************************************************************/
 /*
  * release a server after use
  * - releases the ref on the server struct that was acquired by picking
@@ -469,16 +457,14 @@ int afs_volume_release_fileserver(struct afs_volume *volume,
 	return 1;
 
 	/* tell the caller to loop around and try the next server */
- try_next_server_upw:
+try_next_server_upw:
 	up_write(&volume->server_sem);
- try_next_server:
+try_next_server:
 	afs_put_server(server);
 	_leave(" [try next server]");
 	return 0;
+}
 
-} /* end afs_volume_release_fileserver() */
-
-/*****************************************************************************/
 /*
  * match a volume hash record stored in the cache
  */
@@ -498,10 +484,9 @@ static cachefs_match_val_t afs_volume_cache_match(void *target,
 
 	_leave(" = FAILED");
 	return CACHEFS_MATCH_FAILED;
-} /* end afs_volume_cache_match() */
+}
 #endif
 
-/*****************************************************************************/
 /*
  * update a volume hash record stored in the cache
  */
@@ -514,6 +499,5 @@ static void afs_volume_cache_update(void *source, void *entry)
 	_enter("");
 
 	vhash->vtype = volume->type;
-
-} /* end afs_volume_cache_update() */
+}
 #endif
diff --git a/fs/afs/volume.h b/fs/afs/volume.h
index bfdcf19ba3f..a605bea2e3a 100644
--- a/fs/afs/volume.h
+++ b/fs/afs/volume.h
@@ -1,4 +1,4 @@
-/* volume.h: AFS volume management
+/* AFS volume management
  *
  * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -9,8 +9,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#ifndef _LINUX_AFS_VOLUME_H
-#define _LINUX_AFS_VOLUME_H
+#ifndef AFS_VOLUME_H
+#define AFS_VOLUME_H
 
 #include "types.h"
 #include "fsclient.h"
@@ -23,15 +23,12 @@ typedef enum {
 	AFS_VLUPD_PENDING,		/* on pending queue */
 	AFS_VLUPD_INPROGRESS,		/* op in progress */
 	AFS_VLUPD_BUSYSLEEP,		/* sleeping because server returned EBUSY */
-	
 } __attribute__((packed)) afs_vlocation_upd_t;
 
-/*****************************************************************************/
 /*
  * entry in the cached volume location catalogue
  */
-struct afs_cache_vlocation
-{
+struct afs_cache_vlocation {
 	uint8_t			name[64];	/* volume name (lowercase, padded with NULs) */
 	uint8_t			nservers;	/* number of entries used in servers[] */
 	uint8_t			vidmask;	/* voltype mask for vid[] */
@@ -49,12 +46,10 @@ struct afs_cache_vlocation
 extern struct cachefs_index_def afs_vlocation_cache_index_def;
 #endif
 
-/*****************************************************************************/
 /*
  * volume -> vnode hash table entry
  */
-struct afs_cache_vhash
-{
+struct afs_cache_vhash {
 	afs_voltype_t		vtype;		/* which volume variation */
 	uint8_t			hash_bucket;	/* which hash bucket this represents */
 } __attribute__((packed));
@@ -63,12 +58,10 @@ struct afs_cache_vhash
 extern struct cachefs_index_def afs_volume_cache_index_def;
 #endif
 
-/*****************************************************************************/
 /*
  * AFS volume location record
  */
-struct afs_vlocation
-{
+struct afs_vlocation {
 	atomic_t		usage;
 	struct list_head	link;		/* link in cell volume location list */
 	struct afs_timer	timeout;	/* decaching timer */
@@ -90,22 +83,18 @@ struct afs_vlocation
 	unsigned short		valid;		/* T if valid */
 };
 
-extern int afs_vlocation_lookup(struct afs_cell *cell,
-				const char *name,
-				unsigned namesz,
-				struct afs_vlocation **_vlocation);
+extern int afs_vlocation_lookup(struct afs_cell *, const char *, unsigned,
+				struct afs_vlocation **);
 
 #define afs_get_vlocation(V) do { atomic_inc(&(V)->usage); } while(0)
 
-extern void afs_put_vlocation(struct afs_vlocation *vlocation);
-extern void afs_vlocation_do_timeout(struct afs_vlocation *vlocation);
+extern void afs_put_vlocation(struct afs_vlocation *);
+extern void afs_vlocation_do_timeout(struct afs_vlocation *);
 
-/*****************************************************************************/
 /*
  * AFS volume access record
  */
-struct afs_volume
-{
+struct afs_volume {
 	atomic_t		usage;
 	struct afs_cell		*cell;		/* cell to which belongs (unrefd ptr) */
 	struct afs_vlocation	*vlocation;	/* volume location */
@@ -121,20 +110,17 @@ struct afs_volume
 	struct rw_semaphore	server_sem;	/* lock for accessing current server */
 };
 
-extern int afs_volume_lookup(const char *name,
-			     struct afs_cell *cell,
-			     int rwpath,
-			     struct afs_volume **_volume);
+extern int afs_volume_lookup(const char *, struct afs_cell *, int,
+			     struct afs_volume **);
 
 #define afs_get_volume(V) do { atomic_inc(&(V)->usage); } while(0)
 
-extern void afs_put_volume(struct afs_volume *volume);
+extern void afs_put_volume(struct afs_volume *);
 
-extern int afs_volume_pick_fileserver(struct afs_volume *volume,
-				      struct afs_server **_server);
+extern int afs_volume_pick_fileserver(struct afs_volume *,
+				      struct afs_server **);
 
-extern int afs_volume_release_fileserver(struct afs_volume *volume,
-					 struct afs_server *server,
-					 int result);
+extern int afs_volume_release_fileserver(struct afs_volume *,
+					 struct afs_server *, int);
 
-#endif /* _LINUX_AFS_VOLUME_H */
+#endif /* AFS_VOLUME_H */
-- 
cgit v1.2.3


From 651350d10f93bed7003c9a66e24cf25e0f8eed3d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 26 Apr 2007 15:50:17 -0700
Subject: [AF_RXRPC]: Add an interface to the AF_RXRPC module for the AFS
 filesystem to use

Add an interface to the AF_RXRPC module so that the AFS filesystem module can
more easily make use of the services available.  AFS still opens a socket but
then uses the action functions in lieu of sendmsg() and registers an intercept
functions to grab messages before they're queued on the socket Rx queue.

This permits AFS (or whatever) to:

 (1) Avoid the overhead of using the recvmsg() call.

 (2) Use different keys directly on individual client calls on one socket
     rather than having to open a whole slew of sockets, one for each key it
     might want to use.

 (3) Avoid calling request_key() at the point of issue of a call or opening of
     a socket.  This is done instead by AFS at the point of open(), unlink() or
     other VFS operation and the key handed through.

 (4) Request the use of something other than GFP_KERNEL to allocate memory.

Furthermore:

 (*) The socket buffer markings used by RxRPC are made available for AFS so
     that it can interpret the cooked RxRPC messages itself.

 (*) rxgen (un)marshalling abort codes are made available.


The following documentation for the kernel interface is added to
Documentation/networking/rxrpc.txt:

=========================
AF_RXRPC KERNEL INTERFACE
=========================

The AF_RXRPC module also provides an interface for use by in-kernel utilities
such as the AFS filesystem.  This permits such a utility to:

 (1) Use different keys directly on individual client calls on one socket
     rather than having to open a whole slew of sockets, one for each key it
     might want to use.

 (2) Avoid having RxRPC call request_key() at the point of issue of a call or
     opening of a socket.  Instead the utility is responsible for requesting a
     key at the appropriate point.  AFS, for instance, would do this during VFS
     operations such as open() or unlink().  The key is then handed through
     when the call is initiated.

 (3) Request the use of something other than GFP_KERNEL to allocate memory.

 (4) Avoid the overhead of using the recvmsg() call.  RxRPC messages can be
     intercepted before they get put into the socket Rx queue and the socket
     buffers manipulated directly.

To use the RxRPC facility, a kernel utility must still open an AF_RXRPC socket,
bind an addess as appropriate and listen if it's to be a server socket, but
then it passes this to the kernel interface functions.

The kernel interface functions are as follows:

 (*) Begin a new client call.

	struct rxrpc_call *
	rxrpc_kernel_begin_call(struct socket *sock,
				struct sockaddr_rxrpc *srx,
				struct key *key,
				unsigned long user_call_ID,
				gfp_t gfp);

     This allocates the infrastructure to make a new RxRPC call and assigns
     call and connection numbers.  The call will be made on the UDP port that
     the socket is bound to.  The call will go to the destination address of a
     connected client socket unless an alternative is supplied (srx is
     non-NULL).

     If a key is supplied then this will be used to secure the call instead of
     the key bound to the socket with the RXRPC_SECURITY_KEY sockopt.  Calls
     secured in this way will still share connections if at all possible.

     The user_call_ID is equivalent to that supplied to sendmsg() in the
     control data buffer.  It is entirely feasible to use this to point to a
     kernel data structure.

     If this function is successful, an opaque reference to the RxRPC call is
     returned.  The caller now holds a reference on this and it must be
     properly ended.

 (*) End a client call.

	void rxrpc_kernel_end_call(struct rxrpc_call *call);

     This is used to end a previously begun call.  The user_call_ID is expunged
     from AF_RXRPC's knowledge and will not be seen again in association with
     the specified call.

 (*) Send data through a call.

	int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg,
				   size_t len);

     This is used to supply either the request part of a client call or the
     reply part of a server call.  msg.msg_iovlen and msg.msg_iov specify the
     data buffers to be used.  msg_iov may not be NULL and must point
     exclusively to in-kernel virtual addresses.  msg.msg_flags may be given
     MSG_MORE if there will be subsequent data sends for this call.

     The msg must not specify a destination address, control data or any flags
     other than MSG_MORE.  len is the total amount of data to transmit.

 (*) Abort a call.

	void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code);

     This is used to abort a call if it's still in an abortable state.  The
     abort code specified will be placed in the ABORT message sent.

 (*) Intercept received RxRPC messages.

	typedef void (*rxrpc_interceptor_t)(struct sock *sk,
					    unsigned long user_call_ID,
					    struct sk_buff *skb);

	void
	rxrpc_kernel_intercept_rx_messages(struct socket *sock,
					   rxrpc_interceptor_t interceptor);

     This installs an interceptor function on the specified AF_RXRPC socket.
     All messages that would otherwise wind up in the socket's Rx queue are
     then diverted to this function.  Note that care must be taken to process
     the messages in the right order to maintain DATA message sequentiality.

     The interceptor function itself is provided with the address of the socket
     and handling the incoming message, the ID assigned by the kernel utility
     to the call and the socket buffer containing the message.

     The skb->mark field indicates the type of message:

	MARK				MEANING
	===============================	=======================================
	RXRPC_SKB_MARK_DATA		Data message
	RXRPC_SKB_MARK_FINAL_ACK	Final ACK received for an incoming call
	RXRPC_SKB_MARK_BUSY		Client call rejected as server busy
	RXRPC_SKB_MARK_REMOTE_ABORT	Call aborted by peer
	RXRPC_SKB_MARK_NET_ERROR	Network error detected
	RXRPC_SKB_MARK_LOCAL_ERROR	Local error encountered
	RXRPC_SKB_MARK_NEW_CALL		New incoming call awaiting acceptance

     The remote abort message can be probed with rxrpc_kernel_get_abort_code().
     The two error messages can be probed with rxrpc_kernel_get_error_number().
     A new call can be accepted with rxrpc_kernel_accept_call().

     Data messages can have their contents extracted with the usual bunch of
     socket buffer manipulation functions.  A data message can be determined to
     be the last one in a sequence with rxrpc_kernel_is_data_last().  When a
     data message has been used up, rxrpc_kernel_data_delivered() should be
     called on it..

     Non-data messages should be handled to rxrpc_kernel_free_skb() to dispose
     of.  It is possible to get extra refs on all types of message for later
     freeing, but this may pin the state of a call until the message is finally
     freed.

 (*) Accept an incoming call.

	struct rxrpc_call *
	rxrpc_kernel_accept_call(struct socket *sock,
				 unsigned long user_call_ID);

     This is used to accept an incoming call and to assign it a call ID.  This
     function is similar to rxrpc_kernel_begin_call() and calls accepted must
     be ended in the same way.

     If this function is successful, an opaque reference to the RxRPC call is
     returned.  The caller now holds a reference on this and it must be
     properly ended.

 (*) Reject an incoming call.

	int rxrpc_kernel_reject_call(struct socket *sock);

     This is used to reject the first incoming call on the socket's queue with
     a BUSY message.  -ENODATA is returned if there were no incoming calls.
     Other errors may be returned if the call had been aborted (-ECONNABORTED)
     or had timed out (-ETIME).

 (*) Record the delivery of a data message and free it.

	void rxrpc_kernel_data_delivered(struct sk_buff *skb);

     This is used to record a data message as having been delivered and to
     update the ACK state for the call.  The socket buffer will be freed.

 (*) Free a message.

	void rxrpc_kernel_free_skb(struct sk_buff *skb);

     This is used to free a non-DATA socket buffer intercepted from an AF_RXRPC
     socket.

 (*) Determine if a data message is the last one on a call.

	bool rxrpc_kernel_is_data_last(struct sk_buff *skb);

     This is used to determine if a socket buffer holds the last data message
     to be received for a call (true will be returned if it does, false
     if not).

     The data message will be part of the reply on a client call and the
     request on an incoming call.  In the latter case there will be more
     messages, but in the former case there will not.

 (*) Get the abort code from an abort message.

	u32 rxrpc_kernel_get_abort_code(struct sk_buff *skb);

     This is used to extract the abort code from a remote abort message.

 (*) Get the error number from a local or network error message.

	int rxrpc_kernel_get_error_number(struct sk_buff *skb);

     This is used to extract the error number from a message indicating either
     a local error occurred or a network error occurred.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/rxrpc.txt | 196 +++++++++++++++++++++++++++++++++++++
 include/net/af_rxrpc.h             |  44 ++++++++-
 include/rxrpc/packet.h             |  12 +++
 net/rxrpc/af_rxrpc.c               | 141 ++++++++++++++++++++++++--
 net/rxrpc/ar-accept.c              | 119 ++++++++++++++++++++--
 net/rxrpc/ar-ack.c                 |  10 +-
 net/rxrpc/ar-call.c                |  75 ++++++++------
 net/rxrpc/ar-connection.c          |  28 ++++--
 net/rxrpc/ar-connevent.c           |  20 +++-
 net/rxrpc/ar-error.c               |   6 +-
 net/rxrpc/ar-input.c               |  60 +++++++-----
 net/rxrpc/ar-internal.h            |  44 ++++-----
 net/rxrpc/ar-local.c               |   2 +-
 net/rxrpc/ar-output.c              |  84 +++++++++++++++-
 net/rxrpc/ar-peer.c                |   2 +-
 net/rxrpc/ar-recvmsg.c             |  75 +++++++++++++-
 net/rxrpc/ar-skbuff.c              |  16 ++-
 net/rxrpc/ar-transport.c           |   8 +-
 18 files changed, 813 insertions(+), 129 deletions(-)

diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
index fb809b738a0..cae231b1c13 100644
--- a/Documentation/networking/rxrpc.txt
+++ b/Documentation/networking/rxrpc.txt
@@ -25,6 +25,8 @@ Contents of this document:
 
  (*) Example server usage.
 
+ (*) AF_RXRPC kernel interface.
+
 
 ========
 OVERVIEW
@@ -661,3 +663,197 @@ A server would be set up to accept operations in the following manner:
 Note that all the communications for a particular service take place through
 the one server socket, using control messages on sendmsg() and recvmsg() to
 determine the call affected.
+
+
+=========================
+AF_RXRPC KERNEL INTERFACE
+=========================
+
+The AF_RXRPC module also provides an interface for use by in-kernel utilities
+such as the AFS filesystem.  This permits such a utility to:
+
+ (1) Use different keys directly on individual client calls on one socket
+     rather than having to open a whole slew of sockets, one for each key it
+     might want to use.
+
+ (2) Avoid having RxRPC call request_key() at the point of issue of a call or
+     opening of a socket.  Instead the utility is responsible for requesting a
+     key at the appropriate point.  AFS, for instance, would do this during VFS
+     operations such as open() or unlink().  The key is then handed through
+     when the call is initiated.
+
+ (3) Request the use of something other than GFP_KERNEL to allocate memory.
+
+ (4) Avoid the overhead of using the recvmsg() call.  RxRPC messages can be
+     intercepted before they get put into the socket Rx queue and the socket
+     buffers manipulated directly.
+
+To use the RxRPC facility, a kernel utility must still open an AF_RXRPC socket,
+bind an addess as appropriate and listen if it's to be a server socket, but
+then it passes this to the kernel interface functions.
+
+The kernel interface functions are as follows:
+
+ (*) Begin a new client call.
+
+	struct rxrpc_call *
+	rxrpc_kernel_begin_call(struct socket *sock,
+				struct sockaddr_rxrpc *srx,
+				struct key *key,
+				unsigned long user_call_ID,
+				gfp_t gfp);
+
+     This allocates the infrastructure to make a new RxRPC call and assigns
+     call and connection numbers.  The call will be made on the UDP port that
+     the socket is bound to.  The call will go to the destination address of a
+     connected client socket unless an alternative is supplied (srx is
+     non-NULL).
+
+     If a key is supplied then this will be used to secure the call instead of
+     the key bound to the socket with the RXRPC_SECURITY_KEY sockopt.  Calls
+     secured in this way will still share connections if at all possible.
+
+     The user_call_ID is equivalent to that supplied to sendmsg() in the
+     control data buffer.  It is entirely feasible to use this to point to a
+     kernel data structure.
+
+     If this function is successful, an opaque reference to the RxRPC call is
+     returned.  The caller now holds a reference on this and it must be
+     properly ended.
+
+ (*) End a client call.
+
+	void rxrpc_kernel_end_call(struct rxrpc_call *call);
+
+     This is used to end a previously begun call.  The user_call_ID is expunged
+     from AF_RXRPC's knowledge and will not be seen again in association with
+     the specified call.
+
+ (*) Send data through a call.
+
+	int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg,
+				   size_t len);
+
+     This is used to supply either the request part of a client call or the
+     reply part of a server call.  msg.msg_iovlen and msg.msg_iov specify the
+     data buffers to be used.  msg_iov may not be NULL and must point
+     exclusively to in-kernel virtual addresses.  msg.msg_flags may be given
+     MSG_MORE if there will be subsequent data sends for this call.
+
+     The msg must not specify a destination address, control data or any flags
+     other than MSG_MORE.  len is the total amount of data to transmit.
+
+ (*) Abort a call.
+
+	void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code);
+
+     This is used to abort a call if it's still in an abortable state.  The
+     abort code specified will be placed in the ABORT message sent.
+
+ (*) Intercept received RxRPC messages.
+
+	typedef void (*rxrpc_interceptor_t)(struct sock *sk,
+					    unsigned long user_call_ID,
+					    struct sk_buff *skb);
+
+	void
+	rxrpc_kernel_intercept_rx_messages(struct socket *sock,
+					   rxrpc_interceptor_t interceptor);
+
+     This installs an interceptor function on the specified AF_RXRPC socket.
+     All messages that would otherwise wind up in the socket's Rx queue are
+     then diverted to this function.  Note that care must be taken to process
+     the messages in the right order to maintain DATA message sequentiality.
+
+     The interceptor function itself is provided with the address of the socket
+     and handling the incoming message, the ID assigned by the kernel utility
+     to the call and the socket buffer containing the message.
+
+     The skb->mark field indicates the type of message:
+
+	MARK				MEANING
+	===============================	=======================================
+	RXRPC_SKB_MARK_DATA		Data message
+	RXRPC_SKB_MARK_FINAL_ACK	Final ACK received for an incoming call
+	RXRPC_SKB_MARK_BUSY		Client call rejected as server busy
+	RXRPC_SKB_MARK_REMOTE_ABORT	Call aborted by peer
+	RXRPC_SKB_MARK_NET_ERROR	Network error detected
+	RXRPC_SKB_MARK_LOCAL_ERROR	Local error encountered
+	RXRPC_SKB_MARK_NEW_CALL		New incoming call awaiting acceptance
+
+     The remote abort message can be probed with rxrpc_kernel_get_abort_code().
+     The two error messages can be probed with rxrpc_kernel_get_error_number().
+     A new call can be accepted with rxrpc_kernel_accept_call().
+
+     Data messages can have their contents extracted with the usual bunch of
+     socket buffer manipulation functions.  A data message can be determined to
+     be the last one in a sequence with rxrpc_kernel_is_data_last().  When a
+     data message has been used up, rxrpc_kernel_data_delivered() should be
+     called on it..
+
+     Non-data messages should be handled to rxrpc_kernel_free_skb() to dispose
+     of.  It is possible to get extra refs on all types of message for later
+     freeing, but this may pin the state of a call until the message is finally
+     freed.
+
+ (*) Accept an incoming call.
+
+	struct rxrpc_call *
+	rxrpc_kernel_accept_call(struct socket *sock,
+				 unsigned long user_call_ID);
+
+     This is used to accept an incoming call and to assign it a call ID.  This
+     function is similar to rxrpc_kernel_begin_call() and calls accepted must
+     be ended in the same way.
+
+     If this function is successful, an opaque reference to the RxRPC call is
+     returned.  The caller now holds a reference on this and it must be
+     properly ended.
+
+ (*) Reject an incoming call.
+
+	int rxrpc_kernel_reject_call(struct socket *sock);
+
+     This is used to reject the first incoming call on the socket's queue with
+     a BUSY message.  -ENODATA is returned if there were no incoming calls.
+     Other errors may be returned if the call had been aborted (-ECONNABORTED)
+     or had timed out (-ETIME).
+
+ (*) Record the delivery of a data message and free it.
+
+	void rxrpc_kernel_data_delivered(struct sk_buff *skb);
+
+     This is used to record a data message as having been delivered and to
+     update the ACK state for the call.  The socket buffer will be freed.
+
+ (*) Free a message.
+
+	void rxrpc_kernel_free_skb(struct sk_buff *skb);
+
+     This is used to free a non-DATA socket buffer intercepted from an AF_RXRPC
+     socket.
+
+ (*) Determine if a data message is the last one on a call.
+
+	bool rxrpc_kernel_is_data_last(struct sk_buff *skb);
+
+     This is used to determine if a socket buffer holds the last data message
+     to be received for a call (true will be returned if it does, false
+     if not).
+
+     The data message will be part of the reply on a client call and the
+     request on an incoming call.  In the latter case there will be more
+     messages, but in the former case there will not.
+
+ (*) Get the abort code from an abort message.
+
+	u32 rxrpc_kernel_get_abort_code(struct sk_buff *skb);
+
+     This is used to extract the abort code from a remote abort message.
+
+ (*) Get the error number from a local or network error message.
+
+	int rxrpc_kernel_get_error_number(struct sk_buff *skb);
+
+     This is used to extract the error number from a message indicating either
+     a local error occurred or a network error occurred.
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index b01ca2589d6..00c2eaa07c2 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -1,6 +1,6 @@
-/* RxRPC definitions
+/* RxRPC kernel service interface definitions
  *
- * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
@@ -12,6 +12,46 @@
 #ifndef _NET_RXRPC_H
 #define _NET_RXRPC_H
 
+#ifdef __KERNEL__
+
 #include <linux/rxrpc.h>
 
+struct rxrpc_call;
+
+/*
+ * the mark applied to socket buffers that may be intercepted
+ */
+enum {
+	RXRPC_SKB_MARK_DATA,		/* data message */
+	RXRPC_SKB_MARK_FINAL_ACK,	/* final ACK received message */
+	RXRPC_SKB_MARK_BUSY,		/* server busy message */
+	RXRPC_SKB_MARK_REMOTE_ABORT,	/* remote abort message */
+	RXRPC_SKB_MARK_NET_ERROR,	/* network error message */
+	RXRPC_SKB_MARK_LOCAL_ERROR,	/* local error message */
+	RXRPC_SKB_MARK_NEW_CALL,	/* local error message */
+};
+
+typedef void (*rxrpc_interceptor_t)(struct sock *, unsigned long,
+				    struct sk_buff *);
+extern void rxrpc_kernel_intercept_rx_messages(struct socket *,
+					       rxrpc_interceptor_t);
+extern struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *,
+						  struct sockaddr_rxrpc *,
+						  struct key *,
+						  unsigned long,
+						  gfp_t);
+extern int rxrpc_kernel_send_data(struct rxrpc_call *, struct msghdr *,
+				  size_t);
+extern void rxrpc_kernel_abort_call(struct rxrpc_call *, u32);
+extern void rxrpc_kernel_end_call(struct rxrpc_call *);
+extern bool rxrpc_kernel_is_data_last(struct sk_buff *);
+extern u32 rxrpc_kernel_get_abort_code(struct sk_buff *);
+extern int rxrpc_kernel_get_error_number(struct sk_buff *);
+extern void rxrpc_kernel_data_delivered(struct sk_buff *);
+extern void rxrpc_kernel_free_skb(struct sk_buff *);
+extern struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *,
+						   unsigned long);
+extern int rxrpc_kernel_reject_call(struct socket *);
+
+#endif /* __KERNEL__ */
 #endif /* _NET_RXRPC_H */
diff --git a/include/rxrpc/packet.h b/include/rxrpc/packet.h
index 452a9bb02d4..09b11a1e8d4 100644
--- a/include/rxrpc/packet.h
+++ b/include/rxrpc/packet.h
@@ -185,6 +185,18 @@ struct rxkad_response {
 #define RX_ADDRINUSE		-7	/* UDP port in use */
 #define RX_DEBUGI_BADTYPE	-8	/* bad debugging packet type */
 
+/*
+ * (un)marshalling abort codes (rxgen)
+ */
+#define	RXGEN_CC_MARSHAL    -450
+#define	RXGEN_CC_UNMARSHAL  -451
+#define	RXGEN_SS_MARSHAL    -452
+#define	RXGEN_SS_UNMARSHAL  -453
+#define	RXGEN_DECODE	    -454
+#define	RXGEN_OPCODE	    -455
+#define	RXGEN_SS_XDRFREE    -456
+#define	RXGEN_CC_XDRFREE    -457
+
 /*
  * Rx kerberos security abort codes
  * - unfortunately we have no generalised security abort codes to say things
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index bfa8822e228..2c57df9c131 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -41,6 +41,8 @@ atomic_t rxrpc_debug_id;
 /* count of skbs currently in use */
 atomic_t rxrpc_n_skbs;
 
+struct workqueue_struct *rxrpc_workqueue;
+
 static void rxrpc_sock_destructor(struct sock *);
 
 /*
@@ -214,7 +216,8 @@ static int rxrpc_listen(struct socket *sock, int backlog)
  */
 static struct rxrpc_transport *rxrpc_name_to_transport(struct socket *sock,
 						       struct sockaddr *addr,
-						       int addr_len, int flags)
+						       int addr_len, int flags,
+						       gfp_t gfp)
 {
 	struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *) addr;
 	struct rxrpc_transport *trans;
@@ -232,17 +235,129 @@ static struct rxrpc_transport *rxrpc_name_to_transport(struct socket *sock,
 		return ERR_PTR(-EAFNOSUPPORT);
 
 	/* find a remote transport endpoint from the local one */
-	peer = rxrpc_get_peer(srx, GFP_KERNEL);
+	peer = rxrpc_get_peer(srx, gfp);
 	if (IS_ERR(peer))
 		return ERR_PTR(PTR_ERR(peer));
 
 	/* find a transport */
-	trans = rxrpc_get_transport(rx->local, peer, GFP_KERNEL);
+	trans = rxrpc_get_transport(rx->local, peer, gfp);
 	rxrpc_put_peer(peer);
 	_leave(" = %p", trans);
 	return trans;
 }
 
+/**
+ * rxrpc_kernel_begin_call - Allow a kernel service to begin a call
+ * @sock: The socket on which to make the call
+ * @srx: The address of the peer to contact (defaults to socket setting)
+ * @key: The security context to use (defaults to socket setting)
+ * @user_call_ID: The ID to use
+ *
+ * Allow a kernel service to begin a call on the nominated socket.  This just
+ * sets up all the internal tracking structures and allocates connection and
+ * call IDs as appropriate.  The call to be used is returned.
+ *
+ * The default socket destination address and security may be overridden by
+ * supplying @srx and @key.
+ */
+struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
+					   struct sockaddr_rxrpc *srx,
+					   struct key *key,
+					   unsigned long user_call_ID,
+					   gfp_t gfp)
+{
+	struct rxrpc_conn_bundle *bundle;
+	struct rxrpc_transport *trans;
+	struct rxrpc_call *call;
+	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+	__be16 service_id;
+
+	_enter(",,%x,%lx", key_serial(key), user_call_ID);
+
+	lock_sock(&rx->sk);
+
+	if (srx) {
+		trans = rxrpc_name_to_transport(sock, (struct sockaddr *) srx,
+						sizeof(*srx), 0, gfp);
+		if (IS_ERR(trans)) {
+			call = ERR_PTR(PTR_ERR(trans));
+			trans = NULL;
+			goto out;
+		}
+	} else {
+		trans = rx->trans;
+		if (!trans) {
+			call = ERR_PTR(-ENOTCONN);
+			goto out;
+		}
+		atomic_inc(&trans->usage);
+	}
+
+	service_id = rx->service_id;
+	if (srx)
+		service_id = htons(srx->srx_service);
+
+	if (!key)
+		key = rx->key;
+	if (key && !key->payload.data)
+		key = NULL; /* a no-security key */
+
+	bundle = rxrpc_get_bundle(rx, trans, key, service_id, gfp);
+	if (IS_ERR(bundle)) {
+		call = ERR_PTR(PTR_ERR(bundle));
+		goto out;
+	}
+
+	call = rxrpc_get_client_call(rx, trans, bundle, user_call_ID, true,
+				     gfp);
+	rxrpc_put_bundle(trans, bundle);
+out:
+	rxrpc_put_transport(trans);
+	release_sock(&rx->sk);
+	_leave(" = %p", call);
+	return call;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_begin_call);
+
+/**
+ * rxrpc_kernel_end_call - Allow a kernel service to end a call it was using
+ * @call: The call to end
+ *
+ * Allow a kernel service to end a call it was using.  The call must be
+ * complete before this is called (the call should be aborted if necessary).
+ */
+void rxrpc_kernel_end_call(struct rxrpc_call *call)
+{
+	_enter("%d{%d}", call->debug_id, atomic_read(&call->usage));
+	rxrpc_remove_user_ID(call->socket, call);
+	rxrpc_put_call(call);
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_end_call);
+
+/**
+ * rxrpc_kernel_intercept_rx_messages - Intercept received RxRPC messages
+ * @sock: The socket to intercept received messages on
+ * @interceptor: The function to pass the messages to
+ *
+ * Allow a kernel service to intercept messages heading for the Rx queue on an
+ * RxRPC socket.  They get passed to the specified function instead.
+ * @interceptor should free the socket buffers it is given.  @interceptor is
+ * called with the socket receive queue spinlock held and softirqs disabled -
+ * this ensures that the messages will be delivered in the right order.
+ */
+void rxrpc_kernel_intercept_rx_messages(struct socket *sock,
+					rxrpc_interceptor_t interceptor)
+{
+	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+
+	_enter("");
+	rx->interceptor = interceptor;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_intercept_rx_messages);
+
 /*
  * connect an RxRPC socket
  * - this just targets it at a specific destination; no actual connection
@@ -294,7 +409,8 @@ static int rxrpc_connect(struct socket *sock, struct sockaddr *addr,
 		return -EBUSY; /* server sockets can't connect as well */
 	}
 
-	trans = rxrpc_name_to_transport(sock, addr, addr_len, flags);
+	trans = rxrpc_name_to_transport(sock, addr, addr_len, flags,
+					GFP_KERNEL);
 	if (IS_ERR(trans)) {
 		release_sock(&rx->sk);
 		_leave(" = %ld", PTR_ERR(trans));
@@ -344,7 +460,7 @@ static int rxrpc_sendmsg(struct kiocb *iocb, struct socket *sock,
 	if (m->msg_name) {
 		ret = -EISCONN;
 		trans = rxrpc_name_to_transport(sock, m->msg_name,
-						m->msg_namelen, 0);
+						m->msg_namelen, 0, GFP_KERNEL);
 		if (IS_ERR(trans)) {
 			ret = PTR_ERR(trans);
 			trans = NULL;
@@ -576,7 +692,7 @@ static int rxrpc_release_sock(struct sock *sk)
 
 	/* try to flush out this socket */
 	rxrpc_release_calls_on_socket(rx);
-	flush_scheduled_work();
+	flush_workqueue(rxrpc_workqueue);
 	rxrpc_purge_queue(&sk->sk_receive_queue);
 
 	if (rx->conn) {
@@ -673,15 +789,21 @@ static int __init af_rxrpc_init(void)
 
 	rxrpc_epoch = htonl(xtime.tv_sec);
 
+	ret = -ENOMEM;
 	rxrpc_call_jar = kmem_cache_create(
 		"rxrpc_call_jar", sizeof(struct rxrpc_call), 0,
 		SLAB_HWCACHE_ALIGN, NULL, NULL);
 	if (!rxrpc_call_jar) {
 		printk(KERN_NOTICE "RxRPC: Failed to allocate call jar\n");
-		ret = -ENOMEM;
 		goto error_call_jar;
 	}
 
+	rxrpc_workqueue = create_workqueue("krxrpcd");
+	if (!rxrpc_workqueue) {
+		printk(KERN_NOTICE "RxRPC: Failed to allocate work queue\n");
+		goto error_work_queue;
+	}
+
 	ret = proto_register(&rxrpc_proto, 1);
         if (ret < 0) {
                 printk(KERN_CRIT "RxRPC: Cannot register protocol\n");
@@ -719,6 +841,8 @@ error_key_type:
 error_sock:
 	proto_unregister(&rxrpc_proto);
 error_proto:
+	destroy_workqueue(rxrpc_workqueue);
+error_work_queue:
 	kmem_cache_destroy(rxrpc_call_jar);
 error_call_jar:
 	return ret;
@@ -743,9 +867,10 @@ static void __exit af_rxrpc_exit(void)
 	ASSERTCMP(atomic_read(&rxrpc_n_skbs), ==, 0);
 
 	_debug("flush scheduled work");
-	flush_scheduled_work();
+	flush_workqueue(rxrpc_workqueue);
 	proc_net_remove("rxrpc_conns");
 	proc_net_remove("rxrpc_calls");
+	destroy_workqueue(rxrpc_workqueue);
 	kmem_cache_destroy(rxrpc_call_jar);
 	_leave("");
 }
diff --git a/net/rxrpc/ar-accept.c b/net/rxrpc/ar-accept.c
index e7af780cd6f..92a87fde8bf 100644
--- a/net/rxrpc/ar-accept.c
+++ b/net/rxrpc/ar-accept.c
@@ -139,7 +139,7 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
 			call->conn->state = RXRPC_CONN_SERVER_CHALLENGING;
 			atomic_inc(&call->conn->usage);
 			set_bit(RXRPC_CONN_CHALLENGE, &call->conn->events);
-			schedule_work(&call->conn->processor);
+			rxrpc_queue_conn(call->conn);
 		} else {
 			_debug("conn ready");
 			call->state = RXRPC_CALL_SERVER_ACCEPTING;
@@ -183,7 +183,7 @@ invalid_service:
 	if (!test_bit(RXRPC_CALL_RELEASE, &call->flags) &&
 	    !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events)) {
 		rxrpc_get_call(call);
-		schedule_work(&call->processor);
+		rxrpc_queue_call(call);
 	}
 	read_unlock_bh(&call->state_lock);
 	rxrpc_put_call(call);
@@ -310,7 +310,8 @@ security_mismatch:
  * handle acceptance of a call by userspace
  * - assign the user call ID to the call at the front of the queue
  */
-int rxrpc_accept_call(struct rxrpc_sock *rx, unsigned long user_call_ID)
+struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
+				     unsigned long user_call_ID)
 {
 	struct rxrpc_call *call;
 	struct rb_node *parent, **pp;
@@ -374,12 +375,76 @@ int rxrpc_accept_call(struct rxrpc_sock *rx, unsigned long user_call_ID)
 		BUG();
 	if (test_and_set_bit(RXRPC_CALL_ACCEPTED, &call->events))
 		BUG();
-	schedule_work(&call->processor);
+	rxrpc_queue_call(call);
 
+	rxrpc_get_call(call);
 	write_unlock_bh(&call->state_lock);
 	write_unlock(&rx->call_lock);
-	_leave(" = 0");
-	return 0;
+	_leave(" = %p{%d}", call, call->debug_id);
+	return call;
+
+	/* if the call is already dying or dead, then we leave the socket's ref
+	 * on it to be released by rxrpc_dead_call_expired() as induced by
+	 * rxrpc_release_call() */
+out_release:
+	_debug("release %p", call);
+	if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
+	    !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
+		rxrpc_queue_call(call);
+out_discard:
+	write_unlock_bh(&call->state_lock);
+	_debug("discard %p", call);
+out:
+	write_unlock(&rx->call_lock);
+	_leave(" = %d", ret);
+	return ERR_PTR(ret);
+}
+
+/*
+ * handle rejectance of a call by userspace
+ * - reject the call at the front of the queue
+ */
+int rxrpc_reject_call(struct rxrpc_sock *rx)
+{
+	struct rxrpc_call *call;
+	int ret;
+
+	_enter("");
+
+	ASSERT(!irqs_disabled());
+
+	write_lock(&rx->call_lock);
+
+	ret = -ENODATA;
+	if (list_empty(&rx->acceptq))
+		goto out;
+
+	/* dequeue the first call and check it's still valid */
+	call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link);
+	list_del_init(&call->accept_link);
+	sk_acceptq_removed(&rx->sk);
+
+	write_lock_bh(&call->state_lock);
+	switch (call->state) {
+	case RXRPC_CALL_SERVER_ACCEPTING:
+		call->state = RXRPC_CALL_SERVER_BUSY;
+		if (test_and_set_bit(RXRPC_CALL_REJECT_BUSY, &call->events))
+			rxrpc_queue_call(call);
+		ret = 0;
+		goto out_release;
+	case RXRPC_CALL_REMOTELY_ABORTED:
+	case RXRPC_CALL_LOCALLY_ABORTED:
+		ret = -ECONNABORTED;
+		goto out_release;
+	case RXRPC_CALL_NETWORK_ERROR:
+		ret = call->conn->error;
+		goto out_release;
+	case RXRPC_CALL_DEAD:
+		ret = -ETIME;
+		goto out_discard;
+	default:
+		BUG();
+	}
 
 	/* if the call is already dying or dead, then we leave the socket's ref
 	 * on it to be released by rxrpc_dead_call_expired() as induced by
@@ -388,7 +453,7 @@ out_release:
 	_debug("release %p", call);
 	if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
 	    !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
-		schedule_work(&call->processor);
+		rxrpc_queue_call(call);
 out_discard:
 	write_unlock_bh(&call->state_lock);
 	_debug("discard %p", call);
@@ -397,3 +462,43 @@ out:
 	_leave(" = %d", ret);
 	return ret;
 }
+
+/**
+ * rxrpc_kernel_accept_call - Allow a kernel service to accept an incoming call
+ * @sock: The socket on which the impending call is waiting
+ * @user_call_ID: The tag to attach to the call
+ *
+ * Allow a kernel service to accept an incoming call, assuming the incoming
+ * call is still valid.
+ */
+struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *sock,
+					    unsigned long user_call_ID)
+{
+	struct rxrpc_call *call;
+
+	_enter(",%lx", user_call_ID);
+	call = rxrpc_accept_call(rxrpc_sk(sock->sk), user_call_ID);
+	_leave(" = %p", call);
+	return call;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_accept_call);
+
+/**
+ * rxrpc_kernel_reject_call - Allow a kernel service to reject an incoming call
+ * @sock: The socket on which the impending call is waiting
+ *
+ * Allow a kernel service to reject an incoming call with a BUSY message,
+ * assuming the incoming call is still valid.
+ */
+int rxrpc_kernel_reject_call(struct socket *sock)
+{
+	int ret;
+
+	_enter("");
+	ret = rxrpc_reject_call(rxrpc_sk(sock->sk));
+	_leave(" = %d", ret);
+	return ret;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_reject_call);
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index 8f7764eca96..fc07a926df5 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -113,7 +113,7 @@ cancel_timer:
 	read_lock_bh(&call->state_lock);
 	if (call->state <= RXRPC_CALL_COMPLETE &&
 	    !test_and_set_bit(RXRPC_CALL_ACK, &call->events))
-		schedule_work(&call->processor);
+		rxrpc_queue_call(call);
 	read_unlock_bh(&call->state_lock);
 }
 
@@ -1166,7 +1166,7 @@ send_message_2:
 		_debug("sendmsg failed: %d", ret);
 		read_lock_bh(&call->state_lock);
 		if (call->state < RXRPC_CALL_DEAD)
-			schedule_work(&call->processor);
+			rxrpc_queue_call(call);
 		read_unlock_bh(&call->state_lock);
 		goto error;
 	}
@@ -1210,7 +1210,7 @@ maybe_reschedule:
 	if (call->events || !skb_queue_empty(&call->rx_queue)) {
 		read_lock_bh(&call->state_lock);
 		if (call->state < RXRPC_CALL_DEAD)
-			schedule_work(&call->processor);
+			rxrpc_queue_call(call);
 		read_unlock_bh(&call->state_lock);
 	}
 
@@ -1224,7 +1224,7 @@ maybe_reschedule:
 		read_lock_bh(&call->state_lock);
 		if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
 		    !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
-			schedule_work(&call->processor);
+			rxrpc_queue_call(call);
 		read_unlock_bh(&call->state_lock);
 	}
 
@@ -1238,7 +1238,7 @@ error:
 	 * work pending bit and the work item being processed again */
 	if (call->events && !work_pending(&call->processor)) {
 		_debug("jumpstart %x", ntohl(call->conn->cid));
-		schedule_work(&call->processor);
+		rxrpc_queue_call(call);
 	}
 
 	_leave("");
diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c
index ac31cceda2f..4d92d88ff1f 100644
--- a/net/rxrpc/ar-call.c
+++ b/net/rxrpc/ar-call.c
@@ -19,7 +19,7 @@ struct kmem_cache *rxrpc_call_jar;
 LIST_HEAD(rxrpc_calls);
 DEFINE_RWLOCK(rxrpc_call_lock);
 static unsigned rxrpc_call_max_lifetime = 60;
-static unsigned rxrpc_dead_call_timeout = 10;
+static unsigned rxrpc_dead_call_timeout = 2;
 
 static void rxrpc_destroy_call(struct work_struct *work);
 static void rxrpc_call_life_expired(unsigned long _call);
@@ -264,7 +264,7 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
 		switch (call->state) {
 		case RXRPC_CALL_LOCALLY_ABORTED:
 			if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events))
-				schedule_work(&call->processor);
+				rxrpc_queue_call(call);
 		case RXRPC_CALL_REMOTELY_ABORTED:
 			read_unlock(&call->state_lock);
 			goto aborted_call;
@@ -398,6 +398,7 @@ found_extant_call:
  */
 void rxrpc_release_call(struct rxrpc_call *call)
 {
+	struct rxrpc_connection *conn = call->conn;
 	struct rxrpc_sock *rx = call->socket;
 
 	_enter("{%d,%d,%d,%d}",
@@ -413,8 +414,7 @@ void rxrpc_release_call(struct rxrpc_call *call)
 	/* dissociate from the socket
 	 * - the socket's ref on the call is passed to the death timer
 	 */
-	_debug("RELEASE CALL %p (%d CONN %p)",
-	       call, call->debug_id, call->conn);
+	_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
 
 	write_lock_bh(&rx->call_lock);
 	if (!list_empty(&call->accept_link)) {
@@ -430,24 +430,42 @@ void rxrpc_release_call(struct rxrpc_call *call)
 	}
 	write_unlock_bh(&rx->call_lock);
 
-	if (call->conn->out_clientflag)
-		spin_lock(&call->conn->trans->client_lock);
-	write_lock_bh(&call->conn->lock);
-
 	/* free up the channel for reuse */
-	if (call->conn->out_clientflag) {
-		call->conn->avail_calls++;
-		if (call->conn->avail_calls == RXRPC_MAXCALLS)
-			list_move_tail(&call->conn->bundle_link,
-				       &call->conn->bundle->unused_conns);
-		else if (call->conn->avail_calls == 1)
-			list_move_tail(&call->conn->bundle_link,
-				       &call->conn->bundle->avail_conns);
+	spin_lock(&conn->trans->client_lock);
+	write_lock_bh(&conn->lock);
+	write_lock(&call->state_lock);
+
+	if (conn->channels[call->channel] == call)
+		conn->channels[call->channel] = NULL;
+
+	if (conn->out_clientflag && conn->bundle) {
+		conn->avail_calls++;
+		switch (conn->avail_calls) {
+		case 1:
+			list_move_tail(&conn->bundle_link,
+				       &conn->bundle->avail_conns);
+		case 2 ... RXRPC_MAXCALLS - 1:
+			ASSERT(conn->channels[0] == NULL ||
+			       conn->channels[1] == NULL ||
+			       conn->channels[2] == NULL ||
+			       conn->channels[3] == NULL);
+			break;
+		case RXRPC_MAXCALLS:
+			list_move_tail(&conn->bundle_link,
+				       &conn->bundle->unused_conns);
+			ASSERT(conn->channels[0] == NULL &&
+			       conn->channels[1] == NULL &&
+			       conn->channels[2] == NULL &&
+			       conn->channels[3] == NULL);
+			break;
+		default:
+			printk(KERN_ERR "RxRPC: conn->avail_calls=%d\n",
+			       conn->avail_calls);
+			BUG();
+		}
 	}
 
-	write_lock(&call->state_lock);
-	if (call->conn->channels[call->channel] == call)
-		call->conn->channels[call->channel] = NULL;
+	spin_unlock(&conn->trans->client_lock);
 
 	if (call->state < RXRPC_CALL_COMPLETE &&
 	    call->state != RXRPC_CALL_CLIENT_FINAL_ACK) {
@@ -455,13 +473,12 @@ void rxrpc_release_call(struct rxrpc_call *call)
 		call->state = RXRPC_CALL_LOCALLY_ABORTED;
 		call->abort_code = RX_CALL_DEAD;
 		set_bit(RXRPC_CALL_ABORT, &call->events);
-		schedule_work(&call->processor);
+		rxrpc_queue_call(call);
 	}
 	write_unlock(&call->state_lock);
-	write_unlock_bh(&call->conn->lock);
-	if (call->conn->out_clientflag)
-		spin_unlock(&call->conn->trans->client_lock);
+	write_unlock_bh(&conn->lock);
 
+	/* clean up the Rx queue */
 	if (!skb_queue_empty(&call->rx_queue) ||
 	    !skb_queue_empty(&call->rx_oos_queue)) {
 		struct rxrpc_skb_priv *sp;
@@ -538,7 +555,7 @@ static void rxrpc_mark_call_released(struct rxrpc_call *call)
 		if (!test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
 			sched = true;
 		if (sched)
-			schedule_work(&call->processor);
+			rxrpc_queue_call(call);
 	}
 	write_unlock(&call->state_lock);
 }
@@ -588,7 +605,7 @@ void __rxrpc_put_call(struct rxrpc_call *call)
 	if (atomic_dec_and_test(&call->usage)) {
 		_debug("call %d dead", call->debug_id);
 		ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
-		schedule_work(&call->destroyer);
+		rxrpc_queue_work(&call->destroyer);
 	}
 	_leave("");
 }
@@ -613,7 +630,7 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call)
 	ASSERTCMP(call->events, ==, 0);
 	if (work_pending(&call->processor)) {
 		_debug("defer destroy");
-		schedule_work(&call->destroyer);
+		rxrpc_queue_work(&call->destroyer);
 		return;
 	}
 
@@ -742,7 +759,7 @@ static void rxrpc_call_life_expired(unsigned long _call)
 	read_lock_bh(&call->state_lock);
 	if (call->state < RXRPC_CALL_COMPLETE) {
 		set_bit(RXRPC_CALL_LIFE_TIMER, &call->events);
-		schedule_work(&call->processor);
+		rxrpc_queue_call(call);
 	}
 	read_unlock_bh(&call->state_lock);
 }
@@ -763,7 +780,7 @@ static void rxrpc_resend_time_expired(unsigned long _call)
 	clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
 	if (call->state < RXRPC_CALL_COMPLETE &&
 	    !test_and_set_bit(RXRPC_CALL_RESEND_TIMER, &call->events))
-		schedule_work(&call->processor);
+		rxrpc_queue_call(call);
 	read_unlock_bh(&call->state_lock);
 }
 
@@ -782,6 +799,6 @@ static void rxrpc_ack_time_expired(unsigned long _call)
 	read_lock_bh(&call->state_lock);
 	if (call->state < RXRPC_CALL_COMPLETE &&
 	    !test_and_set_bit(RXRPC_CALL_ACK, &call->events))
-		schedule_work(&call->processor);
+		rxrpc_queue_call(call);
 	read_unlock_bh(&call->state_lock);
 }
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
index 01eb33c3057..43cb3e051ec 100644
--- a/net/rxrpc/ar-connection.c
+++ b/net/rxrpc/ar-connection.c
@@ -356,7 +356,7 @@ static int rxrpc_connect_exclusive(struct rxrpc_sock *rx,
 		conn->out_clientflag = RXRPC_CLIENT_INITIATED;
 		conn->cid = 0;
 		conn->state = RXRPC_CONN_CLIENT;
-		conn->avail_calls = RXRPC_MAXCALLS;
+		conn->avail_calls = RXRPC_MAXCALLS - 1;
 		conn->security_level = rx->min_sec_level;
 		conn->key = key_get(rx->key);
 
@@ -447,6 +447,11 @@ int rxrpc_connect_call(struct rxrpc_sock *rx,
 			if (--conn->avail_calls == 0)
 				list_move(&conn->bundle_link,
 					  &bundle->busy_conns);
+			ASSERTCMP(conn->avail_calls, <, RXRPC_MAXCALLS);
+			ASSERT(conn->channels[0] == NULL ||
+			       conn->channels[1] == NULL ||
+			       conn->channels[2] == NULL ||
+			       conn->channels[3] == NULL);
 			atomic_inc(&conn->usage);
 			break;
 		}
@@ -456,6 +461,12 @@ int rxrpc_connect_call(struct rxrpc_sock *rx,
 			conn = list_entry(bundle->unused_conns.next,
 					  struct rxrpc_connection,
 					  bundle_link);
+			ASSERTCMP(conn->avail_calls, ==, RXRPC_MAXCALLS);
+			conn->avail_calls = RXRPC_MAXCALLS - 1;
+			ASSERT(conn->channels[0] == NULL &&
+			       conn->channels[1] == NULL &&
+			       conn->channels[2] == NULL &&
+			       conn->channels[3] == NULL);
 			atomic_inc(&conn->usage);
 			list_move(&conn->bundle_link, &bundle->avail_conns);
 			break;
@@ -512,7 +523,7 @@ int rxrpc_connect_call(struct rxrpc_sock *rx,
 		candidate->state = RXRPC_CONN_CLIENT;
 		candidate->avail_calls = RXRPC_MAXCALLS;
 		candidate->security_level = rx->min_sec_level;
-		candidate->key = key_get(rx->key);
+		candidate->key = key_get(bundle->key);
 
 		ret = rxrpc_init_client_conn_security(candidate);
 		if (ret < 0) {
@@ -555,6 +566,10 @@ int rxrpc_connect_call(struct rxrpc_sock *rx,
 	for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
 		if (!conn->channels[chan])
 			goto found_channel;
+	ASSERT(conn->channels[0] == NULL ||
+	       conn->channels[1] == NULL ||
+	       conn->channels[2] == NULL ||
+	       conn->channels[3] == NULL);
 	BUG();
 
 found_channel:
@@ -567,6 +582,7 @@ found_channel:
 	_net("CONNECT client on conn %d chan %d as call %x",
 	     conn->debug_id, chan, ntohl(call->call_id));
 
+	ASSERTCMP(conn->avail_calls, <, RXRPC_MAXCALLS);
 	spin_unlock(&trans->client_lock);
 
 	rxrpc_add_call_ID_to_conn(conn, call);
@@ -778,7 +794,7 @@ void rxrpc_put_connection(struct rxrpc_connection *conn)
 	conn->put_time = xtime.tv_sec;
 	if (atomic_dec_and_test(&conn->usage)) {
 		_debug("zombie");
-		schedule_delayed_work(&rxrpc_connection_reap, 0);
+		rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
 	}
 
 	_leave("");
@@ -862,8 +878,8 @@ void rxrpc_connection_reaper(struct work_struct *work)
 	if (earliest != ULONG_MAX) {
 		_debug("reschedule reaper %ld", (long) earliest - now);
 		ASSERTCMP(earliest, >, now);
-		schedule_delayed_work(&rxrpc_connection_reap,
-				      (earliest - now) * HZ);
+		rxrpc_queue_delayed_work(&rxrpc_connection_reap,
+					 (earliest - now) * HZ);
 	}
 
 	/* then destroy all those pulled out */
@@ -889,7 +905,7 @@ void __exit rxrpc_destroy_all_connections(void)
 
 	rxrpc_connection_timeout = 0;
 	cancel_delayed_work(&rxrpc_connection_reap);
-	schedule_delayed_work(&rxrpc_connection_reap, 0);
+	rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
 
 	_leave("");
 }
diff --git a/net/rxrpc/ar-connevent.c b/net/rxrpc/ar-connevent.c
index 4b02815c1de..1ada43d5116 100644
--- a/net/rxrpc/ar-connevent.c
+++ b/net/rxrpc/ar-connevent.c
@@ -45,7 +45,7 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
 				set_bit(RXRPC_CALL_CONN_ABORT, &call->events);
 			else
 				set_bit(RXRPC_CALL_RCVD_ABORT, &call->events);
-			schedule_work(&call->processor);
+			rxrpc_queue_call(call);
 		}
 		write_unlock(&call->state_lock);
 	}
@@ -133,7 +133,7 @@ void rxrpc_call_is_secure(struct rxrpc_call *call)
 		read_lock(&call->state_lock);
 		if (call->state < RXRPC_CALL_COMPLETE &&
 		    !test_and_set_bit(RXRPC_CALL_SECURED, &call->events))
-			schedule_work(&call->processor);
+			rxrpc_queue_call(call);
 		read_unlock(&call->state_lock);
 	}
 }
@@ -307,6 +307,22 @@ protocol_error:
 	goto out;
 }
 
+/*
+ * put a packet up for transport-level abort
+ */
+void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
+{
+	CHECK_SLAB_OKAY(&local->usage);
+
+	if (!atomic_inc_not_zero(&local->usage)) {
+		printk("resurrected on reject\n");
+		BUG();
+	}
+
+	skb_queue_tail(&local->reject_queue, skb);
+	rxrpc_queue_work(&local->rejecter);
+}
+
 /*
  * reject packets through the local endpoint
  */
diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c
index f5539e2f7b5..2c27df1ffa1 100644
--- a/net/rxrpc/ar-error.c
+++ b/net/rxrpc/ar-error.c
@@ -111,7 +111,7 @@ void rxrpc_UDP_error_report(struct sock *sk)
 
 	/* pass the transport ref to error_handler to release */
 	skb_queue_tail(&trans->error_queue, skb);
-	schedule_work(&trans->error_handler);
+	rxrpc_queue_work(&trans->error_handler);
 
 	/* reset and regenerate socket error */
 	spin_lock_bh(&sk->sk_error_queue.lock);
@@ -235,7 +235,7 @@ void rxrpc_UDP_error_handler(struct work_struct *work)
 			    call->state < RXRPC_CALL_NETWORK_ERROR) {
 				call->state = RXRPC_CALL_NETWORK_ERROR;
 				set_bit(RXRPC_CALL_RCVD_ERROR, &call->events);
-				schedule_work(&call->processor);
+				rxrpc_queue_call(call);
 			}
 			write_unlock(&call->state_lock);
 			list_del_init(&call->error_link);
@@ -245,7 +245,7 @@ void rxrpc_UDP_error_handler(struct work_struct *work)
 	}
 
 	if (!skb_queue_empty(&trans->error_queue))
-		schedule_work(&trans->error_handler);
+		rxrpc_queue_work(&trans->error_handler);
 
 	rxrpc_free_skb(skb);
 	rxrpc_put_transport(trans);
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index 323c3454561..ceb5d619a1d 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -42,6 +42,7 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb,
 			bool force, bool terminal)
 {
 	struct rxrpc_skb_priv *sp;
+	struct rxrpc_sock *rx = call->socket;
 	struct sock *sk;
 	int skb_len, ret;
 
@@ -64,7 +65,7 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb,
 		return 0;
 	}
 
-	sk = &call->socket->sk;
+	sk = &rx->sk;
 
 	if (!force) {
 		/* cast skb->rcvbuf to unsigned...  It's pointless, but
@@ -89,25 +90,30 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb,
 		skb->sk = sk;
 		atomic_add(skb->truesize, &sk->sk_rmem_alloc);
 
-		/* Cache the SKB length before we tack it onto the receive
-		 * queue.  Once it is added it no longer belongs to us and
-		 * may be freed by other threads of control pulling packets
-		 * from the queue.
-		 */
-		skb_len = skb->len;
-
-		_net("post skb %p", skb);
-		__skb_queue_tail(&sk->sk_receive_queue, skb);
-		spin_unlock_bh(&sk->sk_receive_queue.lock);
-
-		if (!sock_flag(sk, SOCK_DEAD))
-			sk->sk_data_ready(sk, skb_len);
-
 		if (terminal) {
 			_debug("<<<< TERMINAL MESSAGE >>>>");
 			set_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags);
 		}
 
+		/* allow interception by a kernel service */
+		if (rx->interceptor) {
+			rx->interceptor(sk, call->user_call_ID, skb);
+			spin_unlock_bh(&sk->sk_receive_queue.lock);
+		} else {
+
+			/* Cache the SKB length before we tack it onto the
+			 * receive queue.  Once it is added it no longer
+			 * belongs to us and may be freed by other threads of
+			 * control pulling packets from the queue */
+			skb_len = skb->len;
+
+			_net("post skb %p", skb);
+			__skb_queue_tail(&sk->sk_receive_queue, skb);
+			spin_unlock_bh(&sk->sk_receive_queue.lock);
+
+			if (!sock_flag(sk, SOCK_DEAD))
+				sk->sk_data_ready(sk, skb_len);
+		}
 		skb = NULL;
 	} else {
 		spin_unlock_bh(&sk->sk_receive_queue.lock);
@@ -232,7 +238,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call,
 		read_lock(&call->state_lock);
 		if (call->state < RXRPC_CALL_COMPLETE &&
 		    !test_and_set_bit(RXRPC_CALL_DRAIN_RX_OOS, &call->events))
-			schedule_work(&call->processor);
+			rxrpc_queue_call(call);
 		read_unlock(&call->state_lock);
 	}
 
@@ -267,7 +273,7 @@ enqueue_packet:
 	atomic_inc(&call->ackr_not_idle);
 	read_lock(&call->state_lock);
 	if (call->state < RXRPC_CALL_DEAD)
-		schedule_work(&call->processor);
+		rxrpc_queue_call(call);
 	read_unlock(&call->state_lock);
 	_leave(" = 0 [queued]");
 	return 0;
@@ -360,7 +366,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
 			call->state = RXRPC_CALL_REMOTELY_ABORTED;
 			call->abort_code = abort_code;
 			set_bit(RXRPC_CALL_RCVD_ABORT, &call->events);
-			schedule_work(&call->processor);
+			rxrpc_queue_call(call);
 		}
 		goto free_packet_unlock;
 
@@ -375,7 +381,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
 		case RXRPC_CALL_CLIENT_SEND_REQUEST:
 			call->state = RXRPC_CALL_SERVER_BUSY;
 			set_bit(RXRPC_CALL_RCVD_BUSY, &call->events);
-			schedule_work(&call->processor);
+			rxrpc_queue_call(call);
 		case RXRPC_CALL_SERVER_BUSY:
 			goto free_packet_unlock;
 		default:
@@ -419,7 +425,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
 		read_lock_bh(&call->state_lock);
 		if (call->state < RXRPC_CALL_DEAD) {
 			skb_queue_tail(&call->rx_queue, skb);
-			schedule_work(&call->processor);
+			rxrpc_queue_call(call);
 			skb = NULL;
 		}
 		read_unlock_bh(&call->state_lock);
@@ -434,7 +440,7 @@ protocol_error_locked:
 		call->state = RXRPC_CALL_LOCALLY_ABORTED;
 		call->abort_code = RX_PROTOCOL_ERROR;
 		set_bit(RXRPC_CALL_ABORT, &call->events);
-		schedule_work(&call->processor);
+		rxrpc_queue_call(call);
 	}
 free_packet_unlock:
 	write_unlock_bh(&call->state_lock);
@@ -506,7 +512,7 @@ protocol_error:
 		call->state = RXRPC_CALL_LOCALLY_ABORTED;
 		call->abort_code = RX_PROTOCOL_ERROR;
 		set_bit(RXRPC_CALL_ABORT, &call->events);
-		schedule_work(&call->processor);
+		rxrpc_queue_call(call);
 	}
 	write_unlock_bh(&call->state_lock);
 	_leave("");
@@ -542,7 +548,7 @@ static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn,
 	switch (call->state) {
 	case RXRPC_CALL_LOCALLY_ABORTED:
 		if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events))
-			schedule_work(&call->processor);
+			rxrpc_queue_call(call);
 	case RXRPC_CALL_REMOTELY_ABORTED:
 	case RXRPC_CALL_NETWORK_ERROR:
 	case RXRPC_CALL_DEAD:
@@ -591,7 +597,7 @@ dead_call:
 	    sp->hdr.seq == __constant_cpu_to_be32(1)) {
 		_debug("incoming call");
 		skb_queue_tail(&conn->trans->local->accept_queue, skb);
-		schedule_work(&conn->trans->local->acceptor);
+		rxrpc_queue_work(&conn->trans->local->acceptor);
 		goto done;
 	}
 
@@ -630,7 +636,7 @@ found_completed_call:
 	_debug("final ack again");
 	rxrpc_get_call(call);
 	set_bit(RXRPC_CALL_ACK_FINAL, &call->events);
-	schedule_work(&call->processor);
+	rxrpc_queue_call(call);
 
 free_unlock:
 	read_unlock(&call->state_lock);
@@ -651,7 +657,7 @@ static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
 
 	atomic_inc(&conn->usage);
 	skb_queue_tail(&conn->rx_queue, skb);
-	schedule_work(&conn->processor);
+	rxrpc_queue_conn(conn);
 }
 
 /*
@@ -767,7 +773,7 @@ cant_route_call:
 		if (sp->hdr.seq == __constant_cpu_to_be32(1)) {
 			_debug("first packet");
 			skb_queue_tail(&local->accept_queue, skb);
-			schedule_work(&local->acceptor);
+			rxrpc_queue_work(&local->acceptor);
 			rxrpc_put_local(local);
 			_leave(" [incoming]");
 			return;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 7bfbf471c81..cb1eb492ee4 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -19,8 +19,6 @@
 #define CHECK_SLAB_OKAY(X) do {} while(0)
 #endif
 
-extern atomic_t rxrpc_n_skbs;
-
 #define FCRYPT_BSIZE 8
 struct rxrpc_crypt {
 	union {
@@ -29,8 +27,12 @@ struct rxrpc_crypt {
 	};
 } __attribute__((aligned(8)));
 
-extern __be32 rxrpc_epoch;		/* local epoch for detecting local-end reset */
-extern atomic_t rxrpc_debug_id;		/* current debugging ID */
+#define rxrpc_queue_work(WS)	queue_work(rxrpc_workqueue, (WS))
+#define rxrpc_queue_delayed_work(WS,D)	\
+	queue_delayed_work(rxrpc_workqueue, (WS), (D))
+
+#define rxrpc_queue_call(CALL)	rxrpc_queue_work(&(CALL)->processor)
+#define rxrpc_queue_conn(CONN)	rxrpc_queue_work(&(CONN)->processor)
 
 /*
  * sk_state for RxRPC sockets
@@ -50,6 +52,7 @@ enum {
 struct rxrpc_sock {
 	/* WARNING: sk has to be the first member */
 	struct sock		sk;
+	rxrpc_interceptor_t	interceptor;	/* kernel service Rx interceptor function */
 	struct rxrpc_local	*local;		/* local endpoint */
 	struct rxrpc_transport	*trans;		/* transport handler */
 	struct rxrpc_conn_bundle *bundle;	/* virtual connection bundle */
@@ -91,16 +94,6 @@ struct rxrpc_skb_priv {
 
 #define rxrpc_skb(__skb) ((struct rxrpc_skb_priv *) &(__skb)->cb)
 
-enum {
-	RXRPC_SKB_MARK_DATA,		/* data message */
-	RXRPC_SKB_MARK_FINAL_ACK,	/* final ACK received message */
-	RXRPC_SKB_MARK_BUSY,		/* server busy message */
-	RXRPC_SKB_MARK_REMOTE_ABORT,	/* remote abort message */
-	RXRPC_SKB_MARK_NET_ERROR,	/* network error message */
-	RXRPC_SKB_MARK_LOCAL_ERROR,	/* local error message */
-	RXRPC_SKB_MARK_NEW_CALL,	/* local error message */
-};
-
 enum rxrpc_command {
 	RXRPC_CMD_SEND_DATA,		/* send data message */
 	RXRPC_CMD_SEND_ABORT,		/* request abort generation */
@@ -439,25 +432,20 @@ static inline void rxrpc_abort_call(struct rxrpc_call *call, u32 abort_code)
 }
 
 /*
- * put a packet up for transport-level abort
+ * af_rxrpc.c
  */
-static inline
-void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
-{
-	CHECK_SLAB_OKAY(&local->usage);
-	if (!atomic_inc_not_zero(&local->usage)) {
-		printk("resurrected on reject\n");
-		BUG();
-	}
-	skb_queue_tail(&local->reject_queue, skb);
-	schedule_work(&local->rejecter);
-}
+extern atomic_t rxrpc_n_skbs;
+extern __be32 rxrpc_epoch;
+extern atomic_t rxrpc_debug_id;
+extern struct workqueue_struct *rxrpc_workqueue;
 
 /*
  * ar-accept.c
  */
 extern void rxrpc_accept_incoming_calls(struct work_struct *);
-extern int rxrpc_accept_call(struct rxrpc_sock *, unsigned long);
+extern struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *,
+					    unsigned long);
+extern int rxrpc_reject_call(struct rxrpc_sock *);
 
 /*
  * ar-ack.c
@@ -514,6 +502,7 @@ rxrpc_incoming_connection(struct rxrpc_transport *, struct rxrpc_header *,
  * ar-connevent.c
  */
 extern void rxrpc_process_connection(struct work_struct *);
+extern void rxrpc_reject_packet(struct rxrpc_local *, struct sk_buff *);
 extern void rxrpc_reject_packets(struct work_struct *);
 
 /*
@@ -583,6 +572,7 @@ extern struct file_operations rxrpc_connection_seq_fops;
 /*
  * ar-recvmsg.c
  */
+extern void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *);
 extern int rxrpc_recvmsg(struct kiocb *, struct socket *, struct msghdr *,
 			 size_t, int);
 
diff --git a/net/rxrpc/ar-local.c b/net/rxrpc/ar-local.c
index a20a2c0fe10..fe03f71f17d 100644
--- a/net/rxrpc/ar-local.c
+++ b/net/rxrpc/ar-local.c
@@ -228,7 +228,7 @@ void rxrpc_put_local(struct rxrpc_local *local)
 	write_lock_bh(&rxrpc_local_lock);
 	if (unlikely(atomic_dec_and_test(&local->usage))) {
 		_debug("destroy local");
-		schedule_work(&local->destroyer);
+		rxrpc_queue_work(&local->destroyer);
 	}
 	write_unlock_bh(&rxrpc_local_lock);
 	_leave("");
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index 67aa9510f09..5cdde4a48ed 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -113,7 +113,7 @@ static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code)
 		clear_bit(RXRPC_CALL_RESEND_TIMER, &call->events);
 		clear_bit(RXRPC_CALL_ACK, &call->events);
 		clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
-		schedule_work(&call->processor);
+		rxrpc_queue_call(call);
 	}
 
 	write_unlock_bh(&call->state_lock);
@@ -194,6 +194,77 @@ int rxrpc_client_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx,
 	return ret;
 }
 
+/**
+ * rxrpc_kernel_send_data - Allow a kernel service to send data on a call
+ * @call: The call to send data through
+ * @msg: The data to send
+ * @len: The amount of data to send
+ *
+ * Allow a kernel service to send data on a call.  The call must be in an state
+ * appropriate to sending data.  No control data should be supplied in @msg,
+ * nor should an address be supplied.  MSG_MORE should be flagged if there's
+ * more data to come, otherwise this data will end the transmission phase.
+ */
+int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg,
+			   size_t len)
+{
+	int ret;
+
+	_enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]);
+
+	ASSERTCMP(msg->msg_name, ==, NULL);
+	ASSERTCMP(msg->msg_control, ==, NULL);
+
+	lock_sock(&call->socket->sk);
+
+	_debug("CALL %d USR %lx ST %d on CONN %p",
+	       call->debug_id, call->user_call_ID, call->state, call->conn);
+
+	if (call->state >= RXRPC_CALL_COMPLETE) {
+		ret = -ESHUTDOWN; /* it's too late for this call */
+	} else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
+		   call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+		   call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
+		ret = -EPROTO; /* request phase complete for this client call */
+	} else {
+		mm_segment_t oldfs = get_fs();
+		set_fs(KERNEL_DS);
+		ret = rxrpc_send_data(NULL, call->socket, call, msg, len);
+		set_fs(oldfs);
+	}
+
+	release_sock(&call->socket->sk);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_send_data);
+
+/*
+ * rxrpc_kernel_abort_call - Allow a kernel service to abort a call
+ * @call: The call to be aborted
+ * @abort_code: The abort code to stick into the ABORT packet
+ *
+ * Allow a kernel service to abort a call, if it's still in an abortable state.
+ */
+void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code)
+{
+	_enter("{%d},%d", call->debug_id, abort_code);
+
+	lock_sock(&call->socket->sk);
+
+	_debug("CALL %d USR %lx ST %d on CONN %p",
+	       call->debug_id, call->user_call_ID, call->state, call->conn);
+
+	if (call->state < RXRPC_CALL_COMPLETE)
+		rxrpc_send_abort(call, abort_code);
+
+	release_sock(&call->socket->sk);
+	_leave("");
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_abort_call);
+
 /*
  * send a message through a server socket
  * - caller holds the socket locked
@@ -214,8 +285,13 @@ int rxrpc_server_sendmsg(struct kiocb *iocb, struct rxrpc_sock *rx,
 	if (ret < 0)
 		return ret;
 
-	if (cmd == RXRPC_CMD_ACCEPT)
-		return rxrpc_accept_call(rx, user_call_ID);
+	if (cmd == RXRPC_CMD_ACCEPT) {
+		call = rxrpc_accept_call(rx, user_call_ID);
+		if (IS_ERR(call))
+			return PTR_ERR(call);
+		rxrpc_put_call(call);
+		return 0;
+	}
 
 	call = rxrpc_find_server_call(rx, user_call_ID);
 	if (!call)
@@ -363,7 +439,7 @@ static inline void rxrpc_instant_resend(struct rxrpc_call *call)
 		clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
 		if (call->state < RXRPC_CALL_COMPLETE &&
 		    !test_and_set_bit(RXRPC_CALL_RESEND_TIMER, &call->events))
-			schedule_work(&call->processor);
+			rxrpc_queue_call(call);
 	}
 	read_unlock_bh(&call->state_lock);
 }
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
index 69ac355546a..d399de4a7fe 100644
--- a/net/rxrpc/ar-peer.c
+++ b/net/rxrpc/ar-peer.c
@@ -219,7 +219,7 @@ void rxrpc_put_peer(struct rxrpc_peer *peer)
 		return;
 	}
 
-	schedule_work(&peer->destroyer);
+	rxrpc_queue_work(&peer->destroyer);
 	_leave("");
 }
 
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
index e947d5c1590..f19121d4795 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -19,7 +19,7 @@
  * removal a call's user ID from the socket tree to make the user ID available
  * again and so that it won't be seen again in association with that call
  */
-static void rxrpc_remove_user_ID(struct rxrpc_sock *rx, struct rxrpc_call *call)
+void rxrpc_remove_user_ID(struct rxrpc_sock *rx, struct rxrpc_call *call)
 {
 	_debug("RELEASE CALL %d", call->debug_id);
 
@@ -33,7 +33,7 @@ static void rxrpc_remove_user_ID(struct rxrpc_sock *rx, struct rxrpc_call *call)
 	read_lock_bh(&call->state_lock);
 	if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
 	    !test_and_set_bit(RXRPC_CALL_RELEASE, &call->events))
-		schedule_work(&call->processor);
+		rxrpc_queue_call(call);
 	read_unlock_bh(&call->state_lock);
 }
 
@@ -364,3 +364,74 @@ wait_error:
 	return copied;
 
 }
+
+/**
+ * rxrpc_kernel_data_delivered - Record delivery of data message
+ * @skb: Message holding data
+ *
+ * Record the delivery of a data message.  This permits RxRPC to keep its
+ * tracking correct.  The socket buffer will be deleted.
+ */
+void rxrpc_kernel_data_delivered(struct sk_buff *skb)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	struct rxrpc_call *call = sp->call;
+
+	ASSERTCMP(ntohl(sp->hdr.seq), >=, call->rx_data_recv);
+	ASSERTCMP(ntohl(sp->hdr.seq), <=, call->rx_data_recv + 1);
+	call->rx_data_recv = ntohl(sp->hdr.seq);
+
+	ASSERTCMP(ntohl(sp->hdr.seq), >, call->rx_data_eaten);
+	rxrpc_free_skb(skb);
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_data_delivered);
+
+/**
+ * rxrpc_kernel_is_data_last - Determine if data message is last one
+ * @skb: Message holding data
+ *
+ * Determine if data message is last one for the parent call.
+ */
+bool rxrpc_kernel_is_data_last(struct sk_buff *skb)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+	ASSERTCMP(skb->mark, ==, RXRPC_SKB_MARK_DATA);
+
+	return sp->hdr.flags & RXRPC_LAST_PACKET;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_is_data_last);
+
+/**
+ * rxrpc_kernel_get_abort_code - Get the abort code from an RxRPC abort message
+ * @skb: Message indicating an abort
+ *
+ * Get the abort code from an RxRPC abort message.
+ */
+u32 rxrpc_kernel_get_abort_code(struct sk_buff *skb)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+	ASSERTCMP(skb->mark, ==, RXRPC_SKB_MARK_REMOTE_ABORT);
+
+	return sp->call->abort_code;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_get_abort_code);
+
+/**
+ * rxrpc_kernel_get_error - Get the error number from an RxRPC error message
+ * @skb: Message indicating an error
+ *
+ * Get the error number from an RxRPC error message.
+ */
+int rxrpc_kernel_get_error_number(struct sk_buff *skb)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+	return sp->error;
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_get_error_number);
diff --git a/net/rxrpc/ar-skbuff.c b/net/rxrpc/ar-skbuff.c
index d73f6fc7601..de755e04d29 100644
--- a/net/rxrpc/ar-skbuff.c
+++ b/net/rxrpc/ar-skbuff.c
@@ -36,7 +36,7 @@ static void rxrpc_request_final_ACK(struct rxrpc_call *call)
 		rxrpc_get_call(call);
 		set_bit(RXRPC_CALL_ACK_FINAL, &call->events);
 		if (try_to_del_timer_sync(&call->ack_timer) >= 0)
-			schedule_work(&call->processor);
+			rxrpc_queue_call(call);
 		break;
 
 	case RXRPC_CALL_SERVER_RECV_REQUEST:
@@ -116,3 +116,17 @@ void rxrpc_packet_destructor(struct sk_buff *skb)
 		sock_rfree(skb);
 	_leave("");
 }
+
+/**
+ * rxrpc_kernel_free_skb - Free an RxRPC socket buffer
+ * @skb: The socket buffer to be freed
+ *
+ * Let RxRPC free its own socket buffer, permitting it to maintain debug
+ * accounting.
+ */
+void rxrpc_kernel_free_skb(struct sk_buff *skb)
+{
+	rxrpc_free_skb(skb);
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_free_skb);
diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c
index 9b4e5cb545d..d43d78f1930 100644
--- a/net/rxrpc/ar-transport.c
+++ b/net/rxrpc/ar-transport.c
@@ -189,7 +189,7 @@ void rxrpc_put_transport(struct rxrpc_transport *trans)
 		/* let the reaper determine the timeout to avoid a race with
 		 * overextending the timeout if the reaper is running at the
 		 * same time */
-		schedule_delayed_work(&rxrpc_transport_reap, 0);
+		rxrpc_queue_delayed_work(&rxrpc_transport_reap, 0);
 	_leave("");
 }
 
@@ -243,8 +243,8 @@ static void rxrpc_transport_reaper(struct work_struct *work)
 	if (earliest != ULONG_MAX) {
 		_debug("reschedule reaper %ld", (long) earliest - now);
 		ASSERTCMP(earliest, >, now);
-		schedule_delayed_work(&rxrpc_transport_reap,
-				      (earliest - now) * HZ);
+		rxrpc_queue_delayed_work(&rxrpc_transport_reap,
+					 (earliest - now) * HZ);
 	}
 
 	/* then destroy all those pulled out */
@@ -270,7 +270,7 @@ void __exit rxrpc_destroy_all_transports(void)
 
 	rxrpc_transport_timeout = 0;
 	cancel_delayed_work(&rxrpc_transport_reap);
-	schedule_delayed_work(&rxrpc_transport_reap, 0);
+	rxrpc_queue_delayed_work(&rxrpc_transport_reap, 0);
 
 	_leave("");
 }
-- 
cgit v1.2.3


From 08e0e7c82eeadec6f4871a386b86bf0f0fbcb4eb Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 26 Apr 2007 15:55:03 -0700
Subject: [AF_RXRPC]: Make the in-kernel AFS filesystem use AF_RXRPC.

Make the in-kernel AFS filesystem use AF_RXRPC instead of the old RxRPC code.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/Kconfig          |   13 +-
 fs/afs/Makefile     |    3 +-
 fs/afs/afs.h        |  105 +++++
 fs/afs/afs_cm.h     |   28 ++
 fs/afs/afs_fs.h     |   41 ++
 fs/afs/afs_vl.h     |   84 ++++
 fs/afs/cache.c      |  256 ++++++++++++
 fs/afs/callback.c   |  469 ++++++++++++++++-----
 fs/afs/cell.c       |  344 ++++-----------
 fs/afs/cell.h       |   70 ----
 fs/afs/cmservice.c  |  781 +++++++++++-----------------------
 fs/afs/cmservice.h  |   28 --
 fs/afs/dir.c        |  286 ++++++++-----
 fs/afs/errors.h     |   36 --
 fs/afs/file.c       |   39 +-
 fs/afs/fsclient.c   | 1042 ++++++++++++++--------------------------------
 fs/afs/fsclient.h   |   54 ---
 fs/afs/inode.c      |  107 ++---
 fs/afs/internal.h   |  599 ++++++++++++++++++++++++--
 fs/afs/kafsasyncd.c |  247 -----------
 fs/afs/kafsasyncd.h |   50 ---
 fs/afs/kafstimod.c  |  194 ---------
 fs/afs/kafstimod.h  |   45 --
 fs/afs/main.c       |  135 +-----
 fs/afs/misc.c       |   11 +-
 fs/afs/mntpt.c      |  106 +++--
 fs/afs/mount.h      |   23 -
 fs/afs/proc.c       |   73 ++--
 fs/afs/rxrpc.c      |  666 +++++++++++++++++++++++++++++
 fs/afs/server.c     |  624 ++++++++++------------------
 fs/afs/server.h     |   97 -----
 fs/afs/super.c      |  106 +++--
 fs/afs/super.h      |   39 --
 fs/afs/transport.h  |   21 -
 fs/afs/types.h      |  109 -----
 fs/afs/vlclient.c   |  709 ++++++-------------------------
 fs/afs/vlclient.h   |   85 ----
 fs/afs/vlocation.c  | 1153 +++++++++++++++++++++------------------------------
 fs/afs/vnode.c      |  388 +++++++++--------
 fs/afs/vnode.h      |   84 ----
 fs/afs/volume.c     |  141 +++----
 fs/afs/volume.h     |  126 ------
 42 files changed, 4194 insertions(+), 5423 deletions(-)
 create mode 100644 fs/afs/afs.h
 create mode 100644 fs/afs/afs_cm.h
 create mode 100644 fs/afs/afs_fs.h
 create mode 100644 fs/afs/afs_vl.h
 create mode 100644 fs/afs/cache.c
 delete mode 100644 fs/afs/cell.h
 delete mode 100644 fs/afs/cmservice.h
 delete mode 100644 fs/afs/errors.h
 delete mode 100644 fs/afs/fsclient.h
 delete mode 100644 fs/afs/kafsasyncd.c
 delete mode 100644 fs/afs/kafsasyncd.h
 delete mode 100644 fs/afs/kafstimod.c
 delete mode 100644 fs/afs/kafstimod.h
 delete mode 100644 fs/afs/mount.h
 create mode 100644 fs/afs/rxrpc.c
 delete mode 100644 fs/afs/server.h
 delete mode 100644 fs/afs/super.h
 delete mode 100644 fs/afs/transport.h
 delete mode 100644 fs/afs/types.h
 delete mode 100644 fs/afs/vlclient.h
 delete mode 100644 fs/afs/vnode.h
 delete mode 100644 fs/afs/volume.h

diff --git a/fs/Kconfig b/fs/Kconfig
index 3c4886b849f..075c9997ddc 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -2019,7 +2019,7 @@ config CODA_FS_OLD_API
 config AFS_FS
 	tristate "Andrew File System support (AFS) (EXPERIMENTAL)"
 	depends on INET && EXPERIMENTAL
-	select RXRPC
+	select AF_RXRPC
 	help
 	  If you say Y here, you will get an experimental Andrew File System
 	  driver. It currently only supports unsecured read-only AFS access.
@@ -2028,6 +2028,17 @@ config AFS_FS
 
 	  If unsure, say N.
 
+config AFS_DEBUG
+	bool "AFS dynamic debugging"
+	depends on AFS_FS
+	help
+	  Say Y here to make runtime controllable debugging messages appear.
+
+	  See <file:Documentation/filesystems/afs.txt> for more information.
+
+	  If unsure, say N.
+
+
 config RXRPC
 	tristate
 
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 8e719737967..66bdc219ccd 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -10,12 +10,11 @@ kafs-objs := \
 	file.o \
 	fsclient.o \
 	inode.o \
-	kafsasyncd.o \
-	kafstimod.o \
 	main.o \
 	misc.o \
 	mntpt.o \
 	proc.o \
+	rxrpc.o \
 	server.o \
 	super.o \
 	vlclient.o \
diff --git a/fs/afs/afs.h b/fs/afs/afs.h
new file mode 100644
index 00000000000..b9d2d2ceaf4
--- /dev/null
+++ b/fs/afs/afs.h
@@ -0,0 +1,105 @@
+/* AFS common types
+ *
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef AFS_H
+#define AFS_H
+
+#include <linux/in.h>
+
+typedef unsigned			afs_volid_t;
+typedef unsigned			afs_vnodeid_t;
+typedef unsigned long long		afs_dataversion_t;
+
+typedef enum {
+	AFSVL_RWVOL,			/* read/write volume */
+	AFSVL_ROVOL,			/* read-only volume */
+	AFSVL_BACKVOL,			/* backup volume */
+} __attribute__((packed)) afs_voltype_t;
+
+typedef enum {
+	AFS_FTYPE_INVALID	= 0,
+	AFS_FTYPE_FILE		= 1,
+	AFS_FTYPE_DIR		= 2,
+	AFS_FTYPE_SYMLINK	= 3,
+} afs_file_type_t;
+
+/*
+ * AFS file identifier
+ */
+struct afs_fid {
+	afs_volid_t	vid;		/* volume ID */
+	afs_vnodeid_t	vnode;		/* file index within volume */
+	unsigned	unique;		/* unique ID number (file index version) */
+};
+
+/*
+ * AFS callback notification
+ */
+typedef enum {
+	AFSCM_CB_UNTYPED	= 0,	/* no type set on CB break */
+	AFSCM_CB_EXCLUSIVE	= 1,	/* CB exclusive to CM [not implemented] */
+	AFSCM_CB_SHARED		= 2,	/* CB shared by other CM's */
+	AFSCM_CB_DROPPED	= 3,	/* CB promise cancelled by file server */
+} afs_callback_type_t;
+
+struct afs_callback {
+	struct afs_fid		fid;		/* file identifier */
+	unsigned		version;	/* callback version */
+	unsigned		expiry;		/* time at which expires */
+	afs_callback_type_t	type;		/* type of callback */
+};
+
+#define AFSCBMAX 50	/* maximum callbacks transferred per bulk op */
+
+/*
+ * AFS volume information
+ */
+struct afs_volume_info {
+	afs_volid_t		vid;		/* volume ID */
+	afs_voltype_t		type;		/* type of this volume */
+	afs_volid_t		type_vids[5];	/* volume ID's for possible types for this vol */
+
+	/* list of fileservers serving this volume */
+	size_t			nservers;	/* number of entries used in servers[] */
+	struct {
+		struct in_addr	addr;		/* fileserver address */
+	} servers[8];
+};
+
+/*
+ * AFS file status information
+ */
+struct afs_file_status {
+	unsigned		if_version;	/* interface version */
+#define AFS_FSTATUS_VERSION	1
+
+	afs_file_type_t		type;		/* file type */
+	unsigned		nlink;		/* link count */
+	size_t			size;		/* file size */
+	afs_dataversion_t	data_version;	/* current data version */
+	unsigned		author;		/* author ID */
+	unsigned		owner;		/* owner ID */
+	unsigned		caller_access;	/* access rights for authenticated caller */
+	unsigned		anon_access;	/* access rights for unauthenticated caller */
+	umode_t			mode;		/* UNIX mode */
+	struct afs_fid		parent;		/* parent file ID */
+	time_t			mtime_client;	/* last time client changed data */
+	time_t			mtime_server;	/* last time server changed data */
+};
+
+/*
+ * AFS volume synchronisation information
+ */
+struct afs_volsync {
+	time_t			creation;	/* volume creation time */
+};
+
+#endif /* AFS_H */
diff --git a/fs/afs/afs_cm.h b/fs/afs/afs_cm.h
new file mode 100644
index 00000000000..7c8e3d43c8e
--- /dev/null
+++ b/fs/afs/afs_cm.h
@@ -0,0 +1,28 @@
+/* AFS Cache Manager definitions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef AFS_CM_H
+#define AFS_CM_H
+
+#define AFS_CM_PORT		7001	/* AFS file server port */
+#define CM_SERVICE		1	/* AFS File Service ID */
+
+enum AFS_CM_Operations {
+	CBCallBack		= 204,	/* break callback promises */
+	CBInitCallBackState	= 205,	/* initialise callback state */
+	CBProbe			= 206,	/* probe client */
+	CBGetLock		= 207,	/* get contents of CM lock table */
+	CBGetCE			= 208,	/* get cache file description */
+	CBGetXStatsVersion	= 209,	/* get version of extended statistics */
+	CBGetXStats		= 210,	/* get contents of extended statistics data */
+};
+
+#endif /* AFS_FS_H */
diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h
new file mode 100644
index 00000000000..fd385954f21
--- /dev/null
+++ b/fs/afs/afs_fs.h
@@ -0,0 +1,41 @@
+/* AFS File Service definitions
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef AFS_FS_H
+#define AFS_FS_H
+
+#define AFS_FS_PORT		7000	/* AFS file server port */
+#define FS_SERVICE		1	/* AFS File Service ID */
+
+enum AFS_FS_Operations {
+	FSFETCHSTATUS		= 132,	/* AFS Fetch file status */
+	FSFETCHDATA		= 130,	/* AFS Fetch file data */
+	FSGIVEUPCALLBACKS	= 147,	/* AFS Discard callback promises */
+	FSGETVOLUMEINFO		= 148,	/* AFS Get root volume information */
+	FSGETROOTVOLUME		= 151,	/* AFS Get root volume name */
+	FSLOOKUP		= 161	/* AFS lookup file in directory */
+};
+
+enum AFS_FS_Errors {
+	VSALVAGE	= 101,	/* volume needs salvaging */
+	VNOVNODE	= 102,	/* no such file/dir (vnode) */
+	VNOVOL		= 103,	/* no such volume or volume unavailable */
+	VVOLEXISTS	= 104,	/* volume name already exists */
+	VNOSERVICE	= 105,	/* volume not currently in service */
+	VOFFLINE	= 106,	/* volume is currently offline (more info available [VVL-spec]) */
+	VONLINE		= 107,	/* volume is already online */
+	VDISKFULL	= 108,	/* disk partition is full */
+	VOVERQUOTA	= 109,	/* volume's maximum quota exceeded */
+	VBUSY		= 110,	/* volume is temporarily unavailable */
+	VMOVED		= 111,	/* volume moved to new server - ask this FS where */
+};
+
+#endif /* AFS_FS_H */
diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h
new file mode 100644
index 00000000000..8bbefe009ed
--- /dev/null
+++ b/fs/afs/afs_vl.h
@@ -0,0 +1,84 @@
+/* AFS Volume Location Service client interface
+ *
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef AFS_VL_H
+#define AFS_VL_H
+
+#include "afs.h"
+
+#define AFS_VL_PORT		7003	/* volume location service port */
+#define VL_SERVICE		52	/* RxRPC service ID for the Volume Location service */
+
+enum AFSVL_Operations {
+	VLGETENTRYBYID		= 503,	/* AFS Get Cache Entry By ID operation ID */
+	VLGETENTRYBYNAME	= 504,	/* AFS Get Cache Entry By Name operation ID */
+	VLPROBE			= 514,	/* AFS Probe Volume Location Service operation ID */
+};
+
+enum AFSVL_Errors {
+	AFSVL_IDEXIST 		= 363520,	/* Volume Id entry exists in vl database */
+	AFSVL_IO 		= 363521,	/* I/O related error */
+	AFSVL_NAMEEXIST 	= 363522,	/* Volume name entry exists in vl database */
+	AFSVL_CREATEFAIL 	= 363523,	/* Internal creation failure */
+	AFSVL_NOENT 		= 363524,	/* No such entry */
+	AFSVL_EMPTY 		= 363525,	/* Vl database is empty */
+	AFSVL_ENTDELETED 	= 363526,	/* Entry is deleted (soft delete) */
+	AFSVL_BADNAME 		= 363527,	/* Volume name is illegal */
+	AFSVL_BADINDEX 		= 363528,	/* Index is out of range */
+	AFSVL_BADVOLTYPE 	= 363529,	/* Bad volume type */
+	AFSVL_BADSERVER 	= 363530,	/* Illegal server number (out of range) */
+	AFSVL_BADPARTITION 	= 363531,	/* Bad partition number */
+	AFSVL_REPSFULL 		= 363532,	/* Run out of space for Replication sites */
+	AFSVL_NOREPSERVER 	= 363533,	/* No such Replication server site exists */
+	AFSVL_DUPREPSERVER 	= 363534,	/* Replication site already exists */
+	AFSVL_RWNOTFOUND 	= 363535,	/* Parent R/W entry not found */
+	AFSVL_BADREFCOUNT 	= 363536,	/* Illegal Reference Count number */
+	AFSVL_SIZEEXCEEDED 	= 363537,	/* Vl size for attributes exceeded */
+	AFSVL_BADENTRY 		= 363538,	/* Bad incoming vl entry */
+	AFSVL_BADVOLIDBUMP 	= 363539,	/* Illegal max volid increment */
+	AFSVL_IDALREADYHASHED 	= 363540,	/* RO/BACK id already hashed */
+	AFSVL_ENTRYLOCKED 	= 363541,	/* Vl entry is already locked */
+	AFSVL_BADVOLOPER 	= 363542,	/* Bad volume operation code */
+	AFSVL_BADRELLOCKTYPE 	= 363543,	/* Bad release lock type */
+	AFSVL_RERELEASE 	= 363544,	/* Status report: last release was aborted */
+	AFSVL_BADSERVERFLAG 	= 363545,	/* Invalid replication site server °ag */
+	AFSVL_PERM 		= 363546,	/* No permission access */
+	AFSVL_NOMEM 		= 363547,	/* malloc/realloc failed to alloc enough memory */
+};
+
+/*
+ * maps to "struct vldbentry" in vvl-spec.pdf
+ */
+struct afs_vldbentry {
+	char		name[65];		/* name of volume (with NUL char) */
+	afs_voltype_t	type;			/* volume type */
+	unsigned	num_servers;		/* num servers that hold instances of this vol */
+	unsigned	clone_id;		/* cloning ID */
+
+	unsigned	flags;
+#define AFS_VLF_RWEXISTS	0x1000		/* R/W volume exists */
+#define AFS_VLF_ROEXISTS	0x2000		/* R/O volume exists */
+#define AFS_VLF_BACKEXISTS	0x4000		/* backup volume exists */
+
+	afs_volid_t	volume_ids[3];		/* volume IDs */
+
+	struct {
+		struct in_addr	addr;		/* server address */
+		unsigned	partition;	/* partition ID on this server */
+		unsigned	flags;		/* server specific flags */
+#define AFS_VLSF_NEWREPSITE	0x0001	/* unused */
+#define AFS_VLSF_ROVOL		0x0002	/* this server holds a R/O instance of the volume */
+#define AFS_VLSF_RWVOL		0x0004	/* this server holds a R/W instance of the volume */
+#define AFS_VLSF_BACKVOL	0x0008	/* this server holds a backup instance of the volume */
+	} servers[8];
+};
+
+#endif /* AFS_VL_H */
diff --git a/fs/afs/cache.c b/fs/afs/cache.c
new file mode 100644
index 00000000000..de0d7de69ed
--- /dev/null
+++ b/fs/afs/cache.c
@@ -0,0 +1,256 @@
+/* AFS caching stuff
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifdef AFS_CACHING_SUPPORT
+static cachefs_match_val_t afs_cell_cache_match(void *target,
+						const void *entry);
+static void afs_cell_cache_update(void *source, void *entry);
+
+struct cachefs_index_def afs_cache_cell_index_def = {
+	.name			= "cell_ix",
+	.data_size		= sizeof(struct afs_cache_cell),
+	.keys[0]		= { CACHEFS_INDEX_KEYS_ASCIIZ, 64 },
+	.match			= afs_cell_cache_match,
+	.update			= afs_cell_cache_update,
+};
+#endif
+
+/*
+ * match a cell record obtained from the cache
+ */
+#ifdef AFS_CACHING_SUPPORT
+static cachefs_match_val_t afs_cell_cache_match(void *target,
+						const void *entry)
+{
+	const struct afs_cache_cell *ccell = entry;
+	struct afs_cell *cell = target;
+
+	_enter("{%s},{%s}", ccell->name, cell->name);
+
+	if (strncmp(ccell->name, cell->name, sizeof(ccell->name)) == 0) {
+		_leave(" = SUCCESS");
+		return CACHEFS_MATCH_SUCCESS;
+	}
+
+	_leave(" = FAILED");
+	return CACHEFS_MATCH_FAILED;
+}
+#endif
+
+/*
+ * update a cell record in the cache
+ */
+#ifdef AFS_CACHING_SUPPORT
+static void afs_cell_cache_update(void *source, void *entry)
+{
+	struct afs_cache_cell *ccell = entry;
+	struct afs_cell *cell = source;
+
+	_enter("%p,%p", source, entry);
+
+	strncpy(ccell->name, cell->name, sizeof(ccell->name));
+
+	memcpy(ccell->vl_servers,
+	       cell->vl_addrs,
+	       min(sizeof(ccell->vl_servers), sizeof(cell->vl_addrs)));
+
+}
+#endif
+
+#ifdef AFS_CACHING_SUPPORT
+static cachefs_match_val_t afs_vlocation_cache_match(void *target,
+						     const void *entry);
+static void afs_vlocation_cache_update(void *source, void *entry);
+
+struct cachefs_index_def afs_vlocation_cache_index_def = {
+	.name		= "vldb",
+	.data_size	= sizeof(struct afs_cache_vlocation),
+	.keys[0]	= { CACHEFS_INDEX_KEYS_ASCIIZ, 64 },
+	.match		= afs_vlocation_cache_match,
+	.update		= afs_vlocation_cache_update,
+};
+#endif
+
+/*
+ * match a VLDB record stored in the cache
+ * - may also load target from entry
+ */
+#ifdef AFS_CACHING_SUPPORT
+static cachefs_match_val_t afs_vlocation_cache_match(void *target,
+						     const void *entry)
+{
+	const struct afs_cache_vlocation *vldb = entry;
+	struct afs_vlocation *vlocation = target;
+
+	_enter("{%s},{%s}", vlocation->vldb.name, vldb->name);
+
+	if (strncmp(vlocation->vldb.name, vldb->name, sizeof(vldb->name)) == 0
+	    ) {
+		if (!vlocation->valid ||
+		    vlocation->vldb.rtime == vldb->rtime
+		    ) {
+			vlocation->vldb = *vldb;
+			vlocation->valid = 1;
+			_leave(" = SUCCESS [c->m]");
+			return CACHEFS_MATCH_SUCCESS;
+		} else if (memcmp(&vlocation->vldb, vldb, sizeof(*vldb)) != 0) {
+			/* delete if VIDs for this name differ */
+			if (memcmp(&vlocation->vldb.vid,
+				   &vldb->vid,
+				   sizeof(vldb->vid)) != 0) {
+				_leave(" = DELETE");
+				return CACHEFS_MATCH_SUCCESS_DELETE;
+			}
+
+			_leave(" = UPDATE");
+			return CACHEFS_MATCH_SUCCESS_UPDATE;
+		} else {
+			_leave(" = SUCCESS");
+			return CACHEFS_MATCH_SUCCESS;
+		}
+	}
+
+	_leave(" = FAILED");
+	return CACHEFS_MATCH_FAILED;
+}
+#endif
+
+/*
+ * update a VLDB record stored in the cache
+ */
+#ifdef AFS_CACHING_SUPPORT
+static void afs_vlocation_cache_update(void *source, void *entry)
+{
+	struct afs_cache_vlocation *vldb = entry;
+	struct afs_vlocation *vlocation = source;
+
+	_enter("");
+
+	*vldb = vlocation->vldb;
+}
+#endif
+
+#ifdef AFS_CACHING_SUPPORT
+static cachefs_match_val_t afs_volume_cache_match(void *target,
+						  const void *entry);
+static void afs_volume_cache_update(void *source, void *entry);
+
+struct cachefs_index_def afs_volume_cache_index_def = {
+	.name		= "volume",
+	.data_size	= sizeof(struct afs_cache_vhash),
+	.keys[0]	= { CACHEFS_INDEX_KEYS_BIN, 1 },
+	.keys[1]	= { CACHEFS_INDEX_KEYS_BIN, 1 },
+	.match		= afs_volume_cache_match,
+	.update		= afs_volume_cache_update,
+};
+#endif
+
+/*
+ * match a volume hash record stored in the cache
+ */
+#ifdef AFS_CACHING_SUPPORT
+static cachefs_match_val_t afs_volume_cache_match(void *target,
+						  const void *entry)
+{
+	const struct afs_cache_vhash *vhash = entry;
+	struct afs_volume *volume = target;
+
+	_enter("{%u},{%u}", volume->type, vhash->vtype);
+
+	if (volume->type == vhash->vtype) {
+		_leave(" = SUCCESS");
+		return CACHEFS_MATCH_SUCCESS;
+	}
+
+	_leave(" = FAILED");
+	return CACHEFS_MATCH_FAILED;
+}
+#endif
+
+/*
+ * update a volume hash record stored in the cache
+ */
+#ifdef AFS_CACHING_SUPPORT
+static void afs_volume_cache_update(void *source, void *entry)
+{
+	struct afs_cache_vhash *vhash = entry;
+	struct afs_volume *volume = source;
+
+	_enter("");
+
+	vhash->vtype = volume->type;
+}
+#endif
+
+#ifdef AFS_CACHING_SUPPORT
+static cachefs_match_val_t afs_vnode_cache_match(void *target,
+						 const void *entry);
+static void afs_vnode_cache_update(void *source, void *entry);
+
+struct cachefs_index_def afs_vnode_cache_index_def = {
+	.name		= "vnode",
+	.data_size	= sizeof(struct afs_cache_vnode),
+	.keys[0]	= { CACHEFS_INDEX_KEYS_BIN, 4 },
+	.match		= afs_vnode_cache_match,
+	.update		= afs_vnode_cache_update,
+};
+#endif
+
+/*
+ * match a vnode record stored in the cache
+ */
+#ifdef AFS_CACHING_SUPPORT
+static cachefs_match_val_t afs_vnode_cache_match(void *target,
+						 const void *entry)
+{
+	const struct afs_cache_vnode *cvnode = entry;
+	struct afs_vnode *vnode = target;
+
+	_enter("{%x,%x,%Lx},{%x,%x,%Lx}",
+	       vnode->fid.vnode,
+	       vnode->fid.unique,
+	       vnode->status.version,
+	       cvnode->vnode_id,
+	       cvnode->vnode_unique,
+	       cvnode->data_version);
+
+	if (vnode->fid.vnode != cvnode->vnode_id) {
+		_leave(" = FAILED");
+		return CACHEFS_MATCH_FAILED;
+	}
+
+	if (vnode->fid.unique != cvnode->vnode_unique ||
+	    vnode->status.version != cvnode->data_version) {
+		_leave(" = DELETE");
+		return CACHEFS_MATCH_SUCCESS_DELETE;
+	}
+
+	_leave(" = SUCCESS");
+	return CACHEFS_MATCH_SUCCESS;
+}
+#endif
+
+/*
+ * update a vnode record stored in the cache
+ */
+#ifdef AFS_CACHING_SUPPORT
+static void afs_vnode_cache_update(void *source, void *entry)
+{
+	struct afs_cache_vnode *cvnode = entry;
+	struct afs_vnode *vnode = source;
+
+	_enter("");
+
+	cvnode->vnode_id	= vnode->fid.vnode;
+	cvnode->vnode_unique	= vnode->fid.unique;
+	cvnode->data_version	= vnode->status.version;
+}
+#endif
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 26a48fea42f..61121554714 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002 Red Hat, Inc. All rights reserved.
+ * Copyright (c) 2002, 2007 Red Hat, Inc. All rights reserved.
  *
  * This software may be freely redistributed under the terms of the
  * GNU General Public License.
@@ -16,83 +16,182 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
-#include "server.h"
-#include "vnode.h"
+#include <linux/circ_buf.h>
 #include "internal.h"
-#include "cmservice.h"
+
+unsigned afs_vnode_update_timeout = 10;
+
+#define afs_breakring_space(server) \
+	CIRC_SPACE((server)->cb_break_head, (server)->cb_break_tail,	\
+		   ARRAY_SIZE((server)->cb_break))
+
+//static void afs_callback_updater(struct work_struct *);
+
+static struct workqueue_struct *afs_callback_update_worker;
 
 /*
  * allow the fileserver to request callback state (re-)initialisation
  */
-int SRXAFSCM_InitCallBackState(struct afs_server *server)
+void afs_init_callback_state(struct afs_server *server)
 {
-	struct list_head callbacks;
+	struct afs_vnode *vnode;
 
-	_enter("%p", server);
+	_enter("{%p}", server);
 
-	INIT_LIST_HEAD(&callbacks);
-
-	/* transfer the callback list from the server to a temp holding area */
 	spin_lock(&server->cb_lock);
 
-	list_add(&callbacks, &server->cb_promises);
-	list_del_init(&server->cb_promises);
+	/* kill all the promises on record from this server */
+	while (!RB_EMPTY_ROOT(&server->cb_promises)) {
+		vnode = rb_entry(server->cb_promises.rb_node,
+				 struct afs_vnode, cb_promise);
+		printk("\nUNPROMISE on %p\n", vnode);
+		rb_erase(&vnode->cb_promise, &server->cb_promises);
+		vnode->cb_promised = false;
+	}
 
-	/* munch our way through the list, grabbing the inode, dropping all the
-	 * locks and regetting them in the right order
-	 */
-	while (!list_empty(&callbacks)) {
-		struct afs_vnode *vnode;
-		struct inode *inode;
+	spin_unlock(&server->cb_lock);
+	_leave("");
+}
 
-		vnode = list_entry(callbacks.next, struct afs_vnode, cb_link);
-		list_del_init(&vnode->cb_link);
+/*
+ * handle the data invalidation side of a callback being broken
+ */
+void afs_broken_callback_work(struct work_struct *work)
+{
+	struct afs_vnode *vnode =
+		container_of(work, struct afs_vnode, cb_broken_work);
 
-		/* try and grab the inode - may fail */
-		inode = igrab(AFS_VNODE_TO_I(vnode));
-		if (inode) {
-			int release = 0;
+	_enter("");
 
-			spin_unlock(&server->cb_lock);
-			spin_lock(&vnode->lock);
+	if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+		return;
 
-			if (vnode->cb_server == server) {
-				vnode->cb_server = NULL;
-				afs_kafstimod_del_timer(&vnode->cb_timeout);
-				spin_lock(&afs_cb_hash_lock);
-				list_del_init(&vnode->cb_hash_link);
-				spin_unlock(&afs_cb_hash_lock);
-				release = 1;
-			}
+	/* we're only interested in dealing with a broken callback on *this*
+	 * vnode and only if no-one else has dealt with it yet */
+	if (!mutex_trylock(&vnode->cb_broken_lock))
+		return; /* someone else is dealing with it */
 
-			spin_unlock(&vnode->lock);
+	if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
+		if (afs_vnode_fetch_status(vnode) < 0)
+			goto out;
 
-			iput(inode);
-			afs_put_server(server);
+		if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+			goto out;
 
-			spin_lock(&server->cb_lock);
+		/* if the vnode's data version number changed then its contents
+		 * are different */
+		if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
+			_debug("zap data");
+			invalidate_remote_inode(&vnode->vfs_inode);
 		}
 	}
 
-	spin_unlock(&server->cb_lock);
+out:
+	mutex_unlock(&vnode->cb_broken_lock);
 
-	_leave(" = 0");
-	return 0;
+	/* avoid the potential race whereby the mutex_trylock() in this
+	 * function happens again between the clear_bit() and the
+	 * mutex_unlock() */
+	if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
+		_debug("requeue");
+		queue_work(afs_callback_update_worker, &vnode->cb_broken_work);
+	}
+	_leave("");
+}
+
+/*
+ * actually break a callback
+ */
+static void afs_break_callback(struct afs_server *server,
+			       struct afs_vnode *vnode)
+{
+	_enter("");
+
+	set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
+
+	if (vnode->cb_promised) {
+		spin_lock(&vnode->lock);
+
+		_debug("break callback");
+
+		spin_lock(&server->cb_lock);
+		if (vnode->cb_promised) {
+			rb_erase(&vnode->cb_promise, &server->cb_promises);
+			vnode->cb_promised = false;
+		}
+		spin_unlock(&server->cb_lock);
+
+		queue_work(afs_callback_update_worker, &vnode->cb_broken_work);
+		spin_unlock(&vnode->lock);
+	}
+}
+
+/*
+ * allow the fileserver to explicitly break one callback
+ * - happens when
+ *   - the backing file is changed
+ *   - a lock is released
+ */
+static void afs_break_one_callback(struct afs_server *server,
+				   struct afs_fid *fid)
+{
+	struct afs_vnode *vnode;
+	struct rb_node *p;
+
+	_debug("find");
+	spin_lock(&server->fs_lock);
+	p = server->fs_vnodes.rb_node;
+	while (p) {
+		vnode = rb_entry(p, struct afs_vnode, server_rb);
+		if (fid->vid < vnode->fid.vid)
+			p = p->rb_left;
+		else if (fid->vid > vnode->fid.vid)
+			p = p->rb_right;
+		else if (fid->vnode < vnode->fid.vnode)
+			p = p->rb_left;
+		else if (fid->vnode > vnode->fid.vnode)
+			p = p->rb_right;
+		else if (fid->unique < vnode->fid.unique)
+			p = p->rb_left;
+		else if (fid->unique > vnode->fid.unique)
+			p = p->rb_right;
+		else
+			goto found;
+	}
+
+	/* not found so we just ignore it (it may have moved to another
+	 * server) */
+not_available:
+	_debug("not avail");
+	spin_unlock(&server->fs_lock);
+	_leave("");
+	return;
+
+found:
+	_debug("found");
+	ASSERTCMP(server, ==, vnode->server);
+
+	if (!igrab(AFS_VNODE_TO_I(vnode)))
+		goto not_available;
+	spin_unlock(&server->fs_lock);
+
+	afs_break_callback(server, vnode);
+	iput(&vnode->vfs_inode);
+	_leave("");
 }
 
 /*
  * allow the fileserver to break callback promises
  */
-int SRXAFSCM_CallBack(struct afs_server *server, size_t count,
-		      struct afs_callback callbacks[])
+void afs_break_callbacks(struct afs_server *server, size_t count,
+			 struct afs_callback callbacks[])
 {
-	_enter("%p,%u,", server, count);
+	_enter("%p,%zu,", server, count);
 
-	for (; count > 0; callbacks++, count--) {
-		struct afs_vnode *vnode = NULL;
-		struct inode *inode = NULL;
-		int valid = 0;
+	ASSERT(server != NULL);
+	ASSERTCMP(count, <=, AFSCBMAX);
 
+	for (; count > 0; callbacks++, count--) {
 		_debug("- Fid { vl=%08x n=%u u=%u }  CB { v=%u x=%u t=%u }",
 		       callbacks->fid.vid,
 		       callbacks->fid.vnode,
@@ -101,66 +200,244 @@ int SRXAFSCM_CallBack(struct afs_server *server, size_t count,
 		       callbacks->expiry,
 		       callbacks->type
 		       );
+		afs_break_one_callback(server, &callbacks->fid);
+	}
+
+	_leave("");
+	return;
+}
 
-		/* find the inode for this fid */
-		spin_lock(&afs_cb_hash_lock);
+/*
+ * record the callback for breaking
+ * - the caller must hold server->cb_lock
+ */
+static void afs_do_give_up_callback(struct afs_server *server,
+				    struct afs_vnode *vnode)
+{
+	struct afs_callback *cb;
 
-		list_for_each_entry(vnode,
-				    &afs_cb_hash(server, &callbacks->fid),
-				    cb_hash_link) {
-			if (memcmp(&vnode->fid, &callbacks->fid,
-				   sizeof(struct afs_fid)) != 0)
-				continue;
+	_enter("%p,%p", server, vnode);
 
-			/* right vnode, but is it same server? */
-			if (vnode->cb_server != server)
-				break; /* no */
+	cb = &server->cb_break[server->cb_break_head];
+	cb->fid		= vnode->fid;
+	cb->version	= vnode->cb_version;
+	cb->expiry	= vnode->cb_expiry;
+	cb->type	= vnode->cb_type;
+	smp_wmb();
+	server->cb_break_head =
+		(server->cb_break_head + 1) &
+		(ARRAY_SIZE(server->cb_break) - 1);
 
-			/* try and nail the inode down */
-			inode = igrab(AFS_VNODE_TO_I(vnode));
-			break;
+	/* defer the breaking of callbacks to try and collect as many as
+	 * possible to ship in one operation */
+	switch (atomic_inc_return(&server->cb_break_n)) {
+	case 1 ... AFSCBMAX - 1:
+		queue_delayed_work(afs_callback_update_worker,
+				   &server->cb_break_work, HZ * 2);
+		break;
+	case AFSCBMAX:
+		afs_flush_callback_breaks(server);
+		break;
+	default:
+		break;
+	}
+
+	ASSERT(server->cb_promises.rb_node != NULL);
+	rb_erase(&vnode->cb_promise, &server->cb_promises);
+	vnode->cb_promised = false;
+	_leave("");
+}
+
+/*
+ * give up the callback registered for a vnode on the file server when the
+ * inode is being cleared
+ */
+void afs_give_up_callback(struct afs_vnode *vnode)
+{
+	struct afs_server *server = vnode->server;
+
+	DECLARE_WAITQUEUE(myself, current);
+
+	_enter("%d", vnode->cb_promised);
+
+	_debug("GIVE UP INODE %p", &vnode->vfs_inode);
+
+	if (!vnode->cb_promised) {
+		_leave(" [not promised]");
+		return;
+	}
+
+	ASSERT(server != NULL);
+
+	spin_lock(&server->cb_lock);
+	if (vnode->cb_promised && afs_breakring_space(server) == 0) {
+		add_wait_queue(&server->cb_break_waitq, &myself);
+		for (;;) {
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			if (!vnode->cb_promised ||
+			    afs_breakring_space(server) != 0)
+				break;
+			spin_unlock(&server->cb_lock);
+			schedule();
+			spin_lock(&server->cb_lock);
 		}
+		remove_wait_queue(&server->cb_break_waitq, &myself);
+		__set_current_state(TASK_RUNNING);
+	}
+
+	/* of course, it's always possible for the server to break this vnode's
+	 * callback first... */
+	if (vnode->cb_promised)
+		afs_do_give_up_callback(server, vnode);
+
+	spin_unlock(&server->cb_lock);
+	_leave("");
+}
+
+/*
+ * dispatch a deferred give up callbacks operation
+ */
+void afs_dispatch_give_up_callbacks(struct work_struct *work)
+{
+	struct afs_server *server =
+		container_of(work, struct afs_server, cb_break_work.work);
+
+	_enter("");
+
+	/* tell the fileserver to discard the callback promises it has
+	 * - in the event of ENOMEM or some other error, we just forget that we
+	 *   had callbacks entirely, and the server will call us later to break
+	 *   them
+	 */
+	afs_fs_give_up_callbacks(server, &afs_async_call);
+}
+
+/*
+ * flush the outstanding callback breaks on a server
+ */
+void afs_flush_callback_breaks(struct afs_server *server)
+{
+	cancel_delayed_work(&server->cb_break_work);
+	queue_delayed_work(afs_callback_update_worker,
+			   &server->cb_break_work, 0);
+}
+
+#if 0
+/*
+ * update a bunch of callbacks
+ */
+static void afs_callback_updater(struct work_struct *work)
+{
+	struct afs_server *server;
+	struct afs_vnode *vnode, *xvnode;
+	time_t now;
+	long timeout;
+	int ret;
+
+	server = container_of(work, struct afs_server, updater);
+
+	_enter("");
 
-		spin_unlock(&afs_cb_hash_lock);
-
-		if (inode) {
-			/* we've found the record for this vnode */
-			spin_lock(&vnode->lock);
-			if (vnode->cb_server == server) {
-				/* the callback _is_ on the calling server */
-				vnode->cb_server = NULL;
-				valid = 1;
-
-				afs_kafstimod_del_timer(&vnode->cb_timeout);
-				vnode->flags |= AFS_VNODE_CHANGED;
-
-				spin_lock(&server->cb_lock);
-				list_del_init(&vnode->cb_link);
-				spin_unlock(&server->cb_lock);
-
-				spin_lock(&afs_cb_hash_lock);
-				list_del_init(&vnode->cb_hash_link);
-				spin_unlock(&afs_cb_hash_lock);
-			}
-			spin_unlock(&vnode->lock);
-
-			if (valid) {
-				invalidate_remote_inode(inode);
-				afs_put_server(server);
-			}
-			iput(inode);
+	now = get_seconds();
+
+	/* find the first vnode to update */
+	spin_lock(&server->cb_lock);
+	for (;;) {
+		if (RB_EMPTY_ROOT(&server->cb_promises)) {
+			spin_unlock(&server->cb_lock);
+			_leave(" [nothing]");
+			return;
 		}
+
+		vnode = rb_entry(rb_first(&server->cb_promises),
+				 struct afs_vnode, cb_promise);
+		if (atomic_read(&vnode->usage) > 0)
+			break;
+		rb_erase(&vnode->cb_promise, &server->cb_promises);
+		vnode->cb_promised = false;
+	}
+
+	timeout = vnode->update_at - now;
+	if (timeout > 0) {
+		queue_delayed_work(afs_vnode_update_worker,
+				   &afs_vnode_update, timeout * HZ);
+		spin_unlock(&server->cb_lock);
+		_leave(" [nothing]");
+		return;
+	}
+
+	list_del_init(&vnode->update);
+	atomic_inc(&vnode->usage);
+	spin_unlock(&server->cb_lock);
+
+	/* we can now perform the update */
+	_debug("update %s", vnode->vldb.name);
+	vnode->state = AFS_VL_UPDATING;
+	vnode->upd_rej_cnt = 0;
+	vnode->upd_busy_cnt = 0;
+
+	ret = afs_vnode_update_record(vl, &vldb);
+	switch (ret) {
+	case 0:
+		afs_vnode_apply_update(vl, &vldb);
+		vnode->state = AFS_VL_UPDATING;
+		break;
+	case -ENOMEDIUM:
+		vnode->state = AFS_VL_VOLUME_DELETED;
+		break;
+	default:
+		vnode->state = AFS_VL_UNCERTAIN;
+		break;
+	}
+
+	/* and then reschedule */
+	_debug("reschedule");
+	vnode->update_at = get_seconds() + afs_vnode_update_timeout;
+
+	spin_lock(&server->cb_lock);
+
+	if (!list_empty(&server->cb_promises)) {
+		/* next update in 10 minutes, but wait at least 1 second more
+		 * than the newest record already queued so that we don't spam
+		 * the VL server suddenly with lots of requests
+		 */
+		xvnode = list_entry(server->cb_promises.prev,
+				    struct afs_vnode, update);
+		if (vnode->update_at <= xvnode->update_at)
+			vnode->update_at = xvnode->update_at + 1;
+		xvnode = list_entry(server->cb_promises.next,
+				    struct afs_vnode, update);
+		timeout = xvnode->update_at - now;
+		if (timeout < 0)
+			timeout = 0;
+	} else {
+		timeout = afs_vnode_update_timeout;
 	}
 
-	_leave(" = 0");
-	return 0;
+	list_add_tail(&vnode->update, &server->cb_promises);
+
+	_debug("timeout %ld", timeout);
+	queue_delayed_work(afs_vnode_update_worker,
+			   &afs_vnode_update, timeout * HZ);
+	spin_unlock(&server->cb_lock);
+	afs_put_vnode(vl);
+}
+#endif
+
+/*
+ * initialise the callback update process
+ */
+int __init afs_callback_update_init(void)
+{
+	afs_callback_update_worker =
+		create_singlethread_workqueue("kafs_callbackd");
+	return afs_callback_update_worker ? 0 : -ENOMEM;
 }
 
 /*
- * allow the fileserver to see if the cache manager is still alive
+ * shut down the callback update process
  */
-int SRXAFSCM_Probe(struct afs_server *server)
+void __exit afs_callback_update_kill(void)
 {
-	_debug("SRXAFSCM_Probe(%p)\n", server);
-	return 0;
+	destroy_workqueue(afs_callback_update_worker);
 }
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 28ed84ec8ff..733c60246ab 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -11,15 +11,6 @@
 
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/connection.h>
-#include "volume.h"
-#include "cell.h"
-#include "server.h"
-#include "transport.h"
-#include "vlclient.h"
-#include "kafstimod.h"
-#include "super.h"
 #include "internal.h"
 
 DECLARE_RWSEM(afs_proc_cells_sem);
@@ -28,34 +19,21 @@ LIST_HEAD(afs_proc_cells);
 static struct list_head afs_cells = LIST_HEAD_INIT(afs_cells);
 static DEFINE_RWLOCK(afs_cells_lock);
 static DECLARE_RWSEM(afs_cells_sem); /* add/remove serialisation */
+static DECLARE_WAIT_QUEUE_HEAD(afs_cells_freeable_wq);
 static struct afs_cell *afs_cell_root;
 
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_cell_cache_match(void *target,
-						const void *entry);
-static void afs_cell_cache_update(void *source, void *entry);
-
-struct cachefs_index_def afs_cache_cell_index_def = {
-	.name			= "cell_ix",
-	.data_size		= sizeof(struct afs_cache_cell),
-	.keys[0]		= { CACHEFS_INDEX_KEYS_ASCIIZ, 64 },
-	.match			= afs_cell_cache_match,
-	.update			= afs_cell_cache_update,
-};
-#endif
-
 /*
  * create a cell record
  * - "name" is the name of the cell
  * - "vllist" is a colon separated list of IP addresses in "a.b.c.d" format
  */
-int afs_cell_create(const char *name, char *vllist, struct afs_cell **_cell)
+struct afs_cell *afs_cell_create(const char *name, char *vllist)
 {
 	struct afs_cell *cell;
 	char *next;
 	int ret;
 
-	_enter("%s", name);
+	_enter("%s,%s", name, vllist);
 
 	BUG_ON(!name); /* TODO: want to look up "this cell" in the cache */
 
@@ -63,27 +41,24 @@ int afs_cell_create(const char *name, char *vllist, struct afs_cell **_cell)
 	cell = kmalloc(sizeof(struct afs_cell) + strlen(name) + 1, GFP_KERNEL);
 	if (!cell) {
 		_leave(" = -ENOMEM");
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 	}
 
 	down_write(&afs_cells_sem);
 
 	memset(cell, 0, sizeof(struct afs_cell));
-	atomic_set(&cell->usage, 0);
+	atomic_set(&cell->usage, 1);
 
 	INIT_LIST_HEAD(&cell->link);
 
-	rwlock_init(&cell->sv_lock);
-	INIT_LIST_HEAD(&cell->sv_list);
-	INIT_LIST_HEAD(&cell->sv_graveyard);
-	spin_lock_init(&cell->sv_gylock);
+	rwlock_init(&cell->servers_lock);
+	INIT_LIST_HEAD(&cell->servers);
 
 	init_rwsem(&cell->vl_sem);
 	INIT_LIST_HEAD(&cell->vl_list);
-	INIT_LIST_HEAD(&cell->vl_graveyard);
-	spin_lock_init(&cell->vl_gylock);
+	spin_lock_init(&cell->vl_lock);
 
-	strcpy(cell->name,name);
+	strcpy(cell->name, name);
 
 	/* fill in the VL server list from the rest of the string */
 	ret = -EINVAL;
@@ -106,9 +81,9 @@ int afs_cell_create(const char *name, char *vllist, struct afs_cell **_cell)
 		if (cell->vl_naddrs >= AFS_CELL_MAX_ADDRS)
 			break;
 
-	} while(vllist = next, vllist);
+	} while ((vllist = next));
 
-	/* add a proc dir for this cell */
+	/* add a proc directory for this cell */
 	ret = afs_proc_cell_setup(cell);
 	if (ret < 0)
 		goto error;
@@ -129,30 +104,29 @@ int afs_cell_create(const char *name, char *vllist, struct afs_cell **_cell)
 	down_write(&afs_proc_cells_sem);
 	list_add_tail(&cell->proc_link, &afs_proc_cells);
 	up_write(&afs_proc_cells_sem);
-
-	*_cell = cell;
 	up_write(&afs_cells_sem);
 
-	_leave(" = 0 (%p)", cell);
-	return 0;
+	_leave(" = %p", cell);
+	return cell;
 
 badaddr:
-	printk(KERN_ERR "kAFS: bad VL server IP address: '%s'\n", vllist);
+	printk(KERN_ERR "kAFS: bad VL server IP address\n");
 error:
 	up_write(&afs_cells_sem);
 	kfree(cell);
 	_leave(" = %d", ret);
-	return ret;
+	return ERR_PTR(ret);
 }
 
 /*
- * initialise the cell database from module parameters
+ * set the root cell information
+ * - can be called with a module parameter string
+ * - can be called from a write to /proc/fs/afs/rootcell
  */
 int afs_cell_init(char *rootcell)
 {
 	struct afs_cell *old_root, *new_root;
 	char *cp;
-	int ret;
 
 	_enter("");
 
@@ -160,79 +134,60 @@ int afs_cell_init(char *rootcell)
 		/* module is loaded with no parameters, or built statically.
 		 * - in the future we might initialize cell DB here.
 		 */
-		_leave(" = 0 (but no root)");
+		_leave(" = 0 [no root]");
 		return 0;
 	}
 
 	cp = strchr(rootcell, ':');
 	if (!cp) {
 		printk(KERN_ERR "kAFS: no VL server IP addresses specified\n");
-		_leave(" = %d (no colon)", -EINVAL);
+		_leave(" = -EINVAL");
 		return -EINVAL;
 	}
 
 	/* allocate a cell record for the root cell */
 	*cp++ = 0;
-	ret = afs_cell_create(rootcell, cp, &new_root);
-	if (ret < 0) {
-		_leave(" = %d", ret);
-		return ret;
+	new_root = afs_cell_create(rootcell, cp);
+	if (IS_ERR(new_root)) {
+		_leave(" = %ld", PTR_ERR(new_root));
+		return PTR_ERR(new_root);
 	}
 
-	/* as afs_put_cell() takes locks by itself, we have to do
-	 * a little gymnastics to be race-free.
-	 */
-	afs_get_cell(new_root);
-
+	/* install the new cell */
 	write_lock(&afs_cells_lock);
-	while (afs_cell_root) {
-		old_root = afs_cell_root;
-		afs_cell_root = NULL;
-		write_unlock(&afs_cells_lock);
-		afs_put_cell(old_root);
-		write_lock(&afs_cells_lock);
-	}
+	old_root = afs_cell_root;
 	afs_cell_root = new_root;
 	write_unlock(&afs_cells_lock);
+	afs_put_cell(old_root);
 
-	_leave(" = %d", ret);
-	return ret;
+	_leave(" = 0");
+	return 0;
 }
 
 /*
  * lookup a cell record
  */
-int afs_cell_lookup(const char *name, unsigned namesz, struct afs_cell **_cell)
+struct afs_cell *afs_cell_lookup(const char *name, unsigned namesz)
 {
 	struct afs_cell *cell;
-	int ret;
 
 	_enter("\"%*.*s\",", namesz, namesz, name ? name : "");
 
-	*_cell = NULL;
+	down_read(&afs_cells_sem);
+	read_lock(&afs_cells_lock);
 
 	if (name) {
 		/* if the cell was named, look for it in the cell record list */
-		ret = -ENOENT;
-		cell = NULL;
-		read_lock(&afs_cells_lock);
-
 		list_for_each_entry(cell, &afs_cells, link) {
 			if (strncmp(cell->name, name, namesz) == 0) {
 				afs_get_cell(cell);
 				goto found;
 			}
 		}
-		cell = NULL;
+		cell = ERR_PTR(-ENOENT);
 	found:
-
-		read_unlock(&afs_cells_lock);
-
-		if (cell)
-			ret = 0;
+		;
 	} else {
-		read_lock(&afs_cells_lock);
-
 		cell = afs_cell_root;
 		if (!cell) {
 			/* this should not happen unless user tries to mount
@@ -241,37 +196,32 @@ int afs_cell_lookup(const char *name, unsigned namesz, struct afs_cell **_cell)
 			 * ENOENT might be "more appropriate" but they happen
 			 * for other reasons.
 			 */
-			ret = -EDESTADDRREQ;
+			cell = ERR_PTR(-EDESTADDRREQ);
 		} else {
 			afs_get_cell(cell);
-			ret = 0;
 		}
 
-		read_unlock(&afs_cells_lock);
 	}
 
-	*_cell = cell;
-	_leave(" = %d (%p)", ret, cell);
-	return ret;
+	read_unlock(&afs_cells_lock);
+	up_read(&afs_cells_sem);
+	_leave(" = %p", cell);
+	return cell;
 }
 
 /*
  * try and get a cell record
  */
-struct afs_cell *afs_get_cell_maybe(struct afs_cell **_cell)
+struct afs_cell *afs_get_cell_maybe(struct afs_cell *cell)
 {
-	struct afs_cell *cell;
-
 	write_lock(&afs_cells_lock);
 
-	cell = *_cell;
 	if (cell && !list_empty(&cell->link))
 		afs_get_cell(cell);
 	else
 		cell = NULL;
 
 	write_unlock(&afs_cells_lock);
-
 	return cell;
 }
 
@@ -285,8 +235,7 @@ void afs_put_cell(struct afs_cell *cell)
 
 	_enter("%p{%d,%s}", cell, atomic_read(&cell->usage), cell->name);
 
-	/* sanity check */
-	BUG_ON(atomic_read(&cell->usage) <= 0);
+	ASSERTCMP(atomic_read(&cell->usage), >, 0);
 
 	/* to prevent a race, the decrement and the dequeue must be effectively
 	 * atomic */
@@ -298,35 +247,49 @@ void afs_put_cell(struct afs_cell *cell)
 		return;
 	}
 
+	ASSERT(list_empty(&cell->servers));
+	ASSERT(list_empty(&cell->vl_list));
+
 	write_unlock(&afs_cells_lock);
 
-	BUG_ON(!list_empty(&cell->sv_list));
-	BUG_ON(!list_empty(&cell->sv_graveyard));
-	BUG_ON(!list_empty(&cell->vl_list));
-	BUG_ON(!list_empty(&cell->vl_graveyard));
+	wake_up(&afs_cells_freeable_wq);
 
 	_leave(" [unused]");
 }
 
 /*
  * destroy a cell record
+ * - must be called with the afs_cells_sem write-locked
+ * - cell->link should have been broken by the caller
  */
 static void afs_cell_destroy(struct afs_cell *cell)
 {
 	_enter("%p{%d,%s}", cell, atomic_read(&cell->usage), cell->name);
 
-	/* to prevent a race, the decrement and the dequeue must be effectively
-	 * atomic */
-	write_lock(&afs_cells_lock);
+	ASSERTCMP(atomic_read(&cell->usage), >=, 0);
+	ASSERT(list_empty(&cell->link));
 
-	/* sanity check */
-	BUG_ON(atomic_read(&cell->usage) != 0);
+	/* wait for everyone to stop using the cell */
+	if (atomic_read(&cell->usage) > 0) {
+		DECLARE_WAITQUEUE(myself, current);
 
-	list_del_init(&cell->link);
+		_debug("wait for cell %s", cell->name);
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		add_wait_queue(&afs_cells_freeable_wq, &myself);
 
-	write_unlock(&afs_cells_lock);
+		while (atomic_read(&cell->usage) > 0) {
+			schedule();
+			set_current_state(TASK_UNINTERRUPTIBLE);
+		}
 
-	down_write(&afs_cells_sem);
+		remove_wait_queue(&afs_cells_freeable_wq, &myself);
+		set_current_state(TASK_RUNNING);
+	}
+
+	_debug("cell dead");
+	ASSERTCMP(atomic_read(&cell->usage), ==, 0);
+	ASSERT(list_empty(&cell->servers));
+	ASSERT(list_empty(&cell->vl_list));
 
 	afs_proc_cell_remove(cell);
 
@@ -338,101 +301,25 @@ static void afs_cell_destroy(struct afs_cell *cell)
 	cachefs_relinquish_cookie(cell->cache, 0);
 #endif
 
-	up_write(&afs_cells_sem);
-
-	BUG_ON(!list_empty(&cell->sv_list));
-	BUG_ON(!list_empty(&cell->sv_graveyard));
-	BUG_ON(!list_empty(&cell->vl_list));
-	BUG_ON(!list_empty(&cell->vl_graveyard));
-
-	/* finish cleaning up the cell */
 	kfree(cell);
 
 	_leave(" [destroyed]");
 }
 
-/*
- * lookup the server record corresponding to an Rx RPC peer
- */
-int afs_server_find_by_peer(const struct rxrpc_peer *peer,
-			    struct afs_server **_server)
-{
-	struct afs_server *server;
-	struct afs_cell *cell;
-
-	_enter("%p{a=%08x},", peer, ntohl(peer->addr.s_addr));
-
-	/* search the cell list */
-	read_lock(&afs_cells_lock);
-
-	list_for_each_entry(cell, &afs_cells, link) {
-
-		_debug("? cell %s",cell->name);
-
-		write_lock(&cell->sv_lock);
-
-		/* check the active list */
-		list_for_each_entry(server, &cell->sv_list, link) {
-			_debug("?? server %08x", ntohl(server->addr.s_addr));
-
-			if (memcmp(&server->addr, &peer->addr,
-				   sizeof(struct in_addr)) == 0)
-				goto found_server;
-		}
-
-		/* check the inactive list */
-		spin_lock(&cell->sv_gylock);
-		list_for_each_entry(server, &cell->sv_graveyard, link) {
-			_debug("?? dead server %08x",
-			       ntohl(server->addr.s_addr));
-
-			if (memcmp(&server->addr, &peer->addr,
-				   sizeof(struct in_addr)) == 0)
-				goto found_dead_server;
-		}
-		spin_unlock(&cell->sv_gylock);
-
-		write_unlock(&cell->sv_lock);
-	}
-	read_unlock(&afs_cells_lock);
-
-	_leave(" = -ENOENT");
-	return -ENOENT;
-
-	/* we found it in the graveyard - resurrect it */
-found_dead_server:
-	list_move_tail(&server->link, &cell->sv_list);
-	afs_get_server(server);
-	afs_kafstimod_del_timer(&server->timeout);
-	spin_unlock(&cell->sv_gylock);
-	goto success;
-
-	/* we found it - increment its ref count and return it */
-found_server:
-	afs_get_server(server);
-
-success:
-	write_unlock(&cell->sv_lock);
-	read_unlock(&afs_cells_lock);
-
-	*_server = server;
-	_leave(" = 0 (s=%p c=%p)", server, cell);
-	return 0;
-}
-
 /*
  * purge in-memory cell database on module unload or afs_init() failure
  * - the timeout daemon is stopped before calling this
  */
 void afs_cell_purge(void)
 {
-	struct afs_vlocation *vlocation;
 	struct afs_cell *cell;
 
 	_enter("");
 
 	afs_put_cell(afs_cell_root);
 
+	down_write(&afs_cells_sem);
+
 	while (!list_empty(&afs_cells)) {
 		cell = NULL;
 
@@ -451,102 +338,11 @@ void afs_cell_purge(void)
 			_debug("PURGING CELL %s (%d)",
 			       cell->name, atomic_read(&cell->usage));
 
-			BUG_ON(!list_empty(&cell->sv_list));
-			BUG_ON(!list_empty(&cell->vl_list));
-
-			/* purge the cell's VL graveyard list */
-			_debug(" - clearing VL graveyard");
-
-			spin_lock(&cell->vl_gylock);
-
-			while (!list_empty(&cell->vl_graveyard)) {
-				vlocation = list_entry(cell->vl_graveyard.next,
-						       struct afs_vlocation,
-						       link);
-				list_del_init(&vlocation->link);
-
-				afs_kafstimod_del_timer(&vlocation->timeout);
-
-				spin_unlock(&cell->vl_gylock);
-
-				afs_vlocation_do_timeout(vlocation);
-				/* TODO: race if move to use krxtimod instead
-				 * of kafstimod */
-
-				spin_lock(&cell->vl_gylock);
-			}
-
-			spin_unlock(&cell->vl_gylock);
-
-			/* purge the cell's server graveyard list */
-			_debug(" - clearing server graveyard");
-
-			spin_lock(&cell->sv_gylock);
-
-			while (!list_empty(&cell->sv_graveyard)) {
-				struct afs_server *server;
-
-				server = list_entry(cell->sv_graveyard.next,
-						    struct afs_server, link);
-				list_del_init(&server->link);
-
-				afs_kafstimod_del_timer(&server->timeout);
-
-				spin_unlock(&cell->sv_gylock);
-
-				afs_server_do_timeout(server);
-
-				spin_lock(&cell->sv_gylock);
-			}
-
-			spin_unlock(&cell->sv_gylock);
-
 			/* now the cell should be left with no references */
 			afs_cell_destroy(cell);
 		}
 	}
 
+	up_write(&afs_cells_sem);
 	_leave("");
 }
-
-/*
- * match a cell record obtained from the cache
- */
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_cell_cache_match(void *target,
-						const void *entry)
-{
-	const struct afs_cache_cell *ccell = entry;
-	struct afs_cell *cell = target;
-
-	_enter("{%s},{%s}", ccell->name, cell->name);
-
-	if (strncmp(ccell->name, cell->name, sizeof(ccell->name)) == 0) {
-		_leave(" = SUCCESS");
-		return CACHEFS_MATCH_SUCCESS;
-	}
-
-	_leave(" = FAILED");
-	return CACHEFS_MATCH_FAILED;
-}
-#endif
-
-/*
- * update a cell record in the cache
- */
-#ifdef AFS_CACHING_SUPPORT
-static void afs_cell_cache_update(void *source, void *entry)
-{
-	struct afs_cache_cell *ccell = entry;
-	struct afs_cell *cell = source;
-
-	_enter("%p,%p", source, entry);
-
-	strncpy(ccell->name, cell->name, sizeof(ccell->name));
-
-	memcpy(ccell->vl_servers,
-	       cell->vl_addrs,
-	       min(sizeof(ccell->vl_servers), sizeof(cell->vl_addrs)));
-
-}
-#endif
diff --git a/fs/afs/cell.h b/fs/afs/cell.h
deleted file mode 100644
index c135b00c6c7..00000000000
--- a/fs/afs/cell.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/* AFS cell record
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef AFS_CELL_H
-#define AFS_CELL_H
-
-#include "types.h"
-#include "cache.h"
-
-#define AFS_CELL_MAX_ADDRS 15
-
-extern volatile int afs_cells_being_purged; /* T when cells are being purged by rmmod */
-
-/*
- * entry in the cached cell catalogue
- */
-struct afs_cache_cell {
-	char			name[64];	/* cell name (padded with NULs) */
-	struct in_addr		vl_servers[15];	/* cached cell VL servers */
-};
-
-/*
- * AFS cell record
- */
-struct afs_cell {
-	atomic_t		usage;
-	struct list_head	link;		/* main cell list link */
-	struct list_head	proc_link;	/* /proc cell list link */
-	struct proc_dir_entry	*proc_dir;	/* /proc dir for this cell */
-#ifdef AFS_CACHING_SUPPORT
-	struct cachefs_cookie	*cache;		/* caching cookie */
-#endif
-
-	/* server record management */
-	rwlock_t		sv_lock;	/* active server list lock */
-	struct list_head	sv_list;	/* active server list */
-	struct list_head	sv_graveyard;	/* inactive server list */
-	spinlock_t		sv_gylock;	/* inactive server list lock */
-
-	/* volume location record management */
-	struct rw_semaphore	vl_sem;		/* volume management serialisation semaphore */
-	struct list_head	vl_list;	/* cell's active VL record list */
-	struct list_head	vl_graveyard;	/* cell's inactive VL record list */
-	spinlock_t		vl_gylock;	/* graveyard lock */
-	unsigned short		vl_naddrs;	/* number of VL servers in addr list */
-	unsigned short		vl_curr_svix;	/* current server index */
-	struct in_addr		vl_addrs[AFS_CELL_MAX_ADDRS];	/* cell VL server addresses */
-
-	char			name[0];	/* cell name - must go last */
-};
-
-extern int afs_cell_init(char *);
-extern int afs_cell_create(const char *, char *, struct afs_cell **);
-extern int afs_cell_lookup(const char *, unsigned, struct afs_cell **);
-
-#define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0)
-
-extern struct afs_cell *afs_get_cell_maybe(struct afs_cell **);
-extern void afs_put_cell(struct afs_cell *);
-extern void afs_cell_purge(void);
-
-#endif /* AFS_CELL_H */
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 3f4585765cb..c7141175391 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -12,623 +12,316 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/sched.h>
-#include <linux/completion.h>
-#include "server.h"
-#include "cell.h"
-#include "transport.h"
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include "cmservice.h"
+#include <linux/ip.h>
 #include "internal.h"
+#include "afs_cm.h"
 
-static unsigned afscm_usage;		/* AFS cache manager usage count */
-static struct rw_semaphore afscm_sem;	/* AFS cache manager start/stop semaphore */
-
-static int afscm_new_call(struct rxrpc_call *call);
-static void afscm_attention(struct rxrpc_call *call);
-static void afscm_error(struct rxrpc_call *call);
-static void afscm_aemap(struct rxrpc_call *call);
-
-static void _SRXAFSCM_CallBack(struct rxrpc_call *call);
-static void _SRXAFSCM_InitCallBackState(struct rxrpc_call *call);
-static void _SRXAFSCM_Probe(struct rxrpc_call *call);
-
-typedef void (*_SRXAFSCM_xxxx_t)(struct rxrpc_call *call);
-
-static const struct rxrpc_operation AFSCM_ops[] = {
-	{
-		.id	= 204,
-		.asize	= RXRPC_APP_MARK_EOF,
-		.name	= "CallBack",
-		.user	= _SRXAFSCM_CallBack,
-	},
-	{
-		.id	= 205,
-		.asize	= RXRPC_APP_MARK_EOF,
-		.name	= "InitCallBackState",
-		.user	= _SRXAFSCM_InitCallBackState,
-	},
-	{
-		.id	= 206,
-		.asize	= RXRPC_APP_MARK_EOF,
-		.name	= "Probe",
-		.user	= _SRXAFSCM_Probe,
-	},
-#if 0
-	{
-		.id	= 207,
-		.asize	= RXRPC_APP_MARK_EOF,
-		.name	= "GetLock",
-		.user	= _SRXAFSCM_GetLock,
-	},
-	{
-		.id	= 208,
-		.asize	= RXRPC_APP_MARK_EOF,
-		.name	= "GetCE",
-		.user	= _SRXAFSCM_GetCE,
-	},
-	{
-		.id	= 209,
-		.asize	= RXRPC_APP_MARK_EOF,
-		.name	= "GetXStatsVersion",
-		.user	= _SRXAFSCM_GetXStatsVersion,
-	},
-	{
-		.id	= 210,
-		.asize	= RXRPC_APP_MARK_EOF,
-		.name	= "GetXStats",
-		.user	= _SRXAFSCM_GetXStats,
-	}
-#endif
-};
+struct workqueue_struct *afs_cm_workqueue;
 
-static struct rxrpc_service AFSCM_service = {
-	.name		= "AFS/CM",
-	.owner		= THIS_MODULE,
-	.link		= LIST_HEAD_INIT(AFSCM_service.link),
-	.new_call	= afscm_new_call,
-	.service_id	= 1,
-	.attn_func	= afscm_attention,
-	.error_func	= afscm_error,
-	.aemap_func	= afscm_aemap,
-	.ops_begin	= &AFSCM_ops[0],
-	.ops_end	= &AFSCM_ops[ARRAY_SIZE(AFSCM_ops)],
-};
-
-static DECLARE_COMPLETION(kafscmd_alive);
-static DECLARE_COMPLETION(kafscmd_dead);
-static DECLARE_WAIT_QUEUE_HEAD(kafscmd_sleepq);
-static LIST_HEAD(kafscmd_attention_list);
-static LIST_HEAD(afscm_calls);
-static DEFINE_SPINLOCK(afscm_calls_lock);
-static DEFINE_SPINLOCK(kafscmd_attention_lock);
-static int kafscmd_die;
+static int afs_deliver_cb_init_call_back_state(struct afs_call *,
+					       struct sk_buff *, bool);
+static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool);
+static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool);
+static void afs_cm_destructor(struct afs_call *);
 
 /*
- * AFS Cache Manager kernel thread
+ * CB.CallBack operation type
  */
-static int kafscmd(void *arg)
-{
-	DECLARE_WAITQUEUE(myself, current);
-
-	struct rxrpc_call *call;
-	_SRXAFSCM_xxxx_t func;
-	int die;
-
-	printk(KERN_INFO "kAFS: Started kafscmd %d\n", current->pid);
-
-	daemonize("kafscmd");
-
-	complete(&kafscmd_alive);
-
-	/* loop around looking for things to attend to */
-	do {
-		if (list_empty(&kafscmd_attention_list)) {
-			set_current_state(TASK_INTERRUPTIBLE);
-			add_wait_queue(&kafscmd_sleepq, &myself);
-
-			for (;;) {
-				set_current_state(TASK_INTERRUPTIBLE);
-				if (!list_empty(&kafscmd_attention_list) ||
-				    signal_pending(current) ||
-				    kafscmd_die)
-					break;
-
-				schedule();
-			}
-
-			remove_wait_queue(&kafscmd_sleepq, &myself);
-			set_current_state(TASK_RUNNING);
-		}
-
-		die = kafscmd_die;
-
-		/* dequeue the next call requiring attention */
-		call = NULL;
-		spin_lock(&kafscmd_attention_lock);
-
-		if (!list_empty(&kafscmd_attention_list)) {
-			call = list_entry(kafscmd_attention_list.next,
-					  struct rxrpc_call,
-					  app_attn_link);
-			list_del_init(&call->app_attn_link);
-			die = 0;
-		}
-
-		spin_unlock(&kafscmd_attention_lock);
-
-		if (call) {
-			/* act upon it */
-			_debug("@@@ Begin Attend Call %p", call);
-
-			func = call->app_user;
-			if (func)
-				func(call);
-
-			rxrpc_put_call(call);
-
-			_debug("@@@ End Attend Call %p", call);
-		}
-
-	} while(!die);
-
-	/* and that's all */
-	complete_and_exit(&kafscmd_dead, 0);
-}
+static const struct afs_call_type afs_SRXCBCallBack = {
+	.deliver	= afs_deliver_cb_callback,
+	.abort_to_error	= afs_abort_to_error,
+	.destructor	= afs_cm_destructor,
+};
 
 /*
- * handle a call coming in to the cache manager
- * - if I want to keep the call, I must increment its usage count
- * - the return value will be negated and passed back in an abort packet if
- *   non-zero
- * - serialised by virtue of there only being one krxiod
+ * CB.InitCallBackState operation type
  */
-static int afscm_new_call(struct rxrpc_call *call)
-{
-	_enter("%p{cid=%u u=%d}",
-	       call, ntohl(call->call_id), atomic_read(&call->usage));
-
-	rxrpc_get_call(call);
-
-	/* add to my current call list */
-	spin_lock(&afscm_calls_lock);
-	list_add(&call->app_link,&afscm_calls);
-	spin_unlock(&afscm_calls_lock);
-
-	_leave(" = 0");
-	return 0;
-}
+static const struct afs_call_type afs_SRXCBInitCallBackState = {
+	.deliver	= afs_deliver_cb_init_call_back_state,
+	.abort_to_error	= afs_abort_to_error,
+	.destructor	= afs_cm_destructor,
+};
 
 /*
- * queue on the kafscmd queue for attention
+ * CB.Probe operation type
  */
-static void afscm_attention(struct rxrpc_call *call)
-{
-	_enter("%p{cid=%u u=%d}",
-	       call, ntohl(call->call_id), atomic_read(&call->usage));
-
-	spin_lock(&kafscmd_attention_lock);
-
-	if (list_empty(&call->app_attn_link)) {
-		list_add_tail(&call->app_attn_link, &kafscmd_attention_list);
-		rxrpc_get_call(call);
-	}
-
-	spin_unlock(&kafscmd_attention_lock);
-
-	wake_up(&kafscmd_sleepq);
-
-	_leave(" {u=%d}", atomic_read(&call->usage));
-}
+static const struct afs_call_type afs_SRXCBProbe = {
+	.deliver	= afs_deliver_cb_probe,
+	.abort_to_error	= afs_abort_to_error,
+	.destructor	= afs_cm_destructor,
+};
 
 /*
- * handle my call being aborted
- * - clean up, dequeue and put my ref to the call
+ * route an incoming cache manager call
+ * - return T if supported, F if not
  */
-static void afscm_error(struct rxrpc_call *call)
+bool afs_cm_incoming_call(struct afs_call *call)
 {
-	int removed;
-
-	_enter("%p{est=%s ac=%u er=%d}",
-	       call,
-	       rxrpc_call_error_states[call->app_err_state],
-	       call->app_abort_code,
-	       call->app_errno);
-
-	spin_lock(&kafscmd_attention_lock);
-
-	if (list_empty(&call->app_attn_link)) {
-		list_add_tail(&call->app_attn_link, &kafscmd_attention_list);
-		rxrpc_get_call(call);
-	}
-
-	spin_unlock(&kafscmd_attention_lock);
-
-	removed = 0;
-	spin_lock(&afscm_calls_lock);
-	if (!list_empty(&call->app_link)) {
-		list_del_init(&call->app_link);
-		removed = 1;
+	u32 operation_id = ntohl(call->operation_ID);
+
+	_enter("{CB.OP %u}", operation_id);
+
+	switch (operation_id) {
+	case CBCallBack:
+		call->type = &afs_SRXCBCallBack;
+		return true;
+	case CBInitCallBackState:
+		call->type = &afs_SRXCBInitCallBackState;
+		return true;
+	case CBProbe:
+		call->type = &afs_SRXCBProbe;
+		return true;
+	default:
+		return false;
 	}
-	spin_unlock(&afscm_calls_lock);
-
-	if (removed)
-		rxrpc_put_call(call);
-
-	wake_up(&kafscmd_sleepq);
-
-	_leave("");
 }
 
 /*
- * map afs abort codes to/from Linux error codes
- * - called with call->lock held
+ * clean up a cache manager call
  */
-static void afscm_aemap(struct rxrpc_call *call)
+static void afs_cm_destructor(struct afs_call *call)
 {
-	switch (call->app_err_state) {
-	case RXRPC_ESTATE_LOCAL_ABORT:
-		call->app_abort_code = -call->app_errno;
-		break;
-	case RXRPC_ESTATE_PEER_ABORT:
-		call->app_errno = -ECONNABORTED;
-		break;
-	default:
-		break;
-	}
+	_enter("");
+
+	afs_put_server(call->server);
+	call->server = NULL;
+	kfree(call->buffer);
+	call->buffer = NULL;
 }
 
 /*
- * start the cache manager service if not already started
+ * allow the fileserver to see if the cache manager is still alive
  */
-int afscm_start(void)
+static void SRXAFSCB_CallBack(struct work_struct *work)
 {
-	int ret;
-
-	down_write(&afscm_sem);
-	if (!afscm_usage) {
-		ret = kernel_thread(kafscmd, NULL, 0);
-		if (ret < 0)
-			goto out;
-
-		wait_for_completion(&kafscmd_alive);
-
-		ret = rxrpc_add_service(afs_transport, &AFSCM_service);
-		if (ret < 0)
-			goto kill;
+	struct afs_call *call = container_of(work, struct afs_call, work);
 
-		afs_kafstimod_add_timer(&afs_mntpt_expiry_timer,
-					afs_mntpt_expiry_timeout * HZ);
-	}
-
-	afscm_usage++;
-	up_write(&afscm_sem);
-
-	return 0;
+	_enter("");
 
-kill:
-	kafscmd_die = 1;
-	wake_up(&kafscmd_sleepq);
-	wait_for_completion(&kafscmd_dead);
+	/* be sure to send the reply *before* attempting to spam the AFS server
+	 * with FSFetchStatus requests on the vnodes with broken callbacks lest
+	 * the AFS server get into a vicious cycle of trying to break further
+	 * callbacks because it hadn't received completion of the CBCallBack op
+	 * yet */
+	afs_send_empty_reply(call);
 
-out:
-	up_write(&afscm_sem);
-	return ret;
+	afs_break_callbacks(call->server, call->count, call->request);
+	_leave("");
 }
 
 /*
- * stop the cache manager service
+ * deliver request data to a CB.CallBack call
  */
-void afscm_stop(void)
+static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
+				   bool last)
 {
-	struct rxrpc_call *call;
-
-	down_write(&afscm_sem);
-
-	BUG_ON(afscm_usage == 0);
-	afscm_usage--;
-
-	if (afscm_usage == 0) {
-		/* don't want more incoming calls */
-		rxrpc_del_service(afs_transport, &AFSCM_service);
-
-		/* abort any calls I've still got open (the afscm_error() will
-		 * dequeue them) */
-		spin_lock(&afscm_calls_lock);
-		while (!list_empty(&afscm_calls)) {
-			call = list_entry(afscm_calls.next,
-					  struct rxrpc_call,
-					  app_link);
-
-			list_del_init(&call->app_link);
-			rxrpc_get_call(call);
-			spin_unlock(&afscm_calls_lock);
-
-			rxrpc_call_abort(call, -ESRCH); /* abort, dequeue and
-							 * put */
-
-			_debug("nuking active call %08x.%d",
-			       ntohl(call->conn->conn_id),
-			       ntohl(call->call_id));
-			rxrpc_put_call(call);
-			rxrpc_put_call(call);
-
-			spin_lock(&afscm_calls_lock);
+	struct afs_callback *cb;
+	struct afs_server *server;
+	struct in_addr addr;
+	__be32 *bp;
+	u32 tmp;
+	int ret, loop;
+
+	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+
+	switch (call->unmarshall) {
+	case 0:
+		call->offset = 0;
+		call->unmarshall++;
+
+		/* extract the FID array and its count in two steps */
+	case 1:
+		_debug("extract FID count");
+		ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+		switch (ret) {
+		case 0:		break;
+		case -EAGAIN:	return 0;
+		default:	return ret;
 		}
-		spin_unlock(&afscm_calls_lock);
-
-		/* get rid of my daemon */
-		kafscmd_die = 1;
-		wake_up(&kafscmd_sleepq);
-		wait_for_completion(&kafscmd_dead);
-
-		/* dispose of any calls waiting for attention */
-		spin_lock(&kafscmd_attention_lock);
-		while (!list_empty(&kafscmd_attention_list)) {
-			call = list_entry(kafscmd_attention_list.next,
-					  struct rxrpc_call,
-					  app_attn_link);
-
-			list_del_init(&call->app_attn_link);
-			spin_unlock(&kafscmd_attention_lock);
 
-			rxrpc_put_call(call);
-
-			spin_lock(&kafscmd_attention_lock);
+		call->count = ntohl(call->tmp);
+		_debug("FID count: %u", call->count);
+		if (call->count > AFSCBMAX)
+			return -EBADMSG;
+
+		call->buffer = kmalloc(call->count * 3 * 4, GFP_KERNEL);
+		if (!call->buffer)
+			return -ENOMEM;
+		call->offset = 0;
+		call->unmarshall++;
+
+	case 2:
+		_debug("extract FID array");
+		ret = afs_extract_data(call, skb, last, call->buffer,
+				       call->count * 3 * 4);
+		switch (ret) {
+		case 0:		break;
+		case -EAGAIN:	return 0;
+		default:	return ret;
 		}
-		spin_unlock(&kafscmd_attention_lock);
-
-		afs_kafstimod_del_timer(&afs_mntpt_expiry_timer);
-	}
-
-	up_write(&afscm_sem);
-}
 
-/*
- * handle the fileserver breaking a set of callbacks
- */
-static void _SRXAFSCM_CallBack(struct rxrpc_call *call)
-{
-	struct afs_server *server;
-	size_t count, qty, tmp;
-	int ret = 0, removed;
-
-	_enter("%p{acs=%s}", call, rxrpc_call_states[call->app_call_state]);
-
-	server = afs_server_get_from_peer(call->conn->peer);
-
-	switch (call->app_call_state) {
-		/* we've received the last packet
-		 * - drain all the data from the call and send the reply
-		 */
-	case RXRPC_CSTATE_SRVR_GOT_ARGS:
-		ret = -EBADMSG;
-		qty = call->app_ready_qty;
-		if (qty < 8 || qty > 50 * (6 * 4) + 8)
-			break;
-
-		{
-			struct afs_callback *cb, *pcb;
-			int loop;
-			__be32 *fp, *bp;
-
-			fp = rxrpc_call_alloc_scratch(call, qty);
-
-			/* drag the entire argument block out to the scratch
-			 * space */
-			ret = rxrpc_call_read_data(call, fp, qty, 0);
-			if (ret < 0)
-				break;
-
-			/* and unmarshall the parameter block */
-			ret = -EBADMSG;
-			count = ntohl(*fp++);
-			if (count>AFSCBMAX ||
-			    (count * (3 * 4) + 8 != qty &&
-			     count * (6 * 4) + 8 != qty))
-				break;
-
-			bp = fp + count*3;
-			tmp = ntohl(*bp++);
-			if (tmp > 0 && tmp != count)
-				break;
-			if (tmp == 0)
-				bp = NULL;
-
-			pcb = cb = rxrpc_call_alloc_scratch_s(
-				call, struct afs_callback);
-
-			for (loop = count - 1; loop >= 0; loop--) {
-				pcb->fid.vid	= ntohl(*fp++);
-				pcb->fid.vnode	= ntohl(*fp++);
-				pcb->fid.unique	= ntohl(*fp++);
-				if (bp) {
-					pcb->version	= ntohl(*bp++);
-					pcb->expiry	= ntohl(*bp++);
-					pcb->type	= ntohl(*bp++);
-				} else {
-					pcb->version	= 0;
-					pcb->expiry	= 0;
-					pcb->type	= AFSCM_CB_UNTYPED;
-				}
-				pcb++;
-			}
-
-			/* invoke the actual service routine */
-			ret = SRXAFSCM_CallBack(server, count, cb);
-			if (ret < 0)
-				break;
+		_debug("unmarshall FID array");
+		call->request = kcalloc(call->count,
+					sizeof(struct afs_callback),
+					GFP_KERNEL);
+		if (!call->request)
+			return -ENOMEM;
+
+		cb = call->request;
+		bp = call->buffer;
+		for (loop = call->count; loop > 0; loop--, cb++) {
+			cb->fid.vid	= ntohl(*bp++);
+			cb->fid.vnode	= ntohl(*bp++);
+			cb->fid.unique	= ntohl(*bp++);
+			cb->type	= AFSCM_CB_UNTYPED;
 		}
 
-		/* send the reply */
-		ret = rxrpc_call_write_data(call, 0, NULL, RXRPC_LAST_PACKET,
-					    GFP_KERNEL, 0, &count);
-		if (ret < 0)
-			break;
-		break;
+		call->offset = 0;
+		call->unmarshall++;
+
+		/* extract the callback array and its count in two steps */
+	case 3:
+		_debug("extract CB count");
+		ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+		switch (ret) {
+		case 0:		break;
+		case -EAGAIN:	return 0;
+		default:	return ret;
+		}
 
-		/* operation complete */
-	case RXRPC_CSTATE_COMPLETE:
-		call->app_user = NULL;
-		removed = 0;
-		spin_lock(&afscm_calls_lock);
-		if (!list_empty(&call->app_link)) {
-			list_del_init(&call->app_link);
-			removed = 1;
+		tmp = ntohl(call->tmp);
+		_debug("CB count: %u", tmp);
+		if (tmp != call->count && tmp != 0)
+			return -EBADMSG;
+		call->offset = 0;
+		call->unmarshall++;
+		if (tmp == 0)
+			goto empty_cb_array;
+
+	case 4:
+		_debug("extract CB array");
+		ret = afs_extract_data(call, skb, last, call->request,
+				       call->count * 3 * 4);
+		switch (ret) {
+		case 0:		break;
+		case -EAGAIN:	return 0;
+		default:	return ret;
 		}
-		spin_unlock(&afscm_calls_lock);
 
-		if (removed)
-			rxrpc_put_call(call);
-		break;
+		_debug("unmarshall CB array");
+		cb = call->request;
+		bp = call->buffer;
+		for (loop = call->count; loop > 0; loop--, cb++) {
+			cb->version	= ntohl(*bp++);
+			cb->expiry	= ntohl(*bp++);
+			cb->type	= ntohl(*bp++);
+		}
 
-		/* operation terminated on error */
-	case RXRPC_CSTATE_ERROR:
-		call->app_user = NULL;
-		break;
+	empty_cb_array:
+		call->offset = 0;
+		call->unmarshall++;
 
-	default:
+	case 5:
+		_debug("trailer");
+		if (skb->len != 0)
+			return -EBADMSG;
 		break;
 	}
 
-	if (ret < 0)
-		rxrpc_call_abort(call, ret);
+	if (!last)
+		return 0;
 
-	afs_put_server(server);
+	call->state = AFS_CALL_REPLYING;
 
-	_leave(" = %d", ret);
+	/* we'll need the file server record as that tells us which set of
+	 * vnodes to operate upon */
+	memcpy(&addr, &ip_hdr(skb)->saddr, 4);
+	server = afs_find_server(&addr);
+	if (!server)
+		return -ENOTCONN;
+	call->server = server;
+
+	INIT_WORK(&call->work, SRXAFSCB_CallBack);
+	schedule_work(&call->work);
+	return 0;
 }
 
 /*
- * handle the fileserver asking us to initialise our callback state
+ * allow the fileserver to request callback state (re-)initialisation
  */
-static void _SRXAFSCM_InitCallBackState(struct rxrpc_call *call)
+static void SRXAFSCB_InitCallBackState(struct work_struct *work)
 {
-	struct afs_server *server;
-	size_t count;
-	int ret = 0, removed;
-
-	_enter("%p{acs=%s}", call, rxrpc_call_states[call->app_call_state]);
-
-	server = afs_server_get_from_peer(call->conn->peer);
-
-	switch (call->app_call_state) {
-		/* we've received the last packet - drain all the data from the
-		 * call */
-	case RXRPC_CSTATE_SRVR_GOT_ARGS:
-		/* shouldn't be any args */
-		ret = -EBADMSG;
-		break;
+	struct afs_call *call = container_of(work, struct afs_call, work);
 
-		/* send the reply when asked for it */
-	case RXRPC_CSTATE_SRVR_SND_REPLY:
-		/* invoke the actual service routine */
-		ret = SRXAFSCM_InitCallBackState(server);
-		if (ret < 0)
-			break;
-
-		ret = rxrpc_call_write_data(call, 0, NULL, RXRPC_LAST_PACKET,
-					    GFP_KERNEL, 0, &count);
-		if (ret < 0)
-			break;
-		break;
+	_enter("{%p}", call->server);
 
-		/* operation complete */
-	case RXRPC_CSTATE_COMPLETE:
-		call->app_user = NULL;
-		removed = 0;
-		spin_lock(&afscm_calls_lock);
-		if (!list_empty(&call->app_link)) {
-			list_del_init(&call->app_link);
-			removed = 1;
-		}
-		spin_unlock(&afscm_calls_lock);
-
-		if (removed)
-			rxrpc_put_call(call);
-		break;
-
-		/* operation terminated on error */
-	case RXRPC_CSTATE_ERROR:
-		call->app_user = NULL;
-		break;
-
-	default:
-		break;
-	}
-
-	if (ret < 0)
-		rxrpc_call_abort(call, ret);
-
-	afs_put_server(server);
-
-	_leave(" = %d", ret);
+	afs_init_callback_state(call->server);
+	afs_send_empty_reply(call);
+	_leave("");
 }
 
 /*
- * handle a probe from a fileserver
+ * deliver request data to a CB.InitCallBackState call
  */
-static void _SRXAFSCM_Probe(struct rxrpc_call *call)
+static int afs_deliver_cb_init_call_back_state(struct afs_call *call,
+					       struct sk_buff *skb,
+					       bool last)
 {
 	struct afs_server *server;
-	size_t count;
-	int ret = 0, removed;
+	struct in_addr addr;
 
-	_enter("%p{acs=%s}", call, rxrpc_call_states[call->app_call_state]);
+	_enter(",{%u},%d", skb->len, last);
 
-	server = afs_server_get_from_peer(call->conn->peer);
+	if (skb->len > 0)
+		return -EBADMSG;
+	if (!last)
+		return 0;
 
-	switch (call->app_call_state) {
-		/* we've received the last packet - drain all the data from the
-		 * call */
-	case RXRPC_CSTATE_SRVR_GOT_ARGS:
-		/* shouldn't be any args */
-		ret = -EBADMSG;
-		break;
+	/* no unmarshalling required */
+	call->state = AFS_CALL_REPLYING;
 
-		/* send the reply when asked for it */
-	case RXRPC_CSTATE_SRVR_SND_REPLY:
-		/* invoke the actual service routine */
-		ret = SRXAFSCM_Probe(server);
-		if (ret < 0)
-			break;
-
-		ret = rxrpc_call_write_data(call, 0, NULL, RXRPC_LAST_PACKET,
-					    GFP_KERNEL, 0, &count);
-		if (ret < 0)
-			break;
-		break;
+	/* we'll need the file server record as that tells us which set of
+	 * vnodes to operate upon */
+	memcpy(&addr, &ip_hdr(skb)->saddr, 4);
+	server = afs_find_server(&addr);
+	if (!server)
+		return -ENOTCONN;
+	call->server = server;
 
-		/* operation complete */
-	case RXRPC_CSTATE_COMPLETE:
-		call->app_user = NULL;
-		removed = 0;
-		spin_lock(&afscm_calls_lock);
-		if (!list_empty(&call->app_link)) {
-			list_del_init(&call->app_link);
-			removed = 1;
-		}
-		spin_unlock(&afscm_calls_lock);
+	INIT_WORK(&call->work, SRXAFSCB_InitCallBackState);
+	schedule_work(&call->work);
+	return 0;
+}
 
-		if (removed)
-			rxrpc_put_call(call);
-		break;
+/*
+ * allow the fileserver to see if the cache manager is still alive
+ */
+static void SRXAFSCB_Probe(struct work_struct *work)
+{
+	struct afs_call *call = container_of(work, struct afs_call, work);
 
-		/* operation terminated on error */
-	case RXRPC_CSTATE_ERROR:
-		call->app_user = NULL;
-		break;
+	_enter("");
+	afs_send_empty_reply(call);
+	_leave("");
+}
 
-	default:
-		break;
-	}
+/*
+ * deliver request data to a CB.Probe call
+ */
+static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb,
+				bool last)
+{
+	_enter(",{%u},%d", skb->len, last);
 
-	if (ret < 0)
-		rxrpc_call_abort(call, ret);
+	if (skb->len > 0)
+		return -EBADMSG;
+	if (!last)
+		return 0;
 
-	afs_put_server(server);
+	/* no unmarshalling required */
+	call->state = AFS_CALL_REPLYING;
 
-	_leave(" = %d", ret);
+	INIT_WORK(&call->work, SRXAFSCB_Probe);
+	schedule_work(&call->work);
+	return 0;
 }
diff --git a/fs/afs/cmservice.h b/fs/afs/cmservice.h
deleted file mode 100644
index 66e10c15bd1..00000000000
--- a/fs/afs/cmservice.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* AFS Cache Manager Service declarations
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef AFS_CMSERVICE_H
-#define AFS_CMSERVICE_H
-
-#include <rxrpc/transport.h>
-#include "types.h"
-
-/* cache manager start/stop */
-extern int afscm_start(void);
-extern void afscm_stop(void);
-
-/* cache manager server functions */
-extern int SRXAFSCM_InitCallBackState(struct afs_server *);
-extern int SRXAFSCM_CallBack(struct afs_server *, size_t,
-			     struct afs_callback[]);
-extern int SRXAFSCM_Probe(struct afs_server *);
-
-#endif /* AFS_CMSERVICE_H */
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 2f6d9237646..d7697f6f3b7 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -15,11 +15,6 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
-#include <linux/smp_lock.h>
-#include "vnode.h"
-#include "volume.h"
-#include <rxrpc/call.h>
-#include "super.h"
 #include "internal.h"
 
 static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry,
@@ -127,9 +122,10 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page)
 	if (qty == 0)
 		goto error;
 
-	if (page->index==0 && qty!=ntohs(dbuf->blocks[0].pagehdr.npages)) {
+	if (page->index == 0 && qty != ntohs(dbuf->blocks[0].pagehdr.npages)) {
 		printk("kAFS: %s(%lu): wrong number of dir blocks %d!=%hu\n",
-		       __FUNCTION__,dir->i_ino,qty,ntohs(dbuf->blocks[0].pagehdr.npages));
+		       __FUNCTION__, dir->i_ino, qty,
+		       ntohs(dbuf->blocks[0].pagehdr.npages));
 		goto error;
 	}
 #endif
@@ -194,6 +190,7 @@ static struct page *afs_dir_get_page(struct inode *dir, unsigned long index)
 
 fail:
 	afs_dir_put_page(page);
+	_leave(" = -EIO");
 	return ERR_PTR(-EIO);
 }
 
@@ -207,7 +204,7 @@ static int afs_dir_open(struct inode *inode, struct file *file)
 	BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048);
 	BUILD_BUG_ON(sizeof(union afs_dirent) != 32);
 
-	if (AFS_FS_I(inode)->flags & AFS_VNODE_DELETED)
+	if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(inode)->flags))
 		return -ENOENT;
 
 	_leave(" = 0");
@@ -242,7 +239,7 @@ static int afs_dir_iterate_block(unsigned *fpos,
 		/* skip entries marked unused in the bitmap */
 		if (!(block->pagehdr.bitmap[offset / 8] &
 		      (1 << (offset % 8)))) {
-			_debug("ENT[%Zu.%u]: unused\n",
+			_debug("ENT[%Zu.%u]: unused",
 			       blkoff / sizeof(union afs_dir_block), offset);
 			if (offset >= curr)
 				*fpos = blkoff +
@@ -256,7 +253,7 @@ static int afs_dir_iterate_block(unsigned *fpos,
 			       sizeof(*block) -
 			       offset * sizeof(union afs_dirent));
 
-		_debug("ENT[%Zu.%u]: %s %Zu \"%s\"\n",
+		_debug("ENT[%Zu.%u]: %s %Zu \"%s\"",
 		       blkoff / sizeof(union afs_dir_block), offset,
 		       (offset < curr ? "skip" : "fill"),
 		       nlen, dire->u.name);
@@ -266,7 +263,7 @@ static int afs_dir_iterate_block(unsigned *fpos,
 			if (next >= AFS_DIRENT_PER_BLOCK) {
 				_debug("ENT[%Zu.%u]:"
 				       " %u travelled beyond end dir block"
-				       " (len %u/%Zu)\n",
+				       " (len %u/%Zu)",
 				       blkoff / sizeof(union afs_dir_block),
 				       offset, next, tmp, nlen);
 				return -EIO;
@@ -274,13 +271,13 @@ static int afs_dir_iterate_block(unsigned *fpos,
 			if (!(block->pagehdr.bitmap[next / 8] &
 			      (1 << (next % 8)))) {
 				_debug("ENT[%Zu.%u]:"
-				       " %u unmarked extension (len %u/%Zu)\n",
+				       " %u unmarked extension (len %u/%Zu)",
 				       blkoff / sizeof(union afs_dir_block),
 				       offset, next, tmp, nlen);
 				return -EIO;
 			}
 
-			_debug("ENT[%Zu.%u]: ext %u/%Zu\n",
+			_debug("ENT[%Zu.%u]: ext %u/%Zu",
 			       blkoff / sizeof(union afs_dir_block),
 			       next, tmp, nlen);
 			next++;
@@ -311,12 +308,12 @@ static int afs_dir_iterate_block(unsigned *fpos,
 }
 
 /*
- * read an AFS directory
+ * iterate through the data blob that lists the contents of an AFS directory
  */
 static int afs_dir_iterate(struct inode *dir, unsigned *fpos, void *cookie,
 			   filldir_t filldir)
 {
-	union afs_dir_block	*dblock;
+	union afs_dir_block *dblock;
 	struct afs_dir_page *dbuf;
 	struct page *page;
 	unsigned blkoff, limit;
@@ -324,7 +321,7 @@ static int afs_dir_iterate(struct inode *dir, unsigned *fpos, void *cookie,
 
 	_enter("{%lu},%u,,", dir->i_ino, *fpos);
 
-	if (AFS_FS_I(dir)->flags & AFS_VNODE_DELETED) {
+	if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dir)->flags)) {
 		_leave(" = -ESTALE");
 		return -ESTALE;
 	}
@@ -381,10 +378,12 @@ static int afs_dir_readdir(struct file *file, void *cookie, filldir_t filldir)
 	unsigned fpos;
 	int ret;
 
-	_enter("{%Ld,{%lu}}", file->f_pos, file->f_path.dentry->d_inode->i_ino);
+	_enter("{%Ld,{%lu}}",
+	       file->f_pos, file->f_path.dentry->d_inode->i_ino);
 
 	fpos = file->f_pos;
-	ret = afs_dir_iterate(file->f_path.dentry->d_inode, &fpos, cookie, filldir);
+	ret = afs_dir_iterate(file->f_path.dentry->d_inode, &fpos,
+			      cookie, filldir);
 	file->f_pos = fpos;
 
 	_leave(" = %d", ret);
@@ -401,9 +400,13 @@ static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen,
 {
 	struct afs_dir_lookup_cookie *cookie = _cookie;
 
-	_enter("{%s,%Zu},%s,%u,,%lu,%u",
+	_enter("{%s,%Zu},%s,%u,,%llu,%u",
 	       cookie->name, cookie->nlen, name, nlen, ino, dtype);
 
+	/* insanity checks first */
+	BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048);
+	BUILD_BUG_ON(sizeof(union afs_dirent) != 32);
+
 	if (cookie->nlen != nlen || memcmp(cookie->name, name, nlen) != 0) {
 		_leave(" = 0 [no]");
 		return 0;
@@ -418,34 +421,17 @@ static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen,
 }
 
 /*
- * look up an entry in a directory
+ * do a lookup in a directory
  */
-static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry,
-				     struct nameidata *nd)
+static int afs_do_lookup(struct inode *dir, struct dentry *dentry,
+			 struct afs_fid *fid)
 {
 	struct afs_dir_lookup_cookie cookie;
 	struct afs_super_info *as;
-	struct afs_vnode *vnode;
-	struct inode *inode;
 	unsigned fpos;
 	int ret;
 
-	_enter("{%lu},%p{%s}", dir->i_ino, dentry, dentry->d_name.name);
-
-	/* insanity checks first */
-	BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048);
-	BUILD_BUG_ON(sizeof(union afs_dirent) != 32);
-
-	if (dentry->d_name.len > 255) {
-		_leave(" = -ENAMETOOLONG");
-		return ERR_PTR(-ENAMETOOLONG);
-	}
-
-	vnode = AFS_FS_I(dir);
-	if (vnode->flags & AFS_VNODE_DELETED) {
-		_leave(" = -ESTALE");
-		return ERR_PTR(-ESTALE);
-	}
+	_enter("{%lu},%p{%s},", dir->i_ino, dentry, dentry->d_name.name);
 
 	as = dir->i_sb->s_fs_info;
 
@@ -458,54 +444,130 @@ static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry,
 	fpos = 0;
 	ret = afs_dir_iterate(dir, &fpos, &cookie, afs_dir_lookup_filldir);
 	if (ret < 0) {
-		_leave(" = %d", ret);
-		return ERR_PTR(ret);
+		_leave(" = %d [iter]", ret);
+		return ret;
 	}
 
 	ret = -ENOENT;
 	if (!cookie.found) {
-		_leave(" = %d", ret);
-		return ERR_PTR(ret);
+		_leave(" = -ENOENT [not found]");
+		return -ENOENT;
 	}
 
-	/* instantiate the dentry */
-	ret = afs_iget(dir->i_sb, &cookie.fid, &inode);
+	*fid = cookie.fid;
+	_leave(" = 0 { vn=%u u=%u }", fid->vnode, fid->unique);
+	return 0;
+}
+
+/*
+ * look up an entry in a directory
+ */
+static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry,
+				     struct nameidata *nd)
+{
+	struct afs_vnode *vnode;
+	struct afs_fid fid;
+	struct inode *inode;
+	int ret;
+
+	_enter("{%lu},%p{%s}", dir->i_ino, dentry, dentry->d_name.name);
+
+	if (dentry->d_name.len > 255) {
+		_leave(" = -ENAMETOOLONG");
+		return ERR_PTR(-ENAMETOOLONG);
+	}
+
+	vnode = AFS_FS_I(dir);
+	if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
+		_leave(" = -ESTALE");
+		return ERR_PTR(-ESTALE);
+	}
+
+	ret = afs_do_lookup(dir, dentry, &fid);
 	if (ret < 0) {
-		_leave(" = %d", ret);
+		_leave(" = %d [do]", ret);
 		return ERR_PTR(ret);
 	}
 
+	/* instantiate the dentry */
+	inode = afs_iget(dir->i_sb, &fid);
+	if (IS_ERR(inode)) {
+		_leave(" = %ld", PTR_ERR(inode));
+		return ERR_PTR(PTR_ERR(inode));
+	}
+
 	dentry->d_op = &afs_fs_dentry_operations;
-	dentry->d_fsdata = (void *) (unsigned long) vnode->status.version;
 
 	d_add(dentry, inode);
 	_leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%lu }",
-	       cookie.fid.vnode,
-	       cookie.fid.unique,
+	       fid.vnode,
+	       fid.unique,
 	       dentry->d_inode->i_ino,
 	       dentry->d_inode->i_version);
 
 	return NULL;
 }
 
+/*
+ * propagate changed and modified flags on a directory to all the children of
+ * that directory as they may indicate that the ACL on the dir has changed,
+ * potentially rendering the child inaccessible or that a file has been deleted
+ * or renamed
+ */
+static void afs_propagate_dir_changes(struct dentry *dir)
+{
+	struct dentry *child;
+	bool c, m;
+
+	c = test_bit(AFS_VNODE_CHANGED, &AFS_FS_I(dir->d_inode)->flags);
+	m = test_bit(AFS_VNODE_MODIFIED, &AFS_FS_I(dir->d_inode)->flags);
+
+	_enter("{%d,%d}", c, m);
+
+	spin_lock(&dir->d_lock);
+
+	list_for_each_entry(child, &dir->d_subdirs, d_u.d_child) {
+		if (child->d_inode) {
+			struct afs_vnode *vnode;
+
+			_debug("tag %s", child->d_name.name);
+			vnode = AFS_FS_I(child->d_inode);
+			if (c)
+				set_bit(AFS_VNODE_DIR_CHANGED, &vnode->flags);
+			if (m)
+				set_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags);
+		}
+	}
+
+	spin_unlock(&dir->d_lock);
+}
+
 /*
  * check that a dentry lookup hit has found a valid entry
  * - NOTE! the hit can be a negative hit too, so we can't assume we have an
  *   inode
- * (derived from nfs_lookup_revalidate)
+ * - there are several things we need to check
+ *   - parent dir data changes (rm, rmdir, rename, mkdir, create, link,
+ *     symlink)
+ *   - parent dir metadata changed (security changes)
+ *   - dentry data changed (write, truncate)
+ *   - dentry metadata changed (security changes)
  */
 static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
-	struct afs_dir_lookup_cookie cookie;
+	struct afs_vnode *vnode;
+	struct afs_fid fid;
 	struct dentry *parent;
 	struct inode *inode, *dir;
-	unsigned fpos;
 	int ret;
 
-	_enter("{sb=%p n=%s},", dentry->d_sb, dentry->d_name.name);
+	vnode = AFS_FS_I(dentry->d_inode);
+
+	_enter("{sb=%p n=%s fl=%lx},",
+	       dentry->d_sb, dentry->d_name.name, vnode->flags);
 
 	/* lock down the parent dentry so we can peer at it */
-	parent = dget_parent(dentry->d_parent);
+	parent = dget_parent(dentry);
 
 	dir = parent->d_inode;
 	inode = dentry->d_inode;
@@ -517,81 +579,92 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 	/* handle a bad inode */
 	if (is_bad_inode(inode)) {
 		printk("kAFS: afs_d_revalidate: %s/%s has bad inode\n",
-		       dentry->d_parent->d_name.name, dentry->d_name.name);
+		       parent->d_name.name, dentry->d_name.name);
 		goto out_bad;
 	}
 
-	/* force a full look up if the parent directory changed since last the
-	 * server was consulted
-	 * - otherwise this inode must still exist, even if the inode details
-	 *   themselves have changed
-	 */
-	if (AFS_FS_I(dir)->flags & AFS_VNODE_CHANGED)
-		afs_vnode_fetch_status(AFS_FS_I(dir));
-
-	if (AFS_FS_I(dir)->flags & AFS_VNODE_DELETED) {
+	/* check that this dirent still exists if the directory's contents were
+	 * modified */
+	if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dir)->flags)) {
 		_debug("%s: parent dir deleted", dentry->d_name.name);
 		goto out_bad;
 	}
 
-	if (AFS_FS_I(inode)->flags & AFS_VNODE_DELETED) {
-		_debug("%s: file already deleted", dentry->d_name.name);
-		goto out_bad;
-	}
-
-	if ((unsigned long) dentry->d_fsdata !=
-	    (unsigned long) AFS_FS_I(dir)->status.version) {
-		_debug("%s: parent changed %lu -> %u",
-		       dentry->d_name.name,
-		       (unsigned long) dentry->d_fsdata,
-		       (unsigned) AFS_FS_I(dir)->status.version);
+	if (test_and_clear_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags)) {
+		/* rm/rmdir/rename may have occurred */
+		_debug("dir modified");
 
 		/* search the directory for this vnode */
-		cookie.name	= dentry->d_name.name;
-		cookie.nlen	= dentry->d_name.len;
-		cookie.fid.vid	= AFS_FS_I(inode)->volume->vid;
-		cookie.found	= 0;
-
-		fpos = 0;
-		ret = afs_dir_iterate(dir, &fpos, &cookie,
-				      afs_dir_lookup_filldir);
+		ret = afs_do_lookup(dir, dentry, &fid);
+		if (ret == -ENOENT) {
+			_debug("%s: dirent not found", dentry->d_name.name);
+			goto not_found;
+		}
 		if (ret < 0) {
 			_debug("failed to iterate dir %s: %d",
 			       parent->d_name.name, ret);
 			goto out_bad;
 		}
 
-		if (!cookie.found) {
-			_debug("%s: dirent not found", dentry->d_name.name);
-			goto not_found;
-		}
-
 		/* if the vnode ID has changed, then the dirent points to a
 		 * different file */
-		if (cookie.fid.vnode != AFS_FS_I(inode)->fid.vnode) {
-			_debug("%s: dirent changed", dentry->d_name.name);
+		if (fid.vnode != vnode->fid.vnode) {
+			_debug("%s: dirent changed [%u != %u]",
+			       dentry->d_name.name, fid.vnode,
+			       vnode->fid.vnode);
 			goto not_found;
 		}
 
 		/* if the vnode ID uniqifier has changed, then the file has
 		 * been deleted */
-		if (cookie.fid.unique != AFS_FS_I(inode)->fid.unique) {
+		if (fid.unique != vnode->fid.unique) {
 			_debug("%s: file deleted (uq %u -> %u I:%lu)",
-			       dentry->d_name.name,
-			       cookie.fid.unique,
-			       AFS_FS_I(inode)->fid.unique,
-			       inode->i_version);
-			spin_lock(&AFS_FS_I(inode)->lock);
-			AFS_FS_I(inode)->flags |= AFS_VNODE_DELETED;
-			spin_unlock(&AFS_FS_I(inode)->lock);
+			       dentry->d_name.name, fid.unique,
+			       vnode->fid.unique, inode->i_version);
+			spin_lock(&vnode->lock);
+			set_bit(AFS_VNODE_DELETED, &vnode->flags);
+			spin_unlock(&vnode->lock);
 			invalidate_remote_inode(inode);
 			goto out_bad;
 		}
+	}
+
+	/* if the directory's metadata were changed then the security may be
+	 * different and we may no longer have access */
+	mutex_lock(&vnode->cb_broken_lock);
 
-		dentry->d_fsdata =
-			(void *) (unsigned long) AFS_FS_I(dir)->status.version;
+	if (test_and_clear_bit(AFS_VNODE_DIR_CHANGED, &vnode->flags) ||
+	    test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
+		_debug("%s: changed", dentry->d_name.name);
+		set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
+		if (afs_vnode_fetch_status(vnode) < 0) {
+			mutex_unlock(&vnode->cb_broken_lock);
+			goto out_bad;
+		}
 	}
 
+	if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
+		_debug("%s: file already deleted", dentry->d_name.name);
+		mutex_unlock(&vnode->cb_broken_lock);
+		goto out_bad;
+	}
+
+	/* if the vnode's data version number changed then its contents are
+	 * different */
+	if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
+		_debug("zap data");
+		invalidate_remote_inode(inode);
+	}
+
+	if (S_ISDIR(inode->i_mode) &&
+	    (test_bit(AFS_VNODE_CHANGED, &vnode->flags) ||
+	     test_bit(AFS_VNODE_MODIFIED, &vnode->flags)))
+		afs_propagate_dir_changes(dentry);
+
+	clear_bit(AFS_VNODE_CHANGED, &vnode->flags);
+	clear_bit(AFS_VNODE_MODIFIED, &vnode->flags);
+	mutex_unlock(&vnode->cb_broken_lock);
+
 out_valid:
 	dput(parent);
 	_leave(" = 1 [valid]");
@@ -610,12 +683,10 @@ out_bad:
 			goto out_valid;
 	}
 
-	shrink_dcache_parent(dentry);
-
 	_debug("dropping dentry %s/%s",
-	       dentry->d_parent->d_name.name, dentry->d_name.name);
+	       parent->d_name.name, dentry->d_name.name);
+	shrink_dcache_parent(dentry);
 	d_drop(dentry);
-
 	dput(parent);
 
 	_leave(" = 0 [bad]");
@@ -635,10 +706,9 @@ static int afs_d_delete(struct dentry *dentry)
 	if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
 		goto zap;
 
-	if (dentry->d_inode) {
-		if (AFS_FS_I(dentry->d_inode)->flags & AFS_VNODE_DELETED)
+	if (dentry->d_inode &&
+	    test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dentry->d_inode)->flags))
 			goto zap;
-	}
 
 	_leave(" = 0 [keep]");
 	return 0;
diff --git a/fs/afs/errors.h b/fs/afs/errors.h
deleted file mode 100644
index bcc0a3309e7..00000000000
--- a/fs/afs/errors.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/* AFS abort/error codes
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef AFS_ERRORS_H
-#define AFS_ERRORS_H
-
-#include "types.h"
-
-/*
- * file server abort codes
- */
-typedef enum {
-	VSALVAGE	= 101,	/* volume needs salvaging */
-	VNOVNODE	= 102,	/* no such file/dir (vnode) */
-	VNOVOL		= 103,	/* no such volume or volume unavailable */
-	VVOLEXISTS	= 104,	/* volume name already exists */
-	VNOSERVICE	= 105,	/* volume not currently in service */
-	VOFFLINE	= 106,	/* volume is currently offline (more info available [VVL-spec]) */
-	VONLINE		= 107,	/* volume is already online */
-	VDISKFULL	= 108,	/* disk partition is full */
-	VOVERQUOTA	= 109,	/* volume's maximum quota exceeded */
-	VBUSY		= 110,	/* volume is temporarily unavailable */
-	VMOVED		= 111,	/* volume moved to new server - ask this FS where */
-} afs_rxfs_abort_t;
-
-extern int afs_abort_to_error(int);
-
-#endif /* AFS_ERRORS_H */
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 01df30d256b..6990327e75d 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -1,6 +1,6 @@
-/* file.c: AFS filesystem file handling
+/* AFS filesystem file handling
  *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
@@ -15,9 +15,6 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
-#include "volume.h"
-#include "vnode.h"
-#include <rxrpc/call.h>
 #include "internal.h"
 
 #if 0
@@ -80,12 +77,10 @@ static void afs_file_readpage_write_complete(void *cookie_data,
  */
 static int afs_file_readpage(struct file *file, struct page *page)
 {
-	struct afs_rxfs_fetch_descriptor desc;
-#ifdef AFS_CACHING_SUPPORT
-	struct cachefs_page *pageio;
-#endif
 	struct afs_vnode *vnode;
 	struct inode *inode;
+	size_t len;
+	off_t offset;
 	int ret;
 
 	inode = page->mapping->host;
@@ -97,14 +92,10 @@ static int afs_file_readpage(struct file *file, struct page *page)
 	BUG_ON(!PageLocked(page));
 
 	ret = -ESTALE;
-	if (vnode->flags & AFS_VNODE_DELETED)
+	if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
 		goto error;
 
 #ifdef AFS_CACHING_SUPPORT
-	ret = cachefs_page_get_private(page, &pageio, GFP_NOIO);
-	if (ret < 0)
-		goto error;
-
 	/* is it cached? */
 	ret = cachefs_read_or_alloc_page(vnode->cache,
 					 page,
@@ -128,26 +119,19 @@ static int afs_file_readpage(struct file *file, struct page *page)
 	case -ENOBUFS:
 	case -ENODATA:
 	default:
-		desc.fid	= vnode->fid;
-		desc.offset	= page->index << PAGE_CACHE_SHIFT;
-		desc.size	= min((size_t) (inode->i_size - desc.offset),
-				      (size_t) PAGE_SIZE);
-		desc.buffer	= kmap(page);
-
-		clear_page(desc.buffer);
+		offset = page->index << PAGE_CACHE_SHIFT;
+		len = min_t(size_t, i_size_read(inode) - offset, PAGE_SIZE);
 
 		/* read the contents of the file from the server into the
 		 * page */
-		ret = afs_vnode_fetch_data(vnode, &desc);
-		kunmap(page);
+		ret = afs_vnode_fetch_data(vnode, offset, len, page);
 		if (ret < 0) {
-			if (ret==-ENOENT) {
+			if (ret == -ENOENT) {
 				_debug("got NOENT from server"
 				       " - marking file deleted and stale");
-				vnode->flags |= AFS_VNODE_DELETED;
+				set_bit(AFS_VNODE_DELETED, &vnode->flags);
 				ret = -ESTALE;
 			}
-
 #ifdef AFS_CACHING_SUPPORT
 			cachefs_uncache_page(vnode->cache, page);
 #endif
@@ -174,10 +158,9 @@ static int afs_file_readpage(struct file *file, struct page *page)
 	_leave(" = 0");
 	return 0;
 
- error:
+error:
 	SetPageError(page);
 	unlock_page(page);
-
 	_leave(" = %d", ret);
 	return ret;
 }
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index f1c3a186842..167ca615c2e 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -1,6 +1,6 @@
-/* fsclient.c: AFS File Server client stubs
+/* AFS File Server client stubs
  *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
@@ -11,818 +11,396 @@
 
 #include <linux/init.h>
 #include <linux/sched.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include "fsclient.h"
-#include "cmservice.h"
-#include "vnode.h"
-#include "server.h"
-#include "errors.h"
+#include <linux/circ_buf.h>
 #include "internal.h"
-
-#define FSFETCHSTATUS		132	/* AFS Fetch file status */
-#define FSFETCHDATA		130	/* AFS Fetch file data */
-#define FSGIVEUPCALLBACKS	147	/* AFS Discard callback promises */
-#define FSGETVOLUMEINFO		148	/* AFS Get root volume information */
-#define FSGETROOTVOLUME		151	/* AFS Get root volume name */
-#define FSLOOKUP		161	/* AFS lookup file in directory */
+#include "afs_fs.h"
 
 /*
- * map afs abort codes to/from Linux error codes
- * - called with call->lock held
+ * decode an AFSFetchStatus block
  */
-static void afs_rxfs_aemap(struct rxrpc_call *call)
+static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
+				      struct afs_vnode *vnode)
 {
-	switch (call->app_err_state) {
-	case RXRPC_ESTATE_LOCAL_ABORT:
-		call->app_abort_code = -call->app_errno;
-		break;
-	case RXRPC_ESTATE_PEER_ABORT:
-		call->app_errno = afs_abort_to_error(call->app_abort_code);
-		break;
-	default:
-		break;
+	const __be32 *bp = *_bp;
+	umode_t mode;
+	u64 data_version;
+	u32 changed = 0; /* becomes non-zero if ctime-type changes seen */
+
+#define EXTRACT(DST)				\
+	do {					\
+		u32 x = ntohl(*bp++);		\
+		changed |= DST - x;		\
+		DST = x;			\
+	} while (0)
+
+	vnode->status.if_version = ntohl(*bp++);
+	EXTRACT(vnode->status.type);
+	vnode->status.nlink = ntohl(*bp++);
+	EXTRACT(vnode->status.size);
+	data_version = ntohl(*bp++);
+	EXTRACT(vnode->status.author);
+	EXTRACT(vnode->status.owner);
+	EXTRACT(vnode->status.caller_access); /* call ticket dependent */
+	EXTRACT(vnode->status.anon_access);
+	EXTRACT(vnode->status.mode);
+	vnode->status.parent.vid = vnode->fid.vid;
+	EXTRACT(vnode->status.parent.vnode);
+	EXTRACT(vnode->status.parent.unique);
+	bp++; /* seg size */
+	vnode->status.mtime_client = ntohl(*bp++);
+	vnode->status.mtime_server = ntohl(*bp++);
+	bp++; /* group */
+	bp++; /* sync counter */
+	data_version |= (u64) ntohl(*bp++) << 32;
+	bp++; /* spare2 */
+	bp++; /* spare3 */
+	bp++; /* spare4 */
+	*_bp = bp;
+
+	if (changed) {
+		_debug("vnode changed");
+		set_bit(AFS_VNODE_CHANGED, &vnode->flags);
+		vnode->vfs_inode.i_uid		= vnode->status.owner;
+		vnode->vfs_inode.i_size		= vnode->status.size;
+		vnode->vfs_inode.i_version	= vnode->fid.unique;
+
+		vnode->status.mode &= S_IALLUGO;
+		mode = vnode->vfs_inode.i_mode;
+		mode &= ~S_IALLUGO;
+		mode |= vnode->status.mode;
+		vnode->vfs_inode.i_mode = mode;
+	}
+
+	_debug("vnode time %lx, %lx",
+	       vnode->status.mtime_client, vnode->status.mtime_server);
+	vnode->vfs_inode.i_ctime.tv_sec	= vnode->status.mtime_server;
+	vnode->vfs_inode.i_mtime	= vnode->vfs_inode.i_ctime;
+	vnode->vfs_inode.i_atime	= vnode->vfs_inode.i_ctime;
+
+	if (vnode->status.data_version != data_version) {
+		_debug("vnode modified %llx", data_version);
+		vnode->status.data_version = data_version;
+		set_bit(AFS_VNODE_MODIFIED, &vnode->flags);
+		set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
 	}
 }
 
 /*
- * get the root volume name from a fileserver
- * - this operation doesn't seem to work correctly in OpenAFS server 1.2.2
+ * decode an AFSCallBack block
  */
-#if 0
-int afs_rxfs_get_root_volume(struct afs_server *server,
-			     char *buf, size_t *buflen)
+static void xdr_decode_AFSCallBack(const __be32 **_bp, struct afs_vnode *vnode)
 {
-	struct rxrpc_connection *conn;
-	struct rxrpc_call *call;
-	struct kvec piov[2];
-	size_t sent;
-	int ret;
-	u32 param[1];
-
-	DECLARE_WAITQUEUE(myself, current);
-
-	kenter("%p,%p,%u",server, buf, *buflen);
-
-	/* get hold of the fileserver connection */
-	ret = afs_server_get_fsconn(server, &conn);
-	if (ret < 0)
-		goto out;
-
-	/* create a call through that connection */
-	ret = rxrpc_create_call(conn, NULL, NULL, afs_rxfs_aemap, &call);
-	if (ret < 0) {
-		printk("kAFS: Unable to create call: %d\n", ret);
-		goto out_put_conn;
-	}
-	call->app_opcode = FSGETROOTVOLUME;
-
-	/* we want to get event notifications from the call */
-	add_wait_queue(&call->waitq, &myself);
-
-	/* marshall the parameters */
-	param[0] = htonl(FSGETROOTVOLUME);
-
-	piov[0].iov_len = sizeof(param);
-	piov[0].iov_base = param;
-
-	/* send the parameters to the server */
-	ret = rxrpc_call_write_data(call, 1, piov, RXRPC_LAST_PACKET, GFP_NOFS,
-				    0, &sent);
-	if (ret < 0)
-		goto abort;
-
-	/* wait for the reply to completely arrive */
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (call->app_call_state != RXRPC_CSTATE_CLNT_RCV_REPLY ||
-		    signal_pending(current))
-			break;
-		schedule();
-	}
-	set_current_state(TASK_RUNNING);
-
-	ret = -EINTR;
-	if (signal_pending(current))
-		goto abort;
-
-	switch (call->app_call_state) {
-	case RXRPC_CSTATE_ERROR:
-		ret = call->app_errno;
-		kdebug("Got Error: %d", ret);
-		goto out_unwait;
-
-	case RXRPC_CSTATE_CLNT_GOT_REPLY:
-		/* read the reply */
-		kdebug("Got Reply: qty=%d", call->app_ready_qty);
-
-		ret = -EBADMSG;
-		if (call->app_ready_qty <= 4)
-			goto abort;
-
-		ret = rxrpc_call_read_data(call, NULL, call->app_ready_qty, 0);
-		if (ret < 0)
-			goto abort;
-
-#if 0
-		/* unmarshall the reply */
-		bp = buffer;
-		for (loop = 0; loop < 65; loop++)
-			entry->name[loop] = ntohl(*bp++);
-		entry->name[64] = 0;
-
-		entry->type = ntohl(*bp++);
-		entry->num_servers = ntohl(*bp++);
-
-		for (loop = 0; loop < 8; loop++)
-			entry->servers[loop].addr.s_addr = *bp++;
-
-		for (loop = 0; loop < 8; loop++)
-			entry->servers[loop].partition = ntohl(*bp++);
-
-		for (loop = 0; loop < 8; loop++)
-			entry->servers[loop].flags = ntohl(*bp++);
-
-		for (loop = 0; loop < 3; loop++)
-			entry->volume_ids[loop] = ntohl(*bp++);
-
-		entry->clone_id = ntohl(*bp++);
-		entry->flags = ntohl(*bp);
-#endif
+	const __be32 *bp = *_bp;
 
-		/* success */
-		ret = 0;
-		goto out_unwait;
-
-	default:
-		BUG();
-	}
-
-abort:
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	rxrpc_call_abort(call, ret);
-	schedule();
-out_unwait:
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&call->waitq, &myself);
-	rxrpc_put_call(call);
-out_put_conn:
-	afs_server_release_fsconn(server, conn);
-out:
-	kleave("");
-	return ret;
+	vnode->cb_version	= ntohl(*bp++);
+	vnode->cb_expiry	= ntohl(*bp++);
+	vnode->cb_type		= ntohl(*bp++);
+	vnode->cb_expires	= vnode->cb_expiry + get_seconds();
+	*_bp = bp;
 }
-#endif
 
 /*
- * get information about a volume
+ * decode an AFSVolSync block
  */
-#if 0
-int afs_rxfs_get_volume_info(struct afs_server *server,
-			     const char *name,
-			     struct afs_volume_info *vinfo)
+static void xdr_decode_AFSVolSync(const __be32 **_bp,
+				  struct afs_volsync *volsync)
 {
-	struct rxrpc_connection *conn;
-	struct rxrpc_call *call;
-	struct kvec piov[3];
-	size_t sent;
-	int ret;
-	u32 param[2], *bp, zero;
+	const __be32 *bp = *_bp;
 
-	DECLARE_WAITQUEUE(myself, current);
+	volsync->creation = ntohl(*bp++);
+	bp++; /* spare2 */
+	bp++; /* spare3 */
+	bp++; /* spare4 */
+	bp++; /* spare5 */
+	bp++; /* spare6 */
+	*_bp = bp;
+}
 
-	_enter("%p,%s,%p", server, name, vinfo);
+/*
+ * deliver reply data to an FS.FetchStatus
+ */
+static int afs_deliver_fs_fetch_status(struct afs_call *call,
+				       struct sk_buff *skb, bool last)
+{
+	const __be32 *bp;
 
-	/* get hold of the fileserver connection */
-	ret = afs_server_get_fsconn(server, &conn);
-	if (ret < 0)
-		goto out;
+	_enter(",,%u", last);
 
-	/* create a call through that connection */
-	ret = rxrpc_create_call(conn, NULL, NULL, afs_rxfs_aemap, &call);
-	if (ret < 0) {
-		printk("kAFS: Unable to create call: %d\n", ret);
-		goto out_put_conn;
-	}
-	call->app_opcode = FSGETVOLUMEINFO;
+	afs_transfer_reply(call, skb);
+	if (!last)
+		return 0;
 
-	/* we want to get event notifications from the call */
-	add_wait_queue(&call->waitq, &myself);
+	if (call->reply_size != call->reply_max)
+		return -EBADMSG;
 
-	/* marshall the parameters */
-	piov[1].iov_len = strlen(name);
-	piov[1].iov_base = (char *) name;
-
-	zero = 0;
-	piov[2].iov_len = (4 - (piov[1].iov_len & 3)) & 3;
-	piov[2].iov_base = &zero;
-
-	param[0] = htonl(FSGETVOLUMEINFO);
-	param[1] = htonl(piov[1].iov_len);
-
-	piov[0].iov_len = sizeof(param);
-	piov[0].iov_base = param;
-
-	/* send the parameters to the server */
-	ret = rxrpc_call_write_data(call, 3, piov, RXRPC_LAST_PACKET, GFP_NOFS,
-				    0, &sent);
-	if (ret < 0)
-		goto abort;
-
-	/* wait for the reply to completely arrive */
-	bp = rxrpc_call_alloc_scratch(call, 64);
-
-	ret = rxrpc_call_read_data(call, bp, 64,
-				   RXRPC_CALL_READ_BLOCK |
-				   RXRPC_CALL_READ_ALL);
-	if (ret < 0) {
-		if (ret == -ECONNABORTED) {
-			ret = call->app_errno;
-			goto out_unwait;
-		}
-		goto abort;
-	}
+	/* unmarshall the reply once we've received all of it */
+	bp = call->buffer;
+	xdr_decode_AFSFetchStatus(&bp, call->reply);
+	xdr_decode_AFSCallBack(&bp, call->reply);
+	if (call->reply2)
+		xdr_decode_AFSVolSync(&bp, call->reply2);
 
-	/* unmarshall the reply */
-	vinfo->vid = ntohl(*bp++);
-	vinfo->type = ntohl(*bp++);
-
-	vinfo->type_vids[0] = ntohl(*bp++);
-	vinfo->type_vids[1] = ntohl(*bp++);
-	vinfo->type_vids[2] = ntohl(*bp++);
-	vinfo->type_vids[3] = ntohl(*bp++);
-	vinfo->type_vids[4] = ntohl(*bp++);
-
-	vinfo->nservers = ntohl(*bp++);
-	vinfo->servers[0].addr.s_addr = *bp++;
-	vinfo->servers[1].addr.s_addr = *bp++;
-	vinfo->servers[2].addr.s_addr = *bp++;
-	vinfo->servers[3].addr.s_addr = *bp++;
-	vinfo->servers[4].addr.s_addr = *bp++;
-	vinfo->servers[5].addr.s_addr = *bp++;
-	vinfo->servers[6].addr.s_addr = *bp++;
-	vinfo->servers[7].addr.s_addr = *bp++;
-
-	ret = -EBADMSG;
-	if (vinfo->nservers > 8)
-		goto abort;
-
-	/* success */
-	ret = 0;
-
-out_unwait:
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&call->waitq, &myself);
-	rxrpc_put_call(call);
-out_put_conn:
-	afs_server_release_fsconn(server, conn);
-out:
-	_leave("");
-	return ret;
-
-abort:
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	rxrpc_call_abort(call, ret);
-	schedule();
-	goto out_unwait;
+	_leave(" = 0 [done]");
+	return 0;
 }
-#endif
+
+/*
+ * FS.FetchStatus operation type
+ */
+static const struct afs_call_type afs_RXFSFetchStatus = {
+	.deliver	= afs_deliver_fs_fetch_status,
+	.abort_to_error	= afs_abort_to_error,
+	.destructor	= afs_flat_call_destructor,
+};
 
 /*
  * fetch the status information for a file
  */
-int afs_rxfs_fetch_file_status(struct afs_server *server,
-			       struct afs_vnode *vnode,
-			       struct afs_volsync *volsync)
+int afs_fs_fetch_file_status(struct afs_server *server,
+			     struct afs_vnode *vnode,
+			     struct afs_volsync *volsync,
+			     const struct afs_wait_mode *wait_mode)
 {
-	struct afs_server_callslot callslot;
-	struct rxrpc_call *call;
-	struct kvec piov[1];
-	size_t sent;
-	int ret;
+	struct afs_call *call;
 	__be32 *bp;
 
-	DECLARE_WAITQUEUE(myself, current);
+	_enter("");
 
-	_enter("%p,{%u,%u,%u}",
-	       server, vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
+	call = afs_alloc_flat_call(&afs_RXFSFetchStatus, 16, 120);
+	if (!call)
+		return -ENOMEM;
 
-	/* get hold of the fileserver connection */
-	ret = afs_server_request_callslot(server, &callslot);
-	if (ret < 0)
-		goto out;
-
-	/* create a call through that connection */
-	ret = rxrpc_create_call(callslot.conn, NULL, NULL, afs_rxfs_aemap,
-				&call);
-	if (ret < 0) {
-		printk("kAFS: Unable to create call: %d\n", ret);
-		goto out_put_conn;
-	}
-	call->app_opcode = FSFETCHSTATUS;
-
-	/* we want to get event notifications from the call */
-	add_wait_queue(&call->waitq, &myself);
+	call->reply = vnode;
+	call->reply2 = volsync;
+	call->service_id = FS_SERVICE;
+	call->port = htons(AFS_FS_PORT);
 
 	/* marshall the parameters */
-	bp = rxrpc_call_alloc_scratch(call, 16);
+	bp = call->request;
 	bp[0] = htonl(FSFETCHSTATUS);
 	bp[1] = htonl(vnode->fid.vid);
 	bp[2] = htonl(vnode->fid.vnode);
 	bp[3] = htonl(vnode->fid.unique);
 
-	piov[0].iov_len = 16;
-	piov[0].iov_base = bp;
-
-	/* send the parameters to the server */
-	ret = rxrpc_call_write_data(call, 1, piov, RXRPC_LAST_PACKET, GFP_NOFS,
-				    0, &sent);
-	if (ret < 0)
-		goto abort;
-
-	/* wait for the reply to completely arrive */
-	bp = rxrpc_call_alloc_scratch(call, 120);
-
-	ret = rxrpc_call_read_data(call, bp, 120,
-				   RXRPC_CALL_READ_BLOCK |
-				   RXRPC_CALL_READ_ALL);
-	if (ret < 0) {
-		if (ret == -ECONNABORTED) {
-			ret = call->app_errno;
-			goto out_unwait;
-		}
-		goto abort;
-	}
-
-	/* unmarshall the reply */
-	vnode->status.if_version	= ntohl(*bp++);
-	vnode->status.type		= ntohl(*bp++);
-	vnode->status.nlink		= ntohl(*bp++);
-	vnode->status.size		= ntohl(*bp++);
-	vnode->status.version		= ntohl(*bp++);
-	vnode->status.author		= ntohl(*bp++);
-	vnode->status.owner		= ntohl(*bp++);
-	vnode->status.caller_access	= ntohl(*bp++);
-	vnode->status.anon_access	= ntohl(*bp++);
-	vnode->status.mode		= ntohl(*bp++);
-	vnode->status.parent.vid	= vnode->fid.vid;
-	vnode->status.parent.vnode	= ntohl(*bp++);
-	vnode->status.parent.unique	= ntohl(*bp++);
-	bp++; /* seg size */
-	vnode->status.mtime_client	= ntohl(*bp++);
-	vnode->status.mtime_server	= ntohl(*bp++);
-	bp++; /* group */
-	bp++; /* sync counter */
-	vnode->status.version |= ((unsigned long long) ntohl(*bp++)) << 32;
-	bp++; /* spare2 */
-	bp++; /* spare3 */
-	bp++; /* spare4 */
-
-	vnode->cb_version		= ntohl(*bp++);
-	vnode->cb_expiry		= ntohl(*bp++);
-	vnode->cb_type			= ntohl(*bp++);
-
-	if (volsync) {
-		volsync->creation	= ntohl(*bp++);
-		bp++; /* spare2 */
-		bp++; /* spare3 */
-		bp++; /* spare4 */
-		bp++; /* spare5 */
-		bp++; /* spare6 */
-	}
-
-	/* success */
-	ret = 0;
-
-out_unwait:
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&call->waitq, &myself);
-	rxrpc_put_call(call);
-out_put_conn:
-	afs_server_release_callslot(server, &callslot);
-out:
-	_leave("");
-	return ret;
-
-abort:
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	rxrpc_call_abort(call, ret);
-	schedule();
-	goto out_unwait;
+	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
 }
 
 /*
- * fetch the contents of a file or directory
+ * deliver reply data to an FS.FetchData
  */
-int afs_rxfs_fetch_file_data(struct afs_server *server,
-			     struct afs_vnode *vnode,
-			     struct afs_rxfs_fetch_descriptor *desc,
-			     struct afs_volsync *volsync)
+static int afs_deliver_fs_fetch_data(struct afs_call *call,
+				     struct sk_buff *skb, bool last)
 {
-	struct afs_server_callslot callslot;
-	struct rxrpc_call *call;
-	struct kvec piov[1];
-	size_t sent;
+	const __be32 *bp;
+	struct page *page;
+	void *buffer;
 	int ret;
-	__be32 *bp;
 
-	DECLARE_WAITQUEUE(myself, current);
-
-	_enter("%p,{fid={%u,%u,%u},sz=%Zu,of=%lu}",
-	       server,
-	       desc->fid.vid,
-	       desc->fid.vnode,
-	       desc->fid.unique,
-	       desc->size,
-	       desc->offset);
-
-	/* get hold of the fileserver connection */
-	ret = afs_server_request_callslot(server, &callslot);
-	if (ret < 0)
-		goto out;
-
-	/* create a call through that connection */
-	ret = rxrpc_create_call(callslot.conn, NULL, NULL, afs_rxfs_aemap, &call);
-	if (ret < 0) {
-		printk("kAFS: Unable to create call: %d\n", ret);
-		goto out_put_conn;
-	}
-	call->app_opcode = FSFETCHDATA;
+	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+
+	switch (call->unmarshall) {
+	case 0:
+		call->offset = 0;
+		call->unmarshall++;
+
+		/* extract the returned data length */
+	case 1:
+		_debug("extract data length");
+		ret = afs_extract_data(call, skb, last, &call->tmp, 4);
+		switch (ret) {
+		case 0:		break;
+		case -EAGAIN:	return 0;
+		default:	return ret;
+		}
 
-	/* we want to get event notifications from the call */
-	add_wait_queue(&call->waitq, &myself);
+		call->count = ntohl(call->tmp);
+		_debug("DATA length: %u", call->count);
+		if (call->count > PAGE_SIZE)
+			return -EBADMSG;
+		call->offset = 0;
+		call->unmarshall++;
+
+		if (call->count < PAGE_SIZE) {
+			buffer = kmap_atomic(call->reply3, KM_USER0);
+			memset(buffer + PAGE_SIZE - call->count, 0,
+			       call->count);
+			kunmap_atomic(buffer, KM_USER0);
+		}
 
-	/* marshall the parameters */
-	bp = rxrpc_call_alloc_scratch(call, 24);
-	bp[0] = htonl(FSFETCHDATA);
-	bp[1] = htonl(desc->fid.vid);
-	bp[2] = htonl(desc->fid.vnode);
-	bp[3] = htonl(desc->fid.unique);
-	bp[4] = htonl(desc->offset);
-	bp[5] = htonl(desc->size);
-
-	piov[0].iov_len = 24;
-	piov[0].iov_base = bp;
-
-	/* send the parameters to the server */
-	ret = rxrpc_call_write_data(call, 1, piov, RXRPC_LAST_PACKET, GFP_NOFS,
-				    0, &sent);
-	if (ret < 0)
-		goto abort;
-
-	/* wait for the data count to arrive */
-	ret = rxrpc_call_read_data(call, bp, 4, RXRPC_CALL_READ_BLOCK);
-	if (ret < 0)
-		goto read_failed;
-
-	desc->actual = ntohl(bp[0]);
-	if (desc->actual != desc->size) {
-		ret = -EBADMSG;
-		goto abort;
-	}
+		/* extract the returned data */
+	case 2:
+		_debug("extract data");
+		page = call->reply3;
+		buffer = kmap_atomic(page, KM_USER0);
+		ret = afs_extract_data(call, skb, last, buffer, call->count);
+		kunmap_atomic(buffer, KM_USER0);
+		switch (ret) {
+		case 0:		break;
+		case -EAGAIN:	return 0;
+		default:	return ret;
+		}
 
-	/* call the app to read the actual data */
-	rxrpc_call_reset_scratch(call);
-
-	ret = rxrpc_call_read_data(call, desc->buffer, desc->actual,
-				   RXRPC_CALL_READ_BLOCK);
-	if (ret < 0)
-		goto read_failed;
-
-	/* wait for the rest of the reply to completely arrive */
-	rxrpc_call_reset_scratch(call);
-	bp = rxrpc_call_alloc_scratch(call, 120);
-
-	ret = rxrpc_call_read_data(call, bp, 120,
-				   RXRPC_CALL_READ_BLOCK |
-				   RXRPC_CALL_READ_ALL);
-	if (ret < 0)
-		goto read_failed;
-
-	/* unmarshall the reply */
-	vnode->status.if_version	= ntohl(*bp++);
-	vnode->status.type		= ntohl(*bp++);
-	vnode->status.nlink		= ntohl(*bp++);
-	vnode->status.size		= ntohl(*bp++);
-	vnode->status.version		= ntohl(*bp++);
-	vnode->status.author		= ntohl(*bp++);
-	vnode->status.owner		= ntohl(*bp++);
-	vnode->status.caller_access	= ntohl(*bp++);
-	vnode->status.anon_access	= ntohl(*bp++);
-	vnode->status.mode		= ntohl(*bp++);
-	vnode->status.parent.vid	= desc->fid.vid;
-	vnode->status.parent.vnode	= ntohl(*bp++);
-	vnode->status.parent.unique	= ntohl(*bp++);
-	bp++; /* seg size */
-	vnode->status.mtime_client	= ntohl(*bp++);
-	vnode->status.mtime_server	= ntohl(*bp++);
-	bp++; /* group */
-	bp++; /* sync counter */
-	vnode->status.version |= ((unsigned long long) ntohl(*bp++)) << 32;
-	bp++; /* spare2 */
-	bp++; /* spare3 */
-	bp++; /* spare4 */
+		call->offset = 0;
+		call->unmarshall++;
 
-	vnode->cb_version		= ntohl(*bp++);
-	vnode->cb_expiry		= ntohl(*bp++);
-	vnode->cb_type			= ntohl(*bp++);
-
-	if (volsync) {
-		volsync->creation	= ntohl(*bp++);
-		bp++; /* spare2 */
-		bp++; /* spare3 */
-		bp++; /* spare4 */
-		bp++; /* spare5 */
-		bp++; /* spare6 */
-	}
+		/* extract the metadata */
+	case 3:
+		ret = afs_extract_data(call, skb, last, call->buffer, 120);
+		switch (ret) {
+		case 0:		break;
+		case -EAGAIN:	return 0;
+		default:	return ret;
+		}
+
+		bp = call->buffer;
+		xdr_decode_AFSFetchStatus(&bp, call->reply);
+		xdr_decode_AFSCallBack(&bp, call->reply);
+		if (call->reply2)
+			xdr_decode_AFSVolSync(&bp, call->reply2);
+
+		call->offset = 0;
+		call->unmarshall++;
 
-	/* success */
-	ret = 0;
-
-out_unwait:
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&call->waitq,&myself);
-	rxrpc_put_call(call);
-out_put_conn:
-	afs_server_release_callslot(server, &callslot);
-out:
-	_leave(" = %d", ret);
-	return ret;
-
-read_failed:
-	if (ret == -ECONNABORTED) {
-		ret = call->app_errno;
-		goto out_unwait;
+	case 4:
+		_debug("trailer");
+		if (skb->len != 0)
+			return -EBADMSG;
+		break;
 	}
 
-abort:
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	rxrpc_call_abort(call, ret);
-	schedule();
-	goto out_unwait;
+	if (!last)
+		return 0;
+
+	_leave(" = 0 [done]");
+	return 0;
 }
 
 /*
- * ask the AFS fileserver to discard a callback request on a file
+ * FS.FetchData operation type
  */
-int afs_rxfs_give_up_callback(struct afs_server *server,
-			      struct afs_vnode *vnode)
+static const struct afs_call_type afs_RXFSFetchData = {
+	.deliver	= afs_deliver_fs_fetch_data,
+	.abort_to_error	= afs_abort_to_error,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * fetch data from a file
+ */
+int afs_fs_fetch_data(struct afs_server *server,
+		      struct afs_vnode *vnode,
+		      off_t offset, size_t length,
+		      struct page *buffer,
+		      struct afs_volsync *volsync,
+		      const struct afs_wait_mode *wait_mode)
 {
-	struct afs_server_callslot callslot;
-	struct rxrpc_call *call;
-	struct kvec piov[1];
-	size_t sent;
-	int ret;
+	struct afs_call *call;
 	__be32 *bp;
 
-	DECLARE_WAITQUEUE(myself, current);
+	_enter("");
 
-	_enter("%p,{%u,%u,%u}",
-	       server, vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
+	call = afs_alloc_flat_call(&afs_RXFSFetchData, 24, 120);
+	if (!call)
+		return -ENOMEM;
 
-	/* get hold of the fileserver connection */
-	ret = afs_server_request_callslot(server, &callslot);
-	if (ret < 0)
-		goto out;
-
-	/* create a call through that connection */
-	ret = rxrpc_create_call(callslot.conn, NULL, NULL, afs_rxfs_aemap, &call);
-	if (ret < 0) {
-		printk("kAFS: Unable to create call: %d\n", ret);
-		goto out_put_conn;
-	}
-	call->app_opcode = FSGIVEUPCALLBACKS;
-
-	/* we want to get event notifications from the call */
-	add_wait_queue(&call->waitq, &myself);
+	call->reply = vnode;
+	call->reply2 = volsync;
+	call->reply3 = buffer;
+	call->service_id = FS_SERVICE;
+	call->port = htons(AFS_FS_PORT);
 
 	/* marshall the parameters */
-	bp = rxrpc_call_alloc_scratch(call, (1 + 4 + 4) * 4);
-
-	piov[0].iov_len = (1 + 4 + 4) * 4;
-	piov[0].iov_base = bp;
-
-	*bp++ = htonl(FSGIVEUPCALLBACKS);
-	*bp++ = htonl(1);
-	*bp++ = htonl(vnode->fid.vid);
-	*bp++ = htonl(vnode->fid.vnode);
-	*bp++ = htonl(vnode->fid.unique);
-	*bp++ = htonl(1);
-	*bp++ = htonl(vnode->cb_version);
-	*bp++ = htonl(vnode->cb_expiry);
-	*bp++ = htonl(vnode->cb_type);
-
-	/* send the parameters to the server */
-	ret = rxrpc_call_write_data(call, 1, piov, RXRPC_LAST_PACKET, GFP_NOFS,
-				    0, &sent);
-	if (ret < 0)
-		goto abort;
-
-	/* wait for the reply to completely arrive */
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (call->app_call_state != RXRPC_CSTATE_CLNT_RCV_REPLY ||
-		    signal_pending(current))
-			break;
-		schedule();
-	}
-	set_current_state(TASK_RUNNING);
-
-	ret = -EINTR;
-	if (signal_pending(current))
-		goto abort;
-
-	switch (call->app_call_state) {
-	case RXRPC_CSTATE_ERROR:
-		ret = call->app_errno;
-		goto out_unwait;
+	bp = call->request;
+	bp[0] = htonl(FSFETCHDATA);
+	bp[1] = htonl(vnode->fid.vid);
+	bp[2] = htonl(vnode->fid.vnode);
+	bp[3] = htonl(vnode->fid.unique);
+	bp[4] = htonl(offset);
+	bp[5] = htonl(length);
 
-	case RXRPC_CSTATE_CLNT_GOT_REPLY:
-		ret = 0;
-		goto out_unwait;
+	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+}
 
-	default:
-		BUG();
-	}
+/*
+ * deliver reply data to an FS.GiveUpCallBacks
+ */
+static int afs_deliver_fs_give_up_callbacks(struct afs_call *call,
+					    struct sk_buff *skb, bool last)
+{
+	_enter(",{%u},%d", skb->len, last);
 
-out_unwait:
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&call->waitq, &myself);
-	rxrpc_put_call(call);
-out_put_conn:
-	afs_server_release_callslot(server, &callslot);
-out:
-	_leave("");
-	return ret;
-
-abort:
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	rxrpc_call_abort(call, ret);
-	schedule();
-	goto out_unwait;
+	if (skb->len > 0)
+		return -EBADMSG; /* shouldn't be any reply data */
+	return 0;
 }
 
 /*
- * look a filename up in a directory
- * - this operation doesn't seem to work correctly in OpenAFS server 1.2.2
+ * FS.GiveUpCallBacks operation type
+ */
+static const struct afs_call_type afs_RXFSGiveUpCallBacks = {
+	.deliver	= afs_deliver_fs_give_up_callbacks,
+	.abort_to_error	= afs_abort_to_error,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * give up a set of callbacks
+ * - the callbacks are held in the server->cb_break ring
  */
-#if 0
-int afs_rxfs_lookup(struct afs_server *server,
-		    struct afs_vnode *dir,
-		    const char *filename,
-		    struct afs_vnode *vnode,
-		    struct afs_volsync *volsync)
+int afs_fs_give_up_callbacks(struct afs_server *server,
+			     const struct afs_wait_mode *wait_mode)
 {
-	struct rxrpc_connection *conn;
-	struct rxrpc_call *call;
-	struct kvec piov[3];
-	size_t sent;
-	int ret;
-	u32 *bp, zero;
+	struct afs_call *call;
+	size_t ncallbacks;
+	__be32 *bp, *tp;
+	int loop;
 
-	DECLARE_WAITQUEUE(myself, current);
+	ncallbacks = CIRC_CNT(server->cb_break_head, server->cb_break_tail,
+			      ARRAY_SIZE(server->cb_break));
 
-	kenter("%p,{%u,%u,%u},%s",
-	       server, fid->vid, fid->vnode, fid->unique, filename);
+	_enter("{%zu},", ncallbacks);
 
-	/* get hold of the fileserver connection */
-	ret = afs_server_get_fsconn(server, &conn);
-	if (ret < 0)
-		goto out;
+	if (ncallbacks == 0)
+		return 0;
+	if (ncallbacks > AFSCBMAX)
+		ncallbacks = AFSCBMAX;
 
-	/* create a call through that connection */
-	ret = rxrpc_create_call(conn, NULL, NULL, afs_rxfs_aemap, &call);
-	if (ret < 0) {
-		printk("kAFS: Unable to create call: %d\n", ret);
-		goto out_put_conn;
-	}
-	call->app_opcode = FSLOOKUP;
+	_debug("break %zu callbacks", ncallbacks);
 
-	/* we want to get event notifications from the call */
-	add_wait_queue(&call->waitq,&myself);
+	call = afs_alloc_flat_call(&afs_RXFSGiveUpCallBacks,
+				   12 + ncallbacks * 6 * 4, 0);
+	if (!call)
+		return -ENOMEM;
+
+	call->service_id = FS_SERVICE;
+	call->port = htons(AFS_FS_PORT);
 
 	/* marshall the parameters */
-	bp = rxrpc_call_alloc_scratch(call, 20);
-
-	zero = 0;
-
-	piov[0].iov_len = 20;
-	piov[0].iov_base = bp;
-	piov[1].iov_len = strlen(filename);
-	piov[1].iov_base = (char *) filename;
-	piov[2].iov_len = (4 - (piov[1].iov_len & 3)) & 3;
-	piov[2].iov_base = &zero;
-
-	*bp++ = htonl(FSLOOKUP);
-	*bp++ = htonl(dirfid->vid);
-	*bp++ = htonl(dirfid->vnode);
-	*bp++ = htonl(dirfid->unique);
-	*bp++ = htonl(piov[1].iov_len);
-
-	/* send the parameters to the server */
-	ret = rxrpc_call_write_data(call, 3, piov, RXRPC_LAST_PACKET, GFP_NOFS,
-				    0, &sent);
-	if (ret < 0)
-		goto abort;
-
-	/* wait for the reply to completely arrive */
-	bp = rxrpc_call_alloc_scratch(call, 220);
-
-	ret = rxrpc_call_read_data(call, bp, 220,
-				   RXRPC_CALL_READ_BLOCK |
-				   RXRPC_CALL_READ_ALL);
-	if (ret < 0) {
-		if (ret == -ECONNABORTED) {
-			ret = call->app_errno;
-			goto out_unwait;
-		}
-		goto abort;
+	bp = call->request;
+	tp = bp + 2 + ncallbacks * 3;
+	*bp++ = htonl(FSGIVEUPCALLBACKS);
+	*bp++ = htonl(ncallbacks);
+	*tp++ = htonl(ncallbacks);
+
+	atomic_sub(ncallbacks, &server->cb_break_n);
+	for (loop = ncallbacks; loop > 0; loop--) {
+		struct afs_callback *cb =
+			&server->cb_break[server->cb_break_tail];
+
+		*bp++ = htonl(cb->fid.vid);
+		*bp++ = htonl(cb->fid.vnode);
+		*bp++ = htonl(cb->fid.unique);
+		*tp++ = htonl(cb->version);
+		*tp++ = htonl(cb->expiry);
+		*tp++ = htonl(cb->type);
+		smp_mb();
+		server->cb_break_tail =
+			(server->cb_break_tail + 1) &
+			(ARRAY_SIZE(server->cb_break) - 1);
 	}
 
-	/* unmarshall the reply */
-	fid->vid		= ntohl(*bp++);
-	fid->vnode		= ntohl(*bp++);
-	fid->unique		= ntohl(*bp++);
-
-	vnode->status.if_version	= ntohl(*bp++);
-	vnode->status.type		= ntohl(*bp++);
-	vnode->status.nlink		= ntohl(*bp++);
-	vnode->status.size		= ntohl(*bp++);
-	vnode->status.version		= ntohl(*bp++);
-	vnode->status.author		= ntohl(*bp++);
-	vnode->status.owner		= ntohl(*bp++);
-	vnode->status.caller_access	= ntohl(*bp++);
-	vnode->status.anon_access	= ntohl(*bp++);
-	vnode->status.mode		= ntohl(*bp++);
-	vnode->status.parent.vid	= dirfid->vid;
-	vnode->status.parent.vnode	= ntohl(*bp++);
-	vnode->status.parent.unique	= ntohl(*bp++);
-	bp++; /* seg size */
-	vnode->status.mtime_client	= ntohl(*bp++);
-	vnode->status.mtime_server	= ntohl(*bp++);
-	bp++; /* group */
-	bp++; /* sync counter */
-	vnode->status.version |= ((unsigned long long) ntohl(*bp++)) << 32;
-	bp++; /* spare2 */
-	bp++; /* spare3 */
-	bp++; /* spare4 */
-
-	dir->status.if_version		= ntohl(*bp++);
-	dir->status.type		= ntohl(*bp++);
-	dir->status.nlink		= ntohl(*bp++);
-	dir->status.size		= ntohl(*bp++);
-	dir->status.version		= ntohl(*bp++);
-	dir->status.author		= ntohl(*bp++);
-	dir->status.owner		= ntohl(*bp++);
-	dir->status.caller_access	= ntohl(*bp++);
-	dir->status.anon_access		= ntohl(*bp++);
-	dir->status.mode		= ntohl(*bp++);
-	dir->status.parent.vid		= dirfid->vid;
-	dir->status.parent.vnode	= ntohl(*bp++);
-	dir->status.parent.unique	= ntohl(*bp++);
-	bp++; /* seg size */
-	dir->status.mtime_client	= ntohl(*bp++);
-	dir->status.mtime_server	= ntohl(*bp++);
-	bp++; /* group */
-	bp++; /* sync counter */
-	dir->status.version |= ((unsigned long long) ntohl(*bp++)) << 32;
-	bp++; /* spare2 */
-	bp++; /* spare3 */
-	bp++; /* spare4 */
-
-	callback->fid		= *fid;
-	callback->version	= ntohl(*bp++);
-	callback->expiry	= ntohl(*bp++);
-	callback->type		= ntohl(*bp++);
-
-	if (volsync) {
-		volsync->creation	= ntohl(*bp++);
-		bp++; /* spare2 */
-		bp++; /* spare3 */
-		bp++; /* spare4 */
-		bp++; /* spare5 */
-		bp++; /* spare6 */
-	}
+	ASSERT(ncallbacks > 0);
+	wake_up_nr(&server->cb_break_waitq, ncallbacks);
 
-	/* success */
-	ret = 0;
-
-out_unwait:
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&call->waitq, &myself);
-	rxrpc_put_call(call);
-out_put_conn:
-	afs_server_release_fsconn(server, conn);
-out:
-	kleave("");
-	return ret;
-
-abort:
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	rxrpc_call_abort(call, ret);
-	schedule();
-	goto out_unwait;
+	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
 }
-#endif
diff --git a/fs/afs/fsclient.h b/fs/afs/fsclient.h
deleted file mode 100644
index e2b0b7bcd09..00000000000
--- a/fs/afs/fsclient.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/* AFS File Server client stub declarations
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef AFS_FSCLIENT_H
-#define AFS_FSCLIENT_H
-
-#include "server.h"
-
-extern int afs_rxfs_get_volume_info(struct afs_server *,
-				    const char *,
-				    struct afs_volume_info *);
-
-extern int afs_rxfs_fetch_file_status(struct afs_server *,
-				      struct afs_vnode *,
-				      struct afs_volsync *);
-
-struct afs_rxfs_fetch_descriptor {
-	struct afs_fid	fid;		/* file ID to fetch */
-	size_t		size;		/* total number of bytes to fetch */
-	off_t		offset;		/* offset in file to start from */
-	void		*buffer;	/* read buffer */
-	size_t		actual;		/* actual size sent back by server */
-};
-
-extern int afs_rxfs_fetch_file_data(struct afs_server *,
-				    struct afs_vnode *,
-				    struct afs_rxfs_fetch_descriptor *,
-				    struct afs_volsync *);
-
-extern int afs_rxfs_give_up_callback(struct afs_server *,
-				     struct afs_vnode *);
-
-/* this doesn't appear to work in OpenAFS server */
-extern int afs_rxfs_lookup(struct afs_server *,
-			   struct afs_vnode *,
-			   const char *,
-			   struct afs_vnode *,
-			   struct afs_volsync *);
-
-/* this is apparently mis-implemented in OpenAFS server */
-extern int afs_rxfs_get_root_volume(struct afs_server *,
-				    char *,
-				    size_t *);
-
-
-#endif /* AFS_FSCLIENT_H */
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 900c8bb1c3b..18863315211 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -19,9 +19,6 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
-#include "volume.h"
-#include "vnode.h"
-#include "super.h"
 #include "internal.h"
 
 struct afs_iget_data {
@@ -40,7 +37,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode)
 	       vnode->status.type,
 	       vnode->status.nlink,
 	       vnode->status.size,
-	       vnode->status.version,
+	       vnode->status.data_version,
 	       vnode->status.mode);
 
 	switch (vnode->status.type) {
@@ -78,7 +75,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode)
 	if (vnode->status.type == AFS_FTYPE_SYMLINK) {
 		afs_mntpt_check_symlink(vnode);
 
-		if (vnode->flags & AFS_VNODE_MOUNTPOINT) {
+		if (test_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags)) {
 			inode->i_mode	= S_IFDIR | vnode->status.mode;
 			inode->i_op	= &afs_mntpt_inode_operations;
 			inode->i_fop	= &afs_mntpt_file_operations;
@@ -88,25 +85,6 @@ static int afs_inode_map_status(struct afs_vnode *vnode)
 	return 0;
 }
 
-/*
- * attempt to fetch the status of an inode, coelescing multiple simultaneous
- * fetches
- */
-static int afs_inode_fetch_status(struct inode *inode)
-{
-	struct afs_vnode *vnode;
-	int ret;
-
-	vnode = AFS_FS_I(inode);
-
-	ret = afs_vnode_fetch_status(vnode);
-
-	if (ret == 0)
-		ret = afs_inode_map_status(vnode);
-
-	return ret;
-}
-
 /*
  * iget5() comparator
  */
@@ -137,8 +115,7 @@ static int afs_iget5_set(struct inode *inode, void *opaque)
 /*
  * inode retrieval
  */
-inline int afs_iget(struct super_block *sb, struct afs_fid *fid,
-		    struct inode **_inode)
+inline struct inode *afs_iget(struct super_block *sb, struct afs_fid *fid)
 {
 	struct afs_iget_data data = { .fid = *fid };
 	struct afs_super_info *as;
@@ -155,20 +132,18 @@ inline int afs_iget(struct super_block *sb, struct afs_fid *fid,
 			     &data);
 	if (!inode) {
 		_leave(" = -ENOMEM");
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 	}
 
+	_debug("GOT INODE %p { vl=%x vn=%x, u=%x }",
+	       inode, fid->vid, fid->vnode, fid->unique);
+
 	vnode = AFS_FS_I(inode);
 
 	/* deal with an existing inode */
 	if (!(inode->i_state & I_NEW)) {
-		ret = afs_vnode_fetch_status(vnode);
-		if (ret == 0)
-			*_inode = inode;
-		else
-			iput(inode);
-		_leave(" = %d", ret);
-		return ret;
+		_leave(" = %p", inode);
+		return inode;
 	}
 
 #ifdef AFS_CACHING_SUPPORT
@@ -181,21 +156,19 @@ inline int afs_iget(struct super_block *sb, struct afs_fid *fid,
 #endif
 
 	/* okay... it's a new inode */
-	inode->i_flags |= S_NOATIME;
-	vnode->flags |= AFS_VNODE_CHANGED;
-	ret = afs_inode_fetch_status(inode);
-	if (ret<0)
+	set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
+	ret = afs_vnode_fetch_status(vnode);
+	if (ret < 0)
+		goto bad_inode;
+	ret = afs_inode_map_status(vnode);
+	if (ret < 0)
 		goto bad_inode;
 
 	/* success */
+	inode->i_flags |= S_NOATIME;
 	unlock_new_inode(inode);
-
-	*_inode = inode;
-	_leave(" = 0 [CB { v=%u x=%lu t=%u }]",
-	       vnode->cb_version,
-	       vnode->cb_timeout.timo_jif,
-	       vnode->cb_type);
-	return 0;
+	_leave(" = %p [CB { v=%u t=%u }]", inode, vnode->cb_version, vnode->cb_type);
+	return inode;
 
 	/* failure */
 bad_inode:
@@ -204,7 +177,7 @@ bad_inode:
 	iput(inode);
 
 	_leave(" = %d [bad]", ret);
-	return ret;
+	return ERR_PTR(ret);
 }
 
 /*
@@ -213,36 +186,13 @@ bad_inode:
 int afs_inode_getattr(struct vfsmount *mnt, struct dentry *dentry,
 		      struct kstat *stat)
 {
-	struct afs_vnode *vnode;
 	struct inode *inode;
-	int ret;
 
 	inode = dentry->d_inode;
 
 	_enter("{ ino=%lu v=%lu }", inode->i_ino, inode->i_version);
 
-	vnode = AFS_FS_I(inode);
-
-	ret = afs_inode_fetch_status(inode);
-	if (ret == -ENOENT) {
-		_leave(" = %d [%d %p]",
-		       ret, atomic_read(&dentry->d_count), dentry->d_inode);
-		return ret;
-	} else if (ret < 0) {
-		make_bad_inode(inode);
-		_leave(" = %d", ret);
-		return ret;
-	}
-
-	/* transfer attributes from the inode structure to the stat
-	 * structure */
 	generic_fillattr(inode, stat);
-
-	_leave(" = 0 CB { v=%u x=%u t=%u }",
-	       vnode->cb_version,
-	       vnode->cb_expiry,
-	       vnode->cb_type);
-
 	return 0;
 }
 
@@ -260,12 +210,23 @@ void afs_clear_inode(struct inode *inode)
 	       vnode->fid.vnode,
 	       vnode->cb_version,
 	       vnode->cb_expiry,
-	       vnode->cb_type
-	       );
+	       vnode->cb_type);
 
-	BUG_ON(inode->i_ino != vnode->fid.vnode);
+	_debug("CLEAR INODE %p", inode);
+
+	ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
+
+	afs_give_up_callback(vnode);
+
+	if (vnode->server) {
+		spin_lock(&vnode->server->fs_lock);
+		rb_erase(&vnode->server_rb, &vnode->server->fs_vnodes);
+		spin_unlock(&vnode->server->fs_lock);
+		afs_put_server(vnode->server);
+		vnode->server = NULL;
+	}
 
-	afs_vnode_give_up_callback(vnode);
+	ASSERT(!vnode->cb_promised);
 
 #ifdef AFS_CACHING_SUPPORT
 	cachefs_relinquish_cookie(vnode->cache, 0);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index b6dd20a93cc..afc6f0f3025 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -1,6 +1,6 @@
 /* internal AFS stuff
  *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
@@ -9,48 +9,321 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#ifndef AFS_INTERNAL_H
-#define AFS_INTERNAL_H
-
 #include <linux/compiler.h>
 #include <linux/kernel.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
+#include <linux/skbuff.h>
+#include <linux/rxrpc.h>
+#include "afs.h"
+#include "afs_vl.h"
+
+#define AFS_CELL_MAX_ADDRS 15
+
+struct afs_call;
+
+typedef enum {
+	AFS_VL_NEW,			/* new, uninitialised record */
+	AFS_VL_CREATING,		/* creating record */
+	AFS_VL_VALID,			/* record is pending */
+	AFS_VL_NO_VOLUME,		/* no such volume available */
+	AFS_VL_UPDATING,		/* update in progress */
+	AFS_VL_VOLUME_DELETED,		/* volume was deleted */
+	AFS_VL_UNCERTAIN,		/* uncertain state (update failed) */
+} __attribute__((packed)) afs_vlocation_state_t;
 
 /*
- * debug tracing
+ * definition of how to wait for the completion of an operation
  */
-#define kenter(FMT, a...)	printk("==> %s("FMT")\n",__FUNCTION__ , ## a)
-#define kleave(FMT, a...)	printk("<== %s()"FMT"\n",__FUNCTION__ , ## a)
-#define kdebug(FMT, a...)	printk(FMT"\n" , ## a)
-#define kproto(FMT, a...)	printk("### "FMT"\n" , ## a)
-#define knet(FMT, a...)		printk(FMT"\n" , ## a)
-
-#ifdef __KDEBUG
-#define _enter(FMT, a...)	kenter(FMT , ## a)
-#define _leave(FMT, a...)	kleave(FMT , ## a)
-#define _debug(FMT, a...)	kdebug(FMT , ## a)
-#define _proto(FMT, a...)	kproto(FMT , ## a)
-#define _net(FMT, a...)		knet(FMT , ## a)
-#else
-#define _enter(FMT, a...)	do { } while(0)
-#define _leave(FMT, a...)	do { } while(0)
-#define _debug(FMT, a...)	do { } while(0)
-#define _proto(FMT, a...)	do { } while(0)
-#define _net(FMT, a...)		do { } while(0)
-#endif
+struct afs_wait_mode {
+	/* RxRPC received message notification */
+	void (*rx_wakeup)(struct afs_call *call);
 
-static inline void afs_discard_my_signals(void)
-{
-	while (signal_pending(current)) {
-		siginfo_t sinfo;
+	/* synchronous call waiter and call dispatched notification */
+	int (*wait)(struct afs_call *call);
+
+	/* asynchronous call completion */
+	void (*async_complete)(void *reply, int error);
+};
+
+extern const struct afs_wait_mode afs_sync_call;
+extern const struct afs_wait_mode afs_async_call;
+
+/*
+ * a record of an in-progress RxRPC call
+ */
+struct afs_call {
+	const struct afs_call_type *type;	/* type of call */
+	const struct afs_wait_mode *wait_mode;	/* completion wait mode */
+	wait_queue_head_t	waitq;		/* processes awaiting completion */
+	struct work_struct	async_work;	/* asynchronous work processor */
+	struct work_struct	work;		/* actual work processor */
+	struct sk_buff_head	rx_queue;	/* received packets */
+	struct rxrpc_call	*rxcall;	/* RxRPC call handle */
+	struct key		*key;		/* security for this call */
+	struct afs_server	*server;	/* server affected by incoming CM call */
+	void			*request;	/* request data (first part) */
+	void			*request2;	/* request data (second part) */
+	void			*buffer;	/* reply receive buffer */
+	void			*reply;		/* reply buffer (first part) */
+	void			*reply2;	/* reply buffer (second part) */
+	void			*reply3;	/* reply buffer (third part) */
+	enum {					/* call state */
+		AFS_CALL_REQUESTING,	/* request is being sent for outgoing call */
+		AFS_CALL_AWAIT_REPLY,	/* awaiting reply to outgoing call */
+		AFS_CALL_AWAIT_OP_ID,	/* awaiting op ID on incoming call */
+		AFS_CALL_AWAIT_REQUEST,	/* awaiting request data on incoming call */
+		AFS_CALL_REPLYING,	/* replying to incoming call */
+		AFS_CALL_AWAIT_ACK,	/* awaiting final ACK of incoming call */
+		AFS_CALL_COMPLETE,	/* successfully completed */
+		AFS_CALL_BUSY,		/* server was busy */
+		AFS_CALL_ABORTED,	/* call was aborted */
+		AFS_CALL_ERROR,		/* call failed due to error */
+	}			state;
+	int			error;		/* error code */
+	unsigned		request_size;	/* size of request data */
+	unsigned		reply_max;	/* maximum size of reply */
+	unsigned		reply_size;	/* current size of reply */
+	unsigned short		offset;		/* offset into received data store */
+	unsigned char		unmarshall;	/* unmarshalling phase */
+	bool			incoming;	/* T if incoming call */
+	u16			service_id;	/* RxRPC service ID to call */
+	__be16			port;		/* target UDP port */
+	__be32			operation_ID;	/* operation ID for an incoming call */
+	u32			count;		/* count for use in unmarshalling */
+	__be32			tmp;		/* place to extract temporary data */
+};
+
+struct afs_call_type {
+	/* deliver request or reply data to an call
+	 * - returning an error will cause the call to be aborted
+	 */
+	int (*deliver)(struct afs_call *call, struct sk_buff *skb,
+		       bool last);
+
+	/* map an abort code to an error number */
+	int (*abort_to_error)(u32 abort_code);
+
+	/* clean up a call */
+	void (*destructor)(struct afs_call *call);
+};
+
+/*
+ * AFS superblock private data
+ * - there's one superblock per volume
+ */
+struct afs_super_info {
+	struct afs_volume	*volume;	/* volume record */
+	char			rwparent;	/* T if parent is R/W AFS volume */
+};
 
-		spin_lock_irq(&current->sighand->siglock);
-		dequeue_signal(current,&current->blocked, &sinfo);
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+static inline struct afs_super_info *AFS_FS_S(struct super_block *sb)
+{
+	return sb->s_fs_info;
 }
 
+extern struct file_system_type afs_fs_type;
+
+/*
+ * entry in the cached cell catalogue
+ */
+struct afs_cache_cell {
+	char			name[64];	/* cell name (padded with NULs) */
+	struct in_addr		vl_servers[15];	/* cached cell VL servers */
+};
+
+/*
+ * AFS cell record
+ */
+struct afs_cell {
+	atomic_t		usage;
+	struct list_head	link;		/* main cell list link */
+	struct list_head	proc_link;	/* /proc cell list link */
+	struct proc_dir_entry	*proc_dir;	/* /proc dir for this cell */
+#ifdef AFS_CACHING_SUPPORT
+	struct cachefs_cookie	*cache;		/* caching cookie */
+#endif
+
+	/* server record management */
+	rwlock_t		servers_lock;	/* active server list lock */
+	struct list_head	servers;	/* active server list */
+
+	/* volume location record management */
+	struct rw_semaphore	vl_sem;		/* volume management serialisation semaphore */
+	struct list_head	vl_list;	/* cell's active VL record list */
+	spinlock_t		vl_lock;	/* vl_list lock */
+	unsigned short		vl_naddrs;	/* number of VL servers in addr list */
+	unsigned short		vl_curr_svix;	/* current server index */
+	struct in_addr		vl_addrs[AFS_CELL_MAX_ADDRS];	/* cell VL server addresses */
+
+	char			name[0];	/* cell name - must go last */
+};
+
+/*
+ * entry in the cached volume location catalogue
+ */
+struct afs_cache_vlocation {
+	uint8_t			name[64 + 1];	/* volume name (lowercase, padded with NULs) */
+	uint8_t			nservers;	/* number of entries used in servers[] */
+	uint8_t			vidmask;	/* voltype mask for vid[] */
+	uint8_t			srvtmask[8];	/* voltype masks for servers[] */
+#define AFS_VOL_VTM_RW	0x01 /* R/W version of the volume is available (on this server) */
+#define AFS_VOL_VTM_RO	0x02 /* R/O version of the volume is available (on this server) */
+#define AFS_VOL_VTM_BAK	0x04 /* backup version of the volume is available (on this server) */
+
+	afs_volid_t		vid[3];		/* volume IDs for R/W, R/O and Bak volumes */
+	struct in_addr		servers[8];	/* fileserver addresses */
+	time_t			rtime;		/* last retrieval time */
+};
+
+/*
+ * volume -> vnode hash table entry
+ */
+struct afs_cache_vhash {
+	afs_voltype_t		vtype;		/* which volume variation */
+	uint8_t			hash_bucket;	/* which hash bucket this represents */
+} __attribute__((packed));
+
+/*
+ * AFS volume location record
+ */
+struct afs_vlocation {
+	atomic_t		usage;
+	time_t			time_of_death;	/* time at which put reduced usage to 0 */
+	struct list_head	link;		/* link in cell volume location list */
+	struct list_head	grave;		/* link in master graveyard list */
+	struct list_head	update;		/* link in master update list */
+	struct afs_cell		*cell;		/* cell to which volume belongs */
+#ifdef AFS_CACHING_SUPPORT
+	struct cachefs_cookie	*cache;		/* caching cookie */
+#endif
+	struct afs_cache_vlocation vldb;	/* volume information DB record */
+	struct afs_volume	*vols[3];	/* volume access record pointer (index by type) */
+	wait_queue_head_t	waitq;		/* status change waitqueue */
+	time_t			update_at;	/* time at which record should be updated */
+	rwlock_t		lock;		/* access lock */
+	afs_vlocation_state_t	state;		/* volume location state */
+	unsigned short		upd_rej_cnt;	/* ENOMEDIUM count during update */
+	unsigned short		upd_busy_cnt;	/* EBUSY count during update */
+	bool			valid;		/* T if valid */
+};
+
+/*
+ * AFS fileserver record
+ */
+struct afs_server {
+	atomic_t		usage;
+	time_t			time_of_death;	/* time at which put reduced usage to 0 */
+	struct in_addr		addr;		/* server address */
+	struct afs_cell		*cell;		/* cell in which server resides */
+	struct list_head	link;		/* link in cell's server list */
+	struct list_head	grave;		/* link in master graveyard list */
+	struct rb_node		master_rb;	/* link in master by-addr tree */
+	struct rw_semaphore	sem;		/* access lock */
+
+	/* file service access */
+	struct rb_root		fs_vnodes;	/* vnodes backed by this server (ordered by FID) */
+	unsigned long		fs_act_jif;	/* time at which last activity occurred */
+	unsigned long		fs_dead_jif;	/* time at which no longer to be considered dead */
+	spinlock_t		fs_lock;	/* access lock */
+	int			fs_state;      	/* 0 or reason FS currently marked dead (-errno) */
+
+	/* callback promise management */
+	struct rb_root		cb_promises;	/* vnode expiration list (ordered earliest first) */
+	struct delayed_work	cb_updater;	/* callback updater */
+	struct delayed_work	cb_break_work;	/* collected break dispatcher */
+	wait_queue_head_t	cb_break_waitq;	/* space available in cb_break waitqueue */
+	spinlock_t		cb_lock;	/* access lock */
+	struct afs_callback	cb_break[64];	/* ring of callbacks awaiting breaking */
+	atomic_t		cb_break_n;	/* number of pending breaks */
+	u8			cb_break_head;	/* head of callback breaking ring */
+	u8			cb_break_tail;	/* tail of callback breaking ring */
+};
+
+/*
+ * AFS volume access record
+ */
+struct afs_volume {
+	atomic_t		usage;
+	struct afs_cell		*cell;		/* cell to which belongs (unrefd ptr) */
+	struct afs_vlocation	*vlocation;	/* volume location */
+#ifdef AFS_CACHING_SUPPORT
+	struct cachefs_cookie	*cache;		/* caching cookie */
+#endif
+	afs_volid_t		vid;		/* volume ID */
+	afs_voltype_t		type;		/* type of volume */
+	char			type_force;	/* force volume type (suppress R/O -> R/W) */
+	unsigned short		nservers;	/* number of server slots filled */
+	unsigned short		rjservers;	/* number of servers discarded due to -ENOMEDIUM */
+	struct afs_server	*servers[8];	/* servers on which volume resides (ordered) */
+	struct rw_semaphore	server_sem;	/* lock for accessing current server */
+};
+
+/*
+ * vnode catalogue entry
+ */
+struct afs_cache_vnode {
+	afs_vnodeid_t		vnode_id;	/* vnode ID */
+	unsigned		vnode_unique;	/* vnode ID uniquifier */
+	afs_dataversion_t	data_version;	/* data version */
+};
+
+/*
+ * AFS inode private data
+ */
+struct afs_vnode {
+	struct inode		vfs_inode;	/* the VFS's inode record */
+
+	struct afs_volume	*volume;	/* volume on which vnode resides */
+	struct afs_server	*server;	/* server currently supplying this file */
+	struct afs_fid		fid;		/* the file identifier for this inode */
+	struct afs_file_status	status;		/* AFS status info for this file */
+#ifdef AFS_CACHING_SUPPORT
+	struct cachefs_cookie	*cache;		/* caching cookie */
+#endif
+
+	wait_queue_head_t	update_waitq;	/* status fetch waitqueue */
+	unsigned		update_cnt;	/* number of outstanding ops that will update the
+						 * status */
+	spinlock_t		lock;		/* waitqueue/flags lock */
+	unsigned long		flags;
+#define AFS_VNODE_CB_BROKEN	0		/* set if vnode's callback was broken */
+#define AFS_VNODE_CHANGED	1		/* set if vnode's metadata changed */
+#define AFS_VNODE_MODIFIED	2		/* set if vnode's data modified */
+#define AFS_VNODE_ZAP_DATA	3		/* set if vnode's data should be invalidated */
+#define AFS_VNODE_DELETED	4		/* set if vnode deleted on server */
+#define AFS_VNODE_MOUNTPOINT	5		/* set if vnode is a mountpoint symlink */
+#define AFS_VNODE_DIR_CHANGED	6		/* set if vnode's parent dir metadata changed */
+#define AFS_VNODE_DIR_MODIFIED	7		/* set if vnode's parent dir data modified */
+
+	/* outstanding callback notification on this file */
+	struct rb_node		server_rb;	/* link in server->fs_vnodes */
+	struct rb_node		cb_promise;	/* link in server->cb_promises */
+	struct work_struct	cb_broken_work;	/* work to be done on callback break */
+	struct mutex		cb_broken_lock;	/* lock against multiple attempts to fix break */
+//	struct list_head	cb_hash_link;	/* link in master callback hash */
+	time_t			cb_expires;	/* time at which callback expires */
+	time_t			cb_expires_at;	/* time used to order cb_promise */
+	unsigned		cb_version;	/* callback version */
+	unsigned		cb_expiry;	/* callback expiry time */
+	afs_callback_type_t	cb_type;	/* type of callback */
+	bool			cb_promised;	/* true if promise still holds */
+};
+
+/*****************************************************************************/
+/*
+ * callback.c
+ */
+extern void afs_init_callback_state(struct afs_server *);
+extern void afs_broken_callback_work(struct work_struct *);
+extern void afs_break_callbacks(struct afs_server *, size_t,
+				struct afs_callback[]);
+extern void afs_give_up_callback(struct afs_vnode *);
+extern void afs_dispatch_give_up_callbacks(struct work_struct *);
+extern void afs_flush_callback_breaks(struct afs_server *);
+extern int __init afs_callback_update_init(void);
+extern void __exit afs_callback_update_kill(void);
+
 /*
  * cell.c
  */
@@ -60,6 +333,19 @@ extern struct list_head afs_proc_cells;
 extern struct cachefs_index_def afs_cache_cell_index_def;
 #endif
 
+#define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0)
+extern int afs_cell_init(char *);
+extern struct afs_cell *afs_cell_create(const char *, char *);
+extern struct afs_cell *afs_cell_lookup(const char *, unsigned);
+extern struct afs_cell *afs_grab_cell(struct afs_cell *);
+extern void afs_put_cell(struct afs_cell *);
+extern void afs_cell_purge(void);
+
+/*
+ * cmservice.c
+ */
+extern bool afs_cm_incoming_call(struct afs_call *);
+
 /*
  * dir.c
  */
@@ -76,10 +362,23 @@ extern const struct inode_operations afs_file_inode_operations;
 extern int afs_cache_get_page_cookie(struct page *, struct cachefs_page **);
 #endif
 
+/*
+ * fsclient.c
+ */
+extern int afs_fs_fetch_file_status(struct afs_server *,
+				    struct afs_vnode *,
+				    struct afs_volsync *,
+				    const struct afs_wait_mode *);
+extern int afs_fs_give_up_callbacks(struct afs_server *,
+				    const struct afs_wait_mode *);
+extern int afs_fs_fetch_data(struct afs_server *, struct afs_vnode *, off_t,
+			     size_t, struct page *, struct afs_volsync *,
+			     const struct afs_wait_mode *);
+
 /*
  * inode.c
  */
-extern int afs_iget(struct super_block *, struct afs_fid *, struct inode **);
+extern struct inode *afs_iget(struct super_block *, struct afs_fid *);
 extern int afs_inode_getattr(struct vfsmount *, struct dentry *,
 			     struct kstat *);
 extern void afs_clear_inode(struct inode *);
@@ -91,16 +390,21 @@ extern void afs_clear_inode(struct inode *);
 extern struct cachefs_netfs afs_cache_netfs;
 #endif
 
+/*
+ * misc.c
+ */
+extern int afs_abort_to_error(u32);
+
 /*
  * mntpt.c
  */
 extern const struct inode_operations afs_mntpt_inode_operations;
 extern const struct file_operations afs_mntpt_file_operations;
-extern struct afs_timer afs_mntpt_expiry_timer;
-extern struct afs_timer_ops afs_mntpt_expiry_timer_ops;
 extern unsigned long afs_mntpt_expiry_timeout;
 
 extern int afs_mntpt_check_symlink(struct afs_vnode *);
+extern void afs_mntpt_kill_timer(void);
+extern void afs_umount_begin(struct vfsmount *, int);
 
 /*
  * super.c
@@ -108,16 +412,6 @@ extern int afs_mntpt_check_symlink(struct afs_vnode *);
 extern int afs_fs_init(void);
 extern void afs_fs_exit(void);
 
-#define AFS_CB_HASH_COUNT (PAGE_SIZE / sizeof(struct list_head))
-
-extern struct list_head afs_cb_hash_tbl[];
-extern spinlock_t afs_cb_hash_lock;
-
-#define afs_cb_hash(SRV, FID)						\
-	afs_cb_hash_tbl[((unsigned long)(SRV) +				\
-			 (FID)->vid + (FID)->vnode + (FID)->unique) &	\
-			(AFS_CB_HASH_COUNT - 1)]
-
 /*
  * proc.c
  */
@@ -126,4 +420,217 @@ extern void afs_proc_cleanup(void);
 extern int afs_proc_cell_setup(struct afs_cell *);
 extern void afs_proc_cell_remove(struct afs_cell *);
 
-#endif /* AFS_INTERNAL_H */
+/*
+ * rxrpc.c
+ */
+extern int afs_open_socket(void);
+extern void afs_close_socket(void);
+extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t,
+			 const struct afs_wait_mode *);
+extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *,
+					    size_t, size_t);
+extern void afs_flat_call_destructor(struct afs_call *);
+extern void afs_transfer_reply(struct afs_call *, struct sk_buff *);
+extern void afs_send_empty_reply(struct afs_call *);
+extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *,
+			    size_t);
+
+/*
+ * server.c
+ */
+extern spinlock_t afs_server_peer_lock;
+
+#define afs_get_server(S) do { atomic_inc(&(S)->usage); } while(0)
+
+extern struct afs_server *afs_lookup_server(struct afs_cell *,
+					    const struct in_addr *);
+extern struct afs_server *afs_find_server(const struct in_addr *);
+extern void afs_put_server(struct afs_server *);
+extern void __exit afs_purge_servers(void);
+
+/*
+ * vlclient.c
+ */
+#ifdef AFS_CACHING_SUPPORT
+extern struct cachefs_index_def afs_vlocation_cache_index_def;
+#endif
+
+extern int afs_vl_get_entry_by_name(struct in_addr *, const char *,
+				    struct afs_cache_vlocation *,
+				    const struct afs_wait_mode *);
+extern int afs_vl_get_entry_by_id(struct in_addr *, afs_volid_t, afs_voltype_t,
+				  struct afs_cache_vlocation *,
+				  const struct afs_wait_mode *);
+
+/*
+ * vlocation.c
+ */
+#define afs_get_vlocation(V) do { atomic_inc(&(V)->usage); } while(0)
+
+extern int __init afs_vlocation_update_init(void);
+extern struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *,
+						  const char *, size_t);
+extern void afs_put_vlocation(struct afs_vlocation *);
+extern void __exit afs_vlocation_purge(void);
+
+/*
+ * vnode.c
+ */
+#ifdef AFS_CACHING_SUPPORT
+extern struct cachefs_index_def afs_vnode_cache_index_def;
+#endif
+
+extern struct afs_timer_ops afs_vnode_cb_timed_out_ops;
+
+static inline struct afs_vnode *AFS_FS_I(struct inode *inode)
+{
+	return container_of(inode, struct afs_vnode, vfs_inode);
+}
+
+static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode)
+{
+	return &vnode->vfs_inode;
+}
+
+extern int afs_vnode_fetch_status(struct afs_vnode *);
+extern int afs_vnode_fetch_data(struct afs_vnode *vnode, off_t, size_t,
+				struct page *);
+
+/*
+ * volume.c
+ */
+#ifdef AFS_CACHING_SUPPORT
+extern struct cachefs_index_def afs_volume_cache_index_def;
+#endif
+
+#define afs_get_volume(V) do { atomic_inc(&(V)->usage); } while(0)
+
+extern void afs_put_volume(struct afs_volume *);
+extern struct afs_volume *afs_volume_lookup(const char *, struct afs_cell *,
+					    int);
+extern struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *);
+extern int afs_volume_release_fileserver(struct afs_vnode *,
+					 struct afs_server *, int);
+
+/*****************************************************************************/
+/*
+ * debug tracing
+ */
+extern unsigned afs_debug;
+
+#define dbgprintk(FMT,...) \
+	printk("[%x%-6.6s] "FMT"\n", smp_processor_id(), current->comm ,##__VA_ARGS__)
+
+/* make sure we maintain the format strings, even when debugging is disabled */
+static inline __attribute__((format(printf,1,2)))
+void _dbprintk(const char *fmt, ...)
+{
+}
+
+#define kenter(FMT,...)	dbgprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__)
+#define kleave(FMT,...)	dbgprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__)
+#define kdebug(FMT,...)	dbgprintk("    "FMT ,##__VA_ARGS__)
+
+
+#if defined(__KDEBUG)
+#define _enter(FMT,...)	kenter(FMT,##__VA_ARGS__)
+#define _leave(FMT,...)	kleave(FMT,##__VA_ARGS__)
+#define _debug(FMT,...)	kdebug(FMT,##__VA_ARGS__)
+
+#elif defined(CONFIG_AFS_DEBUG)
+#define AFS_DEBUG_KENTER	0x01
+#define AFS_DEBUG_KLEAVE	0x02
+#define AFS_DEBUG_KDEBUG	0x04
+
+#define _enter(FMT,...)					\
+do {							\
+	if (unlikely(afs_debug & AFS_DEBUG_KENTER))	\
+		kenter(FMT,##__VA_ARGS__);		\
+} while (0)
+
+#define _leave(FMT,...)					\
+do {							\
+	if (unlikely(afs_debug & AFS_DEBUG_KLEAVE))	\
+		kleave(FMT,##__VA_ARGS__);		\
+} while (0)
+
+#define _debug(FMT,...)					\
+do {							\
+	if (unlikely(afs_debug & AFS_DEBUG_KDEBUG))	\
+		kdebug(FMT,##__VA_ARGS__);		\
+} while (0)
+
+#else
+#define _enter(FMT,...)	_dbprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__)
+#define _leave(FMT,...)	_dbprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__)
+#define _debug(FMT,...)	_dbprintk("    "FMT ,##__VA_ARGS__)
+#endif
+
+/*
+ * debug assertion checking
+ */
+#if 1 // defined(__KDEBUGALL)
+
+#define ASSERT(X)						\
+do {								\
+	if (unlikely(!(X))) {					\
+		printk(KERN_ERR "\n");				\
+		printk(KERN_ERR "AFS: Assertion failed\n");	\
+		BUG();						\
+	}							\
+} while(0)
+
+#define ASSERTCMP(X, OP, Y)						\
+do {									\
+	if (unlikely(!((X) OP (Y)))) {					\
+		printk(KERN_ERR "\n");					\
+		printk(KERN_ERR "AFS: Assertion failed\n");		\
+		printk(KERN_ERR "%lu " #OP " %lu is false\n",		\
+		       (unsigned long)(X), (unsigned long)(Y));		\
+		printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n",	\
+		       (unsigned long)(X), (unsigned long)(Y));		\
+		BUG();							\
+	}								\
+} while(0)
+
+#define ASSERTIF(C, X)						\
+do {								\
+	if (unlikely((C) && !(X))) {				\
+		printk(KERN_ERR "\n");				\
+		printk(KERN_ERR "AFS: Assertion failed\n");	\
+		BUG();						\
+	}							\
+} while(0)
+
+#define ASSERTIFCMP(C, X, OP, Y)					\
+do {									\
+	if (unlikely((C) && !((X) OP (Y)))) {				\
+		printk(KERN_ERR "\n");					\
+		printk(KERN_ERR "AFS: Assertion failed\n");		\
+		printk(KERN_ERR "%lu " #OP " %lu is false\n",		\
+		       (unsigned long)(X), (unsigned long)(Y));		\
+		printk(KERN_ERR "0x%lx " #OP " 0x%lx is false\n",	\
+		       (unsigned long)(X), (unsigned long)(Y));		\
+		BUG();							\
+	}								\
+} while(0)
+
+#else
+
+#define ASSERT(X)				\
+do {						\
+} while(0)
+
+#define ASSERTCMP(X, OP, Y)			\
+do {						\
+} while(0)
+
+#define ASSERTIF(C, X)				\
+do {						\
+} while(0)
+
+#define ASSERTIFCMP(C, X, OP, Y)		\
+do {						\
+} while(0)
+
+#endif /* __KDEBUGALL */
diff --git a/fs/afs/kafsasyncd.c b/fs/afs/kafsasyncd.c
deleted file mode 100644
index 8ca01c23601..00000000000
--- a/fs/afs/kafsasyncd.c
+++ /dev/null
@@ -1,247 +0,0 @@
-/* AFS asynchronous operation daemon
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- *
- * The AFS async daemon is used to the following:
- * - probe "dead" servers to see whether they've come back to life yet.
- * - probe "live" servers that we haven't talked to for a while to see if they are better
- *   candidates for serving than what we're currently using
- * - poll volume location servers to keep up to date volume location lists
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/completion.h>
-#include <linux/freezer.h>
-#include "cell.h"
-#include "server.h"
-#include "volume.h"
-#include "kafsasyncd.h"
-#include "kafstimod.h"
-#include <rxrpc/call.h>
-#include <asm/errno.h>
-#include "internal.h"
-
-static DECLARE_COMPLETION(kafsasyncd_alive);
-static DECLARE_COMPLETION(kafsasyncd_dead);
-static DECLARE_WAIT_QUEUE_HEAD(kafsasyncd_sleepq);
-static struct task_struct *kafsasyncd_task;
-static int kafsasyncd_die;
-
-static int kafsasyncd(void *arg);
-
-static LIST_HEAD(kafsasyncd_async_attnq);
-static LIST_HEAD(kafsasyncd_async_busyq);
-static DEFINE_SPINLOCK(kafsasyncd_async_lock);
-
-static void kafsasyncd_null_call_attn_func(struct rxrpc_call *call)
-{
-}
-
-static void kafsasyncd_null_call_error_func(struct rxrpc_call *call)
-{
-}
-
-/*
- * start the async daemon
- */
-int afs_kafsasyncd_start(void)
-{
-	int ret;
-
-	ret = kernel_thread(kafsasyncd, NULL, 0);
-	if (ret < 0)
-		return ret;
-
-	wait_for_completion(&kafsasyncd_alive);
-
-	return ret;
-}
-
-/*
- * stop the async daemon
- */
-void afs_kafsasyncd_stop(void)
-{
-	/* get rid of my daemon */
-	kafsasyncd_die = 1;
-	wake_up(&kafsasyncd_sleepq);
-	wait_for_completion(&kafsasyncd_dead);
-}
-
-/*
- * probing daemon
- */
-static int kafsasyncd(void *arg)
-{
-	struct afs_async_op *op;
-	int die;
-
-	DECLARE_WAITQUEUE(myself, current);
-
-	kafsasyncd_task = current;
-
-	printk("kAFS: Started kafsasyncd %d\n", current->pid);
-
-	daemonize("kafsasyncd");
-
-	complete(&kafsasyncd_alive);
-
-	/* loop around looking for things to attend to */
-	do {
-		set_current_state(TASK_INTERRUPTIBLE);
-		add_wait_queue(&kafsasyncd_sleepq, &myself);
-
-		for (;;) {
-			if (!list_empty(&kafsasyncd_async_attnq) ||
-			    signal_pending(current) ||
-			    kafsasyncd_die)
-				break;
-
-			schedule();
-			set_current_state(TASK_INTERRUPTIBLE);
-		}
-
-		remove_wait_queue(&kafsasyncd_sleepq, &myself);
-		set_current_state(TASK_RUNNING);
-
-		try_to_freeze();
-
-		/* discard pending signals */
-		afs_discard_my_signals();
-
-		die = kafsasyncd_die;
-
-		/* deal with the next asynchronous operation requiring
-		 * attention */
-		if (!list_empty(&kafsasyncd_async_attnq)) {
-			struct afs_async_op *op;
-
-			_debug("@@@ Begin Asynchronous Operation");
-
-			op = NULL;
-			spin_lock(&kafsasyncd_async_lock);
-
-			if (!list_empty(&kafsasyncd_async_attnq)) {
-				op = list_entry(kafsasyncd_async_attnq.next,
-						struct afs_async_op, link);
-				list_move_tail(&op->link,
-					      &kafsasyncd_async_busyq);
-			}
-
-			spin_unlock(&kafsasyncd_async_lock);
-
-			_debug("@@@ Operation %p {%p}\n",
-			       op, op ? op->ops : NULL);
-
-			if (op)
-				op->ops->attend(op);
-
-			_debug("@@@ End Asynchronous Operation");
-		}
-
-	} while(!die);
-
-	/* need to kill all outstanding asynchronous operations before
-	 * exiting */
-	kafsasyncd_task = NULL;
-	spin_lock(&kafsasyncd_async_lock);
-
-	/* fold the busy and attention queues together */
-	list_splice_init(&kafsasyncd_async_busyq,
-			 &kafsasyncd_async_attnq);
-
-	/* dequeue kafsasyncd from all their wait queues */
-	list_for_each_entry(op, &kafsasyncd_async_attnq, link) {
-		op->call->app_attn_func = kafsasyncd_null_call_attn_func;
-		op->call->app_error_func = kafsasyncd_null_call_error_func;
-		remove_wait_queue(&op->call->waitq, &op->waiter);
-	}
-
-	spin_unlock(&kafsasyncd_async_lock);
-
-	/* abort all the operations */
-	while (!list_empty(&kafsasyncd_async_attnq)) {
-		op = list_entry(kafsasyncd_async_attnq.next, struct afs_async_op, link);
-		list_del_init(&op->link);
-
-		rxrpc_call_abort(op->call, -EIO);
-		rxrpc_put_call(op->call);
-		op->call = NULL;
-
-		op->ops->discard(op);
-	}
-
-	/* and that's all */
-	_leave("");
-	complete_and_exit(&kafsasyncd_dead, 0);
-}
-
-/*
- * begin an operation
- * - place operation on busy queue
- */
-void afs_kafsasyncd_begin_op(struct afs_async_op *op)
-{
-	_enter("");
-
-	spin_lock(&kafsasyncd_async_lock);
-
-	init_waitqueue_entry(&op->waiter, kafsasyncd_task);
-	add_wait_queue(&op->call->waitq, &op->waiter);
-
-	list_move_tail(&op->link, &kafsasyncd_async_busyq);
-
-	spin_unlock(&kafsasyncd_async_lock);
-
-	_leave("");
-}
-
-/*
- * request attention for an operation
- * - move to attention queue
- */
-void afs_kafsasyncd_attend_op(struct afs_async_op *op)
-{
-	_enter("");
-
-	spin_lock(&kafsasyncd_async_lock);
-
-	list_move_tail(&op->link, &kafsasyncd_async_attnq);
-
-	spin_unlock(&kafsasyncd_async_lock);
-
-	wake_up(&kafsasyncd_sleepq);
-
-	_leave("");
-}
-
-/*
- * terminate an operation
- * - remove from either queue
- */
-void afs_kafsasyncd_terminate_op(struct afs_async_op *op)
-{
-	_enter("");
-
-	spin_lock(&kafsasyncd_async_lock);
-
-	if (!list_empty(&op->link)) {
-		list_del_init(&op->link);
-		remove_wait_queue(&op->call->waitq, &op->waiter);
-	}
-
-	spin_unlock(&kafsasyncd_async_lock);
-
-	wake_up(&kafsasyncd_sleepq);
-
-	_leave("");
-}
diff --git a/fs/afs/kafsasyncd.h b/fs/afs/kafsasyncd.h
deleted file mode 100644
index 1273eb544c5..00000000000
--- a/fs/afs/kafsasyncd.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/* AFS asynchronous operation daemon
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef AFS_KAFSASYNCD_H
-#define AFS_KAFSASYNCD_H
-
-#include "types.h"
-
-struct afs_async_op;
-
-struct afs_async_op_ops {
-	void (*attend)(struct afs_async_op *);
-	void (*discard)(struct afs_async_op *);
-};
-
-/*
- * asynchronous operation record
- */
-struct afs_async_op {
-	struct list_head		link;
-	struct afs_server		*server;	/* server being contacted */
-	struct rxrpc_call		*call;		/* RxRPC call performing op */
-	wait_queue_t			waiter;		/* wait queue for kafsasyncd */
-	const struct afs_async_op_ops	*ops;		/* operations */
-};
-
-static inline void afs_async_op_init(struct afs_async_op *op,
-				     const struct afs_async_op_ops *ops)
-{
-	INIT_LIST_HEAD(&op->link);
-	op->call = NULL;
-	op->ops = ops;
-}
-
-extern int afs_kafsasyncd_start(void);
-extern void afs_kafsasyncd_stop(void);
-
-extern void afs_kafsasyncd_begin_op(struct afs_async_op *);
-extern void afs_kafsasyncd_attend_op(struct afs_async_op *);
-extern void afs_kafsasyncd_terminate_op(struct afs_async_op *);
-
-#endif /* AFS_KAFSASYNCD_H */
diff --git a/fs/afs/kafstimod.c b/fs/afs/kafstimod.c
deleted file mode 100644
index 3526dcccc16..00000000000
--- a/fs/afs/kafstimod.c
+++ /dev/null
@@ -1,194 +0,0 @@
-/* AFS timeout daemon
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/completion.h>
-#include <linux/freezer.h>
-#include "cell.h"
-#include "volume.h"
-#include "kafstimod.h"
-#include <asm/errno.h>
-#include "internal.h"
-
-static DECLARE_COMPLETION(kafstimod_alive);
-static DECLARE_COMPLETION(kafstimod_dead);
-static DECLARE_WAIT_QUEUE_HEAD(kafstimod_sleepq);
-static int kafstimod_die;
-
-static LIST_HEAD(kafstimod_list);
-static DEFINE_SPINLOCK(kafstimod_lock);
-
-static int kafstimod(void *arg);
-
-/*
- * start the timeout daemon
- */
-int afs_kafstimod_start(void)
-{
-	int ret;
-
-	ret = kernel_thread(kafstimod, NULL, 0);
-	if (ret < 0)
-		return ret;
-
-	wait_for_completion(&kafstimod_alive);
-
-	return ret;
-}
-
-/*
- * stop the timeout daemon
- */
-void afs_kafstimod_stop(void)
-{
-	/* get rid of my daemon */
-	kafstimod_die = 1;
-	wake_up(&kafstimod_sleepq);
-	wait_for_completion(&kafstimod_dead);
-}
-
-/*
- * timeout processing daemon
- */
-static int kafstimod(void *arg)
-{
-	struct afs_timer *timer;
-
-	DECLARE_WAITQUEUE(myself, current);
-
-	printk("kAFS: Started kafstimod %d\n", current->pid);
-
-	daemonize("kafstimod");
-
-	complete(&kafstimod_alive);
-
-	/* loop around looking for things to attend to */
-loop:
-	set_current_state(TASK_INTERRUPTIBLE);
-	add_wait_queue(&kafstimod_sleepq, &myself);
-
-	for (;;) {
-		unsigned long jif;
-		signed long timeout;
-
-		/* deal with the server being asked to die */
-		if (kafstimod_die) {
-			remove_wait_queue(&kafstimod_sleepq, &myself);
-			_leave("");
-			complete_and_exit(&kafstimod_dead, 0);
-		}
-
-		try_to_freeze();
-
-		/* discard pending signals */
-		afs_discard_my_signals();
-
-		/* work out the time to elapse before the next event */
-		spin_lock(&kafstimod_lock);
-		if (list_empty(&kafstimod_list)) {
-			timeout = MAX_SCHEDULE_TIMEOUT;
-		} else {
-			timer = list_entry(kafstimod_list.next,
-					   struct afs_timer, link);
-			timeout = timer->timo_jif;
-			jif = jiffies;
-
-			if (time_before_eq((unsigned long) timeout, jif))
-				goto immediate;
-			timeout = (long) timeout - (long) jiffies;
-		}
-		spin_unlock(&kafstimod_lock);
-
-		schedule_timeout(timeout);
-
-		set_current_state(TASK_INTERRUPTIBLE);
-	}
-
-	/* the thing on the front of the queue needs processing
-	 * - we come here with the lock held and timer pointing to the expired
-	 *   entry
-	 */
-immediate:
-	remove_wait_queue(&kafstimod_sleepq, &myself);
-	set_current_state(TASK_RUNNING);
-
-	_debug("@@@ Begin Timeout of %p", timer);
-
-	/* dequeue the timer */
-	list_del_init(&timer->link);
-	spin_unlock(&kafstimod_lock);
-
-	/* call the timeout function */
-	timer->ops->timed_out(timer);
-
-	_debug("@@@ End Timeout");
-	goto loop;
-}
-
-/*
- * (re-)queue a timer
- */
-void afs_kafstimod_add_timer(struct afs_timer *timer, unsigned long timeout)
-{
-	struct afs_timer *ptimer;
-	struct list_head *_p;
-
-	_enter("%p,%lu", timer, timeout);
-
-	spin_lock(&kafstimod_lock);
-
-	list_del(&timer->link);
-
-	/* the timer was deferred or reset - put it back in the queue at the
-	 * right place */
-	timer->timo_jif = jiffies + timeout;
-
-	list_for_each(_p, &kafstimod_list) {
-		ptimer = list_entry(_p, struct afs_timer, link);
-		if (time_before(timer->timo_jif, ptimer->timo_jif))
-			break;
-	}
-
-	list_add_tail(&timer->link, _p); /* insert before stopping point */
-
-	spin_unlock(&kafstimod_lock);
-
-	wake_up(&kafstimod_sleepq);
-
-	_leave("");
-}
-
-/*
- * dequeue a timer
- * - returns 0 if the timer was deleted or -ENOENT if it wasn't queued
- */
-int afs_kafstimod_del_timer(struct afs_timer *timer)
-{
-	int ret = 0;
-
-	_enter("%p", timer);
-
-	spin_lock(&kafstimod_lock);
-
-	if (list_empty(&timer->link))
-		ret = -ENOENT;
-	else
-		list_del_init(&timer->link);
-
-	spin_unlock(&kafstimod_lock);
-
-	wake_up(&kafstimod_sleepq);
-
-	_leave(" = %d", ret);
-	return ret;
-}
diff --git a/fs/afs/kafstimod.h b/fs/afs/kafstimod.h
deleted file mode 100644
index 0d39becbbe0..00000000000
--- a/fs/afs/kafstimod.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/* AFS timeout daemon
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef AFS_KAFSTIMOD_H
-#define AFS_KAFSTIMOD_H
-
-#include "types.h"
-
-struct afs_timer;
-
-struct afs_timer_ops {
-	/* called when the front of the timer queue has timed out */
-	void (*timed_out)(struct afs_timer *);
-};
-
-/*
- * AFS timer/timeout record
- */
-struct afs_timer {
-	struct list_head		link;		/* link in timer queue */
-	unsigned long			timo_jif;	/* timeout time */
-	const struct afs_timer_ops	*ops;		/* timeout expiry function */
-};
-
-static inline void afs_timer_init(struct afs_timer *timer,
-				  const struct afs_timer_ops *ops)
-{
-	INIT_LIST_HEAD(&timer->link);
-	timer->ops = ops;
-}
-
-extern int afs_kafstimod_start(void);
-extern void afs_kafstimod_stop(void);
-extern void afs_kafstimod_add_timer(struct afs_timer *, unsigned long);
-extern int afs_kafstimod_del_timer(struct afs_timer *);
-
-#endif /* AFS_KAFSTIMOD_H */
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 5bf39f66f4c..0cf1b021ad5 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -13,43 +13,21 @@
 #include <linux/moduleparam.h>
 #include <linux/init.h>
 #include <linux/completion.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/call.h>
-#include <rxrpc/peer.h>
-#include "cache.h"
-#include "cell.h"
-#include "server.h"
-#include "fsclient.h"
-#include "cmservice.h"
-#include "kafstimod.h"
-#include "kafsasyncd.h"
 #include "internal.h"
 
-struct rxrpc_transport *afs_transport;
-
-static int afs_adding_peer(struct rxrpc_peer *peer);
-static void afs_discarding_peer(struct rxrpc_peer *peer);
-
-
 MODULE_DESCRIPTION("AFS Client File System");
 MODULE_AUTHOR("Red Hat, Inc.");
 MODULE_LICENSE("GPL");
 
+unsigned afs_debug;
+module_param_named(debug, afs_debug, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(afs_debug, "AFS debugging mask");
+
 static char *rootcell;
 
 module_param(rootcell, charp, 0);
 MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list");
 
-
-static struct rxrpc_peer_ops afs_peer_ops = {
-	.adding		= afs_adding_peer,
-	.discarding	= afs_discarding_peer,
-};
-
-struct list_head afs_cb_hash_tbl[AFS_CB_HASH_COUNT];
-DEFINE_SPINLOCK(afs_cb_hash_lock);
-
 #ifdef AFS_CACHING_SUPPORT
 static struct cachefs_netfs_operations afs_cache_ops = {
 	.get_page_cookie	= afs_cache_get_page_cookie,
@@ -67,15 +45,10 @@ struct cachefs_netfs afs_cache_netfs = {
  */
 static int __init afs_init(void)
 {
-	int loop, ret;
+	int ret;
 
 	printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 registering.\n");
 
-	/* initialise the callback hash table */
-	spin_lock_init(&afs_cb_hash_lock);
-	for (loop = AFS_CB_HASH_COUNT - 1; loop >= 0; loop--)
-		INIT_LIST_HEAD(&afs_cb_hash_tbl[loop]);
-
 	/* register the /proc stuff */
 	ret = afs_proc_init();
 	if (ret < 0)
@@ -94,22 +67,18 @@ static int __init afs_init(void)
 	if (ret < 0)
 		goto error_cell_init;
 
-	/* start the timeout daemon */
-	ret = afs_kafstimod_start();
+	/* initialise the VL update process */
+	ret = afs_vlocation_update_init();
 	if (ret < 0)
-		goto error_kafstimod;
+		goto error_vl_update_init;
 
-	/* start the async operation daemon */
-	ret = afs_kafsasyncd_start();
-	if (ret < 0)
-		goto error_kafsasyncd;
+	/* initialise the callback update process */
+	ret = afs_callback_update_init();
 
 	/* create the RxRPC transport */
-	ret = rxrpc_create_transport(7001, &afs_transport);
+	ret = afs_open_socket();
 	if (ret < 0)
-		goto error_transport;
-
-	afs_transport->peer_ops = &afs_peer_ops;
+		goto error_open_socket;
 
 	/* register the filesystems */
 	ret = afs_fs_init();
@@ -119,17 +88,16 @@ static int __init afs_init(void)
 	return ret;
 
 error_fs:
-	rxrpc_put_transport(afs_transport);
-error_transport:
-	afs_kafsasyncd_stop();
-error_kafsasyncd:
-	afs_kafstimod_stop();
-error_kafstimod:
+	afs_close_socket();
+error_open_socket:
+error_vl_update_init:
 error_cell_init:
 #ifdef AFS_CACHING_SUPPORT
 	cachefs_unregister_netfs(&afs_cache_netfs);
 error_cache:
 #endif
+	afs_callback_update_kill();
+	afs_vlocation_purge();
 	afs_cell_purge();
 	afs_proc_cleanup();
 	printk(KERN_ERR "kAFS: failed to register: %d\n", ret);
@@ -149,9 +117,11 @@ static void __exit afs_exit(void)
 	printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 unregistering.\n");
 
 	afs_fs_exit();
-	rxrpc_put_transport(afs_transport);
-	afs_kafstimod_stop();
-	afs_kafsasyncd_stop();
+	afs_close_socket();
+	afs_purge_servers();
+	afs_callback_update_kill();
+	afs_vlocation_purge();
+	flush_scheduled_work();
 	afs_cell_purge();
 #ifdef AFS_CACHING_SUPPORT
 	cachefs_unregister_netfs(&afs_cache_netfs);
@@ -160,64 +130,3 @@ static void __exit afs_exit(void)
 }
 
 module_exit(afs_exit);
-
-/*
- * notification that new peer record is being added
- * - called from krxsecd
- * - return an error to induce an abort
- * - mustn't sleep (caller holds an rwlock)
- */
-static int afs_adding_peer(struct rxrpc_peer *peer)
-{
-	struct afs_server *server;
-	int ret;
-
-	_debug("kAFS: Adding new peer %08x\n", ntohl(peer->addr.s_addr));
-
-	/* determine which server the peer resides in (if any) */
-	ret = afs_server_find_by_peer(peer, &server);
-	if (ret < 0)
-		return ret; /* none that we recognise, so abort */
-
-	_debug("Server %p{u=%d}\n", server, atomic_read(&server->usage));
-
-	_debug("Cell %p{u=%d}\n",
-	       server->cell, atomic_read(&server->cell->usage));
-
-	/* cross-point the structs under a global lock */
-	spin_lock(&afs_server_peer_lock);
-	peer->user = server;
-	server->peer = peer;
-	spin_unlock(&afs_server_peer_lock);
-
-	afs_put_server(server);
-
-	return 0;
-}
-
-/*
- * notification that a peer record is being discarded
- * - called from krxiod or krxsecd
- */
-static void afs_discarding_peer(struct rxrpc_peer *peer)
-{
-	struct afs_server *server;
-
-	_enter("%p",peer);
-
-	_debug("Discarding peer %08x (rtt=%lu.%lumS)\n",
-	       ntohl(peer->addr.s_addr),
-	       (long) (peer->rtt / 1000),
-	       (long) (peer->rtt % 1000));
-
-	/* uncross-point the structs under a global lock */
-	spin_lock(&afs_server_peer_lock);
-	server = peer->user;
-	if (server) {
-		peer->user = NULL;
-		server->peer = NULL;
-	}
-	spin_unlock(&afs_server_peer_lock);
-
-	_leave("");
-}
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 55bc6778cec..98e9276c46a 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -1,6 +1,6 @@
 /* miscellaneous bits
  *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
@@ -12,18 +12,19 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/errno.h>
-#include "errors.h"
 #include "internal.h"
+#include "afs_fs.h"
 
 /*
  * convert an AFS abort code to a Linux error number
  */
-int afs_abort_to_error(int abortcode)
+int afs_abort_to_error(u32 abort_code)
 {
-	switch (abortcode) {
+	switch (abort_code) {
+	case 13:		return -EACCES;
 	case VSALVAGE:		return -EIO;
 	case VNOVNODE:		return -ENOENT;
-	case VNOVOL:		return -ENXIO;
+	case VNOVOL:		return -ENOMEDIUM;
 	case VVOLEXISTS:	return -EEXIST;
 	case VNOSERVICE:	return -EIO;
 	case VOFFLINE:		return -ENOENT;
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index ca3fa81b068..08c11a0b66b 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -18,10 +18,6 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/mnt_namespace.h>
-#include "super.h"
-#include "cell.h"
-#include "volume.h"
-#include "vnode.h"
 #include "internal.h"
 
 
@@ -30,6 +26,7 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir,
 				       struct nameidata *nd);
 static int afs_mntpt_open(struct inode *inode, struct file *file);
 static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd);
+static void afs_mntpt_expiry_timed_out(struct work_struct *work);
 
 const struct file_operations afs_mntpt_file_operations = {
 	.open		= afs_mntpt_open,
@@ -43,16 +40,9 @@ const struct inode_operations afs_mntpt_inode_operations = {
 };
 
 static LIST_HEAD(afs_vfsmounts);
+static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out);
 
-static void afs_mntpt_expiry_timed_out(struct afs_timer *timer);
-
-struct afs_timer_ops afs_mntpt_expiry_timer_ops = {
-	.timed_out	= afs_mntpt_expiry_timed_out,
-};
-
-struct afs_timer afs_mntpt_expiry_timer;
-
-unsigned long afs_mntpt_expiry_timeout = 20;
+unsigned long afs_mntpt_expiry_timeout = 10 * 60;
 
 /*
  * check a symbolic link to see whether it actually encodes a mountpoint
@@ -84,7 +74,7 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode)
 
 	/* examine the symlink's contents */
 	size = vnode->status.size;
-	_debug("symlink to %*.*s", size, (int) size, buf);
+	_debug("symlink to %*.*s", (int) size, (int) size, buf);
 
 	if (size > 2 &&
 	    (buf[0] == '%' || buf[0] == '#') &&
@@ -92,7 +82,7 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode)
 	    ) {
 		_debug("symlink is a mountpoint");
 		spin_lock(&vnode->lock);
-		vnode->flags |= AFS_VNODE_MOUNTPOINT;
+		set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags);
 		spin_unlock(&vnode->lock);
 	}
 
@@ -113,7 +103,7 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir,
 				       struct dentry *dentry,
 				       struct nameidata *nd)
 {
-	kenter("%p,%p{%p{%s},%s}",
+	_enter("%p,%p{%p{%s},%s}",
 	       dir,
 	       dentry,
 	       dentry->d_parent,
@@ -129,7 +119,7 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir,
  */
 static int afs_mntpt_open(struct inode *inode, struct file *file)
 {
-	kenter("%p,%p{%p{%s},%s}",
+	_enter("%p,%p{%p{%s},%s}",
 	       inode, file,
 	       file->f_path.dentry->d_parent,
 	       file->f_path.dentry->d_parent ?
@@ -152,7 +142,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
 	char *buf, *devname = NULL, *options = NULL;
 	int ret;
 
-	kenter("{%s}", mntpt->d_name.name);
+	_enter("{%s}", mntpt->d_name.name);
 
 	BUG_ON(!mntpt->d_inode);
 
@@ -196,13 +186,13 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
 		strcat(options, ",rwpath");
 
 	/* try and do the mount */
-	kdebug("--- attempting mount %s -o %s ---", devname, options);
+	_debug("--- attempting mount %s -o %s ---", devname, options);
 	mnt = vfs_kern_mount(&afs_fs_type, 0, devname, options);
-	kdebug("--- mount result %p ---", mnt);
+	_debug("--- mount result %p ---", mnt);
 
 	free_page((unsigned long) devname);
 	free_page((unsigned long) options);
-	kleave(" = %p", mnt);
+	_leave(" = %p", mnt);
 	return mnt;
 
 error:
@@ -212,7 +202,7 @@ error:
 		free_page((unsigned long) devname);
 	if (options)
 		free_page((unsigned long) options);
-	kleave(" = %d", ret);
+	_leave(" = %d", ret);
 	return ERR_PTR(ret);
 }
 
@@ -222,51 +212,81 @@ error:
 static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	struct vfsmount *newmnt;
-	struct dentry *old_dentry;
 	int err;
 
-	kenter("%p{%s},{%s:%p{%s}}",
+	_enter("%p{%s},{%s:%p{%s}}",
 	       dentry,
 	       dentry->d_name.name,
 	       nd->mnt->mnt_devname,
 	       dentry,
 	       nd->dentry->d_name.name);
 
-	newmnt = afs_mntpt_do_automount(dentry);
+	dput(nd->dentry);
+	nd->dentry = dget(dentry);
+
+	newmnt = afs_mntpt_do_automount(nd->dentry);
 	if (IS_ERR(newmnt)) {
 		path_release(nd);
 		return (void *)newmnt;
 	}
 
-	old_dentry = nd->dentry;
-	nd->dentry = dentry;
-	err = do_add_mount(newmnt, nd, 0, &afs_vfsmounts);
-	nd->dentry = old_dentry;
-
-	path_release(nd);
-
-	if (!err) {
-		mntget(newmnt);
+	mntget(newmnt);
+	err = do_add_mount(newmnt, nd, MNT_SHRINKABLE, &afs_vfsmounts);
+	switch (err) {
+	case 0:
+		path_release(nd);
 		nd->mnt = newmnt;
-		dget(newmnt->mnt_root);
-		nd->dentry = newmnt->mnt_root;
+		nd->dentry = dget(newmnt->mnt_root);
+		schedule_delayed_work(&afs_mntpt_expiry_timer,
+				      afs_mntpt_expiry_timeout * HZ);
+		break;
+	case -EBUSY:
+		/* someone else made a mount here whilst we were busy */
+		while (d_mountpoint(nd->dentry) &&
+		       follow_down(&nd->mnt, &nd->dentry))
+			;
+		err = 0;
+	default:
+		mntput(newmnt);
+		break;
 	}
 
-	kleave(" = %d", err);
+	_leave(" = %d", err);
 	return ERR_PTR(err);
 }
 
 /*
  * handle mountpoint expiry timer going off
  */
-static void afs_mntpt_expiry_timed_out(struct afs_timer *timer)
+static void afs_mntpt_expiry_timed_out(struct work_struct *work)
 {
-	kenter("");
+	_enter("");
+
+	if (!list_empty(&afs_vfsmounts)) {
+		mark_mounts_for_expiry(&afs_vfsmounts);
+		schedule_delayed_work(&afs_mntpt_expiry_timer,
+				      afs_mntpt_expiry_timeout * HZ);
+	}
+
+	_leave("");
+}
 
-	mark_mounts_for_expiry(&afs_vfsmounts);
+/*
+ * kill the AFS mountpoint timer if it's still running
+ */
+void afs_mntpt_kill_timer(void)
+{
+	_enter("");
 
-	afs_kafstimod_add_timer(&afs_mntpt_expiry_timer,
-				afs_mntpt_expiry_timeout * HZ);
+	ASSERT(list_empty(&afs_vfsmounts));
+	cancel_delayed_work(&afs_mntpt_expiry_timer);
+	flush_scheduled_work();
+}
 
-	kleave("");
+/*
+ * begin unmount by attempting to remove all automounted mountpoints we added
+ */
+void afs_umount_begin(struct vfsmount *vfsmnt, int flags)
+{
+	shrink_submounts(vfsmnt, &afs_vfsmounts);
 }
diff --git a/fs/afs/mount.h b/fs/afs/mount.h
deleted file mode 100644
index 41b848320e0..00000000000
--- a/fs/afs/mount.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* mount parameters
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef AFS_MOUNT_H
-#define AFS_MOUNT_H
-
-struct afs_mountdata {
-	const char		*volume;	/* name of volume */
-	const char		*cell;		/* name of cell containing volume */
-	const char		*cache;		/* name of cache block device */
-	size_t			nservers;	/* number of server addresses listed */
-	uint32_t		servers[10];	/* IP addresses of servers in this cell */
-};
-
-#endif /* AFS_MOUNT_H */
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 5ebcc0cd3dd..d5601f617cd 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -13,8 +13,6 @@
 #include <linux/module.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include "cell.h"
-#include "volume.h"
 #include <asm/uaccess.h>
 #include "internal.h"
 
@@ -315,10 +313,14 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf,
 
 	if (strcmp(kbuf, "add") == 0) {
 		struct afs_cell *cell;
-		ret = afs_cell_create(name, args, &cell);
-		if (ret < 0)
+
+		cell = afs_cell_create(name, args);
+		if (IS_ERR(cell)) {
+			ret = PTR_ERR(cell);
 			goto done;
+		}
 
+		afs_put_cell(cell);
 		printk("kAFS: Added new cell '%s'\n", name);
 	} else {
 		goto inval;
@@ -472,7 +474,7 @@ static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file)
 	struct seq_file *m;
 	int ret;
 
-	cell = afs_get_cell_maybe((struct afs_cell **) &PDE(inode)->data);
+	cell = PDE(inode)->data;
 	if (!cell)
 		return -ENOENT;
 
@@ -491,13 +493,7 @@ static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file)
  */
 static int afs_proc_cell_volumes_release(struct inode *inode, struct file *file)
 {
-	struct afs_cell *cell = PDE(inode)->data;
-	int ret;
-
-	ret = seq_release(inode, file);
-
-	afs_put_cell(cell);
-	return ret;
+	return seq_release(inode, file);
 }
 
 /*
@@ -557,6 +553,16 @@ static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v)
 	up_read(&cell->vl_sem);
 }
 
+const char afs_vlocation_states[][4] = {
+	[AFS_VL_NEW]			= "New",
+	[AFS_VL_CREATING]		= "Crt",
+	[AFS_VL_VALID]			= "Val",
+	[AFS_VL_NO_VOLUME]		= "NoV",
+	[AFS_VL_UPDATING]		= "Upd",
+	[AFS_VL_VOLUME_DELETED]		= "Del",
+	[AFS_VL_UNCERTAIN]		= "Unc",
+};
+
 /*
  * display a header line followed by a load of volume lines
  */
@@ -567,13 +573,14 @@ static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
 
 	/* display header on line 1 */
 	if (v == (void *) 1) {
-		seq_puts(m, "USE VLID[0]  VLID[1]  VLID[2]  NAME\n");
+		seq_puts(m, "USE STT VLID[0]  VLID[1]  VLID[2]  NAME\n");
 		return 0;
 	}
 
 	/* display one cell per line on subsequent lines */
-	seq_printf(m, "%3d %08x %08x %08x %s\n",
+	seq_printf(m, "%3d %s %08x %08x %08x %s\n",
 		   atomic_read(&vlocation->usage),
+		   afs_vlocation_states[vlocation->state],
 		   vlocation->vldb.vid[0],
 		   vlocation->vldb.vid[1],
 		   vlocation->vldb.vid[2],
@@ -592,11 +599,11 @@ static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file)
 	struct seq_file *m;
 	int ret;
 
-	cell = afs_get_cell_maybe((struct afs_cell**)&PDE(inode)->data);
+	cell = PDE(inode)->data;
 	if (!cell)
 		return -ENOENT;
 
-	ret = seq_open(file,&afs_proc_cell_vlservers_ops);
+	ret = seq_open(file, &afs_proc_cell_vlservers_ops);
 	if (ret<0)
 		return ret;
 
@@ -612,13 +619,7 @@ static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file)
 static int afs_proc_cell_vlservers_release(struct inode *inode,
 					   struct file *file)
 {
-	struct afs_cell *cell = PDE(inode)->data;
-	int ret;
-
-	ret = seq_release(inode,file);
-
-	afs_put_cell(cell);
-	return ret;
+	return seq_release(inode, file);
 }
 
 /*
@@ -703,7 +704,7 @@ static int afs_proc_cell_servers_open(struct inode *inode, struct file *file)
 	struct seq_file *m;
 	int ret;
 
-	cell = afs_get_cell_maybe((struct afs_cell **) &PDE(inode)->data);
+	cell = PDE(inode)->data;
 	if (!cell)
 		return -ENOENT;
 
@@ -722,13 +723,7 @@ static int afs_proc_cell_servers_open(struct inode *inode, struct file *file)
 static int afs_proc_cell_servers_release(struct inode *inode,
 					 struct file *file)
 {
-	struct afs_cell *cell = PDE(inode)->data;
-	int ret;
-
-	ret = seq_release(inode, file);
-
-	afs_put_cell(cell);
-	return ret;
+	return seq_release(inode, file);
 }
 
 /*
@@ -736,7 +731,7 @@ static int afs_proc_cell_servers_release(struct inode *inode,
  * first item
  */
 static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos)
-	__acquires(m->private->sv_lock)
+	__acquires(m->private->servers_lock)
 {
 	struct list_head *_p;
 	struct afs_cell *cell = m->private;
@@ -745,7 +740,7 @@ static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos)
 	_enter("cell=%p pos=%Ld", cell, *_pos);
 
 	/* lock the list against modification */
-	read_lock(&cell->sv_lock);
+	read_lock(&cell->servers_lock);
 
 	/* allow for the header line */
 	if (!pos)
@@ -753,11 +748,11 @@ static void *afs_proc_cell_servers_start(struct seq_file *m, loff_t *_pos)
 	pos--;
 
 	/* find the n'th element in the list */
-	list_for_each(_p, &cell->sv_list)
+	list_for_each(_p, &cell->servers)
 		if (!pos--)
 			break;
 
-	return _p != &cell->sv_list ? _p : NULL;
+	return _p != &cell->servers ? _p : NULL;
 }
 
 /*
@@ -774,20 +769,20 @@ static void *afs_proc_cell_servers_next(struct seq_file *p, void *v,
 	(*_pos)++;
 
 	_p = v;
-	_p = v == (void *) 1 ? cell->sv_list.next : _p->next;
+	_p = v == (void *) 1 ? cell->servers.next : _p->next;
 
-	return _p != &cell->sv_list ? _p : NULL;
+	return _p != &cell->servers ? _p : NULL;
 }
 
 /*
  * clean up after reading from the cells list
  */
 static void afs_proc_cell_servers_stop(struct seq_file *p, void *v)
-	__releases(p->private->sv_lock)
+	__releases(p->private->servers_lock)
 {
 	struct afs_cell *cell = p->private;
 
-	read_unlock(&cell->sv_lock);
+	read_unlock(&cell->servers_lock);
 }
 
 /*
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
new file mode 100644
index 00000000000..b92774231b3
--- /dev/null
+++ b/fs/afs/rxrpc.c
@@ -0,0 +1,666 @@
+/* Maintain an RxRPC server socket to do AFS communications through
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include <rxrpc/packet.h>
+#include "internal.h"
+#include "afs_cm.h"
+
+static struct socket *afs_socket; /* my RxRPC socket */
+static struct workqueue_struct *afs_async_calls;
+
+static void afs_wake_up_call_waiter(struct afs_call *);
+static int afs_wait_for_call_to_complete(struct afs_call *);
+static void afs_wake_up_async_call(struct afs_call *);
+static int afs_dont_wait_for_call_to_complete(struct afs_call *);
+static void afs_process_async_call(struct work_struct *);
+static void afs_rx_interceptor(struct sock *, unsigned long, struct sk_buff *);
+static int afs_deliver_cm_op_id(struct afs_call *, struct sk_buff *, bool);
+
+/* synchronous call management */
+const struct afs_wait_mode afs_sync_call = {
+	.rx_wakeup	= afs_wake_up_call_waiter,
+	.wait		= afs_wait_for_call_to_complete,
+};
+
+/* asynchronous call management */
+const struct afs_wait_mode afs_async_call = {
+	.rx_wakeup	= afs_wake_up_async_call,
+	.wait		= afs_dont_wait_for_call_to_complete,
+};
+
+/* asynchronous incoming call management */
+static const struct afs_wait_mode afs_async_incoming_call = {
+	.rx_wakeup	= afs_wake_up_async_call,
+};
+
+/* asynchronous incoming call initial processing */
+static const struct afs_call_type afs_RXCMxxxx = {
+	.deliver	= afs_deliver_cm_op_id,
+	.abort_to_error	= afs_abort_to_error,
+};
+
+static void afs_collect_incoming_call(struct work_struct *);
+
+static struct sk_buff_head afs_incoming_calls;
+static DECLARE_WORK(afs_collect_incoming_call_work, afs_collect_incoming_call);
+
+/*
+ * open an RxRPC socket and bind it to be a server for callback notifications
+ * - the socket is left in blocking mode and non-blocking ops use MSG_DONTWAIT
+ */
+int afs_open_socket(void)
+{
+	struct sockaddr_rxrpc srx;
+	struct socket *socket;
+	int ret;
+
+	_enter("");
+
+	skb_queue_head_init(&afs_incoming_calls);
+
+	afs_async_calls = create_singlethread_workqueue("kafsd");
+	if (!afs_async_calls) {
+		_leave(" = -ENOMEM [wq]");
+		return -ENOMEM;
+	}
+
+	ret = sock_create_kern(AF_RXRPC, SOCK_DGRAM, PF_INET, &socket);
+	if (ret < 0) {
+		destroy_workqueue(afs_async_calls);
+		_leave(" = %d [socket]", ret);
+		return ret;
+	}
+
+	socket->sk->sk_allocation = GFP_NOFS;
+
+	/* bind the callback manager's address to make this a server socket */
+	srx.srx_family			= AF_RXRPC;
+	srx.srx_service			= CM_SERVICE;
+	srx.transport_type		= SOCK_DGRAM;
+	srx.transport_len		= sizeof(srx.transport.sin);
+	srx.transport.sin.sin_family	= AF_INET;
+	srx.transport.sin.sin_port	= htons(AFS_CM_PORT);
+	memset(&srx.transport.sin.sin_addr, 0,
+	       sizeof(srx.transport.sin.sin_addr));
+
+	ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
+	if (ret < 0) {
+		sock_release(socket);
+		_leave(" = %d [bind]", ret);
+		return ret;
+	}
+
+	rxrpc_kernel_intercept_rx_messages(socket, afs_rx_interceptor);
+
+	afs_socket = socket;
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * close the RxRPC socket AFS was using
+ */
+void afs_close_socket(void)
+{
+	_enter("");
+
+	sock_release(afs_socket);
+
+	_debug("dework");
+	destroy_workqueue(afs_async_calls);
+	_leave("");
+}
+
+/*
+ * allocate a call with flat request and reply buffers
+ */
+struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type,
+				     size_t request_size, size_t reply_size)
+{
+	struct afs_call *call;
+
+	call = kzalloc(sizeof(*call), GFP_NOFS);
+	if (!call)
+		goto nomem_call;
+
+	if (request_size) {
+		call->request = kmalloc(request_size, GFP_NOFS);
+		if (!call->request)
+			goto nomem_request;
+	}
+
+	if (reply_size) {
+		call->buffer = kmalloc(reply_size, GFP_NOFS);
+		if (!call->buffer)
+			goto nomem_buffer;
+	}
+
+	call->type = type;
+	call->request_size = request_size;
+	call->reply_max = reply_size;
+
+	init_waitqueue_head(&call->waitq);
+	skb_queue_head_init(&call->rx_queue);
+	return call;
+
+nomem_buffer:
+	kfree(call->request);
+nomem_request:
+	kfree(call);
+nomem_call:
+	return NULL;
+}
+
+/*
+ * clean up a call with flat buffer
+ */
+void afs_flat_call_destructor(struct afs_call *call)
+{
+	_enter("");
+
+	kfree(call->request);
+	call->request = NULL;
+	kfree(call->buffer);
+	call->buffer = NULL;
+}
+
+/*
+ * initiate a call
+ */
+int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
+		  const struct afs_wait_mode *wait_mode)
+{
+	struct sockaddr_rxrpc srx;
+	struct rxrpc_call *rxcall;
+	struct msghdr msg;
+	struct kvec iov[1];
+	int ret;
+
+	_enter("%x,{%d},", addr->s_addr, ntohs(call->port));
+
+	call->wait_mode = wait_mode;
+	INIT_WORK(&call->async_work, afs_process_async_call);
+
+	memset(&srx, 0, sizeof(srx));
+	srx.srx_family = AF_RXRPC;
+	srx.srx_service = call->service_id;
+	srx.transport_type = SOCK_DGRAM;
+	srx.transport_len = sizeof(srx.transport.sin);
+	srx.transport.sin.sin_family = AF_INET;
+	srx.transport.sin.sin_port = call->port;
+	memcpy(&srx.transport.sin.sin_addr, addr, 4);
+
+	/* create a call */
+	rxcall = rxrpc_kernel_begin_call(afs_socket, &srx, call->key,
+					 (unsigned long) call, gfp);
+	if (IS_ERR(rxcall)) {
+		ret = PTR_ERR(rxcall);
+		goto error_kill_call;
+	}
+
+	call->rxcall = rxcall;
+
+	/* send the request */
+	iov[0].iov_base	= call->request;
+	iov[0].iov_len	= call->request_size;
+
+	msg.msg_name		= NULL;
+	msg.msg_namelen		= 0;
+	msg.msg_iov		= (struct iovec *) iov;
+	msg.msg_iovlen		= 1;
+	msg.msg_control		= NULL;
+	msg.msg_controllen	= 0;
+	msg.msg_flags		= 0;
+
+	/* have to change the state *before* sending the last packet as RxRPC
+	 * might give us the reply before it returns from sending the
+	 * request */
+	call->state = AFS_CALL_AWAIT_REPLY;
+	ret = rxrpc_kernel_send_data(rxcall, &msg, call->request_size);
+	if (ret < 0)
+		goto error_do_abort;
+
+	/* at this point, an async call may no longer exist as it may have
+	 * already completed */
+	return wait_mode->wait(call);
+
+error_do_abort:
+	rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT);
+	rxrpc_kernel_end_call(rxcall);
+error_kill_call:
+	call->type->destructor(call);
+	ASSERT(skb_queue_empty(&call->rx_queue));
+	kfree(call);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * handles intercepted messages that were arriving in the socket's Rx queue
+ * - called with the socket receive queue lock held to ensure message ordering
+ * - called with softirqs disabled
+ */
+static void afs_rx_interceptor(struct sock *sk, unsigned long user_call_ID,
+			       struct sk_buff *skb)
+{
+	struct afs_call *call = (struct afs_call *) user_call_ID;
+
+	_enter("%p,,%u", call, skb->mark);
+
+	ASSERTCMP(sk, ==, afs_socket->sk);
+
+	if (!call) {
+		/* its an incoming call for our callback service */
+		__skb_queue_tail(&afs_incoming_calls, skb);
+		schedule_work(&afs_collect_incoming_call_work);
+	} else {
+		/* route the messages directly to the appropriate call */
+		__skb_queue_tail(&call->rx_queue, skb);
+		call->wait_mode->rx_wakeup(call);
+	}
+
+	_leave("");
+}
+
+/*
+ * deliver messages to a call
+ */
+static void afs_deliver_to_call(struct afs_call *call)
+{
+	struct sk_buff *skb;
+	bool last;
+	u32 abort_code;
+	int ret;
+
+	_enter("");
+
+	while ((call->state == AFS_CALL_AWAIT_REPLY ||
+		call->state == AFS_CALL_AWAIT_OP_ID ||
+		call->state == AFS_CALL_AWAIT_REQUEST ||
+		call->state == AFS_CALL_AWAIT_ACK) &&
+	       (skb = skb_dequeue(&call->rx_queue))) {
+		switch (skb->mark) {
+		case RXRPC_SKB_MARK_DATA:
+			_debug("Rcv DATA");
+			last = rxrpc_kernel_is_data_last(skb);
+			ret = call->type->deliver(call, skb, last);
+			switch (ret) {
+			case 0:
+				if (last &&
+				    call->state == AFS_CALL_AWAIT_REPLY)
+					call->state = AFS_CALL_COMPLETE;
+				break;
+			case -ENOTCONN:
+				abort_code = RX_CALL_DEAD;
+				goto do_abort;
+			case -ENOTSUPP:
+				abort_code = RX_INVALID_OPERATION;
+				goto do_abort;
+			default:
+				abort_code = RXGEN_CC_UNMARSHAL;
+				if (call->state != AFS_CALL_AWAIT_REPLY)
+					abort_code = RXGEN_SS_UNMARSHAL;
+			do_abort:
+				rxrpc_kernel_abort_call(call->rxcall,
+							abort_code);
+				call->error = ret;
+				call->state = AFS_CALL_ERROR;
+				break;
+			}
+			rxrpc_kernel_data_delivered(skb);
+			skb = NULL;
+			break;
+		case RXRPC_SKB_MARK_FINAL_ACK:
+			_debug("Rcv ACK");
+			call->state = AFS_CALL_COMPLETE;
+			break;
+		case RXRPC_SKB_MARK_BUSY:
+			_debug("Rcv BUSY");
+			call->error = -EBUSY;
+			call->state = AFS_CALL_BUSY;
+			break;
+		case RXRPC_SKB_MARK_REMOTE_ABORT:
+			abort_code = rxrpc_kernel_get_abort_code(skb);
+			call->error = call->type->abort_to_error(abort_code);
+			call->state = AFS_CALL_ABORTED;
+			_debug("Rcv ABORT %u -> %d", abort_code, call->error);
+			break;
+		case RXRPC_SKB_MARK_NET_ERROR:
+			call->error = -rxrpc_kernel_get_error_number(skb);
+			call->state = AFS_CALL_ERROR;
+			_debug("Rcv NET ERROR %d", call->error);
+			break;
+		case RXRPC_SKB_MARK_LOCAL_ERROR:
+			call->error = -rxrpc_kernel_get_error_number(skb);
+			call->state = AFS_CALL_ERROR;
+			_debug("Rcv LOCAL ERROR %d", call->error);
+			break;
+		default:
+			BUG();
+			break;
+		}
+
+		rxrpc_kernel_free_skb(skb);
+	}
+
+	/* make sure the queue is empty if the call is done with (we might have
+	 * aborted the call early because of an unmarshalling error) */
+	if (call->state >= AFS_CALL_COMPLETE) {
+		while ((skb = skb_dequeue(&call->rx_queue)))
+			rxrpc_kernel_free_skb(skb);
+		if (call->incoming) {
+			rxrpc_kernel_end_call(call->rxcall);
+			call->type->destructor(call);
+			ASSERT(skb_queue_empty(&call->rx_queue));
+			kfree(call);
+		}
+	}
+
+	_leave("");
+}
+
+/*
+ * wait synchronously for a call to complete
+ */
+static int afs_wait_for_call_to_complete(struct afs_call *call)
+{
+	struct sk_buff *skb;
+	int ret;
+
+	DECLARE_WAITQUEUE(myself, current);
+
+	_enter("");
+
+	add_wait_queue(&call->waitq, &myself);
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		/* deliver any messages that are in the queue */
+		if (!skb_queue_empty(&call->rx_queue)) {
+			__set_current_state(TASK_RUNNING);
+			afs_deliver_to_call(call);
+			continue;
+		}
+
+		ret = call->error;
+		if (call->state >= AFS_CALL_COMPLETE)
+			break;
+		ret = -EINTR;
+		if (signal_pending(current))
+			break;
+		schedule();
+	}
+
+	remove_wait_queue(&call->waitq, &myself);
+	__set_current_state(TASK_RUNNING);
+
+	/* kill the call */
+	if (call->state < AFS_CALL_COMPLETE) {
+		_debug("call incomplete");
+		rxrpc_kernel_abort_call(call->rxcall, RX_CALL_DEAD);
+		while ((skb = skb_dequeue(&call->rx_queue)))
+			rxrpc_kernel_free_skb(skb);
+	}
+
+	_debug("call complete");
+	rxrpc_kernel_end_call(call->rxcall);
+	call->type->destructor(call);
+	ASSERT(skb_queue_empty(&call->rx_queue));
+	kfree(call);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * wake up a waiting call
+ */
+static void afs_wake_up_call_waiter(struct afs_call *call)
+{
+	wake_up(&call->waitq);
+}
+
+/*
+ * wake up an asynchronous call
+ */
+static void afs_wake_up_async_call(struct afs_call *call)
+{
+	_enter("");
+	queue_work(afs_async_calls, &call->async_work);
+}
+
+/*
+ * put a call into asynchronous mode
+ * - mustn't touch the call descriptor as the call my have completed by the
+ *   time we get here
+ */
+static int afs_dont_wait_for_call_to_complete(struct afs_call *call)
+{
+	_enter("");
+	return -EINPROGRESS;
+}
+
+/*
+ * delete an asynchronous call
+ */
+static void afs_delete_async_call(struct work_struct *work)
+{
+	struct afs_call *call =
+		container_of(work, struct afs_call, async_work);
+
+	_enter("");
+
+	ASSERT(skb_queue_empty(&call->rx_queue));
+	ASSERT(!work_pending(&call->async_work));
+	kfree(call);
+
+	_leave("");
+}
+
+/*
+ * perform processing on an asynchronous call
+ * - on a multiple-thread workqueue this work item may try to run on several
+ *   CPUs at the same time
+ */
+static void afs_process_async_call(struct work_struct *work)
+{
+	struct afs_call *call =
+		container_of(work, struct afs_call, async_work);
+
+	_enter("");
+
+	if (!skb_queue_empty(&call->rx_queue))
+		afs_deliver_to_call(call);
+
+	if (call->state >= AFS_CALL_COMPLETE && call->wait_mode) {
+		if (call->wait_mode->async_complete)
+			call->wait_mode->async_complete(call->reply,
+							call->error);
+		call->reply = NULL;
+
+		/* kill the call */
+		rxrpc_kernel_end_call(call->rxcall);
+		if (call->type->destructor)
+			call->type->destructor(call);
+
+		/* we can't just delete the call because the work item may be
+		 * queued */
+		PREPARE_WORK(&call->async_work, afs_delete_async_call);
+		queue_work(afs_async_calls, &call->async_work);
+	}
+
+	_leave("");
+}
+
+/*
+ * empty a socket buffer into a flat reply buffer
+ */
+void afs_transfer_reply(struct afs_call *call, struct sk_buff *skb)
+{
+	size_t len = skb->len;
+
+	if (skb_copy_bits(skb, 0, call->buffer + call->reply_size, len) < 0)
+		BUG();
+	call->reply_size += len;
+}
+
+/*
+ * accept the backlog of incoming calls
+ */
+static void afs_collect_incoming_call(struct work_struct *work)
+{
+	struct rxrpc_call *rxcall;
+	struct afs_call *call = NULL;
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(&afs_incoming_calls))) {
+		_debug("new call");
+
+		/* don't need the notification */
+		rxrpc_kernel_free_skb(skb);
+
+		if (!call) {
+			call = kzalloc(sizeof(struct afs_call), GFP_KERNEL);
+			if (!call) {
+				rxrpc_kernel_reject_call(afs_socket);
+				return;
+			}
+
+			INIT_WORK(&call->async_work, afs_process_async_call);
+			call->wait_mode = &afs_async_incoming_call;
+			call->type = &afs_RXCMxxxx;
+			init_waitqueue_head(&call->waitq);
+			skb_queue_head_init(&call->rx_queue);
+			call->state = AFS_CALL_AWAIT_OP_ID;
+		}
+
+		rxcall = rxrpc_kernel_accept_call(afs_socket,
+						  (unsigned long) call);
+		if (!IS_ERR(rxcall)) {
+			call->rxcall = rxcall;
+			call = NULL;
+		}
+	}
+
+	kfree(call);
+}
+
+/*
+ * grab the operation ID from an incoming cache manager call
+ */
+static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
+				bool last)
+{
+	size_t len = skb->len;
+	void *oibuf = (void *) &call->operation_ID;
+
+	_enter("{%u},{%zu},%d", call->offset, len, last);
+
+	ASSERTCMP(call->offset, <, 4);
+
+	/* the operation ID forms the first four bytes of the request data */
+	len = min_t(size_t, len, 4 - call->offset);
+	if (skb_copy_bits(skb, 0, oibuf + call->offset, len) < 0)
+		BUG();
+	if (!pskb_pull(skb, len))
+		BUG();
+	call->offset += len;
+
+	if (call->offset < 4) {
+		if (last) {
+			_leave(" = -EBADMSG [op ID short]");
+			return -EBADMSG;
+		}
+		_leave(" = 0 [incomplete]");
+		return 0;
+	}
+
+	call->state = AFS_CALL_AWAIT_REQUEST;
+
+	/* ask the cache manager to route the call (it'll change the call type
+	 * if successful) */
+	if (!afs_cm_incoming_call(call))
+		return -ENOTSUPP;
+
+	/* pass responsibility for the remainer of this message off to the
+	 * cache manager op */
+	return call->type->deliver(call, skb, last);
+}
+
+/*
+ * send an empty reply
+ */
+void afs_send_empty_reply(struct afs_call *call)
+{
+	struct msghdr msg;
+	struct iovec iov[1];
+
+	_enter("");
+
+	iov[0].iov_base		= NULL;
+	iov[0].iov_len		= 0;
+	msg.msg_name		= NULL;
+	msg.msg_namelen		= 0;
+	msg.msg_iov		= iov;
+	msg.msg_iovlen		= 0;
+	msg.msg_control		= NULL;
+	msg.msg_controllen	= 0;
+	msg.msg_flags		= 0;
+
+	call->state = AFS_CALL_AWAIT_ACK;
+	switch (rxrpc_kernel_send_data(call->rxcall, &msg, 0)) {
+	case 0:
+		_leave(" [replied]");
+		return;
+
+	case -ENOMEM:
+		_debug("oom");
+		rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT);
+	default:
+		rxrpc_kernel_end_call(call->rxcall);
+		call->rxcall = NULL;
+		call->type->destructor(call);
+		ASSERT(skb_queue_empty(&call->rx_queue));
+		kfree(call);
+		_leave(" [error]");
+		return;
+	}
+}
+
+/*
+ * extract a piece of data from the received data socket buffers
+ */
+int afs_extract_data(struct afs_call *call, struct sk_buff *skb,
+		     bool last, void *buf, size_t count)
+{
+	size_t len = skb->len;
+
+	_enter("{%u},{%zu},%d,,%zu", call->offset, len, last, count);
+
+	ASSERTCMP(call->offset, <, count);
+
+	len = min_t(size_t, len, count - call->offset);
+	if (skb_copy_bits(skb, 0, buf + call->offset, len) < 0 ||
+	    !pskb_pull(skb, len))
+		BUG();
+	call->offset += len;
+
+	if (call->offset < count) {
+		if (last) {
+			_leave(" = -EBADMSG [%d < %lu]", call->offset, count);
+			return -EBADMSG;
+		}
+		_leave(" = -EAGAIN");
+		return -EAGAIN;
+	}
+	return 0;
+}
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 44b0ce53e91..bde6125c2f2 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -1,6 +1,6 @@
 /* AFS server record management
  *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
@@ -11,127 +11,205 @@
 
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/connection.h>
-#include "volume.h"
-#include "cell.h"
-#include "server.h"
-#include "transport.h"
-#include "vlclient.h"
-#include "kafstimod.h"
 #include "internal.h"
 
-DEFINE_SPINLOCK(afs_server_peer_lock);
+unsigned afs_server_timeout = 10;	/* server timeout in seconds */
 
-#define FS_SERVICE_ID		1	/* AFS Volume Location Service ID */
-#define VL_SERVICE_ID		52	/* AFS Volume Location Service ID */
+static void afs_reap_server(struct work_struct *);
 
-static void __afs_server_timeout(struct afs_timer *timer)
+/* tree of all the servers, indexed by IP address */
+static struct rb_root afs_servers = RB_ROOT;
+static DEFINE_RWLOCK(afs_servers_lock);
+
+/* LRU list of all the servers not currently in use */
+static LIST_HEAD(afs_server_graveyard);
+static DEFINE_SPINLOCK(afs_server_graveyard_lock);
+static DECLARE_DELAYED_WORK(afs_server_reaper, afs_reap_server);
+
+/*
+ * install a server record in the master tree
+ */
+static int afs_install_server(struct afs_server *server)
 {
-	struct afs_server *server =
-		list_entry(timer, struct afs_server, timeout);
+	struct afs_server *xserver;
+	struct rb_node **pp, *p;
+	int ret;
+
+	_enter("%p", server);
 
-	_debug("SERVER TIMEOUT [%p{u=%d}]",
-	       server, atomic_read(&server->usage));
+	write_lock(&afs_servers_lock);
+
+	ret = -EEXIST;
+	pp = &afs_servers.rb_node;
+	p = NULL;
+	while (*pp) {
+		p = *pp;
+		_debug("- consider %p", p);
+		xserver = rb_entry(p, struct afs_server, master_rb);
+		if (server->addr.s_addr < xserver->addr.s_addr)
+			pp = &(*pp)->rb_left;
+		else if (server->addr.s_addr > xserver->addr.s_addr)
+			pp = &(*pp)->rb_right;
+		else
+			goto error;
+	}
 
-	afs_server_do_timeout(server);
-}
+	rb_link_node(&server->master_rb, p, pp);
+	rb_insert_color(&server->master_rb, &afs_servers);
+	ret = 0;
 
-static const struct afs_timer_ops afs_server_timer_ops = {
-	.timed_out	= __afs_server_timeout,
-};
+error:
+	write_unlock(&afs_servers_lock);
+	return ret;
+}
 
 /*
- * lookup a server record in a cell
- * - TODO: search the cell's server list
+ * allocate a new server record
  */
-int afs_server_lookup(struct afs_cell *cell, const struct in_addr *addr,
-		      struct afs_server **_server)
+static struct afs_server *afs_alloc_server(struct afs_cell *cell,
+					   const struct in_addr *addr)
 {
-	struct afs_server *server, *active, *zombie;
-	int loop;
+	struct afs_server *server;
 
-	_enter("%p,%08x,", cell, ntohl(addr->s_addr));
+	_enter("");
 
-	/* allocate and initialise a server record */
 	server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
-	if (!server) {
-		_leave(" = -ENOMEM");
-		return -ENOMEM;
+	if (server) {
+		atomic_set(&server->usage, 1);
+		server->cell = cell;
+
+		INIT_LIST_HEAD(&server->link);
+		INIT_LIST_HEAD(&server->grave);
+		init_rwsem(&server->sem);
+		spin_lock_init(&server->fs_lock);
+		server->fs_vnodes = RB_ROOT;
+		server->cb_promises = RB_ROOT;
+		spin_lock_init(&server->cb_lock);
+		init_waitqueue_head(&server->cb_break_waitq);
+		INIT_DELAYED_WORK(&server->cb_break_work,
+				  afs_dispatch_give_up_callbacks);
+
+		memcpy(&server->addr, addr, sizeof(struct in_addr));
+		server->addr.s_addr = addr->s_addr;
 	}
 
-	atomic_set(&server->usage, 1);
-
-	INIT_LIST_HEAD(&server->link);
-	init_rwsem(&server->sem);
-	INIT_LIST_HEAD(&server->fs_callq);
-	spin_lock_init(&server->fs_lock);
-	INIT_LIST_HEAD(&server->cb_promises);
-	spin_lock_init(&server->cb_lock);
-
-	for (loop = 0; loop < AFS_SERVER_CONN_LIST_SIZE; loop++)
-		server->fs_conn_cnt[loop] = 4;
+	_leave(" = %p{%d}", server, atomic_read(&server->usage));
+	return server;
+}
 
-	memcpy(&server->addr, addr, sizeof(struct in_addr));
-	server->addr.s_addr = addr->s_addr;
+/*
+ * get an FS-server record for a cell
+ */
+struct afs_server *afs_lookup_server(struct afs_cell *cell,
+				     const struct in_addr *addr)
+{
+	struct afs_server *server, *candidate;
 
-	afs_timer_init(&server->timeout, &afs_server_timer_ops);
+	_enter("%p,"NIPQUAD_FMT, cell, NIPQUAD(addr->s_addr));
 
-	/* add to the cell */
-	write_lock(&cell->sv_lock);
+	/* quick scan of the list to see if we already have the server */
+	read_lock(&cell->servers_lock);
 
-	/* check the active list */
-	list_for_each_entry(active, &cell->sv_list, link) {
-		if (active->addr.s_addr == addr->s_addr)
-			goto use_active_server;
+	list_for_each_entry(server, &cell->servers, link) {
+		if (server->addr.s_addr == addr->s_addr)
+			goto found_server_quickly;
 	}
+	read_unlock(&cell->servers_lock);
 
-	/* check the inactive list */
-	spin_lock(&cell->sv_gylock);
-	list_for_each_entry(zombie, &cell->sv_graveyard, link) {
-		if (zombie->addr.s_addr == addr->s_addr)
-			goto resurrect_server;
+	candidate = afs_alloc_server(cell, addr);
+	if (!candidate) {
+		_leave(" = -ENOMEM");
+		return ERR_PTR(-ENOMEM);
 	}
-	spin_unlock(&cell->sv_gylock);
 
-	afs_get_cell(cell);
-	server->cell = cell;
-	list_add_tail(&server->link, &cell->sv_list);
+	write_lock(&cell->servers_lock);
 
-	write_unlock(&cell->sv_lock);
+	/* check the cell's server list again */
+	list_for_each_entry(server, &cell->servers, link) {
+		if (server->addr.s_addr == addr->s_addr)
+			goto found_server;
+	}
+
+	_debug("new");
+	server = candidate;
+	if (afs_install_server(server) < 0)
+		goto server_in_two_cells;
 
-	*_server = server;
-	_leave(" = 0 (%p)", server);
-	return 0;
+	afs_get_cell(cell);
+	list_add_tail(&server->link, &cell->servers);
+
+	write_unlock(&cell->servers_lock);
+	_leave(" = %p{%d}", server, atomic_read(&server->usage));
+	return server;
+
+	/* found a matching server quickly */
+found_server_quickly:
+	_debug("found quickly");
+	afs_get_server(server);
+	read_unlock(&cell->servers_lock);
+no_longer_unused:
+	if (!list_empty(&server->grave)) {
+		spin_lock(&afs_server_graveyard_lock);
+		list_del_init(&server->grave);
+		spin_unlock(&afs_server_graveyard_lock);
+	}
+	_leave(" = %p{%d}", server, atomic_read(&server->usage));
+	return server;
+
+	/* found a matching server on the second pass */
+found_server:
+	_debug("found");
+	afs_get_server(server);
+	write_unlock(&cell->servers_lock);
+	kfree(candidate);
+	goto no_longer_unused;
+
+	/* found a server that seems to be in two cells */
+server_in_two_cells:
+	write_unlock(&cell->servers_lock);
+	kfree(candidate);
+	printk(KERN_NOTICE "kAFS:"
+	       " Server "NIPQUAD_FMT" appears to be in two cells\n",
+	       NIPQUAD(*addr));
+	_leave(" = -EEXIST");
+	return ERR_PTR(-EEXIST);
+}
 
-	/* found a matching active server */
-use_active_server:
-	_debug("active server");
-	afs_get_server(active);
-	write_unlock(&cell->sv_lock);
+/*
+ * look up a server by its IP address
+ */
+struct afs_server *afs_find_server(const struct in_addr *_addr)
+{
+	struct afs_server *server = NULL;
+	struct rb_node *p;
+	struct in_addr addr = *_addr;
 
-	kfree(server);
+	_enter(NIPQUAD_FMT, NIPQUAD(addr.s_addr));
 
-	*_server = active;
-	_leave(" = 0 (%p)", active);
-	return 0;
+	read_lock(&afs_servers_lock);
 
-	/* found a matching server in the graveyard, so resurrect it and
-	 * dispose of the new record */
-resurrect_server:
-	_debug("resurrecting server");
+	p = afs_servers.rb_node;
+	while (p) {
+		server = rb_entry(p, struct afs_server, master_rb);
 
-	list_move_tail(&zombie->link, &cell->sv_list);
-	afs_get_server(zombie);
-	afs_kafstimod_del_timer(&zombie->timeout);
-	spin_unlock(&cell->sv_gylock);
-	write_unlock(&cell->sv_lock);
+		_debug("- consider %p", p);
 
-	kfree(server);
+		if (addr.s_addr < server->addr.s_addr) {
+			p = p->rb_left;
+		} else if (addr.s_addr > server->addr.s_addr) {
+			p = p->rb_right;
+		} else {
+			afs_get_server(server);
+			goto found;
+		}
+	}
 
-	*_server = zombie;
-	_leave(" = 0 (%p)", zombie);
-	return 0;
+	server = NULL;
+found:
+	read_unlock(&afs_servers_lock);
+	ASSERTIFCMP(server, server->addr.s_addr, ==, addr.s_addr);
+	_leave(" = %p", server);
+	return server;
 }
 
 /*
@@ -140,347 +218,105 @@ resurrect_server:
  */
 void afs_put_server(struct afs_server *server)
 {
-	struct afs_cell *cell;
-
 	if (!server)
 		return;
 
-	_enter("%p", server);
-
-	cell = server->cell;
+	_enter("%p{%d}", server, atomic_read(&server->usage));
 
-	/* sanity check */
-	BUG_ON(atomic_read(&server->usage) <= 0);
-
-	/* to prevent a race, the decrement and the dequeue must be effectively
-	 * atomic */
-	write_lock(&cell->sv_lock);
+	ASSERTCMP(atomic_read(&server->usage), >, 0);
 
 	if (likely(!atomic_dec_and_test(&server->usage))) {
-		write_unlock(&cell->sv_lock);
 		_leave("");
 		return;
 	}
 
-	spin_lock(&cell->sv_gylock);
-	list_move_tail(&server->link, &cell->sv_graveyard);
-
-	/* time out in 10 secs */
-	afs_kafstimod_add_timer(&server->timeout, 10 * HZ);
-
-	spin_unlock(&cell->sv_gylock);
-	write_unlock(&cell->sv_lock);
+	afs_flush_callback_breaks(server);
 
-	_leave(" [killed]");
+	spin_lock(&afs_server_graveyard_lock);
+	if (atomic_read(&server->usage) == 0) {
+		list_move_tail(&server->grave, &afs_server_graveyard);
+		server->time_of_death = get_seconds();
+		schedule_delayed_work(&afs_server_reaper,
+				      afs_server_timeout * HZ);
+	}
+	spin_unlock(&afs_server_graveyard_lock);
+	_leave(" [dead]");
 }
 
 /*
- * timeout server record
- * - removes from the cell's graveyard if the usage count is zero
+ * destroy a dead server
  */
-void afs_server_do_timeout(struct afs_server *server)
+static void afs_destroy_server(struct afs_server *server)
 {
-	struct rxrpc_peer *peer;
-	struct afs_cell *cell;
-	int loop;
-
 	_enter("%p", server);
 
-	cell = server->cell;
-
-	BUG_ON(atomic_read(&server->usage) < 0);
-
-	/* remove from graveyard if still dead */
-	spin_lock(&cell->vl_gylock);
-	if (atomic_read(&server->usage) == 0)
-		list_del_init(&server->link);
-	else
-		server = NULL;
-	spin_unlock(&cell->vl_gylock);
-
-	if (!server) {
-		_leave("");
-		return; /* resurrected */
-	}
-
-	/* we can now destroy it properly */
-	afs_put_cell(cell);
-
-	/* uncross-point the structs under a global lock */
-	spin_lock(&afs_server_peer_lock);
-	peer = server->peer;
-	if (peer) {
-		server->peer = NULL;
-		peer->user = NULL;
-	}
-	spin_unlock(&afs_server_peer_lock);
-
-	/* finish cleaning up the server */
-	for (loop = AFS_SERVER_CONN_LIST_SIZE - 1; loop >= 0; loop--)
-		if (server->fs_conn[loop])
-			rxrpc_put_connection(server->fs_conn[loop]);
-
-	if (server->vlserver)
-		rxrpc_put_connection(server->vlserver);
+	ASSERTCMP(server->fs_vnodes.rb_node, ==, NULL);
+	ASSERTCMP(server->cb_promises.rb_node, ==, NULL);
+	ASSERTCMP(server->cb_break_head, ==, server->cb_break_tail);
+	ASSERTCMP(atomic_read(&server->cb_break_n), ==, 0);
 
+	afs_put_cell(server->cell);
 	kfree(server);
-
-	_leave(" [destroyed]");
 }
 
 /*
- * get a callslot on a connection to the fileserver on the specified server
+ * reap dead server records
  */
-int afs_server_request_callslot(struct afs_server *server,
-				struct afs_server_callslot *callslot)
+static void afs_reap_server(struct work_struct *work)
 {
-	struct afs_server_callslot *pcallslot;
-	struct rxrpc_connection *conn;
-	int nconn, ret;
-
-	_enter("%p,",server);
-
-	INIT_LIST_HEAD(&callslot->link);
-	callslot->task = current;
-	callslot->conn = NULL;
-	callslot->nconn = -1;
-	callslot->ready = 0;
-
-	ret = 0;
-	conn = NULL;
-
-	/* get hold of a callslot first */
-	spin_lock(&server->fs_lock);
-
-	/* resurrect the server if it's death timeout has expired */
-	if (server->fs_state) {
-		if (time_before(jiffies, server->fs_dead_jif)) {
-			ret = server->fs_state;
-			spin_unlock(&server->fs_lock);
-			_leave(" = %d [still dead]", ret);
-			return ret;
+	LIST_HEAD(corpses);
+	struct afs_server *server;
+	unsigned long delay, expiry;
+	time_t now;
+
+	now = get_seconds();
+	spin_lock(&afs_server_graveyard_lock);
+
+	while (!list_empty(&afs_server_graveyard)) {
+		server = list_entry(afs_server_graveyard.next,
+				    struct afs_server, grave);
+
+		/* the queue is ordered most dead first */
+		expiry = server->time_of_death + afs_server_timeout;
+		if (expiry > now) {
+			delay = (expiry - now) * HZ;
+			if (!schedule_delayed_work(&afs_server_reaper, delay)) {
+				cancel_delayed_work(&afs_server_reaper);
+				schedule_delayed_work(&afs_server_reaper,
+						      delay);
+			}
+			break;
 		}
 
-		server->fs_state = 0;
-	}
-
-	/* try and find a connection that has spare callslots */
-	for (nconn = 0; nconn < AFS_SERVER_CONN_LIST_SIZE; nconn++) {
-		if (server->fs_conn_cnt[nconn] > 0) {
-			server->fs_conn_cnt[nconn]--;
-			spin_unlock(&server->fs_lock);
-			callslot->nconn = nconn;
-			goto obtained_slot;
+		write_lock(&server->cell->servers_lock);
+		write_lock(&afs_servers_lock);
+		if (atomic_read(&server->usage) > 0) {
+			list_del_init(&server->grave);
+		} else {
+			list_move_tail(&server->grave, &corpses);
+			list_del_init(&server->link);
+			rb_erase(&server->master_rb, &afs_servers);
 		}
+		write_unlock(&afs_servers_lock);
+		write_unlock(&server->cell->servers_lock);
 	}
 
-	/* none were available - wait interruptibly for one to become
-	 * available */
-	set_current_state(TASK_INTERRUPTIBLE);
-	list_add_tail(&callslot->link, &server->fs_callq);
-	spin_unlock(&server->fs_lock);
-
-	while (!callslot->ready && !signal_pending(current)) {
-		schedule();
-		set_current_state(TASK_INTERRUPTIBLE);
-	}
-
-	set_current_state(TASK_RUNNING);
-
-	/* even if we were interrupted we may still be queued */
-	if (!callslot->ready) {
-		spin_lock(&server->fs_lock);
-		list_del_init(&callslot->link);
-		spin_unlock(&server->fs_lock);
-	}
-
-	nconn = callslot->nconn;
-
-	/* if interrupted, we must release any slot we also got before
-	 * returning an error */
-	if (signal_pending(current)) {
-		ret = -EINTR;
-		goto error_release;
-	}
-
-	/* if we were woken up with an error, then pass that error back to the
-	 * called */
-	if (nconn < 0) {
-		_leave(" = %d", callslot->errno);
-		return callslot->errno;
-	}
-
-	/* were we given a connection directly? */
-	if (callslot->conn) {
-		/* yes - use it */
-		_leave(" = 0 (nc=%d)", nconn);
-		return 0;
-	}
+	spin_unlock(&afs_server_graveyard_lock);
 
-	/* got a callslot, but no connection */
-obtained_slot:
-
-	/* need to get hold of the RxRPC connection */
-	down_write(&server->sem);
-
-	/* quick check to see if there's an outstanding error */
-	ret = server->fs_state;
-	if (ret)
-		goto error_release_upw;
-
-	if (server->fs_conn[nconn]) {
-		/* reuse an existing connection */
-		rxrpc_get_connection(server->fs_conn[nconn]);
-		callslot->conn = server->fs_conn[nconn];
-	} else {
-		/* create a new connection */
-		ret = rxrpc_create_connection(afs_transport,
-					      htons(7000),
-					      server->addr.s_addr,
-					      FS_SERVICE_ID,
-					      NULL,
-					      &server->fs_conn[nconn]);
-
-		if (ret < 0)
-			goto error_release_upw;
-
-		callslot->conn = server->fs_conn[0];
-		rxrpc_get_connection(callslot->conn);
+	/* now reap the corpses we've extracted */
+	while (!list_empty(&corpses)) {
+		server = list_entry(corpses.next, struct afs_server, grave);
+		list_del(&server->grave);
+		afs_destroy_server(server);
 	}
-
-	up_write(&server->sem);
-
- 	_leave(" = 0");
-	return 0;
-
-	/* handle an error occurring */
-error_release_upw:
-	up_write(&server->sem);
-
-error_release:
-	/* either release the callslot or pass it along to another deserving
-	 * task */
-	spin_lock(&server->fs_lock);
-
-	if (nconn < 0) {
-		/* no callslot allocated */
-	} else if (list_empty(&server->fs_callq)) {
-		/* no one waiting */
-		server->fs_conn_cnt[nconn]++;
-		spin_unlock(&server->fs_lock);
-	} else {
-		/* someone's waiting - dequeue them and wake them up */
-		pcallslot = list_entry(server->fs_callq.next,
-				       struct afs_server_callslot, link);
-		list_del_init(&pcallslot->link);
-
-		pcallslot->errno = server->fs_state;
-		if (!pcallslot->errno) {
-			/* pass them out callslot details */
-			callslot->conn = xchg(&pcallslot->conn,
-					      callslot->conn);
-			pcallslot->nconn = nconn;
-			callslot->nconn = nconn = -1;
-		}
-		pcallslot->ready = 1;
-		wake_up_process(pcallslot->task);
-		spin_unlock(&server->fs_lock);
-	}
-
-	rxrpc_put_connection(callslot->conn);
-	callslot->conn = NULL;
-
-	_leave(" = %d", ret);
-	return ret;
 }
 
 /*
- * release a callslot back to the server
- * - transfers the RxRPC connection to the next pending callslot if possible
+ * discard all the server records for rmmod
  */
-void afs_server_release_callslot(struct afs_server *server,
-				 struct afs_server_callslot *callslot)
+void __exit afs_purge_servers(void)
 {
-	struct afs_server_callslot *pcallslot;
-
-	_enter("{ad=%08x,cnt=%u},{%d}",
-	       ntohl(server->addr.s_addr),
-	       server->fs_conn_cnt[callslot->nconn],
-	       callslot->nconn);
-
-	BUG_ON(callslot->nconn < 0);
-
-	spin_lock(&server->fs_lock);
-
-	if (list_empty(&server->fs_callq)) {
-		/* no one waiting */
-		server->fs_conn_cnt[callslot->nconn]++;
-		spin_unlock(&server->fs_lock);
-	} else {
-		/* someone's waiting - dequeue them and wake them up */
-		pcallslot = list_entry(server->fs_callq.next,
-				       struct afs_server_callslot, link);
-		list_del_init(&pcallslot->link);
-
-		pcallslot->errno = server->fs_state;
-		if (!pcallslot->errno) {
-			/* pass them out callslot details */
-			callslot->conn = xchg(&pcallslot->conn, callslot->conn);
-			pcallslot->nconn = callslot->nconn;
-			callslot->nconn = -1;
-		}
-
-		pcallslot->ready = 1;
-		wake_up_process(pcallslot->task);
-		spin_unlock(&server->fs_lock);
-	}
-
-	rxrpc_put_connection(callslot->conn);
-
-	_leave("");
-}
-
-/*
- * get a handle to a connection to the vlserver (volume location) on the
- * specified server
- */
-int afs_server_get_vlconn(struct afs_server *server,
-			  struct rxrpc_connection **_conn)
-{
-	struct rxrpc_connection *conn;
-	int ret;
-
-	_enter("%p,", server);
-
-	ret = 0;
-	conn = NULL;
-	down_read(&server->sem);
-
-	if (server->vlserver) {
-		/* reuse an existing connection */
-		rxrpc_get_connection(server->vlserver);
-		conn = server->vlserver;
-		up_read(&server->sem);
-	} else {
-		/* create a new connection */
-		up_read(&server->sem);
-		down_write(&server->sem);
-		if (!server->vlserver) {
-			ret = rxrpc_create_connection(afs_transport,
-						      htons(7003),
-						      server->addr.s_addr,
-						      VL_SERVICE_ID,
-						      NULL,
-						      &server->vlserver);
-		}
-		if (ret == 0) {
-			rxrpc_get_connection(server->vlserver);
-			conn = server->vlserver;
-		}
-		up_write(&server->sem);
-	}
-
-	*_conn = conn;
-	_leave(" = %d", ret);
-	return ret;
+	afs_server_timeout = 0;
+	cancel_delayed_work(&afs_server_reaper);
+	schedule_delayed_work(&afs_server_reaper, 0);
 }
diff --git a/fs/afs/server.h b/fs/afs/server.h
deleted file mode 100644
index e1a006829b5..00000000000
--- a/fs/afs/server.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/* AFS server record
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef AFS_SERVER_H
-#define AFS_SERVER_H
-
-#include "types.h"
-#include "kafstimod.h"
-#include <rxrpc/peer.h>
-#include <linux/rwsem.h>
-
-extern spinlock_t afs_server_peer_lock;
-
-/*
- * AFS server record
- */
-struct afs_server {
-	atomic_t		usage;
-	struct afs_cell		*cell;		/* cell in which server resides */
-	struct list_head	link;		/* link in cell's server list */
-	struct rw_semaphore	sem;		/* access lock */
-	struct afs_timer	timeout;	/* graveyard timeout */
-	struct in_addr		addr;		/* server address */
-	struct rxrpc_peer	*peer;		/* peer record for this server */
-	struct rxrpc_connection	*vlserver;	/* connection to the volume location service */
-
-	/* file service access */
-#define AFS_SERVER_CONN_LIST_SIZE 2
-	struct rxrpc_connection	*fs_conn[AFS_SERVER_CONN_LIST_SIZE]; /* FS connections */
-	unsigned		fs_conn_cnt[AFS_SERVER_CONN_LIST_SIZE];	/* per conn call count */
-	struct list_head	fs_callq;	/* queue of processes waiting to make a call */
-	spinlock_t		fs_lock;	/* access lock */
-	int			fs_state;      	/* 0 or reason FS currently marked dead (-errno) */
-	unsigned		fs_rtt;		/* FS round trip time */
-	unsigned long		fs_act_jif;	/* time at which last activity occurred */
-	unsigned long		fs_dead_jif;	/* time at which no longer to be considered dead */
-
-	/* callback promise management */
-	struct list_head	cb_promises;	/* as yet unbroken promises from this server */
-	spinlock_t		cb_lock;	/* access lock */
-};
-
-extern int afs_server_lookup(struct afs_cell *, const struct in_addr *,
-			     struct afs_server **);
-
-#define afs_get_server(S) do { atomic_inc(&(S)->usage); } while(0)
-
-extern void afs_put_server(struct afs_server *);
-extern void afs_server_do_timeout(struct afs_server *);
-
-extern int afs_server_find_by_peer(const struct rxrpc_peer *,
-				   struct afs_server **);
-
-extern int afs_server_get_vlconn(struct afs_server *,
-				 struct rxrpc_connection **);
-
-static inline
-struct afs_server *afs_server_get_from_peer(struct rxrpc_peer *peer)
-{
-	struct afs_server *server;
-
-	spin_lock(&afs_server_peer_lock);
-	server = peer->user;
-	if (server)
-		afs_get_server(server);
-	spin_unlock(&afs_server_peer_lock);
-
-	return server;
-}
-
-/*
- * AFS server callslot grant record
- */
-struct afs_server_callslot {
-	struct list_head	link;		/* link in server's list */
-	struct task_struct	*task;		/* process waiting to make call */
-	struct rxrpc_connection	*conn;		/* connection to use (or NULL on error) */
-	short			nconn;		/* connection slot number (-1 on error) */
-	char			ready;		/* T when ready */
-	int			errno;		/* error number if nconn==-1 */
-};
-
-extern int afs_server_request_callslot(struct afs_server *,
-				       struct afs_server_callslot *);
-
-extern void afs_server_release_callslot(struct afs_server *,
-					struct afs_server_callslot *);
-
-#endif /* AFS_SERVER_H */
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 0470a5c0b8a..efc4fe69f4f 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -1,6 +1,6 @@
 /* AFS superblock handling
  *
- * Copyright (c) 2002 Red Hat, Inc. All rights reserved.
+ * Copyright (c) 2002, 2007 Red Hat, Inc. All rights reserved.
  *
  * This software may be freely redistributed under the terms of the
  * GNU General Public License.
@@ -20,12 +20,6 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
-#include "vnode.h"
-#include "volume.h"
-#include "cell.h"
-#include "cmservice.h"
-#include "fsclient.h"
-#include "super.h"
 #include "internal.h"
 
 #define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
@@ -63,6 +57,7 @@ static const struct super_operations afs_super_ops = {
 	.drop_inode	= generic_delete_inode,
 	.destroy_inode	= afs_destroy_inode,
 	.clear_inode	= afs_clear_inode,
+	.umount_begin	= afs_umount_begin,
 	.put_super	= afs_put_super,
 };
 
@@ -78,8 +73,6 @@ int __init afs_fs_init(void)
 
 	_enter("");
 
-	afs_timer_init(&afs_mntpt_expiry_timer, &afs_mntpt_expiry_timer_ops);
-
 	/* create ourselves an inode cache */
 	atomic_set(&afs_count_active_inodes, 0);
 
@@ -99,11 +92,11 @@ int __init afs_fs_init(void)
 	ret = register_filesystem(&afs_fs_type);
 	if (ret < 0) {
 		kmem_cache_destroy(afs_inode_cachep);
-		kleave(" = %d", ret);
+		_leave(" = %d", ret);
 		return ret;
 	}
 
-	kleave(" = 0");
+	_leave(" = 0");
 	return 0;
 }
 
@@ -112,6 +105,9 @@ int __init afs_fs_init(void)
  */
 void __exit afs_fs_exit(void)
 {
+	_enter("");
+
+	afs_mntpt_kill_timer();
 	unregister_filesystem(&afs_fs_type);
 
 	if (atomic_read(&afs_count_active_inodes) != 0) {
@@ -121,6 +117,7 @@ void __exit afs_fs_exit(void)
 	}
 
 	kmem_cache_destroy(afs_inode_cachep);
+	_leave("");
 }
 
 /*
@@ -154,9 +151,9 @@ static int want_no_value(char *const *_value, const char *option)
  *   shamelessly adapted it from the msdos fs
  */
 static int afs_super_parse_options(struct afs_mount_params *params,
-				   char *options,
-				   const char **devname)
+				   char *options, const char **devname)
 {
+	struct afs_cell *cell;
 	char *key, *value;
 	int ret;
 
@@ -165,43 +162,37 @@ static int afs_super_parse_options(struct afs_mount_params *params,
 	options[PAGE_SIZE - 1] = 0;
 
 	ret = 0;
-	while ((key = strsep(&options, ",")) != 0)
-	{
+	while ((key = strsep(&options, ","))) {
 		value = strchr(key, '=');
 		if (value)
 			*value++ = 0;
 
-		printk("kAFS: KEY: %s, VAL:%s\n", key, value ?: "-");
+		_debug("kAFS: KEY: %s, VAL:%s", key, value ?: "-");
 
 		if (strcmp(key, "rwpath") == 0) {
 			if (!want_no_value(&value, "rwpath"))
 				return -EINVAL;
 			params->rwpath = 1;
-			continue;
 		} else if (strcmp(key, "vol") == 0) {
 			if (!want_arg(&value, "vol"))
 				return -EINVAL;
 			*devname = value;
-			continue;
 		} else if (strcmp(key, "cell") == 0) {
 			if (!want_arg(&value, "cell"))
 				return -EINVAL;
+			cell = afs_cell_lookup(value, strlen(value));
+			if (IS_ERR(cell))
+				return PTR_ERR(cell);
 			afs_put_cell(params->default_cell);
-			ret = afs_cell_lookup(value,
-					      strlen(value),
-					      &params->default_cell);
-			if (ret < 0)
-				return -EINVAL;
-			continue;
+			params->default_cell = cell;
+		} else {
+			printk("kAFS: Unknown mount option: '%s'\n",  key);
+			ret = -EINVAL;
+			goto error;
 		}
-
-		printk("kAFS: Unknown mount option: '%s'\n",  key);
-		ret = -EINVAL;
-		goto error;
 	}
 
 	ret = 0;
-
 error:
 	_leave(" = %d", ret);
 	return ret;
@@ -230,7 +221,7 @@ static int afs_fill_super(struct super_block *sb, void *data, int silent)
 	struct inode *inode = NULL;
 	int ret;
 
-	kenter("");
+	_enter("");
 
 	/* allocate a superblock info record */
 	as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL);
@@ -253,9 +244,9 @@ static int afs_fill_super(struct super_block *sb, void *data, int silent)
 	fid.vid		= as->volume->vid;
 	fid.vnode	= 1;
 	fid.unique	= 1;
-	ret = afs_iget(sb, &fid, &inode);
-	if (ret < 0)
-		goto error;
+	inode = afs_iget(sb, &fid);
+	if (IS_ERR(inode))
+		goto error_inode;
 
 	ret = -ENOMEM;
 	root = d_alloc_root(inode);
@@ -264,9 +255,12 @@ static int afs_fill_super(struct super_block *sb, void *data, int silent)
 
 	sb->s_root = root;
 
-	kleave(" = 0");
+	_leave(" = 0");
 	return 0;
 
+error_inode:
+	ret = PTR_ERR(inode);
+	inode = NULL;
 error:
 	iput(inode);
 	afs_put_volume(as->volume);
@@ -274,7 +268,7 @@ error:
 
 	sb->s_fs_info = NULL;
 
-	kleave(" = %d", ret);
+	_leave(" = %d", ret);
 	return ret;
 }
 
@@ -290,19 +284,13 @@ static int afs_get_sb(struct file_system_type *fs_type,
 {
 	struct afs_mount_params params;
 	struct super_block *sb;
+	struct afs_volume *vol;
 	int ret;
 
 	_enter(",,%s,%p", dev_name, options);
 
 	memset(&params, 0, sizeof(params));
 
-	/* start the cache manager */
-	ret = afscm_start();
-	if (ret < 0) {
-		_leave(" = %d", ret);
-		return ret;
-	}
-
 	/* parse the options */
 	if (options) {
 		ret = afs_super_parse_options(&params, options, &dev_name);
@@ -316,17 +304,20 @@ static int afs_get_sb(struct file_system_type *fs_type,
 	}
 
 	/* parse the device name */
-	ret = afs_volume_lookup(dev_name,
-				params.default_cell,
-				params.rwpath,
-				&params.volume);
-	if (ret < 0)
+	vol = afs_volume_lookup(dev_name, params.default_cell, params.rwpath);
+	if (IS_ERR(vol)) {
+		ret = PTR_ERR(vol);
 		goto error;
+	}
+
+	params.volume = vol;
 
 	/* allocate a deviceless superblock */
 	sb = sget(fs_type, afs_test_super, set_anon_super, &params);
-	if (IS_ERR(sb))
+	if (IS_ERR(sb)) {
+		ret = PTR_ERR(sb);
 		goto error;
+	}
 
 	sb->s_flags = flags;
 
@@ -341,13 +332,12 @@ static int afs_get_sb(struct file_system_type *fs_type,
 
 	afs_put_volume(params.volume);
 	afs_put_cell(params.default_cell);
-	_leave(" = 0 [%p]", 0, sb);
+	_leave(" = 0 [%p]", sb);
 	return 0;
 
 error:
 	afs_put_volume(params.volume);
 	afs_put_cell(params.default_cell);
-	afscm_stop();
 	_leave(" = %d", ret);
 	return ret;
 }
@@ -362,7 +352,6 @@ static void afs_put_super(struct super_block *sb)
 	_enter("");
 
 	afs_put_volume(as->volume);
-	afscm_stop();
 
 	_leave("");
 }
@@ -381,10 +370,8 @@ static void afs_i_init_once(void *_vnode, struct kmem_cache *cachep,
 		inode_init_once(&vnode->vfs_inode);
 		init_waitqueue_head(&vnode->update_waitq);
 		spin_lock_init(&vnode->lock);
-		INIT_LIST_HEAD(&vnode->cb_link);
-		INIT_LIST_HEAD(&vnode->cb_hash_link);
-		afs_timer_init(&vnode->cb_timeout,
-			       &afs_vnode_cb_timed_out_ops);
+		INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work);
+		mutex_init(&vnode->cb_broken_lock);
 	}
 }
 
@@ -407,6 +394,7 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
 	vnode->volume		= NULL;
 	vnode->update_cnt	= 0;
 	vnode->flags		= 0;
+	vnode->cb_promised	= false;
 
 	return &vnode->vfs_inode;
 }
@@ -416,8 +404,14 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
  */
 static void afs_destroy_inode(struct inode *inode)
 {
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+
 	_enter("{%lu}", inode->i_ino);
 
-	kmem_cache_free(afs_inode_cachep, AFS_FS_I(inode));
+	_debug("DESTROY INODE %p", inode);
+
+	ASSERTCMP(vnode->server, ==, NULL);
+
+	kmem_cache_free(afs_inode_cachep, vnode);
 	atomic_dec(&afs_count_active_inodes);
 }
diff --git a/fs/afs/super.h b/fs/afs/super.h
deleted file mode 100644
index c95b48edfc7..00000000000
--- a/fs/afs/super.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* AFS filesystem internal private data
- *
- * Copyright (c) 2002 Red Hat, Inc. All rights reserved.
- *
- * This software may be freely redistributed under the terms of the
- * GNU General Public License.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Authors: David Woodhouse <dwmw2@cambridge.redhat.com>
- *          David Howells <dhowells@redhat.com>
- *
- */
-
-#ifndef AFS_SUPER_H
-#define AFS_SUPER_H
-
-#include <linux/fs.h>
-#include "server.h"
-
-/*
- * AFS superblock private data
- * - there's one superblock per volume
- */
-struct afs_super_info {
-	struct afs_volume	*volume;	/* volume record */
-	char			rwparent;	/* T if parent is R/W AFS volume */
-};
-
-static inline struct afs_super_info *AFS_FS_S(struct super_block *sb)
-{
-	return sb->s_fs_info;
-}
-
-extern struct file_system_type afs_fs_type;
-
-#endif /* AFS_SUPER_H */
diff --git a/fs/afs/transport.h b/fs/afs/transport.h
deleted file mode 100644
index f56be4b7b1d..00000000000
--- a/fs/afs/transport.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* AFS transport management
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef AFS_TRANSPORT_H
-#define AFS_TRANSPORT_H
-
-#include "types.h"
-#include <rxrpc/transport.h>
-
-/* the cache manager transport endpoint */
-extern struct rxrpc_transport *afs_transport;
-
-#endif /* AFS_TRANSPORT_H */
diff --git a/fs/afs/types.h b/fs/afs/types.h
deleted file mode 100644
index db2b5dc9ff4..00000000000
--- a/fs/afs/types.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/* AFS types
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef AFS_TYPES_H
-#define AFS_TYPES_H
-
-#include <rxrpc/types.h>
-
-typedef unsigned			afs_volid_t;
-typedef unsigned			afs_vnodeid_t;
-typedef unsigned long long		afs_dataversion_t;
-
-typedef enum {
-	AFSVL_RWVOL,			/* read/write volume */
-	AFSVL_ROVOL,			/* read-only volume */
-	AFSVL_BACKVOL,			/* backup volume */
-} __attribute__((packed)) afs_voltype_t;
-
-typedef enum {
-	AFS_FTYPE_INVALID	= 0,
-	AFS_FTYPE_FILE		= 1,
-	AFS_FTYPE_DIR		= 2,
-	AFS_FTYPE_SYMLINK	= 3,
-} afs_file_type_t;
-
-struct afs_cell;
-struct afs_vnode;
-
-/*
- * AFS file identifier
- */
-struct afs_fid {
-	afs_volid_t	vid;		/* volume ID */
-	afs_vnodeid_t	vnode;		/* file index within volume */
-	unsigned	unique;		/* unique ID number (file index version) */
-};
-
-/*
- * AFS callback notification
- */
-typedef enum {
-	AFSCM_CB_UNTYPED	= 0,	/* no type set on CB break */
-	AFSCM_CB_EXCLUSIVE	= 1,	/* CB exclusive to CM [not implemented] */
-	AFSCM_CB_SHARED		= 2,	/* CB shared by other CM's */
-	AFSCM_CB_DROPPED	= 3,	/* CB promise cancelled by file server */
-} afs_callback_type_t;
-
-struct afs_callback {
-	struct afs_server	*server;	/* server that made the promise */
-	struct afs_fid		fid;		/* file identifier */
-	unsigned		version;	/* callback version */
-	unsigned		expiry;		/* time at which expires */
-	afs_callback_type_t	type;		/* type of callback */
-};
-
-#define AFSCBMAX 50
-
-/*
- * AFS volume information
- */
-struct afs_volume_info {
-	afs_volid_t		vid;		/* volume ID */
-	afs_voltype_t		type;		/* type of this volume */
-	afs_volid_t		type_vids[5];	/* volume ID's for possible types for this vol */
-	
-	/* list of fileservers serving this volume */
-	size_t			nservers;	/* number of entries used in servers[] */
-	struct {
-		struct in_addr	addr;		/* fileserver address */
-	} servers[8];
-};
-
-/*
- * AFS file status information
- */
-struct afs_file_status {
-	unsigned		if_version;	/* interface version */
-#define AFS_FSTATUS_VERSION	1
-
-	afs_file_type_t		type;		/* file type */
-	unsigned		nlink;		/* link count */
-	size_t			size;		/* file size */
-	afs_dataversion_t	version;	/* current data version */
-	unsigned		author;		/* author ID */
-	unsigned		owner;		/* owner ID */
-	unsigned		caller_access;	/* access rights for authenticated caller */
-	unsigned		anon_access;	/* access rights for unauthenticated caller */
-	umode_t			mode;		/* UNIX mode */
-	struct afs_fid		parent;		/* parent file ID */
-	time_t			mtime_client;	/* last time client changed data */
-	time_t			mtime_server;	/* last time server changed data */
-};
-
-/*
- * AFS volume synchronisation information
- */
-struct afs_volsync {
-	time_t			creation;	/* volume creation time */
-};
-
-#endif /* AFS_TYPES_H */
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index dac9faa70ff..0c7eba17483 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -11,243 +11,76 @@
 
 #include <linux/init.h>
 #include <linux/sched.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include "server.h"
-#include "volume.h"
-#include "vlclient.h"
-#include "kafsasyncd.h"
-#include "kafstimod.h"
-#include "errors.h"
 #include "internal.h"
 
-#define VLGETENTRYBYID		503	/* AFS Get Cache Entry By ID operation ID */
-#define VLGETENTRYBYNAME	504	/* AFS Get Cache Entry By Name operation ID */
-#define VLPROBE			514	/* AFS Probe Volume Location Service operation ID */
-
-static void afs_rxvl_get_entry_by_id_attn(struct rxrpc_call *call);
-static void afs_rxvl_get_entry_by_id_error(struct rxrpc_call *call);
-
 /*
- * map afs VL abort codes to/from Linux error codes
- * - called with call->lock held
+ * map volume locator abort codes to error codes
  */
-static void afs_rxvl_aemap(struct rxrpc_call *call)
+static int afs_vl_abort_to_error(u32 abort_code)
 {
-	int err;
-
-	_enter("{%u,%u,%d}",
-	       call->app_err_state, call->app_abort_code, call->app_errno);
-
-	switch (call->app_err_state) {
-	case RXRPC_ESTATE_LOCAL_ABORT:
-		call->app_abort_code = -call->app_errno;
-		return;
-
-	case RXRPC_ESTATE_PEER_ABORT:
-		switch (call->app_abort_code) {
-		case AFSVL_IDEXIST:		err = -EEXIST;		break;
-		case AFSVL_IO:			err = -EREMOTEIO;	break;
-		case AFSVL_NAMEEXIST:		err = -EEXIST;		break;
-		case AFSVL_CREATEFAIL:		err = -EREMOTEIO;	break;
-		case AFSVL_NOENT:		err = -ENOMEDIUM;	break;
-		case AFSVL_EMPTY:		err = -ENOMEDIUM;	break;
-		case AFSVL_ENTDELETED:		err = -ENOMEDIUM;	break;
-		case AFSVL_BADNAME:		err = -EINVAL;		break;
-		case AFSVL_BADINDEX:		err = -EINVAL;		break;
-		case AFSVL_BADVOLTYPE:		err = -EINVAL;		break;
-		case AFSVL_BADSERVER:		err = -EINVAL;		break;
-		case AFSVL_BADPARTITION:	err = -EINVAL;		break;
-		case AFSVL_REPSFULL:		err = -EFBIG;		break;
-		case AFSVL_NOREPSERVER:		err = -ENOENT;		break;
-		case AFSVL_DUPREPSERVER:	err = -EEXIST;		break;
-		case AFSVL_RWNOTFOUND:		err = -ENOENT;		break;
-		case AFSVL_BADREFCOUNT:		err = -EINVAL;		break;
-		case AFSVL_SIZEEXCEEDED:	err = -EINVAL;		break;
-		case AFSVL_BADENTRY:		err = -EINVAL;		break;
-		case AFSVL_BADVOLIDBUMP:	err = -EINVAL;		break;
-		case AFSVL_IDALREADYHASHED:	err = -EINVAL;		break;
-		case AFSVL_ENTRYLOCKED:		err = -EBUSY;		break;
-		case AFSVL_BADVOLOPER:		err = -EBADRQC;		break;
-		case AFSVL_BADRELLOCKTYPE:	err = -EINVAL;		break;
-		case AFSVL_RERELEASE:		err = -EREMOTEIO;	break;
-		case AFSVL_BADSERVERFLAG:	err = -EINVAL;		break;
-		case AFSVL_PERM:		err = -EACCES;		break;
-		case AFSVL_NOMEM:		err = -EREMOTEIO;	break;
-		default:
-			err = afs_abort_to_error(call->app_abort_code);
-			break;
-		}
-		call->app_errno = err;
-		return;
-
+	_enter("%u", abort_code);
+
+	switch (abort_code) {
+	case AFSVL_IDEXIST:		return -EEXIST;
+	case AFSVL_IO:			return -EREMOTEIO;
+	case AFSVL_NAMEEXIST:		return -EEXIST;
+	case AFSVL_CREATEFAIL:		return -EREMOTEIO;
+	case AFSVL_NOENT:		return -ENOMEDIUM;
+	case AFSVL_EMPTY:		return -ENOMEDIUM;
+	case AFSVL_ENTDELETED:		return -ENOMEDIUM;
+	case AFSVL_BADNAME:		return -EINVAL;
+	case AFSVL_BADINDEX:		return -EINVAL;
+	case AFSVL_BADVOLTYPE:		return -EINVAL;
+	case AFSVL_BADSERVER:		return -EINVAL;
+	case AFSVL_BADPARTITION:	return -EINVAL;
+	case AFSVL_REPSFULL:		return -EFBIG;
+	case AFSVL_NOREPSERVER:		return -ENOENT;
+	case AFSVL_DUPREPSERVER:	return -EEXIST;
+	case AFSVL_RWNOTFOUND:		return -ENOENT;
+	case AFSVL_BADREFCOUNT:		return -EINVAL;
+	case AFSVL_SIZEEXCEEDED:	return -EINVAL;
+	case AFSVL_BADENTRY:		return -EINVAL;
+	case AFSVL_BADVOLIDBUMP:	return -EINVAL;
+	case AFSVL_IDALREADYHASHED:	return -EINVAL;
+	case AFSVL_ENTRYLOCKED:		return -EBUSY;
+	case AFSVL_BADVOLOPER:		return -EBADRQC;
+	case AFSVL_BADRELLOCKTYPE:	return -EINVAL;
+	case AFSVL_RERELEASE:		return -EREMOTEIO;
+	case AFSVL_BADSERVERFLAG:	return -EINVAL;
+	case AFSVL_PERM:		return -EACCES;
+	case AFSVL_NOMEM:		return -EREMOTEIO;
 	default:
-		return;
+		return afs_abort_to_error(abort_code);
 	}
 }
 
-#if 0
 /*
- * probe a volume location server to see if it is still alive -- unused
+ * deliver reply data to a VL.GetEntryByXXX call
  */
-static int afs_rxvl_probe(struct afs_server *server, int alloc_flags)
+static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call,
+					   struct sk_buff *skb, bool last)
 {
-	struct rxrpc_connection *conn;
-	struct rxrpc_call *call;
-	struct kvec piov[1];
-	size_t sent;
-	int ret;
-	__be32 param[1];
-
-	DECLARE_WAITQUEUE(myself, current);
-
-	/* get hold of the vlserver connection */
-	ret = afs_server_get_vlconn(server, &conn);
-	if (ret < 0)
-		goto out;
-
-	/* create a call through that connection */
-	ret = rxrpc_create_call(conn, NULL, NULL, afs_rxvl_aemap, &call);
-	if (ret < 0) {
-		printk("kAFS: Unable to create call: %d\n", ret);
-		goto out_put_conn;
-	}
-	call->app_opcode = VLPROBE;
-
-	/* we want to get event notifications from the call */
-	add_wait_queue(&call->waitq, &myself);
-
-	/* marshall the parameters */
-	param[0] = htonl(VLPROBE);
-	piov[0].iov_len = sizeof(param);
-	piov[0].iov_base = param;
-
-	/* send the parameters to the server */
-	ret = rxrpc_call_write_data(call, 1, piov, RXRPC_LAST_PACKET,
-				    alloc_flags, 0, &sent);
-	if (ret < 0)
-		goto abort;
-
-	/* wait for the reply to completely arrive */
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (call->app_call_state != RXRPC_CSTATE_CLNT_RCV_REPLY ||
-		    signal_pending(current))
-			break;
-		schedule();
-	}
-	set_current_state(TASK_RUNNING);
-
-	ret = -EINTR;
-	if (signal_pending(current))
-		goto abort;
-
-	switch (call->app_call_state) {
-	case RXRPC_CSTATE_ERROR:
-		ret = call->app_errno;
-		goto out_unwait;
-
-	case RXRPC_CSTATE_CLNT_GOT_REPLY:
-		ret = 0;
-		goto out_unwait;
-
-	default:
-		BUG();
-	}
+	struct afs_cache_vlocation *entry;
+	__be32 *bp;
+	u32 tmp;
+	int loop;
 
-abort:
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	rxrpc_call_abort(call, ret);
-	schedule();
-
-out_unwait:
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&call->waitq, &myself);
-	rxrpc_put_call(call);
-out_put_conn:
-	rxrpc_put_connection(conn);
-out:
-	return ret;
-}
-#endif
+	_enter(",,%u", last);
 
-/*
- * look up a volume location database entry by name
- */
-int afs_rxvl_get_entry_by_name(struct afs_server *server,
-			       const char *volname,
-			       unsigned volnamesz,
-			       struct afs_cache_vlocation *entry)
-{
-	DECLARE_WAITQUEUE(myself, current);
-
-	struct rxrpc_connection *conn;
-	struct rxrpc_call *call;
-	struct kvec piov[3];
-	unsigned tmp;
-	size_t sent;
-	int ret, loop;
-	__be32 *bp, param[2], zero;
-
-	_enter(",%*.*s,%u,", volnamesz, volnamesz, volname, volnamesz);
-
-	memset(entry, 0, sizeof(*entry));
-
-	/* get hold of the vlserver connection */
-	ret = afs_server_get_vlconn(server, &conn);
-	if (ret < 0)
-		goto out;
-
-	/* create a call through that connection */
-	ret = rxrpc_create_call(conn, NULL, NULL, afs_rxvl_aemap, &call);
-	if (ret < 0) {
-		printk("kAFS: Unable to create call: %d\n", ret);
-		goto out_put_conn;
-	}
-	call->app_opcode = VLGETENTRYBYNAME;
+	afs_transfer_reply(call, skb);
+	if (!last)
+		return 0;
 
-	/* we want to get event notifications from the call */
-	add_wait_queue(&call->waitq, &myself);
+	if (call->reply_size != call->reply_max)
+		return -EBADMSG;
 
-	/* marshall the parameters */
-	piov[1].iov_len = volnamesz;
-	piov[1].iov_base = (char *) volname;
-
-	zero = 0;
-	piov[2].iov_len = (4 - (piov[1].iov_len & 3)) & 3;
-	piov[2].iov_base = &zero;
-
-	param[0] = htonl(VLGETENTRYBYNAME);
-	param[1] = htonl(piov[1].iov_len);
-
-	piov[0].iov_len = sizeof(param);
-	piov[0].iov_base = param;
-
-	/* send the parameters to the server */
-	ret = rxrpc_call_write_data(call, 3, piov, RXRPC_LAST_PACKET, GFP_NOFS,
-				    0, &sent);
-	if (ret < 0)
-		goto abort;
-
-	/* wait for the reply to completely arrive */
-	bp = rxrpc_call_alloc_scratch(call, 384);
-
-	ret = rxrpc_call_read_data(call, bp, 384,
-				   RXRPC_CALL_READ_BLOCK |
-				   RXRPC_CALL_READ_ALL);
-	if (ret < 0) {
-		if (ret == -ECONNABORTED) {
-			ret = call->app_errno;
-			goto out_unwait;
-		}
-		goto abort;
-	}
+	/* unmarshall the reply once we've received all of it */
+	entry = call->reply;
+	bp = call->buffer;
 
-	/* unmarshall the reply */
 	for (loop = 0; loop < 64; loop++)
 		entry->name[loop] = ntohl(*bp++);
+	entry->name[loop] = 0;
 	bp++; /* final NUL */
 
 	bp++; /* type */
@@ -260,6 +93,7 @@ int afs_rxvl_get_entry_by_name(struct afs_server *server,
 
 	for (loop = 0; loop < 8; loop++) {
 		tmp = ntohl(*bp++);
+		entry->srvtmask[loop] = 0;
 		if (tmp & AFS_VLSF_RWVOL)
 			entry->srvtmask[loop] |= AFS_VOL_VTM_RW;
 		if (tmp & AFS_VLSF_ROVOL)
@@ -275,409 +109,104 @@ int afs_rxvl_get_entry_by_name(struct afs_server *server,
 	bp++; /* clone ID */
 
 	tmp = ntohl(*bp++); /* flags */
+	entry->vidmask = 0;
 	if (tmp & AFS_VLF_RWEXISTS)
 		entry->vidmask |= AFS_VOL_VTM_RW;
 	if (tmp & AFS_VLF_ROEXISTS)
 		entry->vidmask |= AFS_VOL_VTM_RO;
 	if (tmp & AFS_VLF_BACKEXISTS)
 		entry->vidmask |= AFS_VOL_VTM_BAK;
-
-	ret = -ENOMEDIUM;
 	if (!entry->vidmask)
-		goto abort;
-
-	/* success */
-	entry->rtime = get_seconds();
-	ret = 0;
-
-out_unwait:
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&call->waitq, &myself);
-	rxrpc_put_call(call);
-out_put_conn:
-	rxrpc_put_connection(conn);
-out:
-	_leave(" = %d", ret);
-	return ret;
-
-abort:
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	rxrpc_call_abort(call, ret);
-	schedule();
-	goto out_unwait;
+		return -EBADMSG;
+
+	_leave(" = 0 [done]");
+	return 0;
 }
 
 /*
- * look up a volume location database entry by ID
+ * VL.GetEntryByName operation type
  */
-int afs_rxvl_get_entry_by_id(struct afs_server *server,
-			     afs_volid_t volid,
-			     afs_voltype_t voltype,
-			     struct afs_cache_vlocation *entry)
-{
-	DECLARE_WAITQUEUE(myself, current);
-
-	struct rxrpc_connection *conn;
-	struct rxrpc_call *call;
-	struct kvec piov[1];
-	unsigned tmp;
-	size_t sent;
-	int ret, loop;
-	__be32 *bp, param[3];
-
-	_enter(",%x,%d,", volid, voltype);
-
-	memset(entry, 0, sizeof(*entry));
-
-	/* get hold of the vlserver connection */
-	ret = afs_server_get_vlconn(server, &conn);
-	if (ret < 0)
-		goto out;
-
-	/* create a call through that connection */
-	ret = rxrpc_create_call(conn, NULL, NULL, afs_rxvl_aemap, &call);
-	if (ret < 0) {
-		printk("kAFS: Unable to create call: %d\n", ret);
-		goto out_put_conn;
-	}
-	call->app_opcode = VLGETENTRYBYID;
-
-	/* we want to get event notifications from the call */
-	add_wait_queue(&call->waitq, &myself);
-
-	/* marshall the parameters */
-	param[0] = htonl(VLGETENTRYBYID);
-	param[1] = htonl(volid);
-	param[2] = htonl(voltype);
-
-	piov[0].iov_len = sizeof(param);
-	piov[0].iov_base = param;
-
-	/* send the parameters to the server */
-	ret = rxrpc_call_write_data(call, 1, piov, RXRPC_LAST_PACKET, GFP_NOFS,
-				    0, &sent);
-	if (ret < 0)
-		goto abort;
-
-	/* wait for the reply to completely arrive */
-	bp = rxrpc_call_alloc_scratch(call, 384);
-
-	ret = rxrpc_call_read_data(call, bp, 384,
-				   RXRPC_CALL_READ_BLOCK |
-				   RXRPC_CALL_READ_ALL);
-	if (ret < 0) {
-		if (ret == -ECONNABORTED) {
-			ret = call->app_errno;
-			goto out_unwait;
-		}
-		goto abort;
-	}
-
-	/* unmarshall the reply */
-	for (loop = 0; loop < 64; loop++)
-		entry->name[loop] = ntohl(*bp++);
-	bp++; /* final NUL */
-
-	bp++; /* type */
-	entry->nservers = ntohl(*bp++);
-
-	for (loop = 0; loop < 8; loop++)
-		entry->servers[loop].s_addr = *bp++;
-
-	bp += 8; /* partition IDs */
-
-	for (loop = 0; loop < 8; loop++) {
-		tmp = ntohl(*bp++);
-		if (tmp & AFS_VLSF_RWVOL)
-			entry->srvtmask[loop] |= AFS_VOL_VTM_RW;
-		if (tmp & AFS_VLSF_ROVOL)
-			entry->srvtmask[loop] |= AFS_VOL_VTM_RO;
-		if (tmp & AFS_VLSF_BACKVOL)
-			entry->srvtmask[loop] |= AFS_VOL_VTM_BAK;
-	}
-
-	entry->vid[0] = ntohl(*bp++);
-	entry->vid[1] = ntohl(*bp++);
-	entry->vid[2] = ntohl(*bp++);
-
-	bp++; /* clone ID */
-
-	tmp = ntohl(*bp++); /* flags */
-	if (tmp & AFS_VLF_RWEXISTS)
-		entry->vidmask |= AFS_VOL_VTM_RW;
-	if (tmp & AFS_VLF_ROEXISTS)
-		entry->vidmask |= AFS_VOL_VTM_RO;
-	if (tmp & AFS_VLF_BACKEXISTS)
-		entry->vidmask |= AFS_VOL_VTM_BAK;
+static const struct afs_call_type afs_RXVLGetEntryByName = {
+	.deliver	= afs_deliver_vl_get_entry_by_xxx,
+	.abort_to_error	= afs_vl_abort_to_error,
+	.destructor	= afs_flat_call_destructor,
+};
 
-	ret = -ENOMEDIUM;
-	if (!entry->vidmask)
-		goto abort;
-
-#if 0 /* TODO: remove */
-	entry->nservers = 3;
-	entry->servers[0].s_addr = htonl(0xac101249);
-	entry->servers[1].s_addr = htonl(0xac101243);
-	entry->servers[2].s_addr = htonl(0xac10125b /*0xac10125b*/);
-
-	entry->srvtmask[0] = AFS_VOL_VTM_RO;
-	entry->srvtmask[1] = AFS_VOL_VTM_RO;
-	entry->srvtmask[2] = AFS_VOL_VTM_RO | AFS_VOL_VTM_RW;
-#endif
-
-	/* success */
-	entry->rtime = get_seconds();
-	ret = 0;
-
-out_unwait:
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&call->waitq, &myself);
-	rxrpc_put_call(call);
-out_put_conn:
-	rxrpc_put_connection(conn);
-out:
-	_leave(" = %d", ret);
-	return ret;
-
-abort:
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	rxrpc_call_abort(call, ret);
-	schedule();
-	goto out_unwait;
-}
+/*
+ * VL.GetEntryById operation type
+ */
+static const struct afs_call_type afs_RXVLGetEntryById = {
+	.deliver	= afs_deliver_vl_get_entry_by_xxx,
+	.abort_to_error	= afs_vl_abort_to_error,
+	.destructor	= afs_flat_call_destructor,
+};
 
 /*
- * look up a volume location database entry by ID asynchronously
+ * dispatch a get volume entry by name operation
  */
-int afs_rxvl_get_entry_by_id_async(struct afs_async_op *op,
-				   afs_volid_t volid,
-				   afs_voltype_t voltype)
+int afs_vl_get_entry_by_name(struct in_addr *addr,
+			     const char *volname,
+			     struct afs_cache_vlocation *entry,
+			     const struct afs_wait_mode *wait_mode)
 {
-	struct rxrpc_connection *conn;
-	struct rxrpc_call *call;
-	struct kvec piov[1];
-	size_t sent;
-	int ret;
-	__be32 param[3];
-
-	_enter(",%x,%d,", volid, voltype);
-
-	/* get hold of the vlserver connection */
-	ret = afs_server_get_vlconn(op->server, &conn);
-	if (ret < 0) {
-		_leave(" = %d", ret);
-		return ret;
-	}
+	struct afs_call *call;
+	size_t volnamesz, reqsz, padsz;
+	__be32 *bp;
 
-	/* create a call through that connection */
-	ret = rxrpc_create_call(conn,
-				afs_rxvl_get_entry_by_id_attn,
-				afs_rxvl_get_entry_by_id_error,
-				afs_rxvl_aemap,
-				&op->call);
-	rxrpc_put_connection(conn);
-
-	if (ret < 0) {
-		printk("kAFS: Unable to create call: %d\n", ret);
-		_leave(" = %d", ret);
-		return ret;
-	}
+	_enter("");
 
-	op->call->app_opcode = VLGETENTRYBYID;
-	op->call->app_user = op;
+	volnamesz = strlen(volname);
+	padsz = (4 - (volnamesz & 3)) & 3;
+	reqsz = 8 + volnamesz + padsz;
 
-	call = op->call;
-	rxrpc_get_call(call);
+	call = afs_alloc_flat_call(&afs_RXVLGetEntryByName, reqsz, 384);
+	if (!call)
+		return -ENOMEM;
 
-	/* send event notifications from the call to kafsasyncd */
-	afs_kafsasyncd_begin_op(op);
+	call->reply = entry;
+	call->service_id = VL_SERVICE;
+	call->port = htons(AFS_VL_PORT);
 
 	/* marshall the parameters */
-	param[0] = htonl(VLGETENTRYBYID);
-	param[1] = htonl(volid);
-	param[2] = htonl(voltype);
-
-	piov[0].iov_len = sizeof(param);
-	piov[0].iov_base = param;
-
-	/* allocate result read buffer in scratch space */
-	call->app_scr_ptr = rxrpc_call_alloc_scratch(op->call, 384);
-
-	/* send the parameters to the server */
-	ret = rxrpc_call_write_data(call, 1, piov, RXRPC_LAST_PACKET, GFP_NOFS,
-				    0, &sent);
-	if (ret < 0) {
-		rxrpc_call_abort(call, ret); /* handle from kafsasyncd */
-		ret = 0;
-		goto out;
-	}
-
-	/* wait for the reply to completely arrive */
-	ret = rxrpc_call_read_data(call, call->app_scr_ptr, 384, 0);
-	switch (ret) {
-	case 0:
-	case -EAGAIN:
-	case -ECONNABORTED:
-		ret = 0;
-		break;	/* all handled by kafsasyncd */
-
-	default:
-		rxrpc_call_abort(call, ret); /* make kafsasyncd handle it */
-		ret = 0;
-		break;
-	}
-
-out:
-	rxrpc_put_call(call);
-	_leave(" = %d", ret);
-	return ret;
+	bp = call->request;
+	*bp++ = htonl(VLGETENTRYBYNAME);
+	*bp++ = htonl(volnamesz);
+	memcpy(bp, volname, volnamesz);
+	if (padsz > 0)
+		memset((void *) bp + volnamesz, 0, padsz);
+
+	/* initiate the call */
+	return afs_make_call(addr, call, GFP_KERNEL, wait_mode);
 }
 
 /*
- * attend to the asynchronous get VLDB entry by ID
+ * dispatch a get volume entry by ID operation
  */
-int afs_rxvl_get_entry_by_id_async2(struct afs_async_op *op,
-				    struct afs_cache_vlocation *entry)
+int afs_vl_get_entry_by_id(struct in_addr *addr,
+			   afs_volid_t volid,
+			   afs_voltype_t voltype,
+			   struct afs_cache_vlocation *entry,
+			   const struct afs_wait_mode *wait_mode)
 {
+	struct afs_call *call;
 	__be32 *bp;
-	__u32 tmp;
-	int loop, ret;
-
-	_enter("{op=%p cst=%u}", op, op->call->app_call_state);
-
-	memset(entry, 0, sizeof(*entry));
-
-	if (op->call->app_call_state == RXRPC_CSTATE_COMPLETE) {
-		/* operation finished */
-		afs_kafsasyncd_terminate_op(op);
-
-		bp = op->call->app_scr_ptr;
-
-		/* unmarshall the reply */
-		for (loop = 0; loop < 64; loop++)
-			entry->name[loop] = ntohl(*bp++);
-		bp++; /* final NUL */
-
-		bp++; /* type */
-		entry->nservers = ntohl(*bp++);
-
-		for (loop = 0; loop < 8; loop++)
-			entry->servers[loop].s_addr = *bp++;
-
-		bp += 8; /* partition IDs */
-
-		for (loop = 0; loop < 8; loop++) {
-			tmp = ntohl(*bp++);
-			if (tmp & AFS_VLSF_RWVOL)
-				entry->srvtmask[loop] |= AFS_VOL_VTM_RW;
-			if (tmp & AFS_VLSF_ROVOL)
-				entry->srvtmask[loop] |= AFS_VOL_VTM_RO;
-			if (tmp & AFS_VLSF_BACKVOL)
-				entry->srvtmask[loop] |= AFS_VOL_VTM_BAK;
-		}
-
-		entry->vid[0] = ntohl(*bp++);
-		entry->vid[1] = ntohl(*bp++);
-		entry->vid[2] = ntohl(*bp++);
-
-		bp++; /* clone ID */
-
-		tmp = ntohl(*bp++); /* flags */
-		if (tmp & AFS_VLF_RWEXISTS)
-			entry->vidmask |= AFS_VOL_VTM_RW;
-		if (tmp & AFS_VLF_ROEXISTS)
-			entry->vidmask |= AFS_VOL_VTM_RO;
-		if (tmp & AFS_VLF_BACKEXISTS)
-			entry->vidmask |= AFS_VOL_VTM_BAK;
-
-		ret = -ENOMEDIUM;
-		if (!entry->vidmask) {
-			rxrpc_call_abort(op->call, ret);
-			goto done;
-		}
-
-#if 0 /* TODO: remove */
-		entry->nservers = 3;
-		entry->servers[0].s_addr = htonl(0xac101249);
-		entry->servers[1].s_addr = htonl(0xac101243);
-		entry->servers[2].s_addr = htonl(0xac10125b /*0xac10125b*/);
-
-		entry->srvtmask[0] = AFS_VOL_VTM_RO;
-		entry->srvtmask[1] = AFS_VOL_VTM_RO;
-		entry->srvtmask[2] = AFS_VOL_VTM_RO | AFS_VOL_VTM_RW;
-#endif
-
-		/* success */
-		entry->rtime = get_seconds();
-		ret = 0;
-		goto done;
-	}
-
-	if (op->call->app_call_state == RXRPC_CSTATE_ERROR) {
-		/* operation error */
-		ret = op->call->app_errno;
-		goto done;
-	}
-
-	_leave(" = -EAGAIN");
-	return -EAGAIN;
-
-done:
-	rxrpc_put_call(op->call);
-	op->call = NULL;
-	_leave(" = %d", ret);
-	return ret;
-}
-
-/*
- * handle attention events on an async get-entry-by-ID op
- * - called from krxiod
- */
-static void afs_rxvl_get_entry_by_id_attn(struct rxrpc_call *call)
-{
-	struct afs_async_op *op = call->app_user;
-
-	_enter("{op=%p cst=%u}", op, call->app_call_state);
-
-	switch (call->app_call_state) {
-	case RXRPC_CSTATE_COMPLETE:
-		afs_kafsasyncd_attend_op(op);
-		break;
-	case RXRPC_CSTATE_CLNT_RCV_REPLY:
-		if (call->app_async_read)
-			break;
-	case RXRPC_CSTATE_CLNT_GOT_REPLY:
-		if (call->app_read_count == 0)
-			break;
-		printk("kAFS: Reply bigger than expected"
-		       " {cst=%u asyn=%d mark=%Zu rdy=%Zu pr=%u%s}",
-		       call->app_call_state,
-		       call->app_async_read,
-		       call->app_mark,
-		       call->app_ready_qty,
-		       call->pkt_rcv_count,
-		       call->app_last_rcv ? " last" : "");
-
-		rxrpc_call_abort(call, -EBADMSG);
-		break;
-	default:
-		BUG();
-	}
 
-	_leave("");
-}
+	_enter("");
 
-/*
- * handle error events on an async get-entry-by-ID op
- * - called from krxiod
- */
-static void afs_rxvl_get_entry_by_id_error(struct rxrpc_call *call)
-{
-	struct afs_async_op *op = call->app_user;
+	call = afs_alloc_flat_call(&afs_RXVLGetEntryById, 12, 384);
+	if (!call)
+		return -ENOMEM;
 
-	_enter("{op=%p cst=%u}", op, call->app_call_state);
+	call->reply = entry;
+	call->service_id = VL_SERVICE;
+	call->port = htons(AFS_VL_PORT);
 
-	afs_kafsasyncd_attend_op(op);
+	/* marshall the parameters */
+	bp = call->request;
+	*bp++ = htonl(VLGETENTRYBYID);
+	*bp++ = htonl(volid);
+	*bp   = htonl(voltype);
 
-	_leave("");
+	/* initiate the call */
+	return afs_make_call(addr, call, GFP_KERNEL, wait_mode);
 }
diff --git a/fs/afs/vlclient.h b/fs/afs/vlclient.h
deleted file mode 100644
index 11dc10fe300..00000000000
--- a/fs/afs/vlclient.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/* Volume Location Service client interface
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef AFS_VLCLIENT_H
-#define AFS_VLCLIENT_H
-
-#include "types.h"
-
-enum AFSVL_Errors {
-	AFSVL_IDEXIST 		= 363520,	/* Volume Id entry exists in vl database */
-	AFSVL_IO 		= 363521,	/* I/O related error */
-	AFSVL_NAMEEXIST 	= 363522,	/* Volume name entry exists in vl database */
-	AFSVL_CREATEFAIL 	= 363523,	/* Internal creation failure */
-	AFSVL_NOENT 		= 363524,	/* No such entry */
-	AFSVL_EMPTY 		= 363525,	/* Vl database is empty */
-	AFSVL_ENTDELETED 	= 363526,	/* Entry is deleted (soft delete) */
-	AFSVL_BADNAME 		= 363527,	/* Volume name is illegal */
-	AFSVL_BADINDEX 		= 363528,	/* Index is out of range */
-	AFSVL_BADVOLTYPE 	= 363529,	/* Bad volume type */
-	AFSVL_BADSERVER 	= 363530,	/* Illegal server number (out of range) */
-	AFSVL_BADPARTITION 	= 363531,	/* Bad partition number */
-	AFSVL_REPSFULL 		= 363532,	/* Run out of space for Replication sites */
-	AFSVL_NOREPSERVER 	= 363533,	/* No such Replication server site exists */
-	AFSVL_DUPREPSERVER 	= 363534,	/* Replication site already exists */
-	AFSVL_RWNOTFOUND 	= 363535,	/* Parent R/W entry not found */
-	AFSVL_BADREFCOUNT 	= 363536,	/* Illegal Reference Count number */
-	AFSVL_SIZEEXCEEDED 	= 363537,	/* Vl size for attributes exceeded */
-	AFSVL_BADENTRY 		= 363538,	/* Bad incoming vl entry */
-	AFSVL_BADVOLIDBUMP 	= 363539,	/* Illegal max volid increment */
-	AFSVL_IDALREADYHASHED 	= 363540,	/* RO/BACK id already hashed */
-	AFSVL_ENTRYLOCKED 	= 363541,	/* Vl entry is already locked */
-	AFSVL_BADVOLOPER 	= 363542,	/* Bad volume operation code */
-	AFSVL_BADRELLOCKTYPE 	= 363543,	/* Bad release lock type */
-	AFSVL_RERELEASE 	= 363544,	/* Status report: last release was aborted */
-	AFSVL_BADSERVERFLAG 	= 363545,	/* Invalid replication site server �ag */
-	AFSVL_PERM 		= 363546,	/* No permission access */
-	AFSVL_NOMEM 		= 363547,	/* malloc/realloc failed to alloc enough memory */
-};
-
-/* maps to "struct vldbentry" in vvl-spec.pdf */
-struct afs_vldbentry {
-	char		name[65];		/* name of volume (including NUL char) */
-	afs_voltype_t	type;			/* volume type */
-	unsigned	num_servers;		/* num servers that hold instances of this vol */
-	unsigned	clone_id;		/* cloning ID */
-
-	unsigned	flags;
-#define AFS_VLF_RWEXISTS	0x1000		/* R/W volume exists */
-#define AFS_VLF_ROEXISTS	0x2000		/* R/O volume exists */
-#define AFS_VLF_BACKEXISTS	0x4000		/* backup volume exists */
-
-	afs_volid_t	volume_ids[3];		/* volume IDs */
-
-	struct {
-		struct in_addr	addr;		/* server address */
-		unsigned	partition;	/* partition ID on this server */
-		unsigned	flags;		/* server specific flags */
-#define AFS_VLSF_NEWREPSITE	0x0001	/* unused */
-#define AFS_VLSF_ROVOL		0x0002	/* this server holds a R/O instance of the volume */
-#define AFS_VLSF_RWVOL		0x0004	/* this server holds a R/W instance of the volume */
-#define AFS_VLSF_BACKVOL	0x0008	/* this server holds a backup instance of the volume */
-	} servers[8];
-};
-
-extern int afs_rxvl_get_entry_by_name(struct afs_server *, const char *,
-				      unsigned, struct afs_cache_vlocation *);
-extern int afs_rxvl_get_entry_by_id(struct afs_server *, afs_volid_t,
-				    afs_voltype_t,
-				    struct afs_cache_vlocation *);
-
-extern int afs_rxvl_get_entry_by_id_async(struct afs_async_op *,
-					  afs_volid_t, afs_voltype_t);
-
-extern int afs_rxvl_get_entry_by_id_async2(struct afs_async_op *,
-					   struct afs_cache_vlocation *);
-
-#endif /* AFS_VLCLIENT_H */
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index e48728c9217..60cb2f408c7 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -1,6 +1,6 @@
-/* volume location management
+/* AFS volume location management
  *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
@@ -12,130 +12,60 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/fs.h>
-#include <linux/pagemap.h>
-#include "volume.h"
-#include "cell.h"
-#include "cmservice.h"
-#include "fsclient.h"
-#include "vlclient.h"
-#include "kafstimod.h"
-#include <rxrpc/connection.h>
 #include "internal.h"
 
-#define AFS_VLDB_TIMEOUT HZ*1000
+unsigned afs_vlocation_timeout = 10;	/* volume location timeout in seconds */
+unsigned afs_vlocation_update_timeout = 10 * 60;
 
-static void afs_vlocation_update_timer(struct afs_timer *timer);
-static void afs_vlocation_update_attend(struct afs_async_op *op);
-static void afs_vlocation_update_discard(struct afs_async_op *op);
-static void __afs_put_vlocation(struct afs_vlocation *vlocation);
+static void afs_vlocation_reaper(struct work_struct *);
+static void afs_vlocation_updater(struct work_struct *);
 
-static void __afs_vlocation_timeout(struct afs_timer *timer)
-{
-	struct afs_vlocation *vlocation =
-		list_entry(timer, struct afs_vlocation, timeout);
-
-	_debug("VL TIMEOUT [%s{u=%d}]",
-	       vlocation->vldb.name, atomic_read(&vlocation->usage));
-
-	afs_vlocation_do_timeout(vlocation);
-}
-
-static const struct afs_timer_ops afs_vlocation_timer_ops = {
-	.timed_out	= __afs_vlocation_timeout,
-};
-
-static const struct afs_timer_ops afs_vlocation_update_timer_ops = {
-	.timed_out	= afs_vlocation_update_timer,
-};
-
-static const struct afs_async_op_ops afs_vlocation_update_op_ops = {
-	.attend		= afs_vlocation_update_attend,
-	.discard	= afs_vlocation_update_discard,
-};
-
-static LIST_HEAD(afs_vlocation_update_pendq);	/* queue of VLs awaiting update */
-static struct afs_vlocation *afs_vlocation_update;	/* VL currently being updated */
-static DEFINE_SPINLOCK(afs_vlocation_update_lock); /* lock guarding update queue */
-
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_vlocation_cache_match(void *target,
-						     const void *entry);
-static void afs_vlocation_cache_update(void *source, void *entry);
-
-struct cachefs_index_def afs_vlocation_cache_index_def = {
-	.name		= "vldb",
-	.data_size	= sizeof(struct afs_cache_vlocation),
-	.keys[0]	= { CACHEFS_INDEX_KEYS_ASCIIZ, 64 },
-	.match		= afs_vlocation_cache_match,
-	.update		= afs_vlocation_cache_update,
-};
-#endif
+static LIST_HEAD(afs_vlocation_updates);
+static LIST_HEAD(afs_vlocation_graveyard);
+static DEFINE_SPINLOCK(afs_vlocation_updates_lock);
+static DEFINE_SPINLOCK(afs_vlocation_graveyard_lock);
+static DECLARE_DELAYED_WORK(afs_vlocation_reap, afs_vlocation_reaper);
+static DECLARE_DELAYED_WORK(afs_vlocation_update, afs_vlocation_updater);
+static struct workqueue_struct *afs_vlocation_update_worker;
 
 /*
  * iterate through the VL servers in a cell until one of them admits knowing
  * about the volume in question
- * - caller must have cell->vl_sem write-locked
  */
-static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vlocation,
-					   const char *name,
-					   unsigned namesz,
+static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl,
 					   struct afs_cache_vlocation *vldb)
 {
-	struct afs_server *server = NULL;
-	struct afs_cell *cell = vlocation->cell;
+	struct afs_cell *cell = vl->cell;
+	struct in_addr addr;
 	int count, ret;
 
-	_enter("%s,%*.*s,%u", cell->name, namesz, namesz, name, namesz);
+	_enter("%s,%s", cell->name, vl->vldb.name);
 
+	down_write(&vl->cell->vl_sem);
 	ret = -ENOMEDIUM;
 	for (count = cell->vl_naddrs; count > 0; count--) {
-		_debug("CellServ[%hu]: %08x",
-		       cell->vl_curr_svix,
-		       cell->vl_addrs[cell->vl_curr_svix].s_addr);
-
-		/* try and create a server */
-		ret = afs_server_lookup(cell,
-					&cell->vl_addrs[cell->vl_curr_svix],
-					&server);
-		switch (ret) {
-		case 0:
-			break;
-		case -ENOMEM:
-		case -ENONET:
-			goto out;
-		default:
-			goto rotate;
-		}
+		addr = cell->vl_addrs[cell->vl_curr_svix];
+
+		_debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
 
 		/* attempt to access the VL server */
-		ret = afs_rxvl_get_entry_by_name(server, name, namesz, vldb);
+		ret = afs_vl_get_entry_by_name(&addr, vl->vldb.name, vldb,
+					       &afs_sync_call);
 		switch (ret) {
 		case 0:
-			afs_put_server(server);
 			goto out;
 		case -ENOMEM:
 		case -ENONET:
 		case -ENETUNREACH:
 		case -EHOSTUNREACH:
 		case -ECONNREFUSED:
-			down_write(&server->sem);
-			if (server->vlserver) {
-				rxrpc_put_connection(server->vlserver);
-				server->vlserver = NULL;
-			}
-			up_write(&server->sem);
-			afs_put_server(server);
 			if (ret == -ENOMEM || ret == -ENONET)
 				goto out;
 			goto rotate;
 		case -ENOMEDIUM:
-			afs_put_server(server);
 			goto out;
 		default:
-			afs_put_server(server);
-			ret = -ENOMEDIUM;
+			ret = -EIO;
 			goto rotate;
 		}
 
@@ -146,6 +76,7 @@ static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vlocation,
 	}
 
 out:
+	up_write(&vl->cell->vl_sem);
 	_leave(" = %d", ret);
 	return ret;
 }
@@ -153,66 +84,56 @@ out:
 /*
  * iterate through the VL servers in a cell until one of them admits knowing
  * about the volume in question
- * - caller must have cell->vl_sem write-locked
  */
-static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vlocation,
+static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
 					 afs_volid_t volid,
 					 afs_voltype_t voltype,
 					 struct afs_cache_vlocation *vldb)
 {
-	struct afs_server *server = NULL;
-	struct afs_cell *cell = vlocation->cell;
+	struct afs_cell *cell = vl->cell;
+	struct in_addr addr;
 	int count, ret;
 
 	_enter("%s,%x,%d,", cell->name, volid, voltype);
 
+	down_write(&vl->cell->vl_sem);
 	ret = -ENOMEDIUM;
 	for (count = cell->vl_naddrs; count > 0; count--) {
-		_debug("CellServ[%hu]: %08x",
-		       cell->vl_curr_svix,
-		       cell->vl_addrs[cell->vl_curr_svix].s_addr);
-
-		/* try and create a server */
-		ret = afs_server_lookup(cell,
-					&cell->vl_addrs[cell->vl_curr_svix],
-					&server);
-		switch (ret) {
-		case 0:
-			break;
-		case -ENOMEM:
-		case -ENONET:
-			goto out;
-		default:
-			goto rotate;
-		}
+		addr = cell->vl_addrs[cell->vl_curr_svix];
+
+		_debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
 
 		/* attempt to access the VL server */
-		ret = afs_rxvl_get_entry_by_id(server, volid, voltype, vldb);
+		ret = afs_vl_get_entry_by_id(&addr, volid, voltype, vldb,
+					     &afs_sync_call);
 		switch (ret) {
 		case 0:
-			afs_put_server(server);
 			goto out;
 		case -ENOMEM:
 		case -ENONET:
 		case -ENETUNREACH:
 		case -EHOSTUNREACH:
 		case -ECONNREFUSED:
-			down_write(&server->sem);
-			if (server->vlserver) {
-				rxrpc_put_connection(server->vlserver);
-				server->vlserver = NULL;
-			}
-			up_write(&server->sem);
-			afs_put_server(server);
 			if (ret == -ENOMEM || ret == -ENONET)
 				goto out;
 			goto rotate;
+		case -EBUSY:
+			vl->upd_busy_cnt++;
+			if (vl->upd_busy_cnt <= 3) {
+				if (vl->upd_busy_cnt > 1) {
+					/* second+ BUSY - sleep a little bit */
+					set_current_state(TASK_UNINTERRUPTIBLE);
+					schedule_timeout(1);
+					__set_current_state(TASK_RUNNING);
+				}
+				continue;
+			}
+			break;
 		case -ENOMEDIUM:
-			afs_put_server(server);
-			goto out;
+			vl->upd_rej_cnt++;
+			goto rotate;
 		default:
-			afs_put_server(server);
-			ret = -ENOMEDIUM;
+			ret = -EIO;
 			goto rotate;
 		}
 
@@ -220,150 +141,83 @@ static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vlocation,
 	rotate:
 		cell->vl_curr_svix++;
 		cell->vl_curr_svix %= cell->vl_naddrs;
+		vl->upd_busy_cnt = 0;
 	}
 
 out:
+	if (ret < 0 && vl->upd_rej_cnt > 0) {
+		printk(KERN_NOTICE "kAFS:"
+		       " Active volume no longer valid '%s'\n",
+		       vl->vldb.name);
+		vl->valid = 0;
+		ret = -ENOMEDIUM;
+	}
+
+	up_write(&vl->cell->vl_sem);
 	_leave(" = %d", ret);
 	return ret;
 }
 
 /*
- * lookup volume location
- * - caller must have cell->vol_sem write-locked
- * - iterate through the VL servers in a cell until one of them admits knowing
- *   about the volume in question
- * - lookup in the local cache if not able to find on the VL server
- * - insert/update in the local cache if did get a VL response
+ * allocate a volume location record
  */
-int afs_vlocation_lookup(struct afs_cell *cell,
-			 const char *name,
-			 unsigned namesz,
-			 struct afs_vlocation **_vlocation)
+static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell,
+						 const char *name,
+						 size_t namesz)
 {
-	struct afs_cache_vlocation vldb;
-	struct afs_vlocation *vlocation;
-	afs_voltype_t voltype;
-	afs_volid_t vid;
-	int active = 0, ret;
-
-	_enter("{%s},%*.*s,%u,", cell->name, namesz, namesz, name, namesz);
-
-	if (namesz > sizeof(vlocation->vldb.name)) {
-		_leave(" = -ENAMETOOLONG");
-		return -ENAMETOOLONG;
-	}
-
-	/* search the cell's active list first */
-	list_for_each_entry(vlocation, &cell->vl_list, link) {
-		if (namesz < sizeof(vlocation->vldb.name) &&
-		    vlocation->vldb.name[namesz] != '\0')
-			continue;
-
-		if (memcmp(vlocation->vldb.name, name, namesz) == 0)
-			goto found_in_memory;
-	}
-
-	/* search the cell's graveyard list second */
-	spin_lock(&cell->vl_gylock);
-	list_for_each_entry(vlocation, &cell->vl_graveyard, link) {
-		if (namesz < sizeof(vlocation->vldb.name) &&
-		    vlocation->vldb.name[namesz] != '\0')
-			continue;
-
-		if (memcmp(vlocation->vldb.name, name, namesz) == 0)
-			goto found_in_graveyard;
-	}
-	spin_unlock(&cell->vl_gylock);
-
-	/* not in the cell's in-memory lists - create a new record */
-	vlocation = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL);
-	if (!vlocation)
-		return -ENOMEM;
-
-	atomic_set(&vlocation->usage, 1);
-	INIT_LIST_HEAD(&vlocation->link);
-	rwlock_init(&vlocation->lock);
-	memcpy(vlocation->vldb.name, name, namesz);
-
-	afs_timer_init(&vlocation->timeout, &afs_vlocation_timer_ops);
-	afs_timer_init(&vlocation->upd_timer, &afs_vlocation_update_timer_ops);
-	afs_async_op_init(&vlocation->upd_op, &afs_vlocation_update_op_ops);
-
-	afs_get_cell(cell);
-	vlocation->cell = cell;
-
-	list_add_tail(&vlocation->link, &cell->vl_list);
-
-#ifdef AFS_CACHING_SUPPORT
-	/* we want to store it in the cache, plus it might already be
-	 * encached */
-	cachefs_acquire_cookie(cell->cache,
-			       &afs_volume_cache_index_def,
-			       vlocation,
-			       &vlocation->cache);
-
-	if (vlocation->valid)
-		goto found_in_cache;
-#endif
-
-	/* try to look up an unknown volume in the cell VL databases by name */
-	ret = afs_vlocation_access_vl_by_name(vlocation, name, namesz, &vldb);
-	if (ret < 0) {
-		printk("kAFS: failed to locate '%*.*s' in cell '%s'\n",
-		       namesz, namesz, name, cell->name);
-		goto error;
+	struct afs_vlocation *vl;
+
+	vl = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL);
+	if (vl) {
+		vl->cell = cell;
+		vl->state = AFS_VL_NEW;
+		atomic_set(&vl->usage, 1);
+		INIT_LIST_HEAD(&vl->link);
+		INIT_LIST_HEAD(&vl->grave);
+		INIT_LIST_HEAD(&vl->update);
+		init_waitqueue_head(&vl->waitq);
+		rwlock_init(&vl->lock);
+		memcpy(vl->vldb.name, name, namesz);
 	}
 
-	goto found_on_vlserver;
-
-found_in_graveyard:
-	/* found in the graveyard - resurrect */
-	_debug("found in graveyard");
-	atomic_inc(&vlocation->usage);
-	list_move_tail(&vlocation->link, &cell->vl_list);
-	spin_unlock(&cell->vl_gylock);
-
-	afs_kafstimod_del_timer(&vlocation->timeout);
-	goto active;
-
-found_in_memory:
-	/* found in memory - check to see if it's active */
-	_debug("found in memory");
-	atomic_inc(&vlocation->usage);
+	_leave(" = %p", vl);
+	return vl;
+}
 
-active:
-	active = 1;
+/*
+ * update record if we found it in the cache
+ */
+static int afs_vlocation_update_record(struct afs_vlocation *vl,
+				       struct afs_cache_vlocation *vldb)
+{
+	afs_voltype_t voltype;
+	afs_volid_t vid;
+	int ret;
 
-#ifdef AFS_CACHING_SUPPORT
-found_in_cache:
-#endif
 	/* try to look up a cached volume in the cell VL databases by ID */
-	_debug("found in cache");
-
 	_debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
-	       vlocation->vldb.name,
-	       vlocation->vldb.vidmask,
-	       ntohl(vlocation->vldb.servers[0].s_addr),
-	       vlocation->vldb.srvtmask[0],
-	       ntohl(vlocation->vldb.servers[1].s_addr),
-	       vlocation->vldb.srvtmask[1],
-	       ntohl(vlocation->vldb.servers[2].s_addr),
-	       vlocation->vldb.srvtmask[2]
-	       );
+	       vl->vldb.name,
+	       vl->vldb.vidmask,
+	       ntohl(vl->vldb.servers[0].s_addr),
+	       vl->vldb.srvtmask[0],
+	       ntohl(vl->vldb.servers[1].s_addr),
+	       vl->vldb.srvtmask[1],
+	       ntohl(vl->vldb.servers[2].s_addr),
+	       vl->vldb.srvtmask[2]);
 
 	_debug("Vids: %08x %08x %08x",
-	       vlocation->vldb.vid[0],
-	       vlocation->vldb.vid[1],
-	       vlocation->vldb.vid[2]);
+	       vl->vldb.vid[0],
+	       vl->vldb.vid[1],
+	       vl->vldb.vid[2]);
 
-	if (vlocation->vldb.vidmask & AFS_VOL_VTM_RW) {
-		vid = vlocation->vldb.vid[0];
+	if (vl->vldb.vidmask & AFS_VOL_VTM_RW) {
+		vid = vl->vldb.vid[0];
 		voltype = AFSVL_RWVOL;
-	} else if (vlocation->vldb.vidmask & AFS_VOL_VTM_RO) {
-		vid = vlocation->vldb.vid[1];
+	} else if (vl->vldb.vidmask & AFS_VOL_VTM_RO) {
+		vid = vl->vldb.vid[1];
 		voltype = AFSVL_ROVOL;
-	} else if (vlocation->vldb.vidmask & AFS_VOL_VTM_BAK) {
-		vid = vlocation->vldb.vid[2];
+	} else if (vl->vldb.vidmask & AFS_VOL_VTM_BAK) {
+		vid = vl->vldb.vid[2];
 		voltype = AFSVL_BACKVOL;
 	} else {
 		BUG();
@@ -371,551 +225,482 @@ found_in_cache:
 		voltype = 0;
 	}
 
-	ret = afs_vlocation_access_vl_by_id(vlocation, vid, voltype, &vldb);
+	/* contact the server to make sure the volume is still available
+	 * - TODO: need to handle disconnected operation here
+	 */
+	ret = afs_vlocation_access_vl_by_id(vl, vid, voltype, vldb);
 	switch (ret) {
 		/* net error */
 	default:
-		printk("kAFS: failed to volume '%*.*s' (%x) up in '%s': %d\n",
-		       namesz, namesz, name, vid, cell->name, ret);
-		goto error;
+		printk(KERN_WARNING "kAFS:"
+		       " failed to update volume '%s' (%x) up in '%s': %d\n",
+		       vl->vldb.name, vid, vl->cell->name, ret);
+		_leave(" = %d", ret);
+		return ret;
 
 		/* pulled from local cache into memory */
 	case 0:
-		goto found_on_vlserver;
+		_leave(" = 0");
+		return 0;
 
 		/* uh oh... looks like the volume got deleted */
 	case -ENOMEDIUM:
-		printk("kAFS: volume '%*.*s' (%x) does not exist '%s'\n",
-		       namesz, namesz, name, vid, cell->name);
+		printk(KERN_ERR "kAFS:"
+		       " volume '%s' (%x) does not exist '%s'\n",
+		       vl->vldb.name, vid, vl->cell->name);
 
 		/* TODO: make existing record unavailable */
-		goto error;
+		_leave(" = %d", ret);
+		return ret;
 	}
+}
 
-found_on_vlserver:
-	_debug("Done VL Lookup: %*.*s %02x { %08x(%x) %08x(%x) %08x(%x) }",
-	       namesz, namesz, name,
-	       vldb.vidmask,
-	       ntohl(vldb.servers[0].s_addr), vldb.srvtmask[0],
-	       ntohl(vldb.servers[1].s_addr), vldb.srvtmask[1],
-	       ntohl(vldb.servers[2].s_addr), vldb.srvtmask[2]
-	       );
-
-	_debug("Vids: %08x %08x %08x", vldb.vid[0], vldb.vid[1], vldb.vid[2]);
+/*
+ * apply the update to a VL record
+ */
+static void afs_vlocation_apply_update(struct afs_vlocation *vl,
+				       struct afs_cache_vlocation *vldb)
+{
+	_debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
+	       vldb->name, vldb->vidmask,
+	       ntohl(vldb->servers[0].s_addr), vldb->srvtmask[0],
+	       ntohl(vldb->servers[1].s_addr), vldb->srvtmask[1],
+	       ntohl(vldb->servers[2].s_addr), vldb->srvtmask[2]);
 
-	if ((namesz < sizeof(vlocation->vldb.name) &&
-	     vlocation->vldb.name[namesz] != '\0') ||
-	    memcmp(vldb.name, name, namesz) != 0)
-		printk("kAFS: name of volume '%*.*s' changed to '%s' on server\n",
-		       namesz, namesz, name, vldb.name);
+	_debug("Vids: %08x %08x %08x",
+	       vldb->vid[0], vldb->vid[1], vldb->vid[2]);
 
-	memcpy(&vlocation->vldb, &vldb, sizeof(vlocation->vldb));
+	if (strcmp(vldb->name, vl->vldb.name) != 0)
+		printk(KERN_NOTICE "kAFS:"
+		       " name of volume '%s' changed to '%s' on server\n",
+		       vl->vldb.name, vldb->name);
 
-	afs_kafstimod_add_timer(&vlocation->upd_timer, 10 * HZ);
+	vl->vldb = *vldb;
 
 #ifdef AFS_CACHING_SUPPORT
 	/* update volume entry in local cache */
-	cachefs_update_cookie(vlocation->cache);
-#endif
-
-	*_vlocation = vlocation;
-	_leave(" = 0 (%p)",vlocation);
-	return 0;
-
-error:
-	if (vlocation) {
-		if (active) {
-			__afs_put_vlocation(vlocation);
-		} else {
-			list_del(&vlocation->link);
-#ifdef AFS_CACHING_SUPPORT
-			cachefs_relinquish_cookie(vlocation->cache, 0);
+	cachefs_update_cookie(vl->cache);
 #endif
-			afs_put_cell(vlocation->cell);
-			kfree(vlocation);
-		}
-	}
-
-	_leave(" = %d", ret);
-	return ret;
 }
 
 /*
- * finish using a volume location record
- * - caller must have cell->vol_sem write-locked
+ * fill in a volume location record, consulting the cache and the VL server
+ * both
  */
-static void __afs_put_vlocation(struct afs_vlocation *vlocation)
+static int afs_vlocation_fill_in_record(struct afs_vlocation *vl)
 {
-	struct afs_cell *cell;
+	struct afs_cache_vlocation vldb;
+	int ret;
 
-	if (!vlocation)
-		return;
+	_enter("");
 
-	_enter("%s", vlocation->vldb.name);
+	ASSERTCMP(vl->valid, ==, 0);
 
-	cell = vlocation->cell;
+	memset(&vldb, 0, sizeof(vldb));
 
-	/* sanity check */
-	BUG_ON(atomic_read(&vlocation->usage) <= 0);
+	/* see if we have an in-cache copy (will set vl->valid if there is) */
+#ifdef AFS_CACHING_SUPPORT
+	cachefs_acquire_cookie(cell->cache,
+			       &afs_volume_cache_index_def,
+			       vlocation,
+			       &vl->cache);
+#endif
 
-	spin_lock(&cell->vl_gylock);
-	if (likely(!atomic_dec_and_test(&vlocation->usage))) {
-		spin_unlock(&cell->vl_gylock);
-		_leave("");
-		return;
+	if (vl->valid) {
+		/* try to update a known volume in the cell VL databases by
+		 * ID as the name may have changed */
+		_debug("found in cache");
+		ret = afs_vlocation_update_record(vl, &vldb);
+	} else {
+		/* try to look up an unknown volume in the cell VL databases by
+		 * name */
+		ret = afs_vlocation_access_vl_by_name(vl, &vldb);
+		if (ret < 0) {
+			printk("kAFS: failed to locate '%s' in cell '%s'\n",
+			       vl->vldb.name, vl->cell->name);
+			return ret;
+		}
 	}
 
-	/* move to graveyard queue */
-	list_move_tail(&vlocation->link,&cell->vl_graveyard);
-
-	/* remove from pending timeout queue (refcounted if actually being
-	 * updated) */
-	list_del_init(&vlocation->upd_op.link);
-
-	/* time out in 10 secs */
-	afs_kafstimod_del_timer(&vlocation->upd_timer);
-	afs_kafstimod_add_timer(&vlocation->timeout, 10 * HZ);
-
-	spin_unlock(&cell->vl_gylock);
-
-	_leave(" [killed]");
+	afs_vlocation_apply_update(vl, &vldb);
+	_leave(" = 0");
+	return 0;
 }
 
 /*
- * finish using a volume location record
+ * queue a vlocation record for updates
  */
-void afs_put_vlocation(struct afs_vlocation *vlocation)
+void afs_vlocation_queue_for_updates(struct afs_vlocation *vl)
 {
-	if (vlocation) {
-		struct afs_cell *cell = vlocation->cell;
+	struct afs_vlocation *xvl;
 
-		down_write(&cell->vl_sem);
-		__afs_put_vlocation(vlocation);
-		up_write(&cell->vl_sem);
+	/* wait at least 10 minutes before updating... */
+	vl->update_at = get_seconds() + afs_vlocation_update_timeout;
+
+	spin_lock(&afs_vlocation_updates_lock);
+
+	if (!list_empty(&afs_vlocation_updates)) {
+		/* ... but wait at least 1 second more than the newest record
+		 * already queued so that we don't spam the VL server suddenly
+		 * with lots of requests
+		 */
+		xvl = list_entry(afs_vlocation_updates.prev,
+				 struct afs_vlocation, update);
+		if (vl->update_at <= xvl->update_at)
+			vl->update_at = xvl->update_at + 1;
+	} else {
+		queue_delayed_work(afs_vlocation_update_worker,
+				   &afs_vlocation_update,
+				   afs_vlocation_update_timeout * HZ);
 	}
+
+	list_add_tail(&vl->update, &afs_vlocation_updates);
+	spin_unlock(&afs_vlocation_updates_lock);
 }
 
 /*
- * timeout vlocation record
- * - removes from the cell's graveyard if the usage count is zero
+ * lookup volume location
+ * - iterate through the VL servers in a cell until one of them admits knowing
+ *   about the volume in question
+ * - lookup in the local cache if not able to find on the VL server
+ * - insert/update in the local cache if did get a VL response
  */
-void afs_vlocation_do_timeout(struct afs_vlocation *vlocation)
+struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *cell,
+					   const char *name,
+					   size_t namesz)
 {
-	struct afs_cell *cell;
-
-	_enter("%s", vlocation->vldb.name);
-
-	cell = vlocation->cell;
+	struct afs_vlocation *vl;
+	int ret;
 
-	BUG_ON(atomic_read(&vlocation->usage) < 0);
+	_enter("{%s},%*.*s,%zu",
+	       cell->name, (int) namesz, (int) namesz, name, namesz);
 
-	/* remove from graveyard if still dead */
-	spin_lock(&cell->vl_gylock);
-	if (atomic_read(&vlocation->usage) == 0)
-		list_del_init(&vlocation->link);
-	else
-		vlocation = NULL;
-	spin_unlock(&cell->vl_gylock);
+	if (namesz > sizeof(vl->vldb.name)) {
+		_leave(" = -ENAMETOOLONG");
+		return ERR_PTR(-ENAMETOOLONG);
+	}
 
-	if (!vlocation) {
-		_leave("");
-		return; /* resurrected */
+	/* see if we have an in-memory copy first */
+	down_write(&cell->vl_sem);
+	spin_lock(&cell->vl_lock);
+	list_for_each_entry(vl, &cell->vl_list, link) {
+		if (vl->vldb.name[namesz] != '\0')
+			continue;
+		if (memcmp(vl->vldb.name, name, namesz) == 0)
+			goto found_in_memory;
 	}
+	spin_unlock(&cell->vl_lock);
 
-	/* we can now destroy it properly */
-#ifdef AFS_CACHING_SUPPORT
-	cachefs_relinquish_cookie(vlocation->cache, 0);
-#endif
-	afs_put_cell(cell);
+	/* not in the cell's in-memory lists - create a new record */
+	vl = afs_vlocation_alloc(cell, name, namesz);
+	if (!vl) {
+		up_write(&cell->vl_sem);
+		return ERR_PTR(-ENOMEM);
+	}
 
-	kfree(vlocation);
+	afs_get_cell(cell);
 
-	_leave(" [destroyed]");
-}
+	list_add_tail(&vl->link, &cell->vl_list);
+	vl->state = AFS_VL_CREATING;
+	up_write(&cell->vl_sem);
 
-/*
- * send an update operation to the currently selected server
- */
-static int afs_vlocation_update_begin(struct afs_vlocation *vlocation)
-{
-	afs_voltype_t voltype;
-	afs_volid_t vid;
-	int ret;
+fill_in_record:
+	ret = afs_vlocation_fill_in_record(vl);
+	if (ret < 0)
+		goto error_abandon;
+	vl->state = AFS_VL_VALID;
+	wake_up(&vl->waitq);
 
-	_enter("%s{ufs=%u ucs=%u}",
-	       vlocation->vldb.name,
-	       vlocation->upd_first_svix,
-	       vlocation->upd_curr_svix);
+	/* schedule for regular updates */
+	afs_vlocation_queue_for_updates(vl);
+	goto success;
 
-	/* try to look up a cached volume in the cell VL databases by ID */
-	if (vlocation->vldb.vidmask & AFS_VOL_VTM_RW) {
-		vid = vlocation->vldb.vid[0];
-		voltype = AFSVL_RWVOL;
-	} else if (vlocation->vldb.vidmask & AFS_VOL_VTM_RO) {
-		vid = vlocation->vldb.vid[1];
-		voltype = AFSVL_ROVOL;
-	} else if (vlocation->vldb.vidmask & AFS_VOL_VTM_BAK) {
-		vid = vlocation->vldb.vid[2];
-		voltype = AFSVL_BACKVOL;
-	} else {
-		BUG();
-		vid = 0;
-		voltype = 0;
+found_in_memory:
+	/* found in memory */
+	_debug("found in memory");
+	atomic_inc(&vl->usage);
+	spin_unlock(&cell->vl_lock);
+	if (!list_empty(&vl->grave)) {
+		spin_lock(&afs_vlocation_graveyard_lock);
+		list_del_init(&vl->grave);
+		spin_unlock(&afs_vlocation_graveyard_lock);
 	}
+	up_write(&cell->vl_sem);
 
-	/* contact the chosen server */
-	ret = afs_server_lookup(
-		vlocation->cell,
-		&vlocation->cell->vl_addrs[vlocation->upd_curr_svix],
-		&vlocation->upd_op.server);
+	/* see if it was an abandoned record that we might try filling in */
+	while (vl->state != AFS_VL_VALID) {
+		afs_vlocation_state_t state = vl->state;
 
-	switch (ret) {
-	case 0:
-		break;
-	case -ENOMEM:
-	case -ENONET:
-	default:
-		_leave(" = %d", ret);
-		return ret;
-	}
+		_debug("invalid [state %d]", state);
 
-	/* initiate the update operation */
-	ret = afs_rxvl_get_entry_by_id_async(&vlocation->upd_op, vid, voltype);
-	if (ret < 0) {
-		_leave(" = %d", ret);
-		return ret;
+		if ((state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME)) {
+			if (cmpxchg(&vl->state, state, AFS_VL_CREATING) ==
+			    state)
+				goto fill_in_record;
+			continue;
+		}
+
+		/* must now wait for creation or update by someone else to
+		 * complete */
+		_debug("wait");
+
+		ret = wait_event_interruptible(
+			vl->waitq,
+			vl->state == AFS_VL_NEW ||
+			vl->state == AFS_VL_VALID ||
+			vl->state == AFS_VL_NO_VOLUME);
+		if (ret < 0)
+			goto error;
 	}
 
+success:
+	_leave(" = %p",vl);
+	return vl;
+
+error_abandon:
+	vl->state = AFS_VL_NEW;
+	wake_up(&vl->waitq);
+error:
+	ASSERT(vl != NULL);
+	afs_put_vlocation(vl);
 	_leave(" = %d", ret);
-	return ret;
+	return ERR_PTR(ret);
 }
 
 /*
- * abandon updating a VL record
- * - does not restart the update timer
+ * finish using a volume location record
  */
-static void afs_vlocation_update_abandon(struct afs_vlocation *vlocation,
-					 afs_vlocation_upd_t state,
-					 int ret)
+void afs_put_vlocation(struct afs_vlocation *vl)
 {
-	_enter("%s,%u", vlocation->vldb.name, state);
-
-	if (ret < 0)
-		printk("kAFS: Abandoning VL update '%s': %d\n",
-		       vlocation->vldb.name, ret);
-
-	/* discard the server record */
-	afs_put_server(vlocation->upd_op.server);
-	vlocation->upd_op.server = NULL;
+	if (!vl)
+		return;
 
-	spin_lock(&afs_vlocation_update_lock);
-	afs_vlocation_update = NULL;
-	vlocation->upd_state = state;
+	_enter("%s", vl->vldb.name);
 
-	/* TODO: start updating next VL record on pending list */
+	ASSERTCMP(atomic_read(&vl->usage), >, 0);
 
-	spin_unlock(&afs_vlocation_update_lock);
+	if (likely(!atomic_dec_and_test(&vl->usage))) {
+		_leave("");
+		return;
+	}
 
-	_leave("");
+	spin_lock(&afs_vlocation_graveyard_lock);
+	if (atomic_read(&vl->usage) == 0) {
+		_debug("buried");
+		list_move_tail(&vl->grave, &afs_vlocation_graveyard);
+		vl->time_of_death = get_seconds();
+		schedule_delayed_work(&afs_vlocation_reap,
+				      afs_vlocation_timeout * HZ);
+
+		/* suspend updates on this record */
+		if (!list_empty(&vl->update)) {
+			spin_lock(&afs_vlocation_updates_lock);
+			list_del_init(&vl->update);
+			spin_unlock(&afs_vlocation_updates_lock);
+		}
+	}
+	spin_unlock(&afs_vlocation_graveyard_lock);
+	_leave(" [killed?]");
 }
 
 /*
- * handle periodic update timeouts and busy retry timeouts
- * - called from kafstimod
+ * destroy a dead volume location record
  */
-static void afs_vlocation_update_timer(struct afs_timer *timer)
+static void afs_vlocation_destroy(struct afs_vlocation *vl)
 {
-	struct afs_vlocation *vlocation =
-		list_entry(timer, struct afs_vlocation, upd_timer);
-	int ret;
+	_enter("%p", vl);
 
-	_enter("%s", vlocation->vldb.name);
+#ifdef AFS_CACHING_SUPPORT
+	cachefs_relinquish_cookie(vl->cache, 0);
+#endif
 
-	/* only update if not in the graveyard (defend against putting too) */
-	spin_lock(&vlocation->cell->vl_gylock);
+	afs_put_cell(vl->cell);
+	kfree(vl);
+}
+
+/*
+ * reap dead volume location records
+ */
+static void afs_vlocation_reaper(struct work_struct *work)
+{
+	LIST_HEAD(corpses);
+	struct afs_vlocation *vl;
+	unsigned long delay, expiry;
+	time_t now;
 
-	if (!atomic_read(&vlocation->usage))
-		goto out_unlock1;
+	_enter("");
 
-	spin_lock(&afs_vlocation_update_lock);
+	now = get_seconds();
+	spin_lock(&afs_vlocation_graveyard_lock);
+
+	while (!list_empty(&afs_vlocation_graveyard)) {
+		vl = list_entry(afs_vlocation_graveyard.next,
+				struct afs_vlocation, grave);
+
+		_debug("check %p", vl);
+
+		/* the queue is ordered most dead first */
+		expiry = vl->time_of_death + afs_vlocation_timeout;
+		if (expiry > now) {
+			delay = (expiry - now) * HZ;
+			_debug("delay %lu", delay);
+			if (!schedule_delayed_work(&afs_vlocation_reap,
+						   delay)) {
+				cancel_delayed_work(&afs_vlocation_reap);
+				schedule_delayed_work(&afs_vlocation_reap,
+						      delay);
+			}
+			break;
+		}
 
-	/* if we were woken up due to EBUSY sleep then restart immediately if
-	 * possible or else jump to front of pending queue */
-	if (vlocation->upd_state == AFS_VLUPD_BUSYSLEEP) {
-		if (afs_vlocation_update) {
-			list_add(&vlocation->upd_op.link,
-				 &afs_vlocation_update_pendq);
+		spin_lock(&vl->cell->vl_lock);
+		if (atomic_read(&vl->usage) > 0) {
+			_debug("no reap");
+			list_del_init(&vl->grave);
 		} else {
-			afs_get_vlocation(vlocation);
-			afs_vlocation_update = vlocation;
-			vlocation->upd_state = AFS_VLUPD_INPROGRESS;
+			_debug("reap");
+			list_move_tail(&vl->grave, &corpses);
+			list_del_init(&vl->link);
 		}
-		goto out_unlock2;
+		spin_unlock(&vl->cell->vl_lock);
 	}
 
-	/* put on pending queue if there's already another update in progress */
-	if (afs_vlocation_update) {
-		vlocation->upd_state = AFS_VLUPD_PENDING;
-		list_add_tail(&vlocation->upd_op.link,
-			      &afs_vlocation_update_pendq);
-		goto out_unlock2;
-	}
+	spin_unlock(&afs_vlocation_graveyard_lock);
 
-	/* hold a ref on it while actually updating */
-	afs_get_vlocation(vlocation);
-	afs_vlocation_update = vlocation;
-	vlocation->upd_state = AFS_VLUPD_INPROGRESS;
-
-	spin_unlock(&afs_vlocation_update_lock);
-	spin_unlock(&vlocation->cell->vl_gylock);
-
-	/* okay... we can start the update */
-	_debug("BEGIN VL UPDATE [%s]", vlocation->vldb.name);
-	vlocation->upd_first_svix = vlocation->cell->vl_curr_svix;
-	vlocation->upd_curr_svix = vlocation->upd_first_svix;
-	vlocation->upd_rej_cnt = 0;
-	vlocation->upd_busy_cnt = 0;
-
-	ret = afs_vlocation_update_begin(vlocation);
-	if (ret < 0) {
-		afs_vlocation_update_abandon(vlocation, AFS_VLUPD_SLEEP, ret);
-		afs_kafstimod_add_timer(&vlocation->upd_timer,
-					AFS_VLDB_TIMEOUT);
-		afs_put_vlocation(vlocation);
+	/* now reap the corpses we've extracted */
+	while (!list_empty(&corpses)) {
+		vl = list_entry(corpses.next, struct afs_vlocation, grave);
+		list_del(&vl->grave);
+		afs_vlocation_destroy(vl);
 	}
 
 	_leave("");
-	return;
+}
 
-out_unlock2:
-	spin_unlock(&afs_vlocation_update_lock);
-out_unlock1:
-	spin_unlock(&vlocation->cell->vl_gylock);
-	_leave("");
+/*
+ * initialise the VL update process
+ */
+int __init afs_vlocation_update_init(void)
+{
+	afs_vlocation_update_worker =
+		create_singlethread_workqueue("kafs_vlupdated");
+	return afs_vlocation_update_worker ? 0 : -ENOMEM;
+}
+
+/*
+ * discard all the volume location records for rmmod
+ */
+void __exit afs_vlocation_purge(void)
+{
+	afs_vlocation_timeout = 0;
+
+	spin_lock(&afs_vlocation_updates_lock);
+	list_del_init(&afs_vlocation_updates);
+	spin_unlock(&afs_vlocation_updates_lock);
+	cancel_delayed_work(&afs_vlocation_update);
+	queue_delayed_work(afs_vlocation_update_worker,
+			   &afs_vlocation_update, 0);
+	destroy_workqueue(afs_vlocation_update_worker);
+
+	cancel_delayed_work(&afs_vlocation_reap);
+	schedule_delayed_work(&afs_vlocation_reap, 0);
 }
 
 /*
- * attend to an update operation upon which an event happened
- * - called in kafsasyncd context
+ * update a volume location
  */
-static void afs_vlocation_update_attend(struct afs_async_op *op)
+static void afs_vlocation_updater(struct work_struct *work)
 {
 	struct afs_cache_vlocation vldb;
-	struct afs_vlocation *vlocation =
-		list_entry(op, struct afs_vlocation, upd_op);
-	unsigned tmp;
+	struct afs_vlocation *vl, *xvl;
+	time_t now;
+	long timeout;
 	int ret;
 
-	_enter("%s", vlocation->vldb.name);
-
-	ret = afs_rxvl_get_entry_by_id_async2(op, &vldb);
-	switch (ret) {
-	case -EAGAIN:
-		_leave(" [unfinished]");
-		return;
-
-	case 0:
-		_debug("END VL UPDATE: %d\n", ret);
-		vlocation->valid = 1;
-
-		_debug("Done VL Lookup: %02x { %08x(%x) %08x(%x) %08x(%x) }",
-		       vldb.vidmask,
-		       ntohl(vldb.servers[0].s_addr), vldb.srvtmask[0],
-		       ntohl(vldb.servers[1].s_addr), vldb.srvtmask[1],
-		       ntohl(vldb.servers[2].s_addr), vldb.srvtmask[2]
-		       );
-
-		_debug("Vids: %08x %08x %08x",
-		       vldb.vid[0], vldb.vid[1], vldb.vid[2]);
-
-		afs_vlocation_update_abandon(vlocation, AFS_VLUPD_SLEEP, 0);
-
-		down_write(&vlocation->cell->vl_sem);
-
-		/* actually update the cache */
-		if (strncmp(vldb.name, vlocation->vldb.name,
-			    sizeof(vlocation->vldb.name)) != 0)
-			printk("kAFS: name of volume '%s'"
-			       " changed to '%s' on server\n",
-			       vlocation->vldb.name, vldb.name);
-
-		memcpy(&vlocation->vldb, &vldb, sizeof(vlocation->vldb));
-
-#if 0
-		/* TODO update volume entry in local cache */
-#endif
-
-		up_write(&vlocation->cell->vl_sem);
-
-		if (ret < 0)
-			printk("kAFS: failed to update local cache: %d\n", ret);
-
-		afs_kafstimod_add_timer(&vlocation->upd_timer,
-					AFS_VLDB_TIMEOUT);
-		afs_put_vlocation(vlocation);
-		_leave(" [found]");
-		return;
-
-	case -ENOMEDIUM:
-		vlocation->upd_rej_cnt++;
-		goto try_next;
-
-		/* the server is locked - retry in a very short while */
-	case -EBUSY:
-		vlocation->upd_busy_cnt++;
-		if (vlocation->upd_busy_cnt > 3)
-			goto try_next; /* too many retries */
-
-		afs_vlocation_update_abandon(vlocation,
-					     AFS_VLUPD_BUSYSLEEP, 0);
-		afs_kafstimod_add_timer(&vlocation->upd_timer, HZ / 2);
-		afs_put_vlocation(vlocation);
-		_leave(" [busy]");
-		return;
-
-	case -ENETUNREACH:
-	case -EHOSTUNREACH:
-	case -ECONNREFUSED:
-	case -EREMOTEIO:
-		/* record bad vlserver info in the cell too
-		 * - TODO: use down_write_trylock() if available
-		 */
-		if (vlocation->upd_curr_svix == vlocation->cell->vl_curr_svix)
-			vlocation->cell->vl_curr_svix =
-				vlocation->cell->vl_curr_svix %
-				vlocation->cell->vl_naddrs;
-
-	case -EBADRQC:
-	case -EINVAL:
-	case -EACCES:
-	case -EBADMSG:
-		goto try_next;
-
-	default:
-		goto abandon;
-	}
-
-	/* try contacting the next server */
-try_next:
-	vlocation->upd_busy_cnt = 0;
-
-	/* discard the server record */
-	afs_put_server(vlocation->upd_op.server);
-	vlocation->upd_op.server = NULL;
+	_enter("");
 
-	tmp = vlocation->cell->vl_naddrs;
-	if (tmp == 0)
-		goto abandon;
+	now = get_seconds();
 
-	vlocation->upd_curr_svix++;
-	if (vlocation->upd_curr_svix >= tmp)
-		vlocation->upd_curr_svix = 0;
-	if (vlocation->upd_first_svix >= tmp)
-		vlocation->upd_first_svix = tmp - 1;
+	/* find a record to update */
+	spin_lock(&afs_vlocation_updates_lock);
+	for (;;) {
+		if (list_empty(&afs_vlocation_updates)) {
+			spin_unlock(&afs_vlocation_updates_lock);
+			_leave(" [nothing]");
+			return;
+		}
 
-	/* move to the next server */
-	if (vlocation->upd_curr_svix != vlocation->upd_first_svix) {
-		afs_vlocation_update_begin(vlocation);
-		_leave(" [next]");
-		return;
+		vl = list_entry(afs_vlocation_updates.next,
+				struct afs_vlocation, update);
+		if (atomic_read(&vl->usage) > 0)
+			break;
+		list_del_init(&vl->update);
 	}
 
-	/* run out of servers to try - was the volume rejected? */
-	if (vlocation->upd_rej_cnt > 0) {
-		printk("kAFS: Active volume no longer valid '%s'\n",
-		       vlocation->vldb.name);
-		vlocation->valid = 0;
-		afs_vlocation_update_abandon(vlocation, AFS_VLUPD_SLEEP, 0);
-		afs_kafstimod_add_timer(&vlocation->upd_timer,
-					AFS_VLDB_TIMEOUT);
-		afs_put_vlocation(vlocation);
-		_leave(" [invalidated]");
+	timeout = vl->update_at - now;
+	if (timeout > 0) {
+		queue_delayed_work(afs_vlocation_update_worker,
+				   &afs_vlocation_update, timeout * HZ);
+		spin_unlock(&afs_vlocation_updates_lock);
+		_leave(" [nothing]");
 		return;
 	}
 
-	/* abandon the update */
-abandon:
-	afs_vlocation_update_abandon(vlocation, AFS_VLUPD_SLEEP, ret);
-	afs_kafstimod_add_timer(&vlocation->upd_timer, HZ * 10);
-	afs_put_vlocation(vlocation);
-	_leave(" [abandoned]");
-}
+	list_del_init(&vl->update);
+	atomic_inc(&vl->usage);
+	spin_unlock(&afs_vlocation_updates_lock);
 
-/*
- * deal with an update operation being discarded
- * - called in kafsasyncd context when it's dying due to rmmod
- * - the call has already been aborted and put()'d
- */
-static void afs_vlocation_update_discard(struct afs_async_op *op)
-{
-	struct afs_vlocation *vlocation =
-		list_entry(op, struct afs_vlocation, upd_op);
-
-	_enter("%s", vlocation->vldb.name);
+	/* we can now perform the update */
+	_debug("update %s", vl->vldb.name);
+	vl->state = AFS_VL_UPDATING;
+	vl->upd_rej_cnt = 0;
+	vl->upd_busy_cnt = 0;
 
-	afs_put_server(op->server);
-	op->server = NULL;
-
-	afs_put_vlocation(vlocation);
+	ret = afs_vlocation_update_record(vl, &vldb);
+	switch (ret) {
+	case 0:
+		afs_vlocation_apply_update(vl, &vldb);
+		vl->state = AFS_VL_VALID;
+		break;
+	case -ENOMEDIUM:
+		vl->state = AFS_VL_VOLUME_DELETED;
+		break;
+	default:
+		vl->state = AFS_VL_UNCERTAIN;
+		break;
+	}
 
-	_leave("");
-}
+	/* and then reschedule */
+	_debug("reschedule");
+	vl->update_at = get_seconds() + afs_vlocation_update_timeout;
 
-/*
- * match a VLDB record stored in the cache
- * - may also load target from entry
- */
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_vlocation_cache_match(void *target,
-						     const void *entry)
-{
-	const struct afs_cache_vlocation *vldb = entry;
-	struct afs_vlocation *vlocation = target;
-
-	_enter("{%s},{%s}", vlocation->vldb.name, vldb->name);
-
-	if (strncmp(vlocation->vldb.name, vldb->name, sizeof(vldb->name)) == 0
-	    ) {
-		if (!vlocation->valid ||
-		    vlocation->vldb.rtime == vldb->rtime
-		    ) {
-			vlocation->vldb = *vldb;
-			vlocation->valid = 1;
-			_leave(" = SUCCESS [c->m]");
-			return CACHEFS_MATCH_SUCCESS;
-		} else if (memcmp(&vlocation->vldb, vldb, sizeof(*vldb)) != 0) {
-			/* delete if VIDs for this name differ */
-			if (memcmp(&vlocation->vldb.vid,
-				   &vldb->vid,
-				   sizeof(vldb->vid)) != 0) {
-				_leave(" = DELETE");
-				return CACHEFS_MATCH_SUCCESS_DELETE;
-			}
+	spin_lock(&afs_vlocation_updates_lock);
 
-			_leave(" = UPDATE");
-			return CACHEFS_MATCH_SUCCESS_UPDATE;
-		} else {
-			_leave(" = SUCCESS");
-			return CACHEFS_MATCH_SUCCESS;
-		}
+	if (!list_empty(&afs_vlocation_updates)) {
+		/* next update in 10 minutes, but wait at least 1 second more
+		 * than the newest record already queued so that we don't spam
+		 * the VL server suddenly with lots of requests
+		 */
+		xvl = list_entry(afs_vlocation_updates.prev,
+				 struct afs_vlocation, update);
+		if (vl->update_at <= xvl->update_at)
+			vl->update_at = xvl->update_at + 1;
+		xvl = list_entry(afs_vlocation_updates.next,
+				 struct afs_vlocation, update);
+		timeout = xvl->update_at - now;
+		if (timeout < 0)
+			timeout = 0;
+	} else {
+		timeout = afs_vlocation_update_timeout;
 	}
 
-	_leave(" = FAILED");
-	return CACHEFS_MATCH_FAILED;
-}
-#endif
+	ASSERT(list_empty(&vl->update));
 
-/*
- * update a VLDB record stored in the cache
- */
-#ifdef AFS_CACHING_SUPPORT
-static void afs_vlocation_cache_update(void *source, void *entry)
-{
-	struct afs_cache_vlocation *vldb = entry;
-	struct afs_vlocation *vlocation = source;
-
-	_enter("");
+	list_add_tail(&vl->update, &afs_vlocation_updates);
 
-	*vldb = vlocation->vldb;
+	_debug("timeout %ld", timeout);
+	queue_delayed_work(afs_vlocation_update_worker,
+			   &afs_vlocation_update, timeout * HZ);
+	spin_unlock(&afs_vlocation_updates_lock);
+	afs_put_vlocation(vl);
 }
-#endif
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
index 4ab1ed71028..d2ca1398474 100644
--- a/fs/afs/vnode.c
+++ b/fs/afs/vnode.c
@@ -1,6 +1,6 @@
 /* AFS vnode management
  *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
@@ -14,71 +14,182 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
-#include <linux/pagemap.h>
-#include "volume.h"
-#include "cell.h"
-#include "cmservice.h"
-#include "fsclient.h"
-#include "vlclient.h"
-#include "vnode.h"
 #include "internal.h"
 
-static void afs_vnode_cb_timed_out(struct afs_timer *timer);
+#if 0
+static noinline bool dump_tree_aux(struct rb_node *node, struct rb_node *parent,
+				   int depth, char lr)
+{
+	struct afs_vnode *vnode;
+	bool bad = false;
+
+	if (!node)
+		return false;
+
+	if (node->rb_left)
+		bad = dump_tree_aux(node->rb_left, node, depth + 2, '/');
+
+	vnode = rb_entry(node, struct afs_vnode, cb_promise);
+	kdebug("%c %*.*s%c%p {%d}",
+	       rb_is_red(node) ? 'R' : 'B',
+	       depth, depth, "", lr,
+	       vnode, vnode->cb_expires_at);
+	if (rb_parent(node) != parent) {
+		printk("BAD: %p != %p\n", rb_parent(node), parent);
+		bad = true;
+	}
 
-struct afs_timer_ops afs_vnode_cb_timed_out_ops = {
-	.timed_out	= afs_vnode_cb_timed_out,
-};
+	if (node->rb_right)
+		bad |= dump_tree_aux(node->rb_right, node, depth + 2, '\\');
 
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_vnode_cache_match(void *target,
-						 const void *entry);
-static void afs_vnode_cache_update(void *source, void *entry);
+	return bad;
+}
 
-struct cachefs_index_def afs_vnode_cache_index_def = {
-	.name		= "vnode",
-	.data_size	= sizeof(struct afs_cache_vnode),
-	.keys[0]	= { CACHEFS_INDEX_KEYS_BIN, 4 },
-	.match		= afs_vnode_cache_match,
-	.update		= afs_vnode_cache_update,
-};
+static noinline void dump_tree(const char *name, struct afs_server *server)
+{
+	kenter("%s", name);
+	if (dump_tree_aux(server->cb_promises.rb_node, NULL, 0, '-'))
+		BUG();
+}
 #endif
 
 /*
- * handle a callback timing out
- * TODO: retain a ref to vnode struct for an outstanding callback timeout
+ * insert a vnode into the backing server's vnode tree
  */
-static void afs_vnode_cb_timed_out(struct afs_timer *timer)
+static void afs_install_vnode(struct afs_vnode *vnode,
+			      struct afs_server *server)
 {
-	struct afs_server *oldserver;
-	struct afs_vnode *vnode;
+	struct afs_server *old_server = vnode->server;
+	struct afs_vnode *xvnode;
+	struct rb_node *parent, **p;
 
-	vnode = list_entry(timer, struct afs_vnode, cb_timeout);
+	_enter("%p,%p", vnode, server);
 
-	_enter("%p", vnode);
+	if (old_server) {
+		spin_lock(&old_server->fs_lock);
+		rb_erase(&vnode->server_rb, &old_server->fs_vnodes);
+		spin_unlock(&old_server->fs_lock);
+	}
 
-	/* set the changed flag in the vnode and release the server */
-	spin_lock(&vnode->lock);
+	afs_get_server(server);
+	vnode->server = server;
+	afs_put_server(old_server);
+
+	/* insert into the server's vnode tree in FID order */
+	spin_lock(&server->fs_lock);
+
+	parent = NULL;
+	p = &server->fs_vnodes.rb_node;
+	while (*p) {
+		parent = *p;
+		xvnode = rb_entry(parent, struct afs_vnode, server_rb);
+		if (vnode->fid.vid < xvnode->fid.vid)
+			p = &(*p)->rb_left;
+		else if (vnode->fid.vid > xvnode->fid.vid)
+			p = &(*p)->rb_right;
+		else if (vnode->fid.vnode < xvnode->fid.vnode)
+			p = &(*p)->rb_left;
+		else if (vnode->fid.vnode > xvnode->fid.vnode)
+			p = &(*p)->rb_right;
+		else if (vnode->fid.unique < xvnode->fid.unique)
+			p = &(*p)->rb_left;
+		else if (vnode->fid.unique > xvnode->fid.unique)
+			p = &(*p)->rb_right;
+		else
+			BUG(); /* can't happen unless afs_iget() malfunctions */
+	}
 
-	oldserver = xchg(&vnode->cb_server, NULL);
-	if (oldserver) {
-		vnode->flags |= AFS_VNODE_CHANGED;
+	rb_link_node(&vnode->server_rb, parent, p);
+	rb_insert_color(&vnode->server_rb, &server->fs_vnodes);
 
-		spin_lock(&afs_cb_hash_lock);
-		list_del_init(&vnode->cb_hash_link);
-		spin_unlock(&afs_cb_hash_lock);
+	spin_unlock(&server->fs_lock);
+	_leave("");
+}
 
-		spin_lock(&oldserver->cb_lock);
-		list_del_init(&vnode->cb_link);
-		spin_unlock(&oldserver->cb_lock);
+/*
+ * insert a vnode into the promising server's update/expiration tree
+ * - caller must hold vnode->lock
+ */
+static void afs_vnode_note_promise(struct afs_vnode *vnode,
+				   struct afs_server *server)
+{
+	struct afs_server *old_server;
+	struct afs_vnode *xvnode;
+	struct rb_node *parent, **p;
+
+	_enter("%p,%p", vnode, server);
+
+	ASSERT(server != NULL);
+
+	old_server = vnode->server;
+	if (vnode->cb_promised) {
+		if (server == old_server &&
+		    vnode->cb_expires == vnode->cb_expires_at) {
+			_leave(" [no change]");
+			return;
+		}
+
+		spin_lock(&old_server->cb_lock);
+		if (vnode->cb_promised) {
+			_debug("delete");
+			rb_erase(&vnode->cb_promise, &old_server->cb_promises);
+			vnode->cb_promised = false;
+		}
+		spin_unlock(&old_server->cb_lock);
 	}
 
-	spin_unlock(&vnode->lock);
+	if (vnode->server != server)
+		afs_install_vnode(vnode, server);
+
+	vnode->cb_expires_at = vnode->cb_expires;
+	_debug("PROMISE on %p {%lu}",
+	       vnode, (unsigned long) vnode->cb_expires_at);
+
+	/* abuse an RB-tree to hold the expiration order (we may have multiple
+	 * items with the same expiration time) */
+	spin_lock(&server->cb_lock);
+
+	parent = NULL;
+	p = &server->cb_promises.rb_node;
+	while (*p) {
+		parent = *p;
+		xvnode = rb_entry(parent, struct afs_vnode, cb_promise);
+		if (vnode->cb_expires_at < xvnode->cb_expires_at)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
 
-	afs_put_server(oldserver);
+	rb_link_node(&vnode->cb_promise, parent, p);
+	rb_insert_color(&vnode->cb_promise, &server->cb_promises);
+	vnode->cb_promised = true;
 
+	spin_unlock(&server->cb_lock);
 	_leave("");
 }
 
+/*
+ * handle remote file deletion by discarding the callback promise
+ */
+static void afs_vnode_deleted_remotely(struct afs_vnode *vnode)
+{
+	struct afs_server *server;
+
+	set_bit(AFS_VNODE_DELETED, &vnode->flags);
+
+	server = vnode->server;
+	if (vnode->cb_promised) {
+		spin_lock(&server->cb_lock);
+		if (vnode->cb_promised) {
+			rb_erase(&vnode->cb_promise, &server->cb_promises);
+			vnode->cb_promised = false;
+		}
+		spin_unlock(&server->cb_lock);
+	}
+
+	afs_put_server(server);
+}
+
 /*
  * finish off updating the recorded status of a file
  * - starts callback expiry timer
@@ -94,43 +205,19 @@ static void afs_vnode_finalise_status_update(struct afs_vnode *vnode,
 
 	spin_lock(&vnode->lock);
 
-	vnode->flags &= ~AFS_VNODE_CHANGED;
-
-	if (ret == 0) {
-		/* adjust the callback timeout appropriately */
-		afs_kafstimod_add_timer(&vnode->cb_timeout,
-					vnode->cb_expiry * HZ);
-
-		spin_lock(&afs_cb_hash_lock);
-		list_move_tail(&vnode->cb_hash_link,
-			       &afs_cb_hash(server, &vnode->fid));
-		spin_unlock(&afs_cb_hash_lock);
-
-		/* swap ref to old callback server with that for new callback
-		 * server */
-		oldserver = xchg(&vnode->cb_server, server);
-		if (oldserver != server) {
-			if (oldserver) {
-				spin_lock(&oldserver->cb_lock);
-				list_del_init(&vnode->cb_link);
-				spin_unlock(&oldserver->cb_lock);
-			}
-
-			afs_get_server(server);
-			spin_lock(&server->cb_lock);
-			list_add_tail(&vnode->cb_link, &server->cb_promises);
-			spin_unlock(&server->cb_lock);
-		} else {
-			/* same server */
-			oldserver = NULL;
-		}
-	} else if (ret == -ENOENT) {
-		/* the file was deleted - clear the callback timeout */
-		oldserver = xchg(&vnode->cb_server, NULL);
-		afs_kafstimod_del_timer(&vnode->cb_timeout);
+	clear_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
 
+	switch (ret) {
+	case 0:
+		afs_vnode_note_promise(vnode, server);
+		break;
+	case -ENOENT:
+		/* the file was deleted on the server */
 		_debug("got NOENT from server - marking file deleted");
-		vnode->flags |= AFS_VNODE_DELETED;
+		afs_vnode_deleted_remotely(vnode);
+		break;
+	default:
+		break;
 	}
 
 	vnode->update_cnt--;
@@ -162,19 +249,21 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode)
 	       vnode->volume->vlocation->vldb.name,
 	       vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
 
-	if (!(vnode->flags & AFS_VNODE_CHANGED) && vnode->cb_server) {
+	if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
+	    vnode->cb_promised) {
 		_leave(" [unchanged]");
 		return 0;
 	}
 
-	if (vnode->flags & AFS_VNODE_DELETED) {
+	if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
 		_leave(" [deleted]");
 		return -ENOENT;
 	}
 
 	spin_lock(&vnode->lock);
 
-	if (!(vnode->flags & AFS_VNODE_CHANGED)) {
+	if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
+	    vnode->cb_promised) {
 		spin_unlock(&vnode->lock);
 		_leave(" [unchanged]");
 		return 0;
@@ -183,17 +272,18 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode)
 	if (vnode->update_cnt > 0) {
 		/* someone else started a fetch */
 		set_current_state(TASK_UNINTERRUPTIBLE);
+		ASSERT(myself.func != NULL);
 		add_wait_queue(&vnode->update_waitq, &myself);
 
 		/* wait for the status to be updated */
 		for (;;) {
-			if (!(vnode->flags & AFS_VNODE_CHANGED))
+			if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags))
 				break;
-			if (vnode->flags & AFS_VNODE_DELETED)
+			if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
 				break;
 
-			/* it got updated and invalidated all before we saw
-			 * it */
+			/* check to see if it got updated and invalidated all
+			 * before we saw it */
 			if (vnode->update_cnt == 0) {
 				remove_wait_queue(&vnode->update_waitq,
 						  &myself);
@@ -213,7 +303,8 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode)
 		spin_unlock(&vnode->lock);
 		set_current_state(TASK_RUNNING);
 
-		return vnode->flags & AFS_VNODE_DELETED ? -ENOENT : 0;
+		return test_bit(AFS_VNODE_DELETED, &vnode->flags) ?
+			-ENOENT : 0;
 	}
 
 get_anyway:
@@ -226,15 +317,17 @@ get_anyway:
 	 * vnode */
 	do {
 		/* pick a server to query */
-		ret = afs_volume_pick_fileserver(vnode->volume, &server);
-		if (ret<0)
-			return ret;
+		server = afs_volume_pick_fileserver(vnode);
+		if (IS_ERR(server))
+			return PTR_ERR(server);
 
-		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+		_debug("USING SERVER: %p{%08x}",
+		       server, ntohl(server->addr.s_addr));
 
-		ret = afs_rxfs_fetch_file_status(server, vnode, NULL);
+		ret = afs_fs_fetch_file_status(server, vnode, NULL,
+					       &afs_sync_call);
 
-	} while (!afs_volume_release_fileserver(vnode->volume, server, ret));
+	} while (!afs_volume_release_fileserver(vnode, server, ret));
 
 	/* adjust the flags */
 	afs_vnode_finalise_status_update(vnode, server, ret);
@@ -247,8 +340,8 @@ get_anyway:
  * fetch file data from the volume
  * - TODO implement caching and server failover
  */
-int afs_vnode_fetch_data(struct afs_vnode *vnode,
-			 struct afs_rxfs_fetch_descriptor *desc)
+int afs_vnode_fetch_data(struct afs_vnode *vnode, off_t offset, size_t length,
+			 struct page *page)
 {
 	struct afs_server *server;
 	int ret;
@@ -268,15 +361,16 @@ int afs_vnode_fetch_data(struct afs_vnode *vnode,
 	 * vnode */
 	do {
 		/* pick a server to query */
-		ret = afs_volume_pick_fileserver(vnode->volume, &server);
-		if (ret < 0)
-			return ret;
+		server = afs_volume_pick_fileserver(vnode);
+		if (IS_ERR(server))
+			return PTR_ERR(server);
 
 		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
 
-		ret = afs_rxfs_fetch_file_data(server, vnode, desc, NULL);
+		ret = afs_fs_fetch_data(server, vnode, offset, length, page,
+					NULL, &afs_sync_call);
 
-	} while (!afs_volume_release_fileserver(vnode->volume, server, ret));
+	} while (!afs_volume_release_fileserver(vnode, server, ret));
 
 	/* adjust the flags */
 	afs_vnode_finalise_status_update(vnode, server, ret);
@@ -284,99 +378,3 @@ int afs_vnode_fetch_data(struct afs_vnode *vnode,
 	_leave(" = %d", ret);
 	return ret;
 }
-
-/*
- * break any outstanding callback on a vnode
- * - only relevent to server that issued it
- */
-int afs_vnode_give_up_callback(struct afs_vnode *vnode)
-{
-	struct afs_server *server;
-	int ret;
-
-	_enter("%s,{%u,%u,%u}",
-	       vnode->volume->vlocation->vldb.name,
-	       vnode->fid.vid,
-	       vnode->fid.vnode,
-	       vnode->fid.unique);
-
-	spin_lock(&afs_cb_hash_lock);
-	list_del_init(&vnode->cb_hash_link);
-	spin_unlock(&afs_cb_hash_lock);
-
-	/* set the changed flag in the vnode and release the server */
-	spin_lock(&vnode->lock);
-
-	afs_kafstimod_del_timer(&vnode->cb_timeout);
-
-	server = xchg(&vnode->cb_server, NULL);
-	if (server) {
-		vnode->flags |= AFS_VNODE_CHANGED;
-
-		spin_lock(&server->cb_lock);
-		list_del_init(&vnode->cb_link);
-		spin_unlock(&server->cb_lock);
-	}
-
-	spin_unlock(&vnode->lock);
-
-	ret = 0;
-	if (server) {
-		ret = afs_rxfs_give_up_callback(server, vnode);
-		afs_put_server(server);
-	}
-
-	_leave(" = %d", ret);
-	return ret;
-}
-
-/*
- * match a vnode record stored in the cache
- */
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_vnode_cache_match(void *target,
-						 const void *entry)
-{
-	const struct afs_cache_vnode *cvnode = entry;
-	struct afs_vnode *vnode = target;
-
-	_enter("{%x,%x,%Lx},{%x,%x,%Lx}",
-	       vnode->fid.vnode,
-	       vnode->fid.unique,
-	       vnode->status.version,
-	       cvnode->vnode_id,
-	       cvnode->vnode_unique,
-	       cvnode->data_version);
-
-	if (vnode->fid.vnode != cvnode->vnode_id) {
-		_leave(" = FAILED");
-		return CACHEFS_MATCH_FAILED;
-	}
-
-	if (vnode->fid.unique != cvnode->vnode_unique ||
-	    vnode->status.version != cvnode->data_version) {
-		_leave(" = DELETE");
-		return CACHEFS_MATCH_SUCCESS_DELETE;
-	}
-
-	_leave(" = SUCCESS");
-	return CACHEFS_MATCH_SUCCESS;
-}
-#endif
-
-/*
- * update a vnode record stored in the cache
- */
-#ifdef AFS_CACHING_SUPPORT
-static void afs_vnode_cache_update(void *source, void *entry)
-{
-	struct afs_cache_vnode *cvnode = entry;
-	struct afs_vnode *vnode = source;
-
-	_enter("");
-
-	cvnode->vnode_id	= vnode->fid.vnode;
-	cvnode->vnode_unique	= vnode->fid.unique;
-	cvnode->data_version	= vnode->status.version;
-}
-#endif
diff --git a/fs/afs/vnode.h b/fs/afs/vnode.h
deleted file mode 100644
index 7f6d05b197a..00000000000
--- a/fs/afs/vnode.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/* AFS vnode record
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef AFS_VNODE_H
-#define AFS_VNODE_H
-
-#include <linux/fs.h>
-#include "server.h"
-#include "kafstimod.h"
-#include "cache.h"
-
-struct afs_rxfs_fetch_descriptor;
-
-extern struct afs_timer_ops afs_vnode_cb_timed_out_ops;
-
-/*
- * vnode catalogue entry
- */
-struct afs_cache_vnode {
-	afs_vnodeid_t		vnode_id;	/* vnode ID */
-	unsigned		vnode_unique;	/* vnode ID uniquifier */
-	afs_dataversion_t	data_version;	/* data version */
-};
-
-#ifdef AFS_CACHING_SUPPORT
-extern struct cachefs_index_def afs_vnode_cache_index_def;
-#endif
-
-/*
- * AFS inode private data
- */
-struct afs_vnode {
-	struct inode		vfs_inode;	/* the VFS's inode record */
-
-	struct afs_volume	*volume;	/* volume on which vnode resides */
-	struct afs_fid		fid;		/* the file identifier for this inode */
-	struct afs_file_status	status;		/* AFS status info for this file */
-#ifdef AFS_CACHING_SUPPORT
-	struct cachefs_cookie	*cache;		/* caching cookie */
-#endif
-
-	wait_queue_head_t	update_waitq;	/* status fetch waitqueue */
-	unsigned		update_cnt;	/* number of outstanding ops that will update the
-						 * status */
-	spinlock_t		lock;		/* waitqueue/flags lock */
-	unsigned		flags;
-#define AFS_VNODE_CHANGED	0x00000001	/* set if vnode reported changed by callback */
-#define AFS_VNODE_DELETED	0x00000002	/* set if vnode deleted on server */
-#define AFS_VNODE_MOUNTPOINT	0x00000004	/* set if vnode is a mountpoint symlink */
-
-	/* outstanding callback notification on this file */
-	struct afs_server	*cb_server;	/* server that made the current promise */
-	struct list_head	cb_link;	/* link in server's promises list */
-	struct list_head	cb_hash_link;	/* link in master callback hash */
-	struct afs_timer	cb_timeout;	/* timeout on promise */
-	unsigned		cb_version;	/* callback version */
-	unsigned		cb_expiry;	/* callback expiry time */
-	afs_callback_type_t	cb_type;	/* type of callback */
-};
-
-static inline struct afs_vnode *AFS_FS_I(struct inode *inode)
-{
-	return container_of(inode, struct afs_vnode, vfs_inode);
-}
-
-static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode)
-{
-	return &vnode->vfs_inode;
-}
-
-extern int afs_vnode_fetch_status(struct afs_vnode *);
-extern int afs_vnode_fetch_data(struct afs_vnode *,
-				struct afs_rxfs_fetch_descriptor *);
-extern int afs_vnode_give_up_callback(struct afs_vnode *);
-
-#endif /* AFS_VNODE_H */
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index c82e1bb4f2d..45491cfd4f4 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -1,6 +1,6 @@
 /* AFS volume management
  *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
@@ -15,33 +15,9 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
-#include "volume.h"
-#include "vnode.h"
-#include "cell.h"
-#include "cache.h"
-#include "cmservice.h"
-#include "fsclient.h"
-#include "vlclient.h"
 #include "internal.h"
 
-#ifdef __KDEBUG
 static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
-#endif
-
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_volume_cache_match(void *target,
-						  const void *entry);
-static void afs_volume_cache_update(void *source, void *entry);
-
-struct cachefs_index_def afs_volume_cache_index_def = {
-	.name		= "volume",
-	.data_size	= sizeof(struct afs_cache_vhash),
-	.keys[0]	= { CACHEFS_INDEX_KEYS_BIN, 1 },
-	.keys[1]	= { CACHEFS_INDEX_KEYS_BIN, 1 },
-	.match		= afs_volume_cache_match,
-	.update		= afs_volume_cache_update,
-};
-#endif
 
 /*
  * lookup a volume by name
@@ -65,11 +41,12 @@ struct cachefs_index_def afs_volume_cache_index_def = {
  * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
  *           explicitly told otherwise
  */
-int afs_volume_lookup(const char *name, struct afs_cell *cell, int rwpath,
-		      struct afs_volume **_volume)
+struct afs_volume *afs_volume_lookup(const char *name, struct afs_cell *cell,
+				     int rwpath)
 {
 	struct afs_vlocation *vlocation = NULL;
 	struct afs_volume *volume = NULL;
+	struct afs_server *server = NULL;
 	afs_voltype_t type;
 	const char *cellname, *volname, *suffix;
 	char srvtmask;
@@ -79,7 +56,7 @@ int afs_volume_lookup(const char *name, struct afs_cell *cell, int rwpath,
 
 	if (!name || (name[0] != '%' && name[0] != '#') || !name[1]) {
 		printk("kAFS: unparsable volume name\n");
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
 	}
 
 	/* determine the type of volume we're looking for */
@@ -128,8 +105,9 @@ int afs_volume_lookup(const char *name, struct afs_cell *cell, int rwpath,
 
 	/* lookup the cell record */
 	if (cellname || !cell) {
-		ret = afs_cell_lookup(cellname, cellnamesz, &cell);
-		if (ret<0) {
+		cell = afs_cell_lookup(cellname, cellnamesz);
+		if (IS_ERR(cell)) {
+			ret = PTR_ERR(cell);
 			printk("kAFS: unable to lookup cell '%s'\n",
 			       cellname ?: "");
 			goto error;
@@ -139,9 +117,12 @@ int afs_volume_lookup(const char *name, struct afs_cell *cell, int rwpath,
 	}
 
 	/* lookup the volume location record */
-	ret = afs_vlocation_lookup(cell, volname, volnamesz, &vlocation);
-	if (ret < 0)
+	vlocation = afs_vlocation_lookup(cell, volname, volnamesz);
+	if (IS_ERR(vlocation)) {
+		ret = PTR_ERR(vlocation);
+		vlocation = NULL;
 		goto error;
+	}
 
 	/* make the final decision on the type we want */
 	ret = -ENOMEDIUM;
@@ -192,13 +173,14 @@ int afs_volume_lookup(const char *name, struct afs_cell *cell, int rwpath,
 	/* look up all the applicable server records */
 	for (loop = 0; loop < 8; loop++) {
 		if (vlocation->vldb.srvtmask[loop] & (1 << volume->type)) {
-			ret = afs_server_lookup(
-				volume->cell,
-				&vlocation->vldb.servers[loop],
-				&volume->servers[volume->nservers]);
-			if (ret < 0)
+			server = afs_lookup_server(
+			       volume->cell, &vlocation->vldb.servers[loop]);
+			if (IS_ERR(server)) {
+				ret = PTR_ERR(server);
 				goto error_discard;
+			}
 
+			volume->servers[volume->nservers] = server;
 			volume->nservers++;
 		}
 	}
@@ -219,8 +201,11 @@ int afs_volume_lookup(const char *name, struct afs_cell *cell, int rwpath,
 success:
 	_debug("kAFS selected %s volume %08x",
 	       afs_voltypes[volume->type], volume->vid);
-	*_volume = volume;
-	ret = 0;
+	up_write(&cell->vl_sem);
+	afs_put_vlocation(vlocation);
+	afs_put_cell(cell);
+	_leave(" = %p", volume);
+	return volume;
 
 	/* clean up */
 error_up:
@@ -228,9 +213,8 @@ error_up:
 error:
 	afs_put_vlocation(vlocation);
 	afs_put_cell(cell);
-
-	_leave(" = %d (%p)", ret, volume);
-	return ret;
+	_leave(" = %d", ret);
+	return ERR_PTR(ret);
 
 error_discard:
 	up_write(&cell->vl_sem);
@@ -255,10 +239,9 @@ void afs_put_volume(struct afs_volume *volume)
 
 	_enter("%p", volume);
 
-	vlocation = volume->vlocation;
+	ASSERTCMP(atomic_read(&volume->usage), >, 0);
 
-	/* sanity check */
-	BUG_ON(atomic_read(&volume->usage) <= 0);
+	vlocation = volume->vlocation;
 
 	/* to prevent a race, the decrement and the dequeue must be effectively
 	 * atomic */
@@ -292,14 +275,21 @@ void afs_put_volume(struct afs_volume *volume)
  * pick a server to use to try accessing this volume
  * - returns with an elevated usage count on the server chosen
  */
-int afs_volume_pick_fileserver(struct afs_volume *volume,
-			       struct afs_server **_server)
+struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
 {
+	struct afs_volume *volume = vnode->volume;
 	struct afs_server *server;
 	int ret, state, loop;
 
 	_enter("%s", volume->vlocation->vldb.name);
 
+	/* stick with the server we're already using if we can */
+	if (vnode->server && vnode->server->fs_state == 0) {
+		afs_get_server(vnode->server);
+		_leave(" = %p [current]", vnode->server);
+		return vnode->server;
+	}
+
 	down_read(&volume->server_sem);
 
 	/* handle the no-server case */
@@ -307,7 +297,7 @@ int afs_volume_pick_fileserver(struct afs_volume *volume,
 		ret = volume->rjservers ? -ENOMEDIUM : -ESTALE;
 		up_read(&volume->server_sem);
 		_leave(" = %d [no servers]", ret);
-		return ret;
+		return ERR_PTR(ret);
 	}
 
 	/* basically, just search the list for the first live server and use
@@ -317,15 +307,16 @@ int afs_volume_pick_fileserver(struct afs_volume *volume,
 		server = volume->servers[loop];
 		state = server->fs_state;
 
+		_debug("consider %d [%d]", loop, state);
+
 		switch (state) {
 			/* found an apparently healthy server */
 		case 0:
 			afs_get_server(server);
 			up_read(&volume->server_sem);
-			*_server = server;
-			_leave(" = 0 (picked %08x)",
-			       ntohl(server->addr.s_addr));
-			return 0;
+			_leave(" = %p (picked %08x)",
+			       server, ntohl(server->addr.s_addr));
+			return server;
 
 		case -ENETUNREACH:
 			if (ret == 0)
@@ -361,7 +352,7 @@ int afs_volume_pick_fileserver(struct afs_volume *volume,
 	 */
 	up_read(&volume->server_sem);
 	_leave(" = %d", ret);
-	return ret;
+	return ERR_PTR(ret);
 }
 
 /*
@@ -370,10 +361,11 @@ int afs_volume_pick_fileserver(struct afs_volume *volume,
  * - records result of using a particular server to access a volume
  * - return 0 to try again, 1 if okay or to issue error
  */
-int afs_volume_release_fileserver(struct afs_volume *volume,
+int afs_volume_release_fileserver(struct afs_vnode *vnode,
 				  struct afs_server *server,
 				  int result)
 {
+	struct afs_volume *volume = vnode->volume;
 	unsigned loop;
 
 	_enter("%s,%08x,%d",
@@ -384,6 +376,7 @@ int afs_volume_release_fileserver(struct afs_volume *volume,
 		/* success */
 	case 0:
 		server->fs_act_jif = jiffies;
+		server->fs_state = 0;
 		break;
 
 		/* the fileserver denied all knowledge of the volume */
@@ -391,7 +384,7 @@ int afs_volume_release_fileserver(struct afs_volume *volume,
 		server->fs_act_jif = jiffies;
 		down_write(&volume->server_sem);
 
-		/* first, find where the server is in the active list (if it
+		/* firstly, find where the server is in the active list (if it
 		 * is) */
 		for (loop = 0; loop < volume->nservers; loop++)
 			if (volume->servers[loop] == server)
@@ -429,6 +422,7 @@ int afs_volume_release_fileserver(struct afs_volume *volume,
 	case -ENETUNREACH:
 	case -EHOSTUNREACH:
 	case -ECONNREFUSED:
+	case -ETIME:
 	case -ETIMEDOUT:
 	case -EREMOTEIO:
 		/* mark the server as dead
@@ -464,40 +458,3 @@ try_next_server:
 	_leave(" [try next server]");
 	return 0;
 }
-
-/*
- * match a volume hash record stored in the cache
- */
-#ifdef AFS_CACHING_SUPPORT
-static cachefs_match_val_t afs_volume_cache_match(void *target,
-						  const void *entry)
-{
-	const struct afs_cache_vhash *vhash = entry;
-	struct afs_volume *volume = target;
-
-	_enter("{%u},{%u}", volume->type, vhash->vtype);
-
-	if (volume->type == vhash->vtype) {
-		_leave(" = SUCCESS");
-		return CACHEFS_MATCH_SUCCESS;
-	}
-
-	_leave(" = FAILED");
-	return CACHEFS_MATCH_FAILED;
-}
-#endif
-
-/*
- * update a volume hash record stored in the cache
- */
-#ifdef AFS_CACHING_SUPPORT
-static void afs_volume_cache_update(void *source, void *entry)
-{
-	struct afs_cache_vhash *vhash = entry;
-	struct afs_volume *volume = source;
-
-	_enter("");
-
-	vhash->vtype = volume->type;
-}
-#endif
diff --git a/fs/afs/volume.h b/fs/afs/volume.h
deleted file mode 100644
index a605bea2e3a..00000000000
--- a/fs/afs/volume.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/* AFS volume management
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef AFS_VOLUME_H
-#define AFS_VOLUME_H
-
-#include "types.h"
-#include "fsclient.h"
-#include "kafstimod.h"
-#include "kafsasyncd.h"
-#include "cache.h"
-
-typedef enum {
-	AFS_VLUPD_SLEEP,		/* sleeping waiting for update timer to fire */
-	AFS_VLUPD_PENDING,		/* on pending queue */
-	AFS_VLUPD_INPROGRESS,		/* op in progress */
-	AFS_VLUPD_BUSYSLEEP,		/* sleeping because server returned EBUSY */
-} __attribute__((packed)) afs_vlocation_upd_t;
-
-/*
- * entry in the cached volume location catalogue
- */
-struct afs_cache_vlocation {
-	uint8_t			name[64];	/* volume name (lowercase, padded with NULs) */
-	uint8_t			nservers;	/* number of entries used in servers[] */
-	uint8_t			vidmask;	/* voltype mask for vid[] */
-	uint8_t			srvtmask[8];	/* voltype masks for servers[] */
-#define AFS_VOL_VTM_RW	0x01 /* R/W version of the volume is available (on this server) */
-#define AFS_VOL_VTM_RO	0x02 /* R/O version of the volume is available (on this server) */
-#define AFS_VOL_VTM_BAK	0x04 /* backup version of the volume is available (on this server) */
-
-	afs_volid_t		vid[3];		/* volume IDs for R/W, R/O and Bak volumes */
-	struct in_addr		servers[8];	/* fileserver addresses */
-	time_t			rtime;		/* last retrieval time */
-};
-
-#ifdef AFS_CACHING_SUPPORT
-extern struct cachefs_index_def afs_vlocation_cache_index_def;
-#endif
-
-/*
- * volume -> vnode hash table entry
- */
-struct afs_cache_vhash {
-	afs_voltype_t		vtype;		/* which volume variation */
-	uint8_t			hash_bucket;	/* which hash bucket this represents */
-} __attribute__((packed));
-
-#ifdef AFS_CACHING_SUPPORT
-extern struct cachefs_index_def afs_volume_cache_index_def;
-#endif
-
-/*
- * AFS volume location record
- */
-struct afs_vlocation {
-	atomic_t		usage;
-	struct list_head	link;		/* link in cell volume location list */
-	struct afs_timer	timeout;	/* decaching timer */
-	struct afs_cell		*cell;		/* cell to which volume belongs */
-#ifdef AFS_CACHING_SUPPORT
-	struct cachefs_cookie	*cache;		/* caching cookie */
-#endif
-	struct afs_cache_vlocation vldb;	/* volume information DB record */
-	struct afs_volume	*vols[3];	/* volume access record pointer (index by type) */
-	rwlock_t		lock;		/* access lock */
-	unsigned long		read_jif;	/* time at which last read from vlserver */
-	struct afs_timer	upd_timer;	/* update timer */
-	struct afs_async_op	upd_op;		/* update operation */
-	afs_vlocation_upd_t	upd_state;	/* update state */
-	unsigned short		upd_first_svix;	/* first server index during update */
-	unsigned short		upd_curr_svix;	/* current server index during update */
-	unsigned short		upd_rej_cnt;	/* ENOMEDIUM count during update */
-	unsigned short		upd_busy_cnt;	/* EBUSY count during update */
-	unsigned short		valid;		/* T if valid */
-};
-
-extern int afs_vlocation_lookup(struct afs_cell *, const char *, unsigned,
-				struct afs_vlocation **);
-
-#define afs_get_vlocation(V) do { atomic_inc(&(V)->usage); } while(0)
-
-extern void afs_put_vlocation(struct afs_vlocation *);
-extern void afs_vlocation_do_timeout(struct afs_vlocation *);
-
-/*
- * AFS volume access record
- */
-struct afs_volume {
-	atomic_t		usage;
-	struct afs_cell		*cell;		/* cell to which belongs (unrefd ptr) */
-	struct afs_vlocation	*vlocation;	/* volume location */
-#ifdef AFS_CACHING_SUPPORT
-	struct cachefs_cookie	*cache;		/* caching cookie */
-#endif
-	afs_volid_t		vid;		/* volume ID */
-	afs_voltype_t		type;		/* type of volume */
-	char			type_force;	/* force volume type (suppress R/O -> R/W) */
-	unsigned short		nservers;	/* number of server slots filled */
-	unsigned short		rjservers;	/* number of servers discarded due to -ENOMEDIUM */
-	struct afs_server	*servers[8];	/* servers on which volume resides (ordered) */
-	struct rw_semaphore	server_sem;	/* lock for accessing current server */
-};
-
-extern int afs_volume_lookup(const char *, struct afs_cell *, int,
-			     struct afs_volume **);
-
-#define afs_get_volume(V) do { atomic_inc(&(V)->usage); } while(0)
-
-extern void afs_put_volume(struct afs_volume *);
-
-extern int afs_volume_pick_fileserver(struct afs_volume *,
-				      struct afs_server **);
-
-extern int afs_volume_release_fileserver(struct afs_volume *,
-					 struct afs_server *, int);
-
-#endif /* AFS_VOLUME_H */
-- 
cgit v1.2.3


From 63b6be55e8b51cb718468794d343058e96c7462c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 26 Apr 2007 15:55:48 -0700
Subject: [AF_RXRPC]: Delete the old RxRPC code.

Delete the old RxRPC code as it's now no longer used.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/Kconfig                 |    4 -
 include/rxrpc/call.h       |  212 -----
 include/rxrpc/connection.h |   83 --
 include/rxrpc/krxiod.h     |   27 -
 include/rxrpc/krxsecd.h    |   22 -
 include/rxrpc/krxtimod.h   |   45 -
 include/rxrpc/message.h    |   71 --
 include/rxrpc/packet.h     |   22 +-
 include/rxrpc/peer.h       |   82 --
 include/rxrpc/rxrpc.h      |   36 -
 include/rxrpc/transport.h  |  106 ---
 net/rxrpc/Makefile         |   27 +-
 net/rxrpc/call.c           | 2277 --------------------------------------------
 net/rxrpc/connection.c     |  777 ---------------
 net/rxrpc/internal.h       |  106 ---
 net/rxrpc/krxiod.c         |  262 -----
 net/rxrpc/krxsecd.c        |  270 ------
 net/rxrpc/krxtimod.c       |  204 ----
 net/rxrpc/main.c           |  180 ----
 net/rxrpc/peer.c           |  398 --------
 net/rxrpc/proc.c           |  617 ------------
 net/rxrpc/rxrpc_syms.c     |   34 -
 net/rxrpc/sysctl.c         |  121 ---
 net/rxrpc/transport.c      |  846 ----------------
 24 files changed, 9 insertions(+), 6820 deletions(-)
 delete mode 100644 include/rxrpc/call.h
 delete mode 100644 include/rxrpc/connection.h
 delete mode 100644 include/rxrpc/krxiod.h
 delete mode 100644 include/rxrpc/krxsecd.h
 delete mode 100644 include/rxrpc/krxtimod.h
 delete mode 100644 include/rxrpc/message.h
 delete mode 100644 include/rxrpc/peer.h
 delete mode 100644 include/rxrpc/rxrpc.h
 delete mode 100644 include/rxrpc/transport.h
 delete mode 100644 net/rxrpc/call.c
 delete mode 100644 net/rxrpc/connection.c
 delete mode 100644 net/rxrpc/internal.h
 delete mode 100644 net/rxrpc/krxiod.c
 delete mode 100644 net/rxrpc/krxsecd.c
 delete mode 100644 net/rxrpc/krxtimod.c
 delete mode 100644 net/rxrpc/main.c
 delete mode 100644 net/rxrpc/peer.c
 delete mode 100644 net/rxrpc/proc.c
 delete mode 100644 net/rxrpc/rxrpc_syms.c
 delete mode 100644 net/rxrpc/sysctl.c
 delete mode 100644 net/rxrpc/transport.c

diff --git a/fs/Kconfig b/fs/Kconfig
index 075c9997ddc..e33c0892457 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -2038,10 +2038,6 @@ config AFS_DEBUG
 
 	  If unsure, say N.
 
-
-config RXRPC
-	tristate
-
 config 9P_FS
 	tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)"
 	depends on INET && EXPERIMENTAL
diff --git a/include/rxrpc/call.h b/include/rxrpc/call.h
deleted file mode 100644
index b86f8374351..00000000000
--- a/include/rxrpc/call.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/* call.h: Rx call record
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _LINUX_RXRPC_CALL_H
-#define _LINUX_RXRPC_CALL_H
-
-#include <rxrpc/types.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/packet.h>
-#include <linux/timer.h>
-
-#define RXRPC_CALL_ACK_WINDOW_SIZE	16
-
-extern unsigned rxrpc_call_rcv_timeout;		/* receive activity timeout (secs) */
-
-/* application call state
- * - only state 0 and ffff are reserved, the state is set to 1 after an opid is received
- */
-enum rxrpc_app_cstate {
-	RXRPC_CSTATE_COMPLETE		= 0,	/* operation complete */
-	RXRPC_CSTATE_ERROR,			/* operation ICMP error or aborted */
-	RXRPC_CSTATE_SRVR_RCV_OPID,		/* [SERVER] receiving operation ID */
-	RXRPC_CSTATE_SRVR_RCV_ARGS,		/* [SERVER] receiving operation data */
-	RXRPC_CSTATE_SRVR_GOT_ARGS,		/* [SERVER] completely received operation data */
-	RXRPC_CSTATE_SRVR_SND_REPLY,		/* [SERVER] sending operation reply */
-	RXRPC_CSTATE_SRVR_RCV_FINAL_ACK,	/* [SERVER] receiving final ACK */
-	RXRPC_CSTATE_CLNT_SND_ARGS,		/* [CLIENT] sending operation args */
-	RXRPC_CSTATE_CLNT_RCV_REPLY,		/* [CLIENT] receiving operation reply */
-	RXRPC_CSTATE_CLNT_GOT_REPLY,		/* [CLIENT] completely received operation reply */
-} __attribute__((packed));
-
-extern const char *rxrpc_call_states[];
-
-enum rxrpc_app_estate {
-	RXRPC_ESTATE_NO_ERROR		= 0,	/* no error */
-	RXRPC_ESTATE_LOCAL_ABORT,		/* aborted locally by application layer */
-	RXRPC_ESTATE_PEER_ABORT,		/* aborted remotely by peer */
-	RXRPC_ESTATE_LOCAL_ERROR,		/* local ICMP network error */
-	RXRPC_ESTATE_REMOTE_ERROR,		/* remote ICMP network error */
-} __attribute__((packed));
-
-extern const char *rxrpc_call_error_states[];
-
-/*****************************************************************************/
-/*
- * Rx call record and application scratch buffer
- * - the call record occupies the bottom of a complete page
- * - the application scratch buffer occupies the rest
- */
-struct rxrpc_call
-{
-	atomic_t		usage;
-	struct rxrpc_connection	*conn;		/* connection upon which active */
-	spinlock_t		lock;		/* access lock */
-	struct module		*owner;		/* owner module */
-	wait_queue_head_t	waitq;		/* wait queue for events to happen */
-	struct list_head	link;		/* general internal list link */
-	struct list_head	call_link;	/* master call list link */
-	__be32			chan_ix;	/* connection channel index  */
-	__be32			call_id;	/* call ID on connection  */
-	unsigned long		cjif;		/* jiffies at call creation */
-	unsigned long		flags;		/* control flags */
-#define RXRPC_CALL_ACKS_TIMO	0x00000001	/* ACKS timeout reached */
-#define RXRPC_CALL_ACKR_TIMO	0x00000002	/* ACKR timeout reached */
-#define RXRPC_CALL_RCV_TIMO	0x00000004	/* RCV timeout reached */
-#define RXRPC_CALL_RCV_PKT	0x00000008	/* received packet */
-
-	/* transmission */
-	rxrpc_seq_t		snd_seq_count;	/* outgoing packet sequence number counter */
-	struct rxrpc_message	*snd_nextmsg;	/* next message being constructed for sending */
-	struct rxrpc_message	*snd_ping;	/* last ping message sent */
-	unsigned short		snd_resend_cnt;	/* count of resends since last ACK */
-
-	/* transmission ACK tracking */
-	struct list_head	acks_pendq;	/* messages pending ACK (ordered by seq) */
-	unsigned		acks_pend_cnt;	/* number of un-ACK'd packets */
-	rxrpc_seq_t		acks_dftv_seq;	/* highest definitively ACK'd msg seq */
-	struct timer_list	acks_timeout;	/* timeout on expected ACK */
-
-	/* reception */
-	struct list_head	rcv_receiveq;	/* messages pending reception (ordered by seq) */
-	struct list_head	rcv_krxiodq_lk;	/* krxiod queue for new inbound packets */
-	struct timer_list	rcv_timeout;	/* call receive activity timeout */
-
-	/* reception ACK'ing */
-	rxrpc_seq_t		ackr_win_bot;	/* bottom of ACK window */
-	rxrpc_seq_t		ackr_win_top;	/* top of ACK window */
-	rxrpc_seq_t		ackr_high_seq;	/* highest seqno yet received */
-	rxrpc_seq_net_t		ackr_prev_seq;	/* previous seqno received */
-	unsigned		ackr_pend_cnt;	/* number of pending ACKs */
-	struct timer_list	ackr_dfr_timo;	/* timeout on deferred ACK */
-	char			ackr_dfr_perm;	/* request for deferred ACKs permitted */
-	rxrpc_seq_t		ackr_dfr_seq;	/* seqno for deferred ACK */
-	struct rxrpc_ackpacket	ackr;		/* pending normal ACK packet */
-	uint8_t			ackr_array[RXRPC_CALL_ACK_WINDOW_SIZE];	/* ACK records */
-
-	/* presentation layer */
-	char			app_last_rcv;	/* T if received last packet from remote end */
-	enum rxrpc_app_cstate	app_call_state;	/* call state */
-	enum rxrpc_app_estate	app_err_state;	/* abort/error state */
-	struct list_head	app_readyq;	/* ordered ready received packet queue */
-	struct list_head	app_unreadyq;	/* ordered post-hole recv'd packet queue */
-	rxrpc_seq_t		app_ready_seq;	/* last seq number dropped into readyq */
-	size_t			app_ready_qty;	/* amount of data ready in readyq */
-	unsigned		app_opcode;	/* operation ID */
-	unsigned		app_abort_code;	/* abort code (when aborted) */
-	int			app_errno;	/* error number (when ICMP error received) */
-
-	/* statisics */
-	unsigned		pkt_rcv_count;	/* count of received packets on this call */
-	unsigned		pkt_snd_count;	/* count of sent packets on this call */
-	unsigned		app_read_count;	/* number of reads issued */
-
-	/* bits for the application to use */
-	rxrpc_call_attn_func_t	app_attn_func;	/* callback when attention required */
-	rxrpc_call_error_func_t	app_error_func;	/* callback when abort sent (cleanup and put) */
-	rxrpc_call_aemap_func_t	app_aemap_func;	/* callback to map abort code to/from errno */
-	void			*app_user;	/* application data */
-	struct list_head	app_link;	/* application list linkage */
-	struct list_head	app_attn_link;	/* application attention list linkage */
-	size_t			app_mark;	/* trigger callback when app_ready_qty>=app_mark */
-	char			app_async_read;	/* T if in async-read mode */
-	uint8_t			*app_read_buf;	/* application async read buffer (app_mark size) */
-	uint8_t			*app_scr_alloc;	/* application scratch allocation pointer */
-	void			*app_scr_ptr;	/* application pointer into scratch buffer */
-
-#define RXRPC_APP_MARK_EOF 0xFFFFFFFFU	/* mark at end of input */
-
-	/* application scratch buffer */
-	uint8_t			app_scratch[0] __attribute__((aligned(sizeof(long))));
-};
-
-#define RXRPC_CALL_SCRATCH_SIZE (PAGE_SIZE - sizeof(struct rxrpc_call))
-
-#define rxrpc_call_reset_scratch(CALL) \
-do { (CALL)->app_scr_alloc = (CALL)->app_scratch; } while(0)
-
-#define rxrpc_call_alloc_scratch(CALL,SIZE)						\
-({											\
-	void *ptr;									\
-	ptr = (CALL)->app_scr_alloc;							\
-	(CALL)->app_scr_alloc += (SIZE);						\
-	if ((SIZE)>RXRPC_CALL_SCRATCH_SIZE ||						\
-	    (size_t)((CALL)->app_scr_alloc - (u8*)(CALL)) > RXRPC_CALL_SCRATCH_SIZE) {	\
-		printk("rxrpc_call_alloc_scratch(%p,%Zu)\n",(CALL),(size_t)(SIZE));	\
-		BUG();									\
-	}										\
-	ptr;										\
-})
-
-#define rxrpc_call_alloc_scratch_s(CALL,TYPE)						\
-({											\
-	size_t size = sizeof(TYPE);							\
-	TYPE *ptr;									\
-	ptr = (TYPE*)(CALL)->app_scr_alloc;						\
-	(CALL)->app_scr_alloc += size;							\
-	if (size>RXRPC_CALL_SCRATCH_SIZE ||						\
-	    (size_t)((CALL)->app_scr_alloc - (u8*)(CALL)) > RXRPC_CALL_SCRATCH_SIZE) {	\
-		printk("rxrpc_call_alloc_scratch(%p,%Zu)\n",(CALL),size);		\
-		BUG();									\
-	}										\
-	ptr;										\
-})
-
-#define rxrpc_call_is_ack_pending(CALL) ((CALL)->ackr.reason != 0)
-
-extern int rxrpc_create_call(struct rxrpc_connection *conn,
-			     rxrpc_call_attn_func_t attn,
-			     rxrpc_call_error_func_t error,
-			     rxrpc_call_aemap_func_t aemap,
-			     struct rxrpc_call **_call);
-
-extern int rxrpc_incoming_call(struct rxrpc_connection *conn,
-			       struct rxrpc_message *msg,
-			       struct rxrpc_call **_call);
-
-static inline void rxrpc_get_call(struct rxrpc_call *call)
-{
-	BUG_ON(atomic_read(&call->usage)<=0);
-	atomic_inc(&call->usage);
-	/*printk("rxrpc_get_call(%p{u=%d})\n",(C),atomic_read(&(C)->usage));*/
-}
-
-extern void rxrpc_put_call(struct rxrpc_call *call);
-
-extern void rxrpc_call_do_stuff(struct rxrpc_call *call);
-
-extern int rxrpc_call_abort(struct rxrpc_call *call, int error);
-
-#define RXRPC_CALL_READ_BLOCK	0x0001	/* block if not enough data and not yet EOF */
-#define RXRPC_CALL_READ_ALL	0x0002	/* error if insufficient data received */
-extern int rxrpc_call_read_data(struct rxrpc_call *call, void *buffer, size_t size, int flags);
-
-extern int rxrpc_call_write_data(struct rxrpc_call *call,
-				 size_t sioc,
-				 struct kvec *siov,
-				 uint8_t rxhdr_flags,
-				 gfp_t alloc_flags,
-				 int dup_data,
-				 size_t *size_sent);
-
-extern void rxrpc_call_handle_error(struct rxrpc_call *conn, int local, int errno);
-
-#endif /* _LINUX_RXRPC_CALL_H */
diff --git a/include/rxrpc/connection.h b/include/rxrpc/connection.h
deleted file mode 100644
index 41e6781ad06..00000000000
--- a/include/rxrpc/connection.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/* connection.h: Rx connection record
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _LINUX_RXRPC_CONNECTION_H
-#define _LINUX_RXRPC_CONNECTION_H
-
-#include <rxrpc/types.h>
-#include <rxrpc/krxtimod.h>
-
-struct sk_buff;
-
-/*****************************************************************************/
-/*
- * Rx connection
- * - connections are matched by (rmt_port,rmt_addr,service_id,conn_id,clientflag)
- * - connections only retain a refcount on the peer when they are active
- * - connections with refcount==0 are inactive and reside in the peer's graveyard
- */
-struct rxrpc_connection
-{
-	atomic_t		usage;
-	struct rxrpc_transport	*trans;		/* transport endpoint */
-	struct rxrpc_peer	*peer;		/* peer from/to which connected */
-	struct rxrpc_service	*service;	/* responsible service (inbound conns) */
-	struct rxrpc_timer	timeout;	/* decaching timer */
-	struct list_head	link;		/* link in peer's list */
-	struct list_head	proc_link;	/* link in proc list */
-	struct list_head	err_link;	/* link in ICMP error processing list */
-	struct list_head	id_link;	/* link in ID grant list */
-	struct sockaddr_in	addr;		/* remote address */
-	struct rxrpc_call	*channels[4];	/* channels (active calls) */
-	wait_queue_head_t	chanwait;	/* wait for channel to become available */
-	spinlock_t		lock;		/* access lock */
-	struct timeval		atime;		/* last access time */
-	size_t			mtu_size;	/* MTU size for outbound messages */
-	unsigned		call_counter;	/* call ID counter */
-	rxrpc_serial_t		serial_counter;	/* packet serial number counter */
-
-	/* the following should all be in net order */
-	__be32			in_epoch;	/* peer's epoch */
-	__be32			out_epoch;	/* my epoch */
-	__be32			conn_id;	/* connection ID, appropriately shifted */
-	__be16			service_id;	/* service ID */
-	uint8_t			security_ix;	/* security ID */
-	uint8_t			in_clientflag;	/* RXRPC_CLIENT_INITIATED if we are server */
-	uint8_t			out_clientflag;	/* RXRPC_CLIENT_INITIATED if we are client */
-};
-
-extern int rxrpc_create_connection(struct rxrpc_transport *trans,
-				   __be16 port,
-				   __be32 addr,
-				   uint16_t service_id,
-				   void *security,
-				   struct rxrpc_connection **_conn);
-
-extern int rxrpc_connection_lookup(struct rxrpc_peer *peer,
-				   struct rxrpc_message *msg,
-				   struct rxrpc_connection **_conn);
-
-static inline void rxrpc_get_connection(struct rxrpc_connection *conn)
-{
-	BUG_ON(atomic_read(&conn->usage)<0);
-	atomic_inc(&conn->usage);
-	//printk("rxrpc_get_conn(%p{u=%d})\n",conn,atomic_read(&conn->usage));
-}
-
-extern void rxrpc_put_connection(struct rxrpc_connection *conn);
-
-extern int rxrpc_conn_receive_call_packet(struct rxrpc_connection *conn,
-					  struct rxrpc_call *call,
-					  struct rxrpc_message *msg);
-
-extern void rxrpc_conn_handle_error(struct rxrpc_connection *conn, int local, int errno);
-
-#endif /* _LINUX_RXRPC_CONNECTION_H */
diff --git a/include/rxrpc/krxiod.h b/include/rxrpc/krxiod.h
deleted file mode 100644
index c0e0e82e4df..00000000000
--- a/include/rxrpc/krxiod.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* krxiod.h: Rx RPC I/O kernel thread interface
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _LINUX_RXRPC_KRXIOD_H
-#define _LINUX_RXRPC_KRXIOD_H
-
-#include <rxrpc/types.h>
-
-extern int rxrpc_krxiod_init(void);
-extern void rxrpc_krxiod_kill(void);
-extern void rxrpc_krxiod_queue_transport(struct rxrpc_transport *trans);
-extern void rxrpc_krxiod_dequeue_transport(struct rxrpc_transport *trans);
-extern void rxrpc_krxiod_queue_peer(struct rxrpc_peer *peer);
-extern void rxrpc_krxiod_dequeue_peer(struct rxrpc_peer *peer);
-extern void rxrpc_krxiod_clear_peers(struct rxrpc_transport *trans);
-extern void rxrpc_krxiod_queue_call(struct rxrpc_call *call);
-extern void rxrpc_krxiod_dequeue_call(struct rxrpc_call *call);
-
-#endif /* _LINUX_RXRPC_KRXIOD_H */
diff --git a/include/rxrpc/krxsecd.h b/include/rxrpc/krxsecd.h
deleted file mode 100644
index 55ce43a25b3..00000000000
--- a/include/rxrpc/krxsecd.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* krxsecd.h: Rx RPC security kernel thread interface
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _LINUX_RXRPC_KRXSECD_H
-#define _LINUX_RXRPC_KRXSECD_H
-
-#include <rxrpc/types.h>
-
-extern int rxrpc_krxsecd_init(void);
-extern void rxrpc_krxsecd_kill(void);
-extern void rxrpc_krxsecd_clear_transport(struct rxrpc_transport *trans);
-extern void rxrpc_krxsecd_queue_incoming_call(struct rxrpc_message *msg);
-
-#endif /* _LINUX_RXRPC_KRXSECD_H */
diff --git a/include/rxrpc/krxtimod.h b/include/rxrpc/krxtimod.h
deleted file mode 100644
index b3d298b612f..00000000000
--- a/include/rxrpc/krxtimod.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/* krxtimod.h: RxRPC timeout daemon
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _LINUX_RXRPC_KRXTIMOD_H
-#define _LINUX_RXRPC_KRXTIMOD_H
-
-#include <rxrpc/types.h>
-
-struct rxrpc_timer_ops {
-	/* called when the front of the timer queue has timed out */
-	void (*timed_out)(struct rxrpc_timer *timer);
-};
-
-/*****************************************************************************/
-/*
- * RXRPC timer/timeout record
- */
-struct rxrpc_timer
-{
-	struct list_head		link;		/* link in timer queue */
-	unsigned long			timo_jif;	/* timeout time */
-	const struct rxrpc_timer_ops	*ops;		/* timeout expiry function */
-};
-
-static inline void rxrpc_timer_init(rxrpc_timer_t *timer, const struct rxrpc_timer_ops *ops)
-{
-	INIT_LIST_HEAD(&timer->link);
-	timer->ops = ops;
-}
-
-extern int rxrpc_krxtimod_start(void);
-extern void rxrpc_krxtimod_kill(void);
-
-extern void rxrpc_krxtimod_add_timer(rxrpc_timer_t *timer, unsigned long timeout);
-extern int rxrpc_krxtimod_del_timer(rxrpc_timer_t *timer);
-
-#endif /* _LINUX_RXRPC_KRXTIMOD_H */
diff --git a/include/rxrpc/message.h b/include/rxrpc/message.h
deleted file mode 100644
index b318f273d4f..00000000000
--- a/include/rxrpc/message.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/* message.h: Rx message caching
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _LINUX_RXRPC_MESSAGE_H
-#define _LINUX_RXRPC_MESSAGE_H
-
-#include <rxrpc/packet.h>
-
-/*****************************************************************************/
-/*
- * Rx message record
- */
-struct rxrpc_message
-{
-	atomic_t		usage;
-	struct list_head	link;		/* list link */
-	struct timeval		stamp;		/* time received or last sent */
-	rxrpc_seq_t		seq;		/* message sequence number */
-
-	int			state;		/* the state the message is currently in */
-#define RXRPC_MSG_PREPARED	0
-#define RXRPC_MSG_SENT		1
-#define RXRPC_MSG_ACKED		2		/* provisionally ACK'd */
-#define RXRPC_MSG_DONE		3		/* definitively ACK'd (msg->seq<ack.firstPacket) */
-#define RXRPC_MSG_RECEIVED	4
-#define RXRPC_MSG_ERROR		-1
-	char			rttdone;	/* used for RTT */
-
-	struct rxrpc_transport	*trans;		/* transport received through */
-	struct rxrpc_connection	*conn;		/* connection received over */
-	struct sk_buff		*pkt;		/* received packet */
-	off_t			offset;		/* offset into pkt of next byte of data */
-
-	struct rxrpc_header	hdr;		/* message header */
-
-	int			dcount;		/* data part count */
-	size_t			dsize;		/* data size */
-#define RXRPC_MSG_MAX_IOCS 8
-	struct kvec		data[RXRPC_MSG_MAX_IOCS]; /* message data */
-	unsigned long		dfree;		/* bit mask indicating kfree(data[x]) if T */
-};
-
-#define rxrpc_get_message(M) do { atomic_inc(&(M)->usage); } while(0)
-
-extern void __rxrpc_put_message(struct rxrpc_message *msg);
-static inline void rxrpc_put_message(struct rxrpc_message *msg)
-{
-	BUG_ON(atomic_read(&msg->usage)<=0);
-	if (atomic_dec_and_test(&msg->usage))
-		__rxrpc_put_message(msg);
-}
-
-extern int rxrpc_conn_newmsg(struct rxrpc_connection *conn,
-			     struct rxrpc_call *call,
-			     uint8_t type,
-			     int count,
-			     struct kvec *diov,
-			     gfp_t alloc_flags,
-			     struct rxrpc_message **_msg);
-
-extern int rxrpc_conn_sendmsg(struct rxrpc_connection *conn, struct rxrpc_message *msg);
-
-#endif /* _LINUX_RXRPC_MESSAGE_H */
diff --git a/include/rxrpc/packet.h b/include/rxrpc/packet.h
index 09b11a1e8d4..b69e6e173ea 100644
--- a/include/rxrpc/packet.h
+++ b/include/rxrpc/packet.h
@@ -1,6 +1,6 @@
 /* packet.h: Rx packet layout and definitions
  *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
@@ -12,21 +12,17 @@
 #ifndef _LINUX_RXRPC_PACKET_H
 #define _LINUX_RXRPC_PACKET_H
 
-#include <rxrpc/types.h>
-
-#define RXRPC_IPUDP_SIZE		28
-extern size_t RXRPC_MAX_PACKET_SIZE;
-#define RXRPC_MAX_PACKET_DATA_SIZE	(RXRPC_MAX_PACKET_SIZE - sizeof(struct rxrpc_header))
-#define RXRPC_LOCAL_PACKET_SIZE		RXRPC_MAX_PACKET_SIZE
-#define RXRPC_REMOTE_PACKET_SIZE	(576 - RXRPC_IPUDP_SIZE)
+typedef u32	rxrpc_seq_t;	/* Rx message sequence number */
+typedef u32	rxrpc_serial_t;	/* Rx message serial number */
+typedef __be32	rxrpc_seq_net_t; /* on-the-wire Rx message sequence number */
+typedef __be32	rxrpc_serial_net_t; /* on-the-wire Rx message serial number */
 
 /*****************************************************************************/
 /*
  * on-the-wire Rx packet header
  * - all multibyte fields should be in network byte order
  */
-struct rxrpc_header
-{
+struct rxrpc_header {
 	__be32		epoch;		/* client boot timestamp */
 
 	__be32		cid;		/* connection and channel ID */
@@ -85,8 +81,7 @@ extern const char *rxrpc_pkts[];
  *   - new__rsvd = j__rsvd
  *   - duplicating all other fields
  */
-struct rxrpc_jumbo_header
-{
+struct rxrpc_jumbo_header {
 	uint8_t		flags;		/* packet flags (as per rxrpc_header) */
 	uint8_t		pad;
 	__be16		_rsvd;		/* reserved (used by kerberos security as cksum) */
@@ -99,8 +94,7 @@ struct rxrpc_jumbo_header
  * on-the-wire Rx ACK packet data payload
  * - all multibyte fields should be in network byte order
  */
-struct rxrpc_ackpacket
-{
+struct rxrpc_ackpacket {
 	__be16		bufferSpace;	/* number of packet buffers available */
 	__be16		maxSkew;	/* diff between serno being ACK'd and highest serial no
 					 * received */
diff --git a/include/rxrpc/peer.h b/include/rxrpc/peer.h
deleted file mode 100644
index 8b8fe97cbbc..00000000000
--- a/include/rxrpc/peer.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/* peer.h: Rx RPC per-transport peer record
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _LINUX_RXRPC_PEER_H
-#define _LINUX_RXRPC_PEER_H
-
-#include <linux/wait.h>
-#include <rxrpc/types.h>
-#include <rxrpc/krxtimod.h>
-
-struct rxrpc_peer_ops
-{
-	/* peer record being added */
-	int (*adding)(struct rxrpc_peer *peer);
-
-	/* peer record being discarded from graveyard */
-	void (*discarding)(struct rxrpc_peer *peer);
-
-	/* change of epoch detected on connection */
-	void (*change_of_epoch)(struct rxrpc_connection *conn);
-};
-
-/*****************************************************************************/
-/*
- * Rx RPC per-transport peer record
- * - peers only retain a refcount on the transport when they are active
- * - peers with refcount==0 are inactive and reside in the transport's graveyard
- */
-struct rxrpc_peer
-{
-	atomic_t		usage;
-	struct rxrpc_peer_ops	*ops;		/* operations on this peer */
-	struct rxrpc_transport	*trans;		/* owner transport */
-	struct rxrpc_timer	timeout;	/* timeout for grave destruction */
-	struct list_head	link;		/* link in transport's peer list */
-	struct list_head	proc_link;	/* link in /proc list */
-	rwlock_t		conn_idlock;	/* lock for connection IDs */
-	struct list_head	conn_idlist;	/* list of connections granted IDs */
-	uint32_t		conn_idcounter;	/* connection ID counter */
-	rwlock_t		conn_lock;	/* lock for active/dead connections */
-	struct list_head	conn_active;	/* active connections to/from this peer */
-	struct list_head	conn_graveyard;	/* graveyard for inactive connections */
-	spinlock_t		conn_gylock;	/* lock for conn_graveyard */
-	wait_queue_head_t	conn_gy_waitq;	/* wait queue hit when graveyard is empty */
-	atomic_t		conn_count;	/* number of attached connections */
-	struct in_addr		addr;		/* remote address */
-	size_t			if_mtu;		/* interface MTU for this peer */
-	spinlock_t		lock;		/* access lock */
-
-	void			*user;		/* application layer data */
-
-	/* calculated RTT cache */
-#define RXRPC_RTT_CACHE_SIZE 32
-	suseconds_t		rtt;		/* current RTT estimate (in uS) */
-	unsigned		rtt_point;	/* next entry at which to insert */
-	unsigned		rtt_usage;	/* amount of cache actually used */
-	suseconds_t		rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* calculated RTT cache */
-};
-
-
-extern int rxrpc_peer_lookup(struct rxrpc_transport *trans,
-			     __be32 addr,
-			     struct rxrpc_peer **_peer);
-
-static inline void rxrpc_get_peer(struct rxrpc_peer *peer)
-{
-	BUG_ON(atomic_read(&peer->usage)<0);
-	atomic_inc(&peer->usage);
-	//printk("rxrpc_get_peer(%p{u=%d})\n",peer,atomic_read(&peer->usage));
-}
-
-extern void rxrpc_put_peer(struct rxrpc_peer *peer);
-
-#endif /* _LINUX_RXRPC_PEER_H */
diff --git a/include/rxrpc/rxrpc.h b/include/rxrpc/rxrpc.h
deleted file mode 100644
index 8d9874cef99..00000000000
--- a/include/rxrpc/rxrpc.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/* rx.h: Rx RPC interface
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _LINUX_RXRPC_RXRPC_H
-#define _LINUX_RXRPC_RXRPC_H
-
-#ifdef __KERNEL__
-
-extern __be32 rxrpc_epoch;
-
-#ifdef CONFIG_SYSCTL
-extern int rxrpc_ktrace;
-extern int rxrpc_kdebug;
-extern int rxrpc_kproto;
-extern int rxrpc_knet;
-#else
-#define rxrpc_ktrace	0
-#define rxrpc_kdebug	0
-#define rxrpc_kproto	0
-#define rxrpc_knet	0
-#endif
-
-extern int rxrpc_sysctl_init(void);
-extern void rxrpc_sysctl_cleanup(void);
-
-#endif /* __KERNEL__ */
-
-#endif /* _LINUX_RXRPC_RXRPC_H */
diff --git a/include/rxrpc/transport.h b/include/rxrpc/transport.h
deleted file mode 100644
index 7c7b9683fa3..00000000000
--- a/include/rxrpc/transport.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/* transport.h: Rx transport management
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _LINUX_RXRPC_TRANSPORT_H
-#define _LINUX_RXRPC_TRANSPORT_H
-
-#include <rxrpc/types.h>
-#include <rxrpc/krxiod.h>
-#include <rxrpc/rxrpc.h>
-#include <linux/skbuff.h>
-#include <linux/rwsem.h>
-
-typedef int (*rxrpc_newcall_fnx_t)(struct rxrpc_call *call);
-
-extern wait_queue_head_t rxrpc_krxiod_wq;
-
-/*****************************************************************************/
-/*
- * Rx operation specification
- * - tables of these must be sorted by op ID so that they can be binary-chop searched
- */
-struct rxrpc_operation
-{
-	unsigned		id;		/* operation ID */
-	size_t			asize;		/* minimum size of argument block */
-	const char		*name;		/* name of operation */
-	void			*user;		/* initial user data */
-};
-
-/*****************************************************************************/
-/*
- * Rx transport service record
- */
-struct rxrpc_service
-{
-	struct list_head	link;		/* link in services list on transport */
-	struct module		*owner;		/* owner module */
-	rxrpc_newcall_fnx_t	new_call;	/* new call handler function */
-	const char		*name;		/* name of service */
-	unsigned short		service_id;	/* Rx service ID */
-	rxrpc_call_attn_func_t	attn_func;	/* call requires attention callback */
-	rxrpc_call_error_func_t	error_func;	/* call error callback */
-	rxrpc_call_aemap_func_t	aemap_func;	/* abort -> errno mapping callback */
-
-	const struct rxrpc_operation	*ops_begin;	/* beginning of operations table */
-	const struct rxrpc_operation	*ops_end;	/* end of operations table */
-};
-
-/*****************************************************************************/
-/*
- * Rx transport endpoint record
- */
-struct rxrpc_transport
-{
-	atomic_t		usage;
-	struct socket		*socket;	/* my UDP socket */
-	struct list_head	services;	/* services listening on this socket */
-	struct list_head	link;		/* link in transport list */
-	struct list_head	proc_link;	/* link in transport proc list */
-	struct list_head	krxiodq_link;	/* krxiod attention queue link */
-	spinlock_t		lock;		/* access lock */
-	struct list_head	peer_active;	/* active peers connected to over this socket */
-	struct list_head	peer_graveyard;	/* inactive peer list */
-	spinlock_t		peer_gylock;	/* peer graveyard lock */
-	wait_queue_head_t	peer_gy_waitq;	/* wait queue hit when peer graveyard is empty */
-	rwlock_t		peer_lock;	/* peer list access lock */
-	atomic_t		peer_count;	/* number of peers */
-	struct rxrpc_peer_ops	*peer_ops;	/* default peer operations */
-	unsigned short		port;		/* port upon which listening */
-	volatile char		error_rcvd;	/* T if received ICMP error outstanding */
-};
-
-extern int rxrpc_create_transport(unsigned short port,
-				  struct rxrpc_transport **_trans);
-
-static inline void rxrpc_get_transport(struct rxrpc_transport *trans)
-{
-	BUG_ON(atomic_read(&trans->usage) <= 0);
-	atomic_inc(&trans->usage);
-	//printk("rxrpc_get_transport(%p{u=%d})\n",
-	//       trans, atomic_read(&trans->usage));
-}
-
-extern void rxrpc_put_transport(struct rxrpc_transport *trans);
-
-extern int rxrpc_add_service(struct rxrpc_transport *trans,
-			     struct rxrpc_service *srv);
-
-extern void rxrpc_del_service(struct rxrpc_transport *trans,
-			      struct rxrpc_service *srv);
-
-extern void rxrpc_trans_receive_packet(struct rxrpc_transport *trans);
-
-extern int rxrpc_trans_immediate_abort(struct rxrpc_transport *trans,
-				       struct rxrpc_message *msg,
-				       int error);
-
-#endif /* _LINUX_RXRPC_TRANSPORT_H */
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index 07bf82ffec6..c46867c61c9 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -1,9 +1,7 @@
 #
-# Makefile for Linux kernel Rx RPC
+# Makefile for Linux kernel RxRPC
 #
 
-#CFLAGS += -finstrument-functions
-
 af-rxrpc-objs := \
 	af_rxrpc.o \
 	ar-accept.o \
@@ -29,26 +27,3 @@ endif
 obj-$(CONFIG_AF_RXRPC) += af-rxrpc.o
 
 obj-$(CONFIG_RXKAD) += rxkad.o
-
-#
-# obsolete RxRPC interface, still used by fs/afs/
-#
-rxrpc-objs := \
-	call.o \
-	connection.o \
-	krxiod.o \
-	krxsecd.o \
-	krxtimod.o \
-	main.o \
-	peer.o \
-	rxrpc_syms.o \
-	transport.o
-
-ifeq ($(CONFIG_PROC_FS),y)
-rxrpc-objs += proc.o
-endif
-ifeq ($(CONFIG_SYSCTL),y)
-rxrpc-objs += sysctl.o
-endif
-
-obj-$(CONFIG_RXRPC) += rxrpc.o
diff --git a/net/rxrpc/call.c b/net/rxrpc/call.c
deleted file mode 100644
index d07122b57e0..00000000000
--- a/net/rxrpc/call.c
+++ /dev/null
@@ -1,2277 +0,0 @@
-/* call.c: Rx call routines
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/message.h>
-#include "internal.h"
-
-__RXACCT_DECL(atomic_t rxrpc_call_count);
-__RXACCT_DECL(atomic_t rxrpc_message_count);
-
-LIST_HEAD(rxrpc_calls);
-DECLARE_RWSEM(rxrpc_calls_sem);
-
-unsigned rxrpc_call_rcv_timeout			= HZ/3;
-static unsigned rxrpc_call_acks_timeout		= HZ/3;
-static unsigned rxrpc_call_dfr_ack_timeout	= HZ/20;
-static unsigned short rxrpc_call_max_resend	= HZ/10;
-
-const char *rxrpc_call_states[] = {
-	"COMPLETE",
-	"ERROR",
-	"SRVR_RCV_OPID",
-	"SRVR_RCV_ARGS",
-	"SRVR_GOT_ARGS",
-	"SRVR_SND_REPLY",
-	"SRVR_RCV_FINAL_ACK",
-	"CLNT_SND_ARGS",
-	"CLNT_RCV_REPLY",
-	"CLNT_GOT_REPLY"
-};
-
-const char *rxrpc_call_error_states[] = {
-	"NO_ERROR",
-	"LOCAL_ABORT",
-	"PEER_ABORT",
-	"LOCAL_ERROR",
-	"REMOTE_ERROR"
-};
-
-const char *rxrpc_pkts[] = {
-	"?00",
-	"data", "ack", "busy", "abort", "ackall", "chall", "resp", "debug",
-	"?09", "?10", "?11", "?12", "?13", "?14", "?15"
-};
-
-static const char *rxrpc_acks[] = {
-	"---", "REQ", "DUP", "SEQ", "WIN", "MEM", "PNG", "PNR", "DLY", "IDL",
-	"-?-"
-};
-
-static const char _acktype[] = "NA-";
-
-static void rxrpc_call_receive_packet(struct rxrpc_call *call);
-static void rxrpc_call_receive_data_packet(struct rxrpc_call *call,
-					   struct rxrpc_message *msg);
-static void rxrpc_call_receive_ack_packet(struct rxrpc_call *call,
-					  struct rxrpc_message *msg);
-static void rxrpc_call_definitively_ACK(struct rxrpc_call *call,
-					rxrpc_seq_t higest);
-static void rxrpc_call_resend(struct rxrpc_call *call, rxrpc_seq_t highest);
-static int __rxrpc_call_read_data(struct rxrpc_call *call);
-
-static int rxrpc_call_record_ACK(struct rxrpc_call *call,
-				 struct rxrpc_message *msg,
-				 rxrpc_seq_t seq,
-				 size_t count);
-
-static int rxrpc_call_flush(struct rxrpc_call *call);
-
-#define _state(call) \
-	_debug("[[[ state %s ]]]", rxrpc_call_states[call->app_call_state]);
-
-static void rxrpc_call_default_attn_func(struct rxrpc_call *call)
-{
-	wake_up(&call->waitq);
-}
-
-static void rxrpc_call_default_error_func(struct rxrpc_call *call)
-{
-	wake_up(&call->waitq);
-}
-
-static void rxrpc_call_default_aemap_func(struct rxrpc_call *call)
-{
-	switch (call->app_err_state) {
-	case RXRPC_ESTATE_LOCAL_ABORT:
-		call->app_abort_code = -call->app_errno;
-	case RXRPC_ESTATE_PEER_ABORT:
-		call->app_errno = -ECONNABORTED;
-	default:
-		break;
-	}
-}
-
-static void __rxrpc_call_acks_timeout(unsigned long _call)
-{
-	struct rxrpc_call *call = (struct rxrpc_call *) _call;
-
-	_debug("ACKS TIMEOUT %05lu", jiffies - call->cjif);
-
-	call->flags |= RXRPC_CALL_ACKS_TIMO;
-	rxrpc_krxiod_queue_call(call);
-}
-
-static void __rxrpc_call_rcv_timeout(unsigned long _call)
-{
-	struct rxrpc_call *call = (struct rxrpc_call *) _call;
-
-	_debug("RCV TIMEOUT %05lu", jiffies - call->cjif);
-
-	call->flags |= RXRPC_CALL_RCV_TIMO;
-	rxrpc_krxiod_queue_call(call);
-}
-
-static void __rxrpc_call_ackr_timeout(unsigned long _call)
-{
-	struct rxrpc_call *call = (struct rxrpc_call *) _call;
-
-	_debug("ACKR TIMEOUT %05lu",jiffies - call->cjif);
-
-	call->flags |= RXRPC_CALL_ACKR_TIMO;
-	rxrpc_krxiod_queue_call(call);
-}
-
-/*****************************************************************************/
-/*
- * calculate a timeout based on an RTT value
- */
-static inline unsigned long __rxrpc_rtt_based_timeout(struct rxrpc_call *call,
-						      unsigned long val)
-{
-	unsigned long expiry = call->conn->peer->rtt / (1000000 / HZ);
-
-	expiry += 10;
-	if (expiry < HZ / 25)
-		expiry = HZ / 25;
-	if (expiry > HZ)
-		expiry = HZ;
-
-	_leave(" = %lu jiffies", expiry);
-	return jiffies + expiry;
-} /* end __rxrpc_rtt_based_timeout() */
-
-/*****************************************************************************/
-/*
- * create a new call record
- */
-static inline int __rxrpc_create_call(struct rxrpc_connection *conn,
-				      struct rxrpc_call **_call)
-{
-	struct rxrpc_call *call;
-
-	_enter("%p", conn);
-
-	/* allocate and initialise a call record */
-	call = (struct rxrpc_call *) get_zeroed_page(GFP_KERNEL);
-	if (!call) {
-		_leave(" ENOMEM");
-		return -ENOMEM;
-	}
-
-	atomic_set(&call->usage, 1);
-
-	init_waitqueue_head(&call->waitq);
-	spin_lock_init(&call->lock);
-	INIT_LIST_HEAD(&call->link);
-	INIT_LIST_HEAD(&call->acks_pendq);
-	INIT_LIST_HEAD(&call->rcv_receiveq);
-	INIT_LIST_HEAD(&call->rcv_krxiodq_lk);
-	INIT_LIST_HEAD(&call->app_readyq);
-	INIT_LIST_HEAD(&call->app_unreadyq);
-	INIT_LIST_HEAD(&call->app_link);
-	INIT_LIST_HEAD(&call->app_attn_link);
-
-	init_timer(&call->acks_timeout);
-	call->acks_timeout.data = (unsigned long) call;
-	call->acks_timeout.function = __rxrpc_call_acks_timeout;
-
-	init_timer(&call->rcv_timeout);
-	call->rcv_timeout.data = (unsigned long) call;
-	call->rcv_timeout.function = __rxrpc_call_rcv_timeout;
-
-	init_timer(&call->ackr_dfr_timo);
-	call->ackr_dfr_timo.data = (unsigned long) call;
-	call->ackr_dfr_timo.function = __rxrpc_call_ackr_timeout;
-
-	call->conn = conn;
-	call->ackr_win_bot = 1;
-	call->ackr_win_top = call->ackr_win_bot + RXRPC_CALL_ACK_WINDOW_SIZE - 1;
-	call->ackr_prev_seq = 0;
-	call->app_mark = RXRPC_APP_MARK_EOF;
-	call->app_attn_func = rxrpc_call_default_attn_func;
-	call->app_error_func = rxrpc_call_default_error_func;
-	call->app_aemap_func = rxrpc_call_default_aemap_func;
-	call->app_scr_alloc = call->app_scratch;
-
-	call->cjif = jiffies;
-
-	_leave(" = 0 (%p)", call);
-
-	*_call = call;
-
-	return 0;
-} /* end __rxrpc_create_call() */
-
-/*****************************************************************************/
-/*
- * create a new call record for outgoing calls
- */
-int rxrpc_create_call(struct rxrpc_connection *conn,
-		      rxrpc_call_attn_func_t attn,
-		      rxrpc_call_error_func_t error,
-		      rxrpc_call_aemap_func_t aemap,
-		      struct rxrpc_call **_call)
-{
-	DECLARE_WAITQUEUE(myself, current);
-
-	struct rxrpc_call *call;
-	int ret, cix, loop;
-
-	_enter("%p", conn);
-
-	/* allocate and initialise a call record */
-	ret = __rxrpc_create_call(conn, &call);
-	if (ret < 0) {
-		_leave(" = %d", ret);
-		return ret;
-	}
-
-	call->app_call_state = RXRPC_CSTATE_CLNT_SND_ARGS;
-	if (attn)
-		call->app_attn_func = attn;
-	if (error)
-		call->app_error_func = error;
-	if (aemap)
-		call->app_aemap_func = aemap;
-
-	_state(call);
-
-	spin_lock(&conn->lock);
-	set_current_state(TASK_INTERRUPTIBLE);
-	add_wait_queue(&conn->chanwait, &myself);
-
- try_again:
-	/* try to find an unused channel */
-	for (cix = 0; cix < 4; cix++)
-		if (!conn->channels[cix])
-			goto obtained_chan;
-
-	/* no free channels - wait for one to become available */
-	ret = -EINTR;
-	if (signal_pending(current))
-		goto error_unwait;
-
-	spin_unlock(&conn->lock);
-
-	schedule();
-	set_current_state(TASK_INTERRUPTIBLE);
-
-	spin_lock(&conn->lock);
-	goto try_again;
-
-	/* got a channel - now attach to the connection */
- obtained_chan:
-	remove_wait_queue(&conn->chanwait, &myself);
-	set_current_state(TASK_RUNNING);
-
-	/* concoct a unique call number */
- next_callid:
-	call->call_id = htonl(++conn->call_counter);
-	for (loop = 0; loop < 4; loop++)
-		if (conn->channels[loop] &&
-		    conn->channels[loop]->call_id == call->call_id)
-			goto next_callid;
-
-	rxrpc_get_connection(conn);
-	conn->channels[cix] = call; /* assign _after_ done callid check loop */
-	do_gettimeofday(&conn->atime);
-	call->chan_ix = htonl(cix);
-
-	spin_unlock(&conn->lock);
-
-	down_write(&rxrpc_calls_sem);
-	list_add_tail(&call->call_link, &rxrpc_calls);
-	up_write(&rxrpc_calls_sem);
-
-	__RXACCT(atomic_inc(&rxrpc_call_count));
-	*_call = call;
-
-	_leave(" = 0 (call=%p cix=%u)", call, cix);
-	return 0;
-
- error_unwait:
-	remove_wait_queue(&conn->chanwait, &myself);
-	set_current_state(TASK_RUNNING);
-	spin_unlock(&conn->lock);
-
-	free_page((unsigned long) call);
-	_leave(" = %d", ret);
-	return ret;
-} /* end rxrpc_create_call() */
-
-/*****************************************************************************/
-/*
- * create a new call record for incoming calls
- */
-int rxrpc_incoming_call(struct rxrpc_connection *conn,
-			struct rxrpc_message *msg,
-			struct rxrpc_call **_call)
-{
-	struct rxrpc_call *call;
-	unsigned cix;
-	int ret;
-
-	cix = ntohl(msg->hdr.cid) & RXRPC_CHANNELMASK;
-
-	_enter("%p,%u,%u", conn, ntohl(msg->hdr.callNumber), cix);
-
-	/* allocate and initialise a call record */
-	ret = __rxrpc_create_call(conn, &call);
-	if (ret < 0) {
-		_leave(" = %d", ret);
-		return ret;
-	}
-
-	call->pkt_rcv_count = 1;
-	call->app_call_state = RXRPC_CSTATE_SRVR_RCV_OPID;
-	call->app_mark = sizeof(uint32_t);
-
-	_state(call);
-
-	/* attach to the connection */
-	ret = -EBUSY;
-	call->chan_ix = htonl(cix);
-	call->call_id = msg->hdr.callNumber;
-
-	spin_lock(&conn->lock);
-
-	if (!conn->channels[cix] ||
-	    conn->channels[cix]->app_call_state == RXRPC_CSTATE_COMPLETE ||
-	    conn->channels[cix]->app_call_state == RXRPC_CSTATE_ERROR
-	    ) {
-		conn->channels[cix] = call;
-		rxrpc_get_connection(conn);
-		ret = 0;
-	}
-
-	spin_unlock(&conn->lock);
-
-	if (ret < 0) {
-		free_page((unsigned long) call);
-		call = NULL;
-	}
-
-	if (ret == 0) {
-		down_write(&rxrpc_calls_sem);
-		list_add_tail(&call->call_link, &rxrpc_calls);
-		up_write(&rxrpc_calls_sem);
-		__RXACCT(atomic_inc(&rxrpc_call_count));
-		*_call = call;
-	}
-
-	_leave(" = %d [%p]", ret, call);
-	return ret;
-} /* end rxrpc_incoming_call() */
-
-/*****************************************************************************/
-/*
- * free a call record
- */
-void rxrpc_put_call(struct rxrpc_call *call)
-{
-	struct rxrpc_connection *conn = call->conn;
-	struct rxrpc_message *msg;
-
-	_enter("%p{u=%d}",call,atomic_read(&call->usage));
-
-	/* sanity check */
-	if (atomic_read(&call->usage) <= 0)
-		BUG();
-
-	/* to prevent a race, the decrement and the de-list must be effectively
-	 * atomic */
-	spin_lock(&conn->lock);
-	if (likely(!atomic_dec_and_test(&call->usage))) {
-		spin_unlock(&conn->lock);
-		_leave("");
-		return;
-	}
-
-	if (conn->channels[ntohl(call->chan_ix)] == call)
-		conn->channels[ntohl(call->chan_ix)] = NULL;
-
-	spin_unlock(&conn->lock);
-
-	wake_up(&conn->chanwait);
-
-	rxrpc_put_connection(conn);
-
-	/* clear the timers and dequeue from krxiod */
-	del_timer_sync(&call->acks_timeout);
-	del_timer_sync(&call->rcv_timeout);
-	del_timer_sync(&call->ackr_dfr_timo);
-
-	rxrpc_krxiod_dequeue_call(call);
-
-	/* clean up the contents of the struct */
-	if (call->snd_nextmsg)
-		rxrpc_put_message(call->snd_nextmsg);
-
-	if (call->snd_ping)
-		rxrpc_put_message(call->snd_ping);
-
-	while (!list_empty(&call->acks_pendq)) {
-		msg = list_entry(call->acks_pendq.next,
-				 struct rxrpc_message, link);
-		list_del(&msg->link);
-		rxrpc_put_message(msg);
-	}
-
-	while (!list_empty(&call->rcv_receiveq)) {
-		msg = list_entry(call->rcv_receiveq.next,
-				 struct rxrpc_message, link);
-		list_del(&msg->link);
-		rxrpc_put_message(msg);
-	}
-
-	while (!list_empty(&call->app_readyq)) {
-		msg = list_entry(call->app_readyq.next,
-				 struct rxrpc_message, link);
-		list_del(&msg->link);
-		rxrpc_put_message(msg);
-	}
-
-	while (!list_empty(&call->app_unreadyq)) {
-		msg = list_entry(call->app_unreadyq.next,
-				 struct rxrpc_message, link);
-		list_del(&msg->link);
-		rxrpc_put_message(msg);
-	}
-
-	module_put(call->owner);
-
-	down_write(&rxrpc_calls_sem);
-	list_del(&call->call_link);
-	up_write(&rxrpc_calls_sem);
-
-	__RXACCT(atomic_dec(&rxrpc_call_count));
-	free_page((unsigned long) call);
-
-	_leave(" [destroyed]");
-} /* end rxrpc_put_call() */
-
-/*****************************************************************************/
-/*
- * actually generate a normal ACK
- */
-static inline int __rxrpc_call_gen_normal_ACK(struct rxrpc_call *call,
-					      rxrpc_seq_t seq)
-{
-	struct rxrpc_message *msg;
-	struct kvec diov[3];
-	__be32 aux[4];
-	int delta, ret;
-
-	/* ACKs default to DELAY */
-	if (!call->ackr.reason)
-		call->ackr.reason = RXRPC_ACK_DELAY;
-
-	_proto("Rx %05lu Sending ACK { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
-	       jiffies - call->cjif,
-	       ntohs(call->ackr.maxSkew),
-	       ntohl(call->ackr.firstPacket),
-	       ntohl(call->ackr.previousPacket),
-	       ntohl(call->ackr.serial),
-	       rxrpc_acks[call->ackr.reason],
-	       call->ackr.nAcks);
-
-	aux[0] = htonl(call->conn->peer->if_mtu);	/* interface MTU */
-	aux[1] = htonl(1444);				/* max MTU */
-	aux[2] = htonl(16);				/* rwind */
-	aux[3] = htonl(4);				/* max packets */
-
-	diov[0].iov_len  = sizeof(struct rxrpc_ackpacket);
-	diov[0].iov_base = &call->ackr;
-	diov[1].iov_len  = call->ackr_pend_cnt + 3;
-	diov[1].iov_base = call->ackr_array;
-	diov[2].iov_len  = sizeof(aux);
-	diov[2].iov_base = &aux;
-
-	/* build and send the message */
-	ret = rxrpc_conn_newmsg(call->conn,call, RXRPC_PACKET_TYPE_ACK,
-				3, diov, GFP_KERNEL, &msg);
-	if (ret < 0)
-		goto out;
-
-	msg->seq = seq;
-	msg->hdr.seq = htonl(seq);
-	msg->hdr.flags |= RXRPC_SLOW_START_OK;
-
-	ret = rxrpc_conn_sendmsg(call->conn, msg);
-	rxrpc_put_message(msg);
-	if (ret < 0)
-		goto out;
-	call->pkt_snd_count++;
-
-	/* count how many actual ACKs there were at the front */
-	for (delta = 0; delta < call->ackr_pend_cnt; delta++)
-		if (call->ackr_array[delta] != RXRPC_ACK_TYPE_ACK)
-			break;
-
-	call->ackr_pend_cnt -= delta; /* all ACK'd to this point */
-
-	/* crank the ACK window around */
-	if (delta == 0) {
-		/* un-ACK'd window */
-	}
-	else if (delta < RXRPC_CALL_ACK_WINDOW_SIZE) {
-		/* partially ACK'd window
-		 * - shuffle down to avoid losing out-of-sequence packets
-		 */
-		call->ackr_win_bot += delta;
-		call->ackr_win_top += delta;
-
-		memmove(&call->ackr_array[0],
-			&call->ackr_array[delta],
-			call->ackr_pend_cnt);
-
-		memset(&call->ackr_array[call->ackr_pend_cnt],
-		       RXRPC_ACK_TYPE_NACK,
-		       sizeof(call->ackr_array) - call->ackr_pend_cnt);
-	}
-	else {
-		/* fully ACK'd window
-		 * - just clear the whole thing
-		 */
-		memset(&call->ackr_array,
-		       RXRPC_ACK_TYPE_NACK,
-		       sizeof(call->ackr_array));
-	}
-
-	/* clear this ACK */
-	memset(&call->ackr, 0, sizeof(call->ackr));
-
- out:
-	if (!call->app_call_state)
-		printk("___ STATE 0 ___\n");
-	return ret;
-} /* end __rxrpc_call_gen_normal_ACK() */
-
-/*****************************************************************************/
-/*
- * note the reception of a packet in the call's ACK records and generate an
- * appropriate ACK packet if necessary
- * - returns 0 if packet should be processed, 1 if packet should be ignored
- *   and -ve on an error
- */
-static int rxrpc_call_generate_ACK(struct rxrpc_call *call,
-				   struct rxrpc_header *hdr,
-				   struct rxrpc_ackpacket *ack)
-{
-	struct rxrpc_message *msg;
-	rxrpc_seq_t seq;
-	unsigned offset;
-	int ret = 0, err;
-	u8 special_ACK, do_ACK, force;
-
-	_enter("%p,%p { seq=%d tp=%d fl=%02x }",
-	       call, hdr, ntohl(hdr->seq), hdr->type, hdr->flags);
-
-	seq = ntohl(hdr->seq);
-	offset = seq - call->ackr_win_bot;
-	do_ACK = RXRPC_ACK_DELAY;
-	special_ACK = 0;
-	force = (seq == 1);
-
-	if (call->ackr_high_seq < seq)
-		call->ackr_high_seq = seq;
-
-	/* deal with generation of obvious special ACKs first */
-	if (ack && ack->reason == RXRPC_ACK_PING) {
-		special_ACK = RXRPC_ACK_PING_RESPONSE;
-		ret = 1;
-		goto gen_ACK;
-	}
-
-	if (seq < call->ackr_win_bot) {
-		special_ACK = RXRPC_ACK_DUPLICATE;
-		ret = 1;
-		goto gen_ACK;
-	}
-
-	if (seq >= call->ackr_win_top) {
-		special_ACK = RXRPC_ACK_EXCEEDS_WINDOW;
-		ret = 1;
-		goto gen_ACK;
-	}
-
-	if (call->ackr_array[offset] != RXRPC_ACK_TYPE_NACK) {
-		special_ACK = RXRPC_ACK_DUPLICATE;
-		ret = 1;
-		goto gen_ACK;
-	}
-
-	/* okay... it's a normal data packet inside the ACK window */
-	call->ackr_array[offset] = RXRPC_ACK_TYPE_ACK;
-
-	if (offset < call->ackr_pend_cnt) {
-	}
-	else if (offset > call->ackr_pend_cnt) {
-		do_ACK = RXRPC_ACK_OUT_OF_SEQUENCE;
-		call->ackr_pend_cnt = offset;
-		goto gen_ACK;
-	}
-
-	if (hdr->flags & RXRPC_REQUEST_ACK) {
-		do_ACK = RXRPC_ACK_REQUESTED;
-	}
-
-	/* generate an ACK on the final packet of a reply just received */
-	if (hdr->flags & RXRPC_LAST_PACKET) {
-		if (call->conn->out_clientflag)
-			force = 1;
-	}
-	else if (!(hdr->flags & RXRPC_MORE_PACKETS)) {
-		do_ACK = RXRPC_ACK_REQUESTED;
-	}
-
-	/* re-ACK packets previously received out-of-order */
-	for (offset++; offset < RXRPC_CALL_ACK_WINDOW_SIZE; offset++)
-		if (call->ackr_array[offset] != RXRPC_ACK_TYPE_ACK)
-			break;
-
-	call->ackr_pend_cnt = offset;
-
-	/* generate an ACK if we fill up the window */
-	if (call->ackr_pend_cnt >= RXRPC_CALL_ACK_WINDOW_SIZE)
-		force = 1;
-
- gen_ACK:
-	_debug("%05lu ACKs pend=%u norm=%s special=%s%s",
-	       jiffies - call->cjif,
-	       call->ackr_pend_cnt,
-	       rxrpc_acks[do_ACK],
-	       rxrpc_acks[special_ACK],
-	       force ? " immediate" :
-	       do_ACK == RXRPC_ACK_REQUESTED ? " merge-req" :
-	       hdr->flags & RXRPC_LAST_PACKET ? " finalise" :
-	       " defer"
-	       );
-
-	/* send any pending normal ACKs if need be */
-	if (call->ackr_pend_cnt > 0) {
-		/* fill out the appropriate form */
-		call->ackr.bufferSpace	= htons(RXRPC_CALL_ACK_WINDOW_SIZE);
-		call->ackr.maxSkew	= htons(min(call->ackr_high_seq - seq,
-						    65535U));
-		call->ackr.firstPacket	= htonl(call->ackr_win_bot);
-		call->ackr.previousPacket = call->ackr_prev_seq;
-		call->ackr.serial	= hdr->serial;
-		call->ackr.nAcks	= call->ackr_pend_cnt;
-
-		if (do_ACK == RXRPC_ACK_REQUESTED)
-			call->ackr.reason = do_ACK;
-
-		/* generate the ACK immediately if necessary */
-		if (special_ACK || force) {
-			err = __rxrpc_call_gen_normal_ACK(
-				call, do_ACK == RXRPC_ACK_DELAY ? 0 : seq);
-			if (err < 0) {
-				ret = err;
-				goto out;
-			}
-		}
-	}
-
-	if (call->ackr.reason == RXRPC_ACK_REQUESTED)
-		call->ackr_dfr_seq = seq;
-
-	/* start the ACK timer if not running if there are any pending deferred
-	 * ACKs */
-	if (call->ackr_pend_cnt > 0 &&
-	    call->ackr.reason != RXRPC_ACK_REQUESTED &&
-	    !timer_pending(&call->ackr_dfr_timo)
-	    ) {
-		unsigned long timo;
-
-		timo = rxrpc_call_dfr_ack_timeout + jiffies;
-
-		_debug("START ACKR TIMER for cj=%lu", timo - call->cjif);
-
-		spin_lock(&call->lock);
-		mod_timer(&call->ackr_dfr_timo, timo);
-		spin_unlock(&call->lock);
-	}
-	else if ((call->ackr_pend_cnt == 0 ||
-		  call->ackr.reason == RXRPC_ACK_REQUESTED) &&
-		 timer_pending(&call->ackr_dfr_timo)
-		 ) {
-		/* stop timer if no pending ACKs */
-		_debug("CLEAR ACKR TIMER");
-		del_timer_sync(&call->ackr_dfr_timo);
-	}
-
-	/* send a special ACK if one is required */
-	if (special_ACK) {
-		struct rxrpc_ackpacket ack;
-		struct kvec diov[2];
-		uint8_t acks[1] = { RXRPC_ACK_TYPE_ACK };
-
-		/* fill out the appropriate form */
-		ack.bufferSpace	= htons(RXRPC_CALL_ACK_WINDOW_SIZE);
-		ack.maxSkew	= htons(min(call->ackr_high_seq - seq,
-					    65535U));
-		ack.firstPacket	= htonl(call->ackr_win_bot);
-		ack.previousPacket = call->ackr_prev_seq;
-		ack.serial	= hdr->serial;
-		ack.reason	= special_ACK;
-		ack.nAcks	= 0;
-
-		_proto("Rx Sending s-ACK"
-		       " { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
-		       ntohs(ack.maxSkew),
-		       ntohl(ack.firstPacket),
-		       ntohl(ack.previousPacket),
-		       ntohl(ack.serial),
-		       rxrpc_acks[ack.reason],
-		       ack.nAcks);
-
-		diov[0].iov_len  = sizeof(struct rxrpc_ackpacket);
-		diov[0].iov_base = &ack;
-		diov[1].iov_len  = sizeof(acks);
-		diov[1].iov_base = acks;
-
-		/* build and send the message */
-		err = rxrpc_conn_newmsg(call->conn,call, RXRPC_PACKET_TYPE_ACK,
-					hdr->seq ? 2 : 1, diov,
-					GFP_KERNEL,
-					&msg);
-		if (err < 0) {
-			ret = err;
-			goto out;
-		}
-
-		msg->seq = seq;
-		msg->hdr.seq = htonl(seq);
-		msg->hdr.flags |= RXRPC_SLOW_START_OK;
-
-		err = rxrpc_conn_sendmsg(call->conn, msg);
-		rxrpc_put_message(msg);
-		if (err < 0) {
-			ret = err;
-			goto out;
-		}
-		call->pkt_snd_count++;
-	}
-
- out:
-	if (hdr->seq)
-		call->ackr_prev_seq = hdr->seq;
-
-	_leave(" = %d", ret);
-	return ret;
-} /* end rxrpc_call_generate_ACK() */
-
-/*****************************************************************************/
-/*
- * handle work to be done on a call
- * - includes packet reception and timeout processing
- */
-void rxrpc_call_do_stuff(struct rxrpc_call *call)
-{
-	_enter("%p{flags=%lx}", call, call->flags);
-
-	/* handle packet reception */
-	if (call->flags & RXRPC_CALL_RCV_PKT) {
-		_debug("- receive packet");
-		call->flags &= ~RXRPC_CALL_RCV_PKT;
-		rxrpc_call_receive_packet(call);
-	}
-
-	/* handle overdue ACKs */
-	if (call->flags & RXRPC_CALL_ACKS_TIMO) {
-		_debug("- overdue ACK timeout");
-		call->flags &= ~RXRPC_CALL_ACKS_TIMO;
-		rxrpc_call_resend(call, call->snd_seq_count);
-	}
-
-	/* handle lack of reception */
-	if (call->flags & RXRPC_CALL_RCV_TIMO) {
-		_debug("- reception timeout");
-		call->flags &= ~RXRPC_CALL_RCV_TIMO;
-		rxrpc_call_abort(call, -EIO);
-	}
-
-	/* handle deferred ACKs */
-	if (call->flags & RXRPC_CALL_ACKR_TIMO ||
-	    (call->ackr.nAcks > 0 && call->ackr.reason == RXRPC_ACK_REQUESTED)
-	    ) {
-		_debug("- deferred ACK timeout: cj=%05lu r=%s n=%u",
-		       jiffies - call->cjif,
-		       rxrpc_acks[call->ackr.reason],
-		       call->ackr.nAcks);
-
-		call->flags &= ~RXRPC_CALL_ACKR_TIMO;
-
-		if (call->ackr.nAcks > 0 &&
-		    call->app_call_state != RXRPC_CSTATE_ERROR) {
-			/* generate ACK */
-			__rxrpc_call_gen_normal_ACK(call, call->ackr_dfr_seq);
-			call->ackr_dfr_seq = 0;
-		}
-	}
-
-	_leave("");
-
-} /* end rxrpc_call_do_stuff() */
-
-/*****************************************************************************/
-/*
- * send an abort message at call or connection level
- * - must be called with call->lock held
- * - the supplied error code is sent as the packet data
- */
-static int __rxrpc_call_abort(struct rxrpc_call *call, int errno)
-{
-	struct rxrpc_connection *conn = call->conn;
-	struct rxrpc_message *msg;
-	struct kvec diov[1];
-	int ret;
-	__be32 _error;
-
-	_enter("%p{%08x},%p{%d},%d",
-	       conn, ntohl(conn->conn_id), call, ntohl(call->call_id), errno);
-
-	/* if this call is already aborted, then just wake up any waiters */
-	if (call->app_call_state == RXRPC_CSTATE_ERROR) {
-		spin_unlock(&call->lock);
-		call->app_error_func(call);
-		_leave(" = 0");
-		return 0;
-	}
-
-	rxrpc_get_call(call);
-
-	/* change the state _with_ the lock still held */
-	call->app_call_state	= RXRPC_CSTATE_ERROR;
-	call->app_err_state	= RXRPC_ESTATE_LOCAL_ABORT;
-	call->app_errno		= errno;
-	call->app_mark		= RXRPC_APP_MARK_EOF;
-	call->app_read_buf	= NULL;
-	call->app_async_read	= 0;
-
-	_state(call);
-
-	/* ask the app to translate the error code */
-	call->app_aemap_func(call);
-
-	spin_unlock(&call->lock);
-
-	/* flush any outstanding ACKs */
-	del_timer_sync(&call->acks_timeout);
-	del_timer_sync(&call->rcv_timeout);
-	del_timer_sync(&call->ackr_dfr_timo);
-
-	if (rxrpc_call_is_ack_pending(call))
-		__rxrpc_call_gen_normal_ACK(call, 0);
-
-	/* send the abort packet only if we actually traded some other
-	 * packets */
-	ret = 0;
-	if (call->pkt_snd_count || call->pkt_rcv_count) {
-		/* actually send the abort */
-		_proto("Rx Sending Call ABORT { data=%d }",
-		       call->app_abort_code);
-
-		_error = htonl(call->app_abort_code);
-
-		diov[0].iov_len  = sizeof(_error);
-		diov[0].iov_base = &_error;
-
-		ret = rxrpc_conn_newmsg(conn, call, RXRPC_PACKET_TYPE_ABORT,
-					1, diov, GFP_KERNEL, &msg);
-		if (ret == 0) {
-			ret = rxrpc_conn_sendmsg(conn, msg);
-			rxrpc_put_message(msg);
-		}
-	}
-
-	/* tell the app layer to let go */
-	call->app_error_func(call);
-
-	rxrpc_put_call(call);
-
-	_leave(" = %d", ret);
-	return ret;
-} /* end __rxrpc_call_abort() */
-
-/*****************************************************************************/
-/*
- * send an abort message at call or connection level
- * - the supplied error code is sent as the packet data
- */
-int rxrpc_call_abort(struct rxrpc_call *call, int error)
-{
-	spin_lock(&call->lock);
-
-	return __rxrpc_call_abort(call, error);
-
-} /* end rxrpc_call_abort() */
-
-/*****************************************************************************/
-/*
- * process packets waiting for this call
- */
-static void rxrpc_call_receive_packet(struct rxrpc_call *call)
-{
-	struct rxrpc_message *msg;
-	struct list_head *_p;
-
-	_enter("%p", call);
-
-	rxrpc_get_call(call); /* must not go away too soon if aborted by
-			       * app-layer */
-
-	while (!list_empty(&call->rcv_receiveq)) {
-		/* try to get next packet */
-		_p = NULL;
-		spin_lock(&call->lock);
-		if (!list_empty(&call->rcv_receiveq)) {
-			_p = call->rcv_receiveq.next;
-			list_del_init(_p);
-		}
-		spin_unlock(&call->lock);
-
-		if (!_p)
-			break;
-
-		msg = list_entry(_p, struct rxrpc_message, link);
-
-		_proto("Rx %05lu Received %s packet (%%%u,#%u,%c%c%c%c%c)",
-		       jiffies - call->cjif,
-		       rxrpc_pkts[msg->hdr.type],
-		       ntohl(msg->hdr.serial),
-		       msg->seq,
-		       msg->hdr.flags & RXRPC_JUMBO_PACKET	? 'j' : '-',
-		       msg->hdr.flags & RXRPC_MORE_PACKETS	? 'm' : '-',
-		       msg->hdr.flags & RXRPC_LAST_PACKET	? 'l' : '-',
-		       msg->hdr.flags & RXRPC_REQUEST_ACK	? 'r' : '-',
-		       msg->hdr.flags & RXRPC_CLIENT_INITIATED	? 'C' : 'S'
-		       );
-
-		switch (msg->hdr.type) {
-			/* deal with data packets */
-		case RXRPC_PACKET_TYPE_DATA:
-			/* ACK the packet if necessary */
-			switch (rxrpc_call_generate_ACK(call, &msg->hdr,
-							NULL)) {
-			case 0: /* useful packet */
-				rxrpc_call_receive_data_packet(call, msg);
-				break;
-			case 1: /* duplicate or out-of-window packet */
-				break;
-			default:
-				rxrpc_put_message(msg);
-				goto out;
-			}
-			break;
-
-			/* deal with ACK packets */
-		case RXRPC_PACKET_TYPE_ACK:
-			rxrpc_call_receive_ack_packet(call, msg);
-			break;
-
-			/* deal with abort packets */
-		case RXRPC_PACKET_TYPE_ABORT: {
-			__be32 _dbuf, *dp;
-
-			dp = skb_header_pointer(msg->pkt, msg->offset,
-						sizeof(_dbuf), &_dbuf);
-			if (dp == NULL)
-				printk("Rx Received short ABORT packet\n");
-
-			_proto("Rx Received Call ABORT { data=%d }",
-			       (dp ? ntohl(*dp) : 0));
-
-			spin_lock(&call->lock);
-			call->app_call_state	= RXRPC_CSTATE_ERROR;
-			call->app_err_state	= RXRPC_ESTATE_PEER_ABORT;
-			call->app_abort_code	= (dp ? ntohl(*dp) : 0);
-			call->app_errno		= -ECONNABORTED;
-			call->app_mark		= RXRPC_APP_MARK_EOF;
-			call->app_read_buf	= NULL;
-			call->app_async_read	= 0;
-
-			/* ask the app to translate the error code */
-			call->app_aemap_func(call);
-			_state(call);
-			spin_unlock(&call->lock);
-			call->app_error_func(call);
-			break;
-		}
-		default:
-			/* deal with other packet types */
-			_proto("Rx Unsupported packet type %u (#%u)",
-			       msg->hdr.type, msg->seq);
-			break;
-		}
-
-		rxrpc_put_message(msg);
-	}
-
- out:
-	rxrpc_put_call(call);
-	_leave("");
-} /* end rxrpc_call_receive_packet() */
-
-/*****************************************************************************/
-/*
- * process next data packet
- * - as the next data packet arrives:
- *   - it is queued on app_readyq _if_ it is the next one expected
- *     (app_ready_seq+1)
- *   - it is queued on app_unreadyq _if_ it is not the next one expected
- *   - if a packet placed on app_readyq completely fills a hole leading up to
- *     the first packet on app_unreadyq, then packets now in sequence are
- *     tranferred to app_readyq
- * - the application layer can only see packets on app_readyq
- *   (app_ready_qty bytes)
- * - the application layer is prodded every time a new packet arrives
- */
-static void rxrpc_call_receive_data_packet(struct rxrpc_call *call,
-					   struct rxrpc_message *msg)
-{
-	const struct rxrpc_operation *optbl, *op;
-	struct rxrpc_message *pmsg;
-	struct list_head *_p;
-	int ret, lo, hi, rmtimo;
-	__be32 opid;
-
-	_enter("%p{%u},%p{%u}", call, ntohl(call->call_id), msg, msg->seq);
-
-	rxrpc_get_message(msg);
-
-	/* add to the unready queue if we'd have to create a hole in the ready
-	 * queue otherwise */
-	if (msg->seq != call->app_ready_seq + 1) {
-		_debug("Call add packet %d to unreadyq", msg->seq);
-
-		/* insert in seq order */
-		list_for_each(_p, &call->app_unreadyq) {
-			pmsg = list_entry(_p, struct rxrpc_message, link);
-			if (pmsg->seq > msg->seq)
-				break;
-		}
-
-		list_add_tail(&msg->link, _p);
-
-		_leave(" [unreadyq]");
-		return;
-	}
-
-	/* next in sequence - simply append into the call's ready queue */
-	_debug("Call add packet %d to readyq (+%Zd => %Zd bytes)",
-	       msg->seq, msg->dsize, call->app_ready_qty);
-
-	spin_lock(&call->lock);
-	call->app_ready_seq = msg->seq;
-	call->app_ready_qty += msg->dsize;
-	list_add_tail(&msg->link, &call->app_readyq);
-
-	/* move unready packets to the readyq if we got rid of a hole */
-	while (!list_empty(&call->app_unreadyq)) {
-		pmsg = list_entry(call->app_unreadyq.next,
-				  struct rxrpc_message, link);
-
-		if (pmsg->seq != call->app_ready_seq + 1)
-			break;
-
-		/* next in sequence - just move list-to-list */
-		_debug("Call transfer packet %d to readyq (+%Zd => %Zd bytes)",
-		       pmsg->seq, pmsg->dsize, call->app_ready_qty);
-
-		call->app_ready_seq = pmsg->seq;
-		call->app_ready_qty += pmsg->dsize;
-		list_move_tail(&pmsg->link, &call->app_readyq);
-	}
-
-	/* see if we've got the last packet yet */
-	if (!list_empty(&call->app_readyq)) {
-		pmsg = list_entry(call->app_readyq.prev,
-				  struct rxrpc_message, link);
-		if (pmsg->hdr.flags & RXRPC_LAST_PACKET) {
-			call->app_last_rcv = 1;
-			_debug("Last packet on readyq");
-		}
-	}
-
-	switch (call->app_call_state) {
-		/* do nothing if call already aborted */
-	case RXRPC_CSTATE_ERROR:
-		spin_unlock(&call->lock);
-		_leave(" [error]");
-		return;
-
-		/* extract the operation ID from an incoming call if that's not
-		 * yet been done */
-	case RXRPC_CSTATE_SRVR_RCV_OPID:
-		spin_unlock(&call->lock);
-
-		/* handle as yet insufficient data for the operation ID */
-		if (call->app_ready_qty < 4) {
-			if (call->app_last_rcv)
-				/* trouble - last packet seen */
-				rxrpc_call_abort(call, -EINVAL);
-
-			_leave("");
-			return;
-		}
-
-		/* pull the operation ID out of the buffer */
-		ret = rxrpc_call_read_data(call, &opid, sizeof(opid), 0);
-		if (ret < 0) {
-			printk("Unexpected error from read-data: %d\n", ret);
-			if (call->app_call_state != RXRPC_CSTATE_ERROR)
-				rxrpc_call_abort(call, ret);
-			_leave("");
-			return;
-		}
-		call->app_opcode = ntohl(opid);
-
-		/* locate the operation in the available ops table */
-		optbl = call->conn->service->ops_begin;
-		lo = 0;
-		hi = call->conn->service->ops_end - optbl;
-
-		while (lo < hi) {
-			int mid = (hi + lo) / 2;
-			op = &optbl[mid];
-			if (call->app_opcode == op->id)
-				goto found_op;
-			if (call->app_opcode > op->id)
-				lo = mid + 1;
-			else
-				hi = mid;
-		}
-
-		/* search failed */
-		kproto("Rx Client requested operation %d from %s service",
-		       call->app_opcode, call->conn->service->name);
-		rxrpc_call_abort(call, -EINVAL);
-		_leave(" [inval]");
-		return;
-
-	found_op:
-		_proto("Rx Client requested operation %s from %s service",
-		       op->name, call->conn->service->name);
-
-		/* we're now waiting for the argument block (unless the call
-		 * was aborted) */
-		spin_lock(&call->lock);
-		if (call->app_call_state == RXRPC_CSTATE_SRVR_RCV_OPID ||
-		    call->app_call_state == RXRPC_CSTATE_SRVR_SND_REPLY) {
-			if (!call->app_last_rcv)
-				call->app_call_state =
-					RXRPC_CSTATE_SRVR_RCV_ARGS;
-			else if (call->app_ready_qty > 0)
-				call->app_call_state =
-					RXRPC_CSTATE_SRVR_GOT_ARGS;
-			else
-				call->app_call_state =
-					RXRPC_CSTATE_SRVR_SND_REPLY;
-			call->app_mark = op->asize;
-			call->app_user = op->user;
-		}
-		spin_unlock(&call->lock);
-
-		_state(call);
-		break;
-
-	case RXRPC_CSTATE_SRVR_RCV_ARGS:
-		/* change state if just received last packet of arg block */
-		if (call->app_last_rcv)
-			call->app_call_state = RXRPC_CSTATE_SRVR_GOT_ARGS;
-		spin_unlock(&call->lock);
-
-		_state(call);
-		break;
-
-	case RXRPC_CSTATE_CLNT_RCV_REPLY:
-		/* change state if just received last packet of reply block */
-		rmtimo = 0;
-		if (call->app_last_rcv) {
-			call->app_call_state = RXRPC_CSTATE_CLNT_GOT_REPLY;
-			rmtimo = 1;
-		}
-		spin_unlock(&call->lock);
-
-		if (rmtimo) {
-			del_timer_sync(&call->acks_timeout);
-			del_timer_sync(&call->rcv_timeout);
-			del_timer_sync(&call->ackr_dfr_timo);
-		}
-
-		_state(call);
-		break;
-
-	default:
-		/* deal with data reception in an unexpected state */
-		printk("Unexpected state [[[ %u ]]]\n", call->app_call_state);
-		__rxrpc_call_abort(call, -EBADMSG);
-		_leave("");
-		return;
-	}
-
-	if (call->app_call_state == RXRPC_CSTATE_CLNT_RCV_REPLY &&
-	    call->app_last_rcv)
-		BUG();
-
-	/* otherwise just invoke the data function whenever we can satisfy its desire for more
-	 * data
-	 */
-	_proto("Rx Received Op Data: st=%u qty=%Zu mk=%Zu%s",
-	       call->app_call_state, call->app_ready_qty, call->app_mark,
-	       call->app_last_rcv ? " last-rcvd" : "");
-
-	spin_lock(&call->lock);
-
-	ret = __rxrpc_call_read_data(call);
-	switch (ret) {
-	case 0:
-		spin_unlock(&call->lock);
-		call->app_attn_func(call);
-		break;
-	case -EAGAIN:
-		spin_unlock(&call->lock);
-		break;
-	case -ECONNABORTED:
-		spin_unlock(&call->lock);
-		break;
-	default:
-		__rxrpc_call_abort(call, ret);
-		break;
-	}
-
-	_state(call);
-
-	_leave("");
-
-} /* end rxrpc_call_receive_data_packet() */
-
-/*****************************************************************************/
-/*
- * received an ACK packet
- */
-static void rxrpc_call_receive_ack_packet(struct rxrpc_call *call,
-					  struct rxrpc_message *msg)
-{
-	struct rxrpc_ackpacket _ack, *ap;
-	rxrpc_serial_net_t serial;
-	rxrpc_seq_t seq;
-	int ret;
-
-	_enter("%p{%u},%p{%u}", call, ntohl(call->call_id), msg, msg->seq);
-
-	/* extract the basic ACK record */
-	ap = skb_header_pointer(msg->pkt, msg->offset, sizeof(_ack), &_ack);
-	if (ap == NULL) {
-		printk("Rx Received short ACK packet\n");
-		return;
-	}
-	msg->offset += sizeof(_ack);
-
-	serial = ap->serial;
-	seq = ntohl(ap->firstPacket);
-
-	_proto("Rx Received ACK %%%d { b=%hu m=%hu f=%u p=%u s=%u r=%s n=%u }",
-	       ntohl(msg->hdr.serial),
-	       ntohs(ap->bufferSpace),
-	       ntohs(ap->maxSkew),
-	       seq,
-	       ntohl(ap->previousPacket),
-	       ntohl(serial),
-	       rxrpc_acks[ap->reason],
-	       call->ackr.nAcks
-	       );
-
-	/* check the other side isn't ACK'ing a sequence number I haven't sent
-	 * yet */
-	if (ap->nAcks > 0 &&
-	    (seq > call->snd_seq_count ||
-	     seq + ap->nAcks - 1 > call->snd_seq_count)) {
-		printk("Received ACK (#%u-#%u) for unsent packet\n",
-		       seq, seq + ap->nAcks - 1);
-		rxrpc_call_abort(call, -EINVAL);
-		_leave("");
-		return;
-	}
-
-	/* deal with RTT calculation */
-	if (serial) {
-		struct rxrpc_message *rttmsg;
-
-		/* find the prompting packet */
-		spin_lock(&call->lock);
-		if (call->snd_ping && call->snd_ping->hdr.serial == serial) {
-			/* it was a ping packet */
-			rttmsg = call->snd_ping;
-			call->snd_ping = NULL;
-			spin_unlock(&call->lock);
-
-			if (rttmsg) {
-				rttmsg->rttdone = 1;
-				rxrpc_peer_calculate_rtt(call->conn->peer,
-							 rttmsg, msg);
-				rxrpc_put_message(rttmsg);
-			}
-		}
-		else {
-			struct list_head *_p;
-
-			/* it ought to be a data packet - look in the pending
-			 * ACK list */
-			list_for_each(_p, &call->acks_pendq) {
-				rttmsg = list_entry(_p, struct rxrpc_message,
-						    link);
-				if (rttmsg->hdr.serial == serial) {
-					if (rttmsg->rttdone)
-						/* never do RTT twice without
-						 * resending */
-						break;
-
-					rttmsg->rttdone = 1;
-					rxrpc_peer_calculate_rtt(
-						call->conn->peer, rttmsg, msg);
-					break;
-				}
-			}
-			spin_unlock(&call->lock);
-		}
-	}
-
-	switch (ap->reason) {
-		/* deal with negative/positive acknowledgement of data
-		 * packets */
-	case RXRPC_ACK_REQUESTED:
-	case RXRPC_ACK_DELAY:
-	case RXRPC_ACK_IDLE:
-		rxrpc_call_definitively_ACK(call, seq - 1);
-
-	case RXRPC_ACK_DUPLICATE:
-	case RXRPC_ACK_OUT_OF_SEQUENCE:
-	case RXRPC_ACK_EXCEEDS_WINDOW:
-		call->snd_resend_cnt = 0;
-		ret = rxrpc_call_record_ACK(call, msg, seq, ap->nAcks);
-		if (ret < 0)
-			rxrpc_call_abort(call, ret);
-		break;
-
-		/* respond to ping packets immediately */
-	case RXRPC_ACK_PING:
-		rxrpc_call_generate_ACK(call, &msg->hdr, ap);
-		break;
-
-		/* only record RTT on ping response packets */
-	case RXRPC_ACK_PING_RESPONSE:
-		if (call->snd_ping) {
-			struct rxrpc_message *rttmsg;
-
-			/* only do RTT stuff if the response matches the
-			 * retained ping */
-			rttmsg = NULL;
-			spin_lock(&call->lock);
-			if (call->snd_ping &&
-			    call->snd_ping->hdr.serial == ap->serial) {
-				rttmsg = call->snd_ping;
-				call->snd_ping = NULL;
-			}
-			spin_unlock(&call->lock);
-
-			if (rttmsg) {
-				rttmsg->rttdone = 1;
-				rxrpc_peer_calculate_rtt(call->conn->peer,
-							 rttmsg, msg);
-				rxrpc_put_message(rttmsg);
-			}
-		}
-		break;
-
-	default:
-		printk("Unsupported ACK reason %u\n", ap->reason);
-		break;
-	}
-
-	_leave("");
-} /* end rxrpc_call_receive_ack_packet() */
-
-/*****************************************************************************/
-/*
- * record definitive ACKs for all messages up to and including the one with the
- * 'highest' seq
- */
-static void rxrpc_call_definitively_ACK(struct rxrpc_call *call,
-					rxrpc_seq_t highest)
-{
-	struct rxrpc_message *msg;
-	int now_complete;
-
-	_enter("%p{ads=%u},%u", call, call->acks_dftv_seq, highest);
-
-	while (call->acks_dftv_seq < highest) {
-		call->acks_dftv_seq++;
-
-		_proto("Definitive ACK on packet #%u", call->acks_dftv_seq);
-
-		/* discard those at front of queue until message with highest
-		 * ACK is found */
-		spin_lock(&call->lock);
-		msg = NULL;
-		if (!list_empty(&call->acks_pendq)) {
-			msg = list_entry(call->acks_pendq.next,
-					 struct rxrpc_message, link);
-			list_del_init(&msg->link); /* dequeue */
-			if (msg->state == RXRPC_MSG_SENT)
-				call->acks_pend_cnt--;
-		}
-		spin_unlock(&call->lock);
-
-		/* insanity check */
-		if (!msg)
-			panic("%s(): acks_pendq unexpectedly empty\n",
-			      __FUNCTION__);
-
-		if (msg->seq != call->acks_dftv_seq)
-			panic("%s(): Packet #%u expected at front of acks_pendq"
-			      " (#%u found)\n",
-			      __FUNCTION__, call->acks_dftv_seq, msg->seq);
-
-		/* discard the message */
-		msg->state = RXRPC_MSG_DONE;
-		rxrpc_put_message(msg);
-	}
-
-	/* if all sent packets are definitively ACK'd then prod any sleepers just in case */
-	now_complete = 0;
-	spin_lock(&call->lock);
-	if (call->acks_dftv_seq == call->snd_seq_count) {
-		if (call->app_call_state != RXRPC_CSTATE_COMPLETE) {
-			call->app_call_state = RXRPC_CSTATE_COMPLETE;
-			_state(call);
-			now_complete = 1;
-		}
-	}
-	spin_unlock(&call->lock);
-
-	if (now_complete) {
-		del_timer_sync(&call->acks_timeout);
-		del_timer_sync(&call->rcv_timeout);
-		del_timer_sync(&call->ackr_dfr_timo);
-		call->app_attn_func(call);
-	}
-
-	_leave("");
-} /* end rxrpc_call_definitively_ACK() */
-
-/*****************************************************************************/
-/*
- * record the specified amount of ACKs/NAKs
- */
-static int rxrpc_call_record_ACK(struct rxrpc_call *call,
-				 struct rxrpc_message *msg,
-				 rxrpc_seq_t seq,
-				 size_t count)
-{
-	struct rxrpc_message *dmsg;
-	struct list_head *_p;
-	rxrpc_seq_t highest;
-	unsigned ix;
-	size_t chunk;
-	char resend, now_complete;
-	u8 acks[16];
-
-	_enter("%p{apc=%u ads=%u},%p,%u,%Zu",
-	       call, call->acks_pend_cnt, call->acks_dftv_seq,
-	       msg, seq, count);
-
-	/* handle re-ACK'ing of definitively ACK'd packets (may be out-of-order
-	 * ACKs) */
-	if (seq <= call->acks_dftv_seq) {
-		unsigned delta = call->acks_dftv_seq - seq;
-
-		if (count <= delta) {
-			_leave(" = 0 [all definitively ACK'd]");
-			return 0;
-		}
-
-		seq += delta;
-		count -= delta;
-		msg->offset += delta;
-	}
-
-	highest = seq + count - 1;
-	resend = 0;
-	while (count > 0) {
-		/* extract up to 16 ACK slots at a time */
-		chunk = min(count, sizeof(acks));
-		count -= chunk;
-
-		memset(acks, 2, sizeof(acks));
-
-		if (skb_copy_bits(msg->pkt, msg->offset, &acks, chunk) < 0) {
-			printk("Rx Received short ACK packet\n");
-			_leave(" = -EINVAL");
-			return -EINVAL;
-		}
-		msg->offset += chunk;
-
-		/* check that the ACK set is valid */
-		for (ix = 0; ix < chunk; ix++) {
-			switch (acks[ix]) {
-			case RXRPC_ACK_TYPE_ACK:
-				break;
-			case RXRPC_ACK_TYPE_NACK:
-				resend = 1;
-				break;
-			default:
-				printk("Rx Received unsupported ACK state"
-				       " %u\n", acks[ix]);
-				_leave(" = -EINVAL");
-				return -EINVAL;
-			}
-		}
-
-		_proto("Rx ACK of packets #%u-#%u "
-		       "[%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c] (pend=%u)",
-		       seq, (unsigned) (seq + chunk - 1),
-		       _acktype[acks[0x0]],
-		       _acktype[acks[0x1]],
-		       _acktype[acks[0x2]],
-		       _acktype[acks[0x3]],
-		       _acktype[acks[0x4]],
-		       _acktype[acks[0x5]],
-		       _acktype[acks[0x6]],
-		       _acktype[acks[0x7]],
-		       _acktype[acks[0x8]],
-		       _acktype[acks[0x9]],
-		       _acktype[acks[0xA]],
-		       _acktype[acks[0xB]],
-		       _acktype[acks[0xC]],
-		       _acktype[acks[0xD]],
-		       _acktype[acks[0xE]],
-		       _acktype[acks[0xF]],
-		       call->acks_pend_cnt
-		       );
-
-		/* mark the packets in the ACK queue as being provisionally
-		 * ACK'd */
-		ix = 0;
-		spin_lock(&call->lock);
-
-		/* find the first packet ACK'd/NAK'd here */
-		list_for_each(_p, &call->acks_pendq) {
-			dmsg = list_entry(_p, struct rxrpc_message, link);
-			if (dmsg->seq == seq)
-				goto found_first;
-			_debug("- %u: skipping #%u", ix, dmsg->seq);
-		}
-		goto bad_queue;
-
-	found_first:
-		do {
-			_debug("- %u: processing #%u (%c) apc=%u",
-			       ix, dmsg->seq, _acktype[acks[ix]],
-			       call->acks_pend_cnt);
-
-			if (acks[ix] == RXRPC_ACK_TYPE_ACK) {
-				if (dmsg->state == RXRPC_MSG_SENT)
-					call->acks_pend_cnt--;
-				dmsg->state = RXRPC_MSG_ACKED;
-			}
-			else {
-				if (dmsg->state == RXRPC_MSG_ACKED)
-					call->acks_pend_cnt++;
-				dmsg->state = RXRPC_MSG_SENT;
-			}
-			ix++;
-			seq++;
-
-			_p = dmsg->link.next;
-			dmsg = list_entry(_p, struct rxrpc_message, link);
-		} while(ix < chunk &&
-			_p != &call->acks_pendq &&
-			dmsg->seq == seq);
-
-		if (ix < chunk)
-			goto bad_queue;
-
-		spin_unlock(&call->lock);
-	}
-
-	if (resend)
-		rxrpc_call_resend(call, highest);
-
-	/* if all packets are provisionally ACK'd, then wake up anyone who's
-	 * waiting for that */
-	now_complete = 0;
-	spin_lock(&call->lock);
-	if (call->acks_pend_cnt == 0) {
-		if (call->app_call_state == RXRPC_CSTATE_SRVR_RCV_FINAL_ACK) {
-			call->app_call_state = RXRPC_CSTATE_COMPLETE;
-			_state(call);
-		}
-		now_complete = 1;
-	}
-	spin_unlock(&call->lock);
-
-	if (now_complete) {
-		_debug("- wake up waiters");
-		del_timer_sync(&call->acks_timeout);
-		del_timer_sync(&call->rcv_timeout);
-		del_timer_sync(&call->ackr_dfr_timo);
-		call->app_attn_func(call);
-	}
-
-	_leave(" = 0 (apc=%u)", call->acks_pend_cnt);
-	return 0;
-
- bad_queue:
-	panic("%s(): acks_pendq in bad state (packet #%u absent)\n",
-	      __FUNCTION__, seq);
-
-} /* end rxrpc_call_record_ACK() */
-
-/*****************************************************************************/
-/*
- * transfer data from the ready packet queue to the asynchronous read buffer
- * - since this func is the only one going to look at packets queued on
- *   app_readyq, we don't need a lock to modify or access them, only to modify
- *   the queue pointers
- * - called with call->lock held
- * - the buffer must be in kernel space
- * - returns:
- *	0 if buffer filled
- *	-EAGAIN if buffer not filled and more data to come
- *	-EBADMSG if last packet received and insufficient data left
- *	-ECONNABORTED if the call has in an error state
- */
-static int __rxrpc_call_read_data(struct rxrpc_call *call)
-{
-	struct rxrpc_message *msg;
-	size_t qty;
-	int ret;
-
-	_enter("%p{as=%d buf=%p qty=%Zu/%Zu}",
-	       call,
-	       call->app_async_read, call->app_read_buf,
-	       call->app_ready_qty, call->app_mark);
-
-	/* check the state */
-	switch (call->app_call_state) {
-	case RXRPC_CSTATE_SRVR_RCV_ARGS:
-	case RXRPC_CSTATE_CLNT_RCV_REPLY:
-		if (call->app_last_rcv) {
-			printk("%s(%p,%p,%Zd):"
-			       " Inconsistent call state (%s, last pkt)",
-			       __FUNCTION__,
-			       call, call->app_read_buf, call->app_mark,
-			       rxrpc_call_states[call->app_call_state]);
-			BUG();
-		}
-		break;
-
-	case RXRPC_CSTATE_SRVR_RCV_OPID:
-	case RXRPC_CSTATE_SRVR_GOT_ARGS:
-	case RXRPC_CSTATE_CLNT_GOT_REPLY:
-		break;
-
-	case RXRPC_CSTATE_SRVR_SND_REPLY:
-		if (!call->app_last_rcv) {
-			printk("%s(%p,%p,%Zd):"
-			       " Inconsistent call state (%s, not last pkt)",
-			       __FUNCTION__,
-			       call, call->app_read_buf, call->app_mark,
-			       rxrpc_call_states[call->app_call_state]);
-			BUG();
-		}
-		_debug("Trying to read data from call in SND_REPLY state");
-		break;
-
-	case RXRPC_CSTATE_ERROR:
-		_leave(" = -ECONNABORTED");
-		return -ECONNABORTED;
-
-	default:
-		printk("reading in unexpected state [[[ %u ]]]\n",
-		       call->app_call_state);
-		BUG();
-	}
-
-	/* handle the case of not having an async buffer */
-	if (!call->app_async_read) {
-		if (call->app_mark == RXRPC_APP_MARK_EOF) {
-			ret = call->app_last_rcv ? 0 : -EAGAIN;
-		}
-		else {
-			if (call->app_mark >= call->app_ready_qty) {
-				call->app_mark = RXRPC_APP_MARK_EOF;
-				ret = 0;
-			}
-			else {
-				ret = call->app_last_rcv ? -EBADMSG : -EAGAIN;
-			}
-		}
-
-		_leave(" = %d [no buf]", ret);
-		return 0;
-	}
-
-	while (!list_empty(&call->app_readyq) && call->app_mark > 0) {
-		msg = list_entry(call->app_readyq.next,
-				 struct rxrpc_message, link);
-
-		/* drag as much data as we need out of this packet */
-		qty = min(call->app_mark, msg->dsize);
-
-		_debug("reading %Zu from skb=%p off=%lu",
-		       qty, msg->pkt, msg->offset);
-
-		if (call->app_read_buf)
-			if (skb_copy_bits(msg->pkt, msg->offset,
-					  call->app_read_buf, qty) < 0)
-				panic("%s: Failed to copy data from packet:"
-				      " (%p,%p,%Zd)",
-				      __FUNCTION__,
-				      call, call->app_read_buf, qty);
-
-		/* if that packet is now empty, discard it */
-		call->app_ready_qty -= qty;
-		msg->dsize -= qty;
-
-		if (msg->dsize == 0) {
-			list_del_init(&msg->link);
-			rxrpc_put_message(msg);
-		}
-		else {
-			msg->offset += qty;
-		}
-
-		call->app_mark -= qty;
-		if (call->app_read_buf)
-			call->app_read_buf += qty;
-	}
-
-	if (call->app_mark == 0) {
-		call->app_async_read = 0;
-		call->app_mark = RXRPC_APP_MARK_EOF;
-		call->app_read_buf = NULL;
-
-		/* adjust the state if used up all packets */
-		if (list_empty(&call->app_readyq) && call->app_last_rcv) {
-			switch (call->app_call_state) {
-			case RXRPC_CSTATE_SRVR_RCV_OPID:
-				call->app_call_state = RXRPC_CSTATE_SRVR_SND_REPLY;
-				call->app_mark = RXRPC_APP_MARK_EOF;
-				_state(call);
-				del_timer_sync(&call->rcv_timeout);
-				break;
-			case RXRPC_CSTATE_SRVR_GOT_ARGS:
-				call->app_call_state = RXRPC_CSTATE_SRVR_SND_REPLY;
-				_state(call);
-				del_timer_sync(&call->rcv_timeout);
-				break;
-			default:
-				call->app_call_state = RXRPC_CSTATE_COMPLETE;
-				_state(call);
-				del_timer_sync(&call->acks_timeout);
-				del_timer_sync(&call->ackr_dfr_timo);
-				del_timer_sync(&call->rcv_timeout);
-				break;
-			}
-		}
-
-		_leave(" = 0");
-		return 0;
-	}
-
-	if (call->app_last_rcv) {
-		_debug("Insufficient data (%Zu/%Zu)",
-		       call->app_ready_qty, call->app_mark);
-		call->app_async_read = 0;
-		call->app_mark = RXRPC_APP_MARK_EOF;
-		call->app_read_buf = NULL;
-
-		_leave(" = -EBADMSG");
-		return -EBADMSG;
-	}
-
-	_leave(" = -EAGAIN");
-	return -EAGAIN;
-} /* end __rxrpc_call_read_data() */
-
-/*****************************************************************************/
-/*
- * attempt to read the specified amount of data from the call's ready queue
- * into the buffer provided
- * - since this func is the only one going to look at packets queued on
- *   app_readyq, we don't need a lock to modify or access them, only to modify
- *   the queue pointers
- * - if the buffer pointer is NULL, then data is merely drained, not copied
- * - if flags&RXRPC_CALL_READ_BLOCK, then the function will wait until there is
- *   enough data or an error will be generated
- *   - note that the caller must have added the calling task to the call's wait
- *     queue beforehand
- * - if flags&RXRPC_CALL_READ_ALL, then an error will be generated if this
- *   function doesn't read all available data
- */
-int rxrpc_call_read_data(struct rxrpc_call *call,
-			 void *buffer, size_t size, int flags)
-{
-	int ret;
-
-	_enter("%p{arq=%Zu},%p,%Zd,%x",
-	       call, call->app_ready_qty, buffer, size, flags);
-
-	spin_lock(&call->lock);
-
-	if (unlikely(!!call->app_read_buf)) {
-		spin_unlock(&call->lock);
-		_leave(" = -EBUSY");
-		return -EBUSY;
-	}
-
-	call->app_mark = size;
-	call->app_read_buf = buffer;
-	call->app_async_read = 1;
-	call->app_read_count++;
-
-	/* read as much data as possible */
-	ret = __rxrpc_call_read_data(call);
-	switch (ret) {
-	case 0:
-		if (flags & RXRPC_CALL_READ_ALL &&
-		    (!call->app_last_rcv || call->app_ready_qty > 0)) {
-			_leave(" = -EBADMSG");
-			__rxrpc_call_abort(call, -EBADMSG);
-			return -EBADMSG;
-		}
-
-		spin_unlock(&call->lock);
-		call->app_attn_func(call);
-		_leave(" = 0");
-		return ret;
-
-	case -ECONNABORTED:
-		spin_unlock(&call->lock);
-		_leave(" = %d [aborted]", ret);
-		return ret;
-
-	default:
-		__rxrpc_call_abort(call, ret);
-		_leave(" = %d", ret);
-		return ret;
-
-	case -EAGAIN:
-		spin_unlock(&call->lock);
-
-		if (!(flags & RXRPC_CALL_READ_BLOCK)) {
-			_leave(" = -EAGAIN");
-			return -EAGAIN;
-		}
-
-		/* wait for the data to arrive */
-		_debug("blocking for data arrival");
-
-		for (;;) {
-			set_current_state(TASK_INTERRUPTIBLE);
-			if (!call->app_async_read || signal_pending(current))
-				break;
-			schedule();
-		}
-		set_current_state(TASK_RUNNING);
-
-		if (signal_pending(current)) {
-			_leave(" = -EINTR");
-			return -EINTR;
-		}
-
-		if (call->app_call_state == RXRPC_CSTATE_ERROR) {
-			_leave(" = -ECONNABORTED");
-			return -ECONNABORTED;
-		}
-
-		_leave(" = 0");
-		return 0;
-	}
-
-} /* end rxrpc_call_read_data() */
-
-/*****************************************************************************/
-/*
- * write data to a call
- * - the data may not be sent immediately if it doesn't fill a buffer
- * - if we can't queue all the data for buffering now, siov[] will have been
- *   adjusted to take account of what has been sent
- */
-int rxrpc_call_write_data(struct rxrpc_call *call,
-			  size_t sioc,
-			  struct kvec *siov,
-			  u8 rxhdr_flags,
-			  gfp_t alloc_flags,
-			  int dup_data,
-			  size_t *size_sent)
-{
-	struct rxrpc_message *msg;
-	struct kvec *sptr;
-	size_t space, size, chunk, tmp;
-	char *buf;
-	int ret;
-
-	_enter("%p,%Zu,%p,%02x,%x,%d,%p",
-	       call, sioc, siov, rxhdr_flags, alloc_flags, dup_data,
-	       size_sent);
-
-	*size_sent = 0;
-	size = 0;
-	ret = -EINVAL;
-
-	/* can't send more if we've sent last packet from this end */
-	switch (call->app_call_state) {
-	case RXRPC_CSTATE_SRVR_SND_REPLY:
-	case RXRPC_CSTATE_CLNT_SND_ARGS:
-		break;
-	case RXRPC_CSTATE_ERROR:
-		ret = call->app_errno;
-	default:
-		goto out;
-	}
-
-	/* calculate how much data we've been given */
-	sptr = siov;
-	for (; sioc > 0; sptr++, sioc--) {
-		if (!sptr->iov_len)
-			continue;
-
-		if (!sptr->iov_base)
-			goto out;
-
-		size += sptr->iov_len;
-	}
-
-	_debug("- size=%Zu mtu=%Zu", size, call->conn->mtu_size);
-
-	do {
-		/* make sure there's a message under construction */
-		if (!call->snd_nextmsg) {
-			/* no - allocate a message with no data yet attached */
-			ret = rxrpc_conn_newmsg(call->conn, call,
-						RXRPC_PACKET_TYPE_DATA,
-						0, NULL, alloc_flags,
-						&call->snd_nextmsg);
-			if (ret < 0)
-				goto out;
-			_debug("- allocated new message [ds=%Zu]",
-			       call->snd_nextmsg->dsize);
-		}
-
-		msg = call->snd_nextmsg;
-		msg->hdr.flags |= rxhdr_flags;
-
-		/* deal with zero-length terminal packet */
-		if (size == 0) {
-			if (rxhdr_flags & RXRPC_LAST_PACKET) {
-				ret = rxrpc_call_flush(call);
-				if (ret < 0)
-					goto out;
-			}
-			break;
-		}
-
-		/* work out how much space current packet has available */
-		space = call->conn->mtu_size - msg->dsize;
-		chunk = min(space, size);
-
-		_debug("- [before] space=%Zu chunk=%Zu", space, chunk);
-
-		while (!siov->iov_len)
-			siov++;
-
-		/* if we are going to have to duplicate the data then coalesce
-		 * it too */
-		if (dup_data) {
-			/* don't allocate more that 1 page at a time */
-			if (chunk > PAGE_SIZE)
-				chunk = PAGE_SIZE;
-
-			/* allocate a data buffer and attach to the message */
-			buf = kmalloc(chunk, alloc_flags);
-			if (unlikely(!buf)) {
-				if (msg->dsize ==
-				    sizeof(struct rxrpc_header)) {
-					/* discard an empty msg and wind back
-					 * the seq counter */
-					rxrpc_put_message(msg);
-					call->snd_nextmsg = NULL;
-					call->snd_seq_count--;
-				}
-
-				ret = -ENOMEM;
-				goto out;
-			}
-
-			tmp = msg->dcount++;
-			set_bit(tmp, &msg->dfree);
-			msg->data[tmp].iov_base = buf;
-			msg->data[tmp].iov_len = chunk;
-			msg->dsize += chunk;
-			*size_sent += chunk;
-			size -= chunk;
-
-			/* load the buffer with data */
-			while (chunk > 0) {
-				tmp = min(chunk, siov->iov_len);
-				memcpy(buf, siov->iov_base, tmp);
-				buf += tmp;
-				siov->iov_base += tmp;
-				siov->iov_len -= tmp;
-				if (!siov->iov_len)
-					siov++;
-				chunk -= tmp;
-			}
-		}
-		else {
-			/* we want to attach the supplied buffers directly */
-			while (chunk > 0 &&
-			       msg->dcount < RXRPC_MSG_MAX_IOCS) {
-				tmp = msg->dcount++;
-				msg->data[tmp].iov_base = siov->iov_base;
-				msg->data[tmp].iov_len = siov->iov_len;
-				msg->dsize += siov->iov_len;
-				*size_sent += siov->iov_len;
-				size -= siov->iov_len;
-				chunk -= siov->iov_len;
-				siov++;
-			}
-		}
-
-		_debug("- [loaded] chunk=%Zu size=%Zu", chunk, size);
-
-		/* dispatch the message when full, final or requesting ACK */
-		if (msg->dsize >= call->conn->mtu_size || rxhdr_flags) {
-			ret = rxrpc_call_flush(call);
-			if (ret < 0)
-				goto out;
-		}
-
-	} while(size > 0);
-
-	ret = 0;
- out:
-	_leave(" = %d (%Zd queued, %Zd rem)", ret, *size_sent, size);
-	return ret;
-
-} /* end rxrpc_call_write_data() */
-
-/*****************************************************************************/
-/*
- * flush outstanding packets to the network
- */
-static int rxrpc_call_flush(struct rxrpc_call *call)
-{
-	struct rxrpc_message *msg;
-	int ret = 0;
-
-	_enter("%p", call);
-
-	rxrpc_get_call(call);
-
-	/* if there's a packet under construction, then dispatch it now */
-	if (call->snd_nextmsg) {
-		msg = call->snd_nextmsg;
-		call->snd_nextmsg = NULL;
-
-		if (msg->hdr.flags & RXRPC_LAST_PACKET) {
-			msg->hdr.flags &= ~RXRPC_MORE_PACKETS;
-			if (call->app_call_state != RXRPC_CSTATE_CLNT_SND_ARGS)
-				msg->hdr.flags |= RXRPC_REQUEST_ACK;
-		}
-		else {
-			msg->hdr.flags |= RXRPC_MORE_PACKETS;
-		}
-
-		_proto("Sending DATA message { ds=%Zu dc=%u df=%02lu }",
-		       msg->dsize, msg->dcount, msg->dfree);
-
-		/* queue and adjust call state */
-		spin_lock(&call->lock);
-		list_add_tail(&msg->link, &call->acks_pendq);
-
-		/* decide what to do depending on current state and if this is
-		 * the last packet */
-		ret = -EINVAL;
-		switch (call->app_call_state) {
-		case RXRPC_CSTATE_SRVR_SND_REPLY:
-			if (msg->hdr.flags & RXRPC_LAST_PACKET) {
-				call->app_call_state =
-					RXRPC_CSTATE_SRVR_RCV_FINAL_ACK;
-				_state(call);
-			}
-			break;
-
-		case RXRPC_CSTATE_CLNT_SND_ARGS:
-			if (msg->hdr.flags & RXRPC_LAST_PACKET) {
-				call->app_call_state =
-					RXRPC_CSTATE_CLNT_RCV_REPLY;
-				_state(call);
-			}
-			break;
-
-		case RXRPC_CSTATE_ERROR:
-			ret = call->app_errno;
-		default:
-			spin_unlock(&call->lock);
-			goto out;
-		}
-
-		call->acks_pend_cnt++;
-
-		mod_timer(&call->acks_timeout,
-			  __rxrpc_rtt_based_timeout(call,
-						    rxrpc_call_acks_timeout));
-
-		spin_unlock(&call->lock);
-
-		ret = rxrpc_conn_sendmsg(call->conn, msg);
-		if (ret == 0)
-			call->pkt_snd_count++;
-	}
-
- out:
-	rxrpc_put_call(call);
-
-	_leave(" = %d", ret);
-	return ret;
-
-} /* end rxrpc_call_flush() */
-
-/*****************************************************************************/
-/*
- * resend NAK'd or unacknowledged packets up to the highest one specified
- */
-static void rxrpc_call_resend(struct rxrpc_call *call, rxrpc_seq_t highest)
-{
-	struct rxrpc_message *msg;
-	struct list_head *_p;
-	rxrpc_seq_t seq = 0;
-
-	_enter("%p,%u", call, highest);
-
-	_proto("Rx Resend required");
-
-	/* handle too many resends */
-	if (call->snd_resend_cnt >= rxrpc_call_max_resend) {
-		_debug("Aborting due to too many resends (rcv=%d)",
-		       call->pkt_rcv_count);
-		rxrpc_call_abort(call,
-				 call->pkt_rcv_count > 0 ? -EIO : -ETIMEDOUT);
-		_leave("");
-		return;
-	}
-
-	spin_lock(&call->lock);
-	call->snd_resend_cnt++;
-	for (;;) {
-		/* determine which the next packet we might need to ACK is */
-		if (seq <= call->acks_dftv_seq)
-			seq = call->acks_dftv_seq;
-		seq++;
-
-		if (seq > highest)
-			break;
-
-		/* look for the packet in the pending-ACK queue */
-		list_for_each(_p, &call->acks_pendq) {
-			msg = list_entry(_p, struct rxrpc_message, link);
-			if (msg->seq == seq)
-				goto found_msg;
-		}
-
-		panic("%s(%p,%d):"
-		      " Inconsistent pending-ACK queue (ds=%u sc=%u sq=%u)\n",
-		      __FUNCTION__, call, highest,
-		      call->acks_dftv_seq, call->snd_seq_count, seq);
-
-	found_msg:
-		if (msg->state != RXRPC_MSG_SENT)
-			continue; /* only un-ACK'd packets */
-
-		rxrpc_get_message(msg);
-		spin_unlock(&call->lock);
-
-		/* send each message again (and ignore any errors we might
-		 * incur) */
-		_proto("Resending DATA message { ds=%Zu dc=%u df=%02lu }",
-		       msg->dsize, msg->dcount, msg->dfree);
-
-		if (rxrpc_conn_sendmsg(call->conn, msg) == 0)
-			call->pkt_snd_count++;
-
-		rxrpc_put_message(msg);
-
-		spin_lock(&call->lock);
-	}
-
-	/* reset the timeout */
-	mod_timer(&call->acks_timeout,
-		  __rxrpc_rtt_based_timeout(call, rxrpc_call_acks_timeout));
-
-	spin_unlock(&call->lock);
-
-	_leave("");
-} /* end rxrpc_call_resend() */
-
-/*****************************************************************************/
-/*
- * handle an ICMP error being applied to a call
- */
-void rxrpc_call_handle_error(struct rxrpc_call *call, int local, int errno)
-{
-	_enter("%p{%u},%d", call, ntohl(call->call_id), errno);
-
-	/* if this call is already aborted, then just wake up any waiters */
-	if (call->app_call_state == RXRPC_CSTATE_ERROR) {
-		call->app_error_func(call);
-	}
-	else {
-		/* tell the app layer what happened */
-		spin_lock(&call->lock);
-		call->app_call_state = RXRPC_CSTATE_ERROR;
-		_state(call);
-		if (local)
-			call->app_err_state = RXRPC_ESTATE_LOCAL_ERROR;
-		else
-			call->app_err_state = RXRPC_ESTATE_REMOTE_ERROR;
-		call->app_errno		= errno;
-		call->app_mark		= RXRPC_APP_MARK_EOF;
-		call->app_read_buf	= NULL;
-		call->app_async_read	= 0;
-
-		/* map the error */
-		call->app_aemap_func(call);
-
-		del_timer_sync(&call->acks_timeout);
-		del_timer_sync(&call->rcv_timeout);
-		del_timer_sync(&call->ackr_dfr_timo);
-
-		spin_unlock(&call->lock);
-
-		call->app_error_func(call);
-	}
-
-	_leave("");
-} /* end rxrpc_call_handle_error() */
diff --git a/net/rxrpc/connection.c b/net/rxrpc/connection.c
deleted file mode 100644
index 665a9995244..00000000000
--- a/net/rxrpc/connection.c
+++ /dev/null
@@ -1,777 +0,0 @@
-/* connection.c: Rx connection routines
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/message.h>
-#include <linux/udp.h>
-#include <linux/ip.h>
-#include <net/sock.h>
-#include <asm/uaccess.h>
-#include "internal.h"
-
-__RXACCT_DECL(atomic_t rxrpc_connection_count);
-
-LIST_HEAD(rxrpc_conns);
-DECLARE_RWSEM(rxrpc_conns_sem);
-unsigned long rxrpc_conn_timeout = 60 * 60;
-
-static void rxrpc_conn_do_timeout(struct rxrpc_connection *conn);
-
-static void __rxrpc_conn_timeout(rxrpc_timer_t *timer)
-{
-	struct rxrpc_connection *conn =
-		list_entry(timer, struct rxrpc_connection, timeout);
-
-	_debug("Rx CONN TIMEOUT [%p{u=%d}]", conn, atomic_read(&conn->usage));
-
-	rxrpc_conn_do_timeout(conn);
-}
-
-static const struct rxrpc_timer_ops rxrpc_conn_timer_ops = {
-	.timed_out	= __rxrpc_conn_timeout,
-};
-
-/*****************************************************************************/
-/*
- * create a new connection record
- */
-static inline int __rxrpc_create_connection(struct rxrpc_peer *peer,
-					    struct rxrpc_connection **_conn)
-{
-	struct rxrpc_connection *conn;
-
-	_enter("%p",peer);
-
-	/* allocate and initialise a connection record */
-	conn = kzalloc(sizeof(struct rxrpc_connection), GFP_KERNEL);
-	if (!conn) {
-		_leave(" = -ENOMEM");
-		return -ENOMEM;
-	}
-
-	atomic_set(&conn->usage, 1);
-
-	INIT_LIST_HEAD(&conn->link);
-	INIT_LIST_HEAD(&conn->id_link);
-	init_waitqueue_head(&conn->chanwait);
-	spin_lock_init(&conn->lock);
-	rxrpc_timer_init(&conn->timeout, &rxrpc_conn_timer_ops);
-
-	do_gettimeofday(&conn->atime);
-	conn->mtu_size = 1024;
-	conn->peer = peer;
-	conn->trans = peer->trans;
-
-	__RXACCT(atomic_inc(&rxrpc_connection_count));
-	*_conn = conn;
-	_leave(" = 0 (%p)", conn);
-
-	return 0;
-} /* end __rxrpc_create_connection() */
-
-/*****************************************************************************/
-/*
- * create a new connection record for outgoing connections
- */
-int rxrpc_create_connection(struct rxrpc_transport *trans,
-			    __be16 port,
-			    __be32 addr,
-			    uint16_t service_id,
-			    void *security,
-			    struct rxrpc_connection **_conn)
-{
-	struct rxrpc_connection *candidate, *conn;
-	struct rxrpc_peer *peer;
-	struct list_head *_p;
-	__be32 connid;
-	int ret;
-
-	_enter("%p{%hu},%u,%hu", trans, trans->port, ntohs(port), service_id);
-
-	/* get a peer record */
-	ret = rxrpc_peer_lookup(trans, addr, &peer);
-	if (ret < 0) {
-		_leave(" = %d", ret);
-		return ret;
-	}
-
-	/* allocate and initialise a connection record */
-	ret = __rxrpc_create_connection(peer, &candidate);
-	if (ret < 0) {
-		rxrpc_put_peer(peer);
-		_leave(" = %d", ret);
-		return ret;
-	}
-
-	/* fill in the specific bits */
-	candidate->addr.sin_family	= AF_INET;
-	candidate->addr.sin_port	= port;
-	candidate->addr.sin_addr.s_addr	= addr;
-
-	candidate->in_epoch		= rxrpc_epoch;
-	candidate->out_epoch		= rxrpc_epoch;
-	candidate->in_clientflag	= 0;
-	candidate->out_clientflag	= RXRPC_CLIENT_INITIATED;
-	candidate->service_id		= htons(service_id);
-
-	/* invent a unique connection ID */
-	write_lock(&peer->conn_idlock);
-
- try_next_id:
-	connid = htonl(peer->conn_idcounter & RXRPC_CIDMASK);
-	peer->conn_idcounter += RXRPC_MAXCALLS;
-
-	list_for_each(_p, &peer->conn_idlist) {
-		conn = list_entry(_p, struct rxrpc_connection, id_link);
-		if (connid == conn->conn_id)
-			goto try_next_id;
-		if (connid > conn->conn_id)
-			break;
-	}
-
-	_debug("selected candidate conn ID %x.%u",
-	       ntohl(peer->addr.s_addr), ntohl(connid));
-
-	candidate->conn_id = connid;
-	list_add_tail(&candidate->id_link, _p);
-
-	write_unlock(&peer->conn_idlock);
-
-	/* attach to peer */
-	candidate->peer = peer;
-
-	write_lock(&peer->conn_lock);
-
-	/* search the peer's transport graveyard list */
-	spin_lock(&peer->conn_gylock);
-	list_for_each(_p, &peer->conn_graveyard) {
-		conn = list_entry(_p, struct rxrpc_connection, link);
-		if (conn->addr.sin_port	== candidate->addr.sin_port	&&
-		    conn->security_ix	== candidate->security_ix	&&
-		    conn->service_id	== candidate->service_id	&&
-		    conn->in_clientflag	== 0)
-			goto found_in_graveyard;
-	}
-	spin_unlock(&peer->conn_gylock);
-
-	/* pick the new candidate */
-	_debug("created connection: {%08x} [out]", ntohl(candidate->conn_id));
-	atomic_inc(&peer->conn_count);
-	conn = candidate;
-	candidate = NULL;
-
- make_active:
-	list_add_tail(&conn->link, &peer->conn_active);
-	write_unlock(&peer->conn_lock);
-
-	if (candidate) {
-		write_lock(&peer->conn_idlock);
-		list_del(&candidate->id_link);
-		write_unlock(&peer->conn_idlock);
-
-		__RXACCT(atomic_dec(&rxrpc_connection_count));
-		kfree(candidate);
-	}
-	else {
-		down_write(&rxrpc_conns_sem);
-		list_add_tail(&conn->proc_link, &rxrpc_conns);
-		up_write(&rxrpc_conns_sem);
-	}
-
-	*_conn = conn;
-	_leave(" = 0 (%p)", conn);
-
-	return 0;
-
-	/* handle resurrecting a connection from the graveyard */
- found_in_graveyard:
-	_debug("resurrecting connection: {%08x} [out]", ntohl(conn->conn_id));
-	rxrpc_get_connection(conn);
-	rxrpc_krxtimod_del_timer(&conn->timeout);
-	list_del_init(&conn->link);
-	spin_unlock(&peer->conn_gylock);
-	goto make_active;
-} /* end rxrpc_create_connection() */
-
-/*****************************************************************************/
-/*
- * lookup the connection for an incoming packet
- * - create a new connection record for unrecorded incoming connections
- */
-int rxrpc_connection_lookup(struct rxrpc_peer *peer,
-			    struct rxrpc_message *msg,
-			    struct rxrpc_connection **_conn)
-{
-	struct rxrpc_connection *conn, *candidate = NULL;
-	struct list_head *_p;
-	struct sk_buff *pkt = msg->pkt;
-	int ret, fresh = 0;
-	__be32 x_epoch, x_connid;
-	__be16 x_port, x_servid;
-	__u32 x_secix;
-	u8 x_clflag;
-
-	_enter("%p{{%hu}},%u,%hu",
-	       peer,
-	       peer->trans->port,
-	       ntohs(udp_hdr(pkt)->source),
-	       ntohs(msg->hdr.serviceId));
-
-	x_port		= udp_hdr(pkt)->source;
-	x_epoch		= msg->hdr.epoch;
-	x_clflag	= msg->hdr.flags & RXRPC_CLIENT_INITIATED;
-	x_connid	= htonl(ntohl(msg->hdr.cid) & RXRPC_CIDMASK);
-	x_servid	= msg->hdr.serviceId;
-	x_secix		= msg->hdr.securityIndex;
-
-	/* [common case] search the transport's active list first */
-	read_lock(&peer->conn_lock);
-	list_for_each(_p, &peer->conn_active) {
-		conn = list_entry(_p, struct rxrpc_connection, link);
-		if (conn->addr.sin_port		== x_port	&&
-		    conn->in_epoch		== x_epoch	&&
-		    conn->conn_id		== x_connid	&&
-		    conn->security_ix		== x_secix	&&
-		    conn->service_id		== x_servid	&&
-		    conn->in_clientflag		== x_clflag)
-			goto found_active;
-	}
-	read_unlock(&peer->conn_lock);
-
-	/* [uncommon case] not active
-	 * - create a candidate for a new record if an inbound connection
-	 * - only examine the graveyard for an outbound connection
-	 */
-	if (x_clflag) {
-		ret = __rxrpc_create_connection(peer, &candidate);
-		if (ret < 0) {
-			_leave(" = %d", ret);
-			return ret;
-		}
-
-		/* fill in the specifics */
-		candidate->addr.sin_family	= AF_INET;
-		candidate->addr.sin_port	= x_port;
-		candidate->addr.sin_addr.s_addr = ip_hdr(pkt)->saddr;
-		candidate->in_epoch		= x_epoch;
-		candidate->out_epoch		= x_epoch;
-		candidate->in_clientflag	= RXRPC_CLIENT_INITIATED;
-		candidate->out_clientflag	= 0;
-		candidate->conn_id		= x_connid;
-		candidate->service_id		= x_servid;
-		candidate->security_ix		= x_secix;
-	}
-
-	/* search the active list again, just in case it appeared whilst we
-	 * were busy */
-	write_lock(&peer->conn_lock);
-	list_for_each(_p, &peer->conn_active) {
-		conn = list_entry(_p, struct rxrpc_connection, link);
-		if (conn->addr.sin_port		== x_port	&&
-		    conn->in_epoch		== x_epoch	&&
-		    conn->conn_id		== x_connid	&&
-		    conn->security_ix		== x_secix	&&
-		    conn->service_id		== x_servid	&&
-		    conn->in_clientflag		== x_clflag)
-			goto found_active_second_chance;
-	}
-
-	/* search the transport's graveyard list */
-	spin_lock(&peer->conn_gylock);
-	list_for_each(_p, &peer->conn_graveyard) {
-		conn = list_entry(_p, struct rxrpc_connection, link);
-		if (conn->addr.sin_port		== x_port	&&
-		    conn->in_epoch		== x_epoch	&&
-		    conn->conn_id		== x_connid	&&
-		    conn->security_ix		== x_secix	&&
-		    conn->service_id		== x_servid	&&
-		    conn->in_clientflag		== x_clflag)
-			goto found_in_graveyard;
-	}
-	spin_unlock(&peer->conn_gylock);
-
-	/* outbound connections aren't created here */
-	if (!x_clflag) {
-		write_unlock(&peer->conn_lock);
-		_leave(" = -ENOENT");
-		return -ENOENT;
-	}
-
-	/* we can now add the new candidate to the list */
-	_debug("created connection: {%08x} [in]", ntohl(candidate->conn_id));
-	rxrpc_get_peer(peer);
-	conn = candidate;
-	candidate = NULL;
-	atomic_inc(&peer->conn_count);
-	fresh = 1;
-
- make_active:
-	list_add_tail(&conn->link, &peer->conn_active);
-
- success_uwfree:
-	write_unlock(&peer->conn_lock);
-
-	if (candidate) {
-		write_lock(&peer->conn_idlock);
-		list_del(&candidate->id_link);
-		write_unlock(&peer->conn_idlock);
-
-		__RXACCT(atomic_dec(&rxrpc_connection_count));
-		kfree(candidate);
-	}
-
-	if (fresh) {
-		down_write(&rxrpc_conns_sem);
-		list_add_tail(&conn->proc_link, &rxrpc_conns);
-		up_write(&rxrpc_conns_sem);
-	}
-
- success:
-	*_conn = conn;
-	_leave(" = 0 (%p)", conn);
-	return 0;
-
-	/* handle the connection being found in the active list straight off */
- found_active:
-	rxrpc_get_connection(conn);
-	read_unlock(&peer->conn_lock);
-	goto success;
-
-	/* handle resurrecting a connection from the graveyard */
- found_in_graveyard:
-	_debug("resurrecting connection: {%08x} [in]", ntohl(conn->conn_id));
-	rxrpc_get_peer(peer);
-	rxrpc_get_connection(conn);
-	rxrpc_krxtimod_del_timer(&conn->timeout);
-	list_del_init(&conn->link);
-	spin_unlock(&peer->conn_gylock);
-	goto make_active;
-
-	/* handle finding the connection on the second time through the active
-	 * list */
- found_active_second_chance:
-	rxrpc_get_connection(conn);
-	goto success_uwfree;
-
-} /* end rxrpc_connection_lookup() */
-
-/*****************************************************************************/
-/*
- * finish using a connection record
- * - it will be transferred to the peer's connection graveyard when refcount
- *   reaches 0
- */
-void rxrpc_put_connection(struct rxrpc_connection *conn)
-{
-	struct rxrpc_peer *peer;
-
-	if (!conn)
-		return;
-
-	_enter("%p{u=%d p=%hu}",
-	       conn, atomic_read(&conn->usage), ntohs(conn->addr.sin_port));
-
-	peer = conn->peer;
-	spin_lock(&peer->conn_gylock);
-
-	/* sanity check */
-	if (atomic_read(&conn->usage) <= 0)
-		BUG();
-
-	if (likely(!atomic_dec_and_test(&conn->usage))) {
-		spin_unlock(&peer->conn_gylock);
-		_leave("");
-		return;
-	}
-
-	/* move to graveyard queue */
-	_debug("burying connection: {%08x}", ntohl(conn->conn_id));
-	list_move_tail(&conn->link, &peer->conn_graveyard);
-
-	rxrpc_krxtimod_add_timer(&conn->timeout, rxrpc_conn_timeout * HZ);
-
-	spin_unlock(&peer->conn_gylock);
-
-	rxrpc_put_peer(conn->peer);
-
-	_leave(" [killed]");
-} /* end rxrpc_put_connection() */
-
-/*****************************************************************************/
-/*
- * free a connection record
- */
-static void rxrpc_conn_do_timeout(struct rxrpc_connection *conn)
-{
-	struct rxrpc_peer *peer;
-
-	_enter("%p{u=%d p=%hu}",
-	       conn, atomic_read(&conn->usage), ntohs(conn->addr.sin_port));
-
-	peer = conn->peer;
-
-	if (atomic_read(&conn->usage) < 0)
-		BUG();
-
-	/* remove from graveyard if still dead */
-	spin_lock(&peer->conn_gylock);
-	if (atomic_read(&conn->usage) == 0) {
-		list_del_init(&conn->link);
-	}
-	else {
-		conn = NULL;
-	}
-	spin_unlock(&peer->conn_gylock);
-
-	if (!conn) {
-		_leave("");
-		return; /* resurrected */
-	}
-
-	_debug("--- Destroying Connection %p{%08x} ---",
-	       conn, ntohl(conn->conn_id));
-
-	down_write(&rxrpc_conns_sem);
-	list_del(&conn->proc_link);
-	up_write(&rxrpc_conns_sem);
-
-	write_lock(&peer->conn_idlock);
-	list_del(&conn->id_link);
-	write_unlock(&peer->conn_idlock);
-
-	__RXACCT(atomic_dec(&rxrpc_connection_count));
-	kfree(conn);
-
-	/* if the graveyard is now empty, wake up anyone waiting for that */
-	if (atomic_dec_and_test(&peer->conn_count))
-		wake_up(&peer->conn_gy_waitq);
-
-	_leave(" [destroyed]");
-} /* end rxrpc_conn_do_timeout() */
-
-/*****************************************************************************/
-/*
- * clear all connection records from a peer endpoint
- */
-void rxrpc_conn_clearall(struct rxrpc_peer *peer)
-{
-	DECLARE_WAITQUEUE(myself, current);
-
-	struct rxrpc_connection *conn;
-	int err;
-
-	_enter("%p", peer);
-
-	/* there shouldn't be any active conns remaining */
-	if (!list_empty(&peer->conn_active))
-		BUG();
-
-	/* manually timeout all conns in the graveyard */
-	spin_lock(&peer->conn_gylock);
-	while (!list_empty(&peer->conn_graveyard)) {
-		conn = list_entry(peer->conn_graveyard.next,
-				  struct rxrpc_connection, link);
-		err = rxrpc_krxtimod_del_timer(&conn->timeout);
-		spin_unlock(&peer->conn_gylock);
-
-		if (err == 0)
-			rxrpc_conn_do_timeout(conn);
-
-		spin_lock(&peer->conn_gylock);
-	}
-	spin_unlock(&peer->conn_gylock);
-
-	/* wait for the the conn graveyard to be completely cleared */
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	add_wait_queue(&peer->conn_gy_waitq, &myself);
-
-	while (atomic_read(&peer->conn_count) != 0) {
-		schedule();
-		set_current_state(TASK_UNINTERRUPTIBLE);
-	}
-
-	remove_wait_queue(&peer->conn_gy_waitq, &myself);
-	set_current_state(TASK_RUNNING);
-
-	_leave("");
-} /* end rxrpc_conn_clearall() */
-
-/*****************************************************************************/
-/*
- * allocate and prepare a message for sending out through the transport
- * endpoint
- */
-int rxrpc_conn_newmsg(struct rxrpc_connection *conn,
-		      struct rxrpc_call *call,
-		      uint8_t type,
-		      int dcount,
-		      struct kvec diov[],
-		      gfp_t alloc_flags,
-		      struct rxrpc_message **_msg)
-{
-	struct rxrpc_message *msg;
-	int loop;
-
-	_enter("%p{%d},%p,%u", conn, ntohs(conn->addr.sin_port), call, type);
-
-	if (dcount > 3) {
-		_leave(" = -EINVAL");
-		return -EINVAL;
-	}
-
-	msg = kzalloc(sizeof(struct rxrpc_message), alloc_flags);
-	if (!msg) {
-		_leave(" = -ENOMEM");
-		return -ENOMEM;
-	}
-
-	atomic_set(&msg->usage, 1);
-
-	INIT_LIST_HEAD(&msg->link);
-
-	msg->state = RXRPC_MSG_PREPARED;
-
-	msg->hdr.epoch		= conn->out_epoch;
-	msg->hdr.cid		= conn->conn_id | (call ? call->chan_ix : 0);
-	msg->hdr.callNumber	= call ? call->call_id : 0;
-	msg->hdr.type		= type;
-	msg->hdr.flags		= conn->out_clientflag;
-	msg->hdr.securityIndex	= conn->security_ix;
-	msg->hdr.serviceId	= conn->service_id;
-
-	/* generate sequence numbers for data packets */
-	if (call) {
-		switch (type) {
-		case RXRPC_PACKET_TYPE_DATA:
-			msg->seq = ++call->snd_seq_count;
-			msg->hdr.seq = htonl(msg->seq);
-			break;
-		case RXRPC_PACKET_TYPE_ACK:
-			/* ACK sequence numbers are complicated. The following
-			 * may be wrong:
-			 * - jumbo packet ACKs should have a seq number
-			 * - normal ACKs should not
-			 */
-		default:
-			break;
-		}
-	}
-
-	msg->dcount = dcount + 1;
-	msg->dsize = sizeof(msg->hdr);
-	msg->data[0].iov_len = sizeof(msg->hdr);
-	msg->data[0].iov_base = &msg->hdr;
-
-	for (loop=0; loop < dcount; loop++) {
-		msg->dsize += diov[loop].iov_len;
-		msg->data[loop+1].iov_len  = diov[loop].iov_len;
-		msg->data[loop+1].iov_base = diov[loop].iov_base;
-	}
-
-	__RXACCT(atomic_inc(&rxrpc_message_count));
-	*_msg = msg;
-	_leave(" = 0 (%p) #%d", msg, atomic_read(&rxrpc_message_count));
-	return 0;
-} /* end rxrpc_conn_newmsg() */
-
-/*****************************************************************************/
-/*
- * free a message
- */
-void __rxrpc_put_message(struct rxrpc_message *msg)
-{
-	int loop;
-
-	_enter("%p #%d", msg, atomic_read(&rxrpc_message_count));
-
-	if (msg->pkt)
-		kfree_skb(msg->pkt);
-	rxrpc_put_connection(msg->conn);
-
-	for (loop = 0; loop < 8; loop++)
-		if (test_bit(loop, &msg->dfree))
-			kfree(msg->data[loop].iov_base);
-
-	__RXACCT(atomic_dec(&rxrpc_message_count));
-	kfree(msg);
-
-	_leave("");
-} /* end __rxrpc_put_message() */
-
-/*****************************************************************************/
-/*
- * send a message out through the transport endpoint
- */
-int rxrpc_conn_sendmsg(struct rxrpc_connection *conn,
-		       struct rxrpc_message *msg)
-{
-	struct msghdr msghdr;
-	int ret;
-
-	_enter("%p{%d}", conn, ntohs(conn->addr.sin_port));
-
-	/* fill in some fields in the header */
-	spin_lock(&conn->lock);
-	msg->hdr.serial = htonl(++conn->serial_counter);
-	msg->rttdone = 0;
-	spin_unlock(&conn->lock);
-
-	/* set up the message to be transmitted */
-	msghdr.msg_name		= &conn->addr;
-	msghdr.msg_namelen	= sizeof(conn->addr);
-	msghdr.msg_control	= NULL;
-	msghdr.msg_controllen	= 0;
-	msghdr.msg_flags	= MSG_CONFIRM | MSG_DONTWAIT;
-
-	_net("Sending message type %d of %Zd bytes to %08x:%d",
-	     msg->hdr.type,
-	     msg->dsize,
-	     ntohl(conn->addr.sin_addr.s_addr),
-	     ntohs(conn->addr.sin_port));
-
-	/* send the message */
-	ret = kernel_sendmsg(conn->trans->socket, &msghdr,
-			     msg->data, msg->dcount, msg->dsize);
-	if (ret < 0) {
-		msg->state = RXRPC_MSG_ERROR;
-	} else {
-		msg->state = RXRPC_MSG_SENT;
-		ret = 0;
-
-		spin_lock(&conn->lock);
-		do_gettimeofday(&conn->atime);
-		msg->stamp = conn->atime;
-		spin_unlock(&conn->lock);
-	}
-
-	_leave(" = %d", ret);
-
-	return ret;
-} /* end rxrpc_conn_sendmsg() */
-
-/*****************************************************************************/
-/*
- * deal with a subsequent call packet
- */
-int rxrpc_conn_receive_call_packet(struct rxrpc_connection *conn,
-				   struct rxrpc_call *call,
-				   struct rxrpc_message *msg)
-{
-	struct rxrpc_message *pmsg;
-	struct dst_entry *dst;
-	struct list_head *_p;
-	unsigned cix, seq;
-	int ret = 0;
-
-	_enter("%p,%p,%p", conn, call, msg);
-
-	if (!call) {
-		cix = ntohl(msg->hdr.cid) & RXRPC_CHANNELMASK;
-
-		spin_lock(&conn->lock);
-		call = conn->channels[cix];
-
-		if (!call || call->call_id != msg->hdr.callNumber) {
-			spin_unlock(&conn->lock);
-			rxrpc_trans_immediate_abort(conn->trans, msg, -ENOENT);
-			goto out;
-		}
-		else {
-			rxrpc_get_call(call);
-			spin_unlock(&conn->lock);
-		}
-	}
-	else {
-		rxrpc_get_call(call);
-	}
-
-	_proto("Received packet %%%u [%u] on call %hu:%u:%u",
-	       ntohl(msg->hdr.serial),
-	       ntohl(msg->hdr.seq),
-	       ntohs(msg->hdr.serviceId),
-	       ntohl(conn->conn_id),
-	       ntohl(call->call_id));
-
-	call->pkt_rcv_count++;
-
-	dst = msg->pkt->dst;
-	if (dst && dst->dev)
-		conn->peer->if_mtu =
-			dst->dev->mtu - dst->dev->hard_header_len;
-
-	/* queue on the call in seq order */
-	rxrpc_get_message(msg);
-	seq = msg->seq;
-
-	spin_lock(&call->lock);
-	list_for_each(_p, &call->rcv_receiveq) {
-		pmsg = list_entry(_p, struct rxrpc_message, link);
-		if (pmsg->seq > seq)
-			break;
-	}
-	list_add_tail(&msg->link, _p);
-
-	/* reset the activity timeout */
-	call->flags |= RXRPC_CALL_RCV_PKT;
-	mod_timer(&call->rcv_timeout,jiffies + rxrpc_call_rcv_timeout * HZ);
-
-	spin_unlock(&call->lock);
-
-	rxrpc_krxiod_queue_call(call);
-
-	rxrpc_put_call(call);
- out:
-	_leave(" = %d", ret);
-	return ret;
-} /* end rxrpc_conn_receive_call_packet() */
-
-/*****************************************************************************/
-/*
- * handle an ICMP error being applied to a connection
- */
-void rxrpc_conn_handle_error(struct rxrpc_connection *conn,
-			     int local, int errno)
-{
-	struct rxrpc_call *calls[4];
-	int loop;
-
-	_enter("%p{%d},%d", conn, ntohs(conn->addr.sin_port), errno);
-
-	/* get a ref to all my calls in one go */
-	memset(calls, 0, sizeof(calls));
-	spin_lock(&conn->lock);
-
-	for (loop = 3; loop >= 0; loop--) {
-		if (conn->channels[loop]) {
-			calls[loop] = conn->channels[loop];
-			rxrpc_get_call(calls[loop]);
-		}
-	}
-
-	spin_unlock(&conn->lock);
-
-	/* now kick them all */
-	for (loop = 3; loop >= 0; loop--) {
-		if (calls[loop]) {
-			rxrpc_call_handle_error(calls[loop], local, errno);
-			rxrpc_put_call(calls[loop]);
-		}
-	}
-
-	_leave("");
-} /* end rxrpc_conn_handle_error() */
diff --git a/net/rxrpc/internal.h b/net/rxrpc/internal.h
deleted file mode 100644
index cc0c5795a10..00000000000
--- a/net/rxrpc/internal.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/* internal.h: internal Rx RPC stuff
- *
- * Copyright (c) 2002   David Howells (dhowells@redhat.com).
- */
-
-#ifndef RXRPC_INTERNAL_H
-#define RXRPC_INTERNAL_H
-
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-
-/*
- * debug accounting
- */
-#if 1
-#define __RXACCT_DECL(X) X
-#define __RXACCT(X) do { X; } while(0)
-#else
-#define __RXACCT_DECL(X)
-#define __RXACCT(X) do { } while(0)
-#endif
-
-__RXACCT_DECL(extern atomic_t rxrpc_transport_count);
-__RXACCT_DECL(extern atomic_t rxrpc_peer_count);
-__RXACCT_DECL(extern atomic_t rxrpc_connection_count);
-__RXACCT_DECL(extern atomic_t rxrpc_call_count);
-__RXACCT_DECL(extern atomic_t rxrpc_message_count);
-
-/*
- * debug tracing
- */
-#define kenter(FMT, a...)	printk("==> %s("FMT")\n",__FUNCTION__ , ##a)
-#define kleave(FMT, a...)	printk("<== %s()"FMT"\n",__FUNCTION__ , ##a)
-#define kdebug(FMT, a...)	printk("    "FMT"\n" , ##a)
-#define kproto(FMT, a...)	printk("### "FMT"\n" , ##a)
-#define knet(FMT, a...)		printk("    "FMT"\n" , ##a)
-
-#if 0
-#define _enter(FMT, a...)	kenter(FMT , ##a)
-#define _leave(FMT, a...)	kleave(FMT , ##a)
-#define _debug(FMT, a...)	kdebug(FMT , ##a)
-#define _proto(FMT, a...)	kproto(FMT , ##a)
-#define _net(FMT, a...)		knet(FMT , ##a)
-#else
-#define _enter(FMT, a...)	do { if (rxrpc_ktrace) kenter(FMT , ##a); } while(0)
-#define _leave(FMT, a...)	do { if (rxrpc_ktrace) kleave(FMT , ##a); } while(0)
-#define _debug(FMT, a...)	do { if (rxrpc_kdebug) kdebug(FMT , ##a); } while(0)
-#define _proto(FMT, a...)	do { if (rxrpc_kproto) kproto(FMT , ##a); } while(0)
-#define _net(FMT, a...)		do { if (rxrpc_knet)   knet  (FMT , ##a); } while(0)
-#endif
-
-static inline void rxrpc_discard_my_signals(void)
-{
-	while (signal_pending(current)) {
-		siginfo_t sinfo;
-
-		spin_lock_irq(&current->sighand->siglock);
-		dequeue_signal(current, &current->blocked, &sinfo);
-		spin_unlock_irq(&current->sighand->siglock);
-	}
-}
-
-/*
- * call.c
- */
-extern struct list_head rxrpc_calls;
-extern struct rw_semaphore rxrpc_calls_sem;
-
-/*
- * connection.c
- */
-extern struct list_head rxrpc_conns;
-extern struct rw_semaphore rxrpc_conns_sem;
-extern unsigned long rxrpc_conn_timeout;
-
-extern void rxrpc_conn_clearall(struct rxrpc_peer *peer);
-
-/*
- * peer.c
- */
-extern struct list_head rxrpc_peers;
-extern struct rw_semaphore rxrpc_peers_sem;
-extern unsigned long rxrpc_peer_timeout;
-
-extern void rxrpc_peer_calculate_rtt(struct rxrpc_peer *peer,
-				     struct rxrpc_message *msg,
-				     struct rxrpc_message *resp);
-
-extern void rxrpc_peer_clearall(struct rxrpc_transport *trans);
-
-
-/*
- * proc.c
- */
-#ifdef CONFIG_PROC_FS
-extern int rxrpc_proc_init(void);
-extern void rxrpc_proc_cleanup(void);
-#endif
-
-/*
- * transport.c
- */
-extern struct list_head rxrpc_proc_transports;
-extern struct rw_semaphore rxrpc_proc_transports_sem;
-
-#endif /* RXRPC_INTERNAL_H */
diff --git a/net/rxrpc/krxiod.c b/net/rxrpc/krxiod.c
deleted file mode 100644
index bbbcd6c2404..00000000000
--- a/net/rxrpc/krxiod.c
+++ /dev/null
@@ -1,262 +0,0 @@
-/* krxiod.c: Rx I/O daemon
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/completion.h>
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <linux/freezer.h>
-#include <rxrpc/krxiod.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/call.h>
-#include "internal.h"
-
-static DECLARE_WAIT_QUEUE_HEAD(rxrpc_krxiod_sleepq);
-static DECLARE_COMPLETION(rxrpc_krxiod_dead);
-
-static atomic_t rxrpc_krxiod_qcount = ATOMIC_INIT(0);
-
-static LIST_HEAD(rxrpc_krxiod_transportq);
-static DEFINE_SPINLOCK(rxrpc_krxiod_transportq_lock);
-
-static LIST_HEAD(rxrpc_krxiod_callq);
-static DEFINE_SPINLOCK(rxrpc_krxiod_callq_lock);
-
-static volatile int rxrpc_krxiod_die;
-
-/*****************************************************************************/
-/*
- * Rx I/O daemon
- */
-static int rxrpc_krxiod(void *arg)
-{
-	DECLARE_WAITQUEUE(krxiod,current);
-
-	printk("Started krxiod %d\n",current->pid);
-
-	daemonize("krxiod");
-
-	/* loop around waiting for work to do */
-	do {
-		/* wait for work or to be told to exit */
-		_debug("### Begin Wait");
-		if (!atomic_read(&rxrpc_krxiod_qcount)) {
-			set_current_state(TASK_INTERRUPTIBLE);
-
-			add_wait_queue(&rxrpc_krxiod_sleepq, &krxiod);
-
-			for (;;) {
-				set_current_state(TASK_INTERRUPTIBLE);
-				if (atomic_read(&rxrpc_krxiod_qcount) ||
-				    rxrpc_krxiod_die ||
-				    signal_pending(current))
-					break;
-
-				schedule();
-			}
-
-			remove_wait_queue(&rxrpc_krxiod_sleepq, &krxiod);
-			set_current_state(TASK_RUNNING);
-		}
-		_debug("### End Wait");
-
-		/* do work if been given some to do */
-		_debug("### Begin Work");
-
-		/* see if there's a transport in need of attention */
-		if (!list_empty(&rxrpc_krxiod_transportq)) {
-			struct rxrpc_transport *trans = NULL;
-
-			spin_lock_irq(&rxrpc_krxiod_transportq_lock);
-
-			if (!list_empty(&rxrpc_krxiod_transportq)) {
-				trans = list_entry(
-					rxrpc_krxiod_transportq.next,
-					struct rxrpc_transport,
-					krxiodq_link);
-
-				list_del_init(&trans->krxiodq_link);
-				atomic_dec(&rxrpc_krxiod_qcount);
-
-				/* make sure it hasn't gone away and doesn't go
-				 * away */
-				if (atomic_read(&trans->usage)>0)
-					rxrpc_get_transport(trans);
-				else
-					trans = NULL;
-			}
-
-			spin_unlock_irq(&rxrpc_krxiod_transportq_lock);
-
-			if (trans) {
-				rxrpc_trans_receive_packet(trans);
-				rxrpc_put_transport(trans);
-			}
-		}
-
-		/* see if there's a call in need of attention */
-		if (!list_empty(&rxrpc_krxiod_callq)) {
-			struct rxrpc_call *call = NULL;
-
-			spin_lock_irq(&rxrpc_krxiod_callq_lock);
-
-			if (!list_empty(&rxrpc_krxiod_callq)) {
-				call = list_entry(rxrpc_krxiod_callq.next,
-						  struct rxrpc_call,
-						  rcv_krxiodq_lk);
-				list_del_init(&call->rcv_krxiodq_lk);
-				atomic_dec(&rxrpc_krxiod_qcount);
-
-				/* make sure it hasn't gone away and doesn't go
-				 * away */
-				if (atomic_read(&call->usage) > 0) {
-					_debug("@@@ KRXIOD"
-					       " Begin Attend Call %p", call);
-					rxrpc_get_call(call);
-				}
-				else {
-					call = NULL;
-				}
-			}
-
-			spin_unlock_irq(&rxrpc_krxiod_callq_lock);
-
-			if (call) {
-				rxrpc_call_do_stuff(call);
-				rxrpc_put_call(call);
-				_debug("@@@ KRXIOD End Attend Call %p", call);
-			}
-		}
-
-		_debug("### End Work");
-
-		try_to_freeze();
-
-		/* discard pending signals */
-		rxrpc_discard_my_signals();
-
-	} while (!rxrpc_krxiod_die);
-
-	/* and that's all */
-	complete_and_exit(&rxrpc_krxiod_dead, 0);
-
-} /* end rxrpc_krxiod() */
-
-/*****************************************************************************/
-/*
- * start up a krxiod daemon
- */
-int __init rxrpc_krxiod_init(void)
-{
-	return kernel_thread(rxrpc_krxiod, NULL, 0);
-
-} /* end rxrpc_krxiod_init() */
-
-/*****************************************************************************/
-/*
- * kill the krxiod daemon and wait for it to complete
- */
-void rxrpc_krxiod_kill(void)
-{
-	rxrpc_krxiod_die = 1;
-	wake_up_all(&rxrpc_krxiod_sleepq);
-	wait_for_completion(&rxrpc_krxiod_dead);
-
-} /* end rxrpc_krxiod_kill() */
-
-/*****************************************************************************/
-/*
- * queue a transport for attention by krxiod
- */
-void rxrpc_krxiod_queue_transport(struct rxrpc_transport *trans)
-{
-	unsigned long flags;
-
-	_enter("");
-
-	if (list_empty(&trans->krxiodq_link)) {
-		spin_lock_irqsave(&rxrpc_krxiod_transportq_lock, flags);
-
-		if (list_empty(&trans->krxiodq_link)) {
-			if (atomic_read(&trans->usage) > 0) {
-				list_add_tail(&trans->krxiodq_link,
-					      &rxrpc_krxiod_transportq);
-				atomic_inc(&rxrpc_krxiod_qcount);
-			}
-		}
-
-		spin_unlock_irqrestore(&rxrpc_krxiod_transportq_lock, flags);
-		wake_up_all(&rxrpc_krxiod_sleepq);
-	}
-
-	_leave("");
-
-} /* end rxrpc_krxiod_queue_transport() */
-
-/*****************************************************************************/
-/*
- * dequeue a transport from krxiod's attention queue
- */
-void rxrpc_krxiod_dequeue_transport(struct rxrpc_transport *trans)
-{
-	unsigned long flags;
-
-	_enter("");
-
-	spin_lock_irqsave(&rxrpc_krxiod_transportq_lock, flags);
-	if (!list_empty(&trans->krxiodq_link)) {
-		list_del_init(&trans->krxiodq_link);
-		atomic_dec(&rxrpc_krxiod_qcount);
-	}
-	spin_unlock_irqrestore(&rxrpc_krxiod_transportq_lock, flags);
-
-	_leave("");
-
-} /* end rxrpc_krxiod_dequeue_transport() */
-
-/*****************************************************************************/
-/*
- * queue a call for attention by krxiod
- */
-void rxrpc_krxiod_queue_call(struct rxrpc_call *call)
-{
-	unsigned long flags;
-
-	if (list_empty(&call->rcv_krxiodq_lk)) {
-		spin_lock_irqsave(&rxrpc_krxiod_callq_lock, flags);
-		if (atomic_read(&call->usage) > 0) {
-			list_add_tail(&call->rcv_krxiodq_lk,
-				      &rxrpc_krxiod_callq);
-			atomic_inc(&rxrpc_krxiod_qcount);
-		}
-		spin_unlock_irqrestore(&rxrpc_krxiod_callq_lock, flags);
-	}
-	wake_up_all(&rxrpc_krxiod_sleepq);
-
-} /* end rxrpc_krxiod_queue_call() */
-
-/*****************************************************************************/
-/*
- * dequeue a call from krxiod's attention queue
- */
-void rxrpc_krxiod_dequeue_call(struct rxrpc_call *call)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&rxrpc_krxiod_callq_lock, flags);
-	if (!list_empty(&call->rcv_krxiodq_lk)) {
-		list_del_init(&call->rcv_krxiodq_lk);
-		atomic_dec(&rxrpc_krxiod_qcount);
-	}
-	spin_unlock_irqrestore(&rxrpc_krxiod_callq_lock, flags);
-
-} /* end rxrpc_krxiod_dequeue_call() */
diff --git a/net/rxrpc/krxsecd.c b/net/rxrpc/krxsecd.c
deleted file mode 100644
index 9a1e7f5e034..00000000000
--- a/net/rxrpc/krxsecd.c
+++ /dev/null
@@ -1,270 +0,0 @@
-/* krxsecd.c: Rx security daemon
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * This daemon deals with:
- * - consulting the application as to whether inbound peers and calls should be authorised
- * - generating security challenges for inbound connections
- * - responding to security challenges on outbound connections
- */
-
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/completion.h>
-#include <linux/spinlock.h>
-#include <linux/init.h>
-#include <rxrpc/krxsecd.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/message.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/call.h>
-#include <linux/udp.h>
-#include <linux/ip.h>
-#include <linux/freezer.h>
-#include <net/sock.h>
-#include "internal.h"
-
-static DECLARE_WAIT_QUEUE_HEAD(rxrpc_krxsecd_sleepq);
-static DECLARE_COMPLETION(rxrpc_krxsecd_dead);
-static volatile int rxrpc_krxsecd_die;
-
-static atomic_t rxrpc_krxsecd_qcount;
-
-/* queue of unprocessed inbound messages with seqno #1 and
- * RXRPC_CLIENT_INITIATED flag set */
-static LIST_HEAD(rxrpc_krxsecd_initmsgq);
-static DEFINE_SPINLOCK(rxrpc_krxsecd_initmsgq_lock);
-
-static void rxrpc_krxsecd_process_incoming_call(struct rxrpc_message *msg);
-
-/*****************************************************************************/
-/*
- * Rx security daemon
- */
-static int rxrpc_krxsecd(void *arg)
-{
-	DECLARE_WAITQUEUE(krxsecd, current);
-
-	int die;
-
-	printk("Started krxsecd %d\n", current->pid);
-
-	daemonize("krxsecd");
-
-	/* loop around waiting for work to do */
-	do {
-		/* wait for work or to be told to exit */
-		_debug("### Begin Wait");
-		if (!atomic_read(&rxrpc_krxsecd_qcount)) {
-			set_current_state(TASK_INTERRUPTIBLE);
-
-			add_wait_queue(&rxrpc_krxsecd_sleepq, &krxsecd);
-
-			for (;;) {
-				set_current_state(TASK_INTERRUPTIBLE);
-				if (atomic_read(&rxrpc_krxsecd_qcount) ||
-				    rxrpc_krxsecd_die ||
-				    signal_pending(current))
-					break;
-
-				schedule();
-			}
-
-			remove_wait_queue(&rxrpc_krxsecd_sleepq, &krxsecd);
-			set_current_state(TASK_RUNNING);
-		}
-		die = rxrpc_krxsecd_die;
-		_debug("### End Wait");
-
-		/* see if there're incoming calls in need of authenticating */
-		_debug("### Begin Inbound Calls");
-
-		if (!list_empty(&rxrpc_krxsecd_initmsgq)) {
-			struct rxrpc_message *msg = NULL;
-
-			spin_lock(&rxrpc_krxsecd_initmsgq_lock);
-
-			if (!list_empty(&rxrpc_krxsecd_initmsgq)) {
-				msg = list_entry(rxrpc_krxsecd_initmsgq.next,
-						 struct rxrpc_message, link);
-				list_del_init(&msg->link);
-				atomic_dec(&rxrpc_krxsecd_qcount);
-			}
-
-			spin_unlock(&rxrpc_krxsecd_initmsgq_lock);
-
-			if (msg) {
-				rxrpc_krxsecd_process_incoming_call(msg);
-				rxrpc_put_message(msg);
-			}
-		}
-
-		_debug("### End Inbound Calls");
-
-		try_to_freeze();
-
-		/* discard pending signals */
-		rxrpc_discard_my_signals();
-
-	} while (!die);
-
-	/* and that's all */
-	complete_and_exit(&rxrpc_krxsecd_dead, 0);
-
-} /* end rxrpc_krxsecd() */
-
-/*****************************************************************************/
-/*
- * start up a krxsecd daemon
- */
-int __init rxrpc_krxsecd_init(void)
-{
-	return kernel_thread(rxrpc_krxsecd, NULL, 0);
-
-} /* end rxrpc_krxsecd_init() */
-
-/*****************************************************************************/
-/*
- * kill the krxsecd daemon and wait for it to complete
- */
-void rxrpc_krxsecd_kill(void)
-{
-	rxrpc_krxsecd_die = 1;
-	wake_up_all(&rxrpc_krxsecd_sleepq);
-	wait_for_completion(&rxrpc_krxsecd_dead);
-
-} /* end rxrpc_krxsecd_kill() */
-
-/*****************************************************************************/
-/*
- * clear all pending incoming calls for the specified transport
- */
-void rxrpc_krxsecd_clear_transport(struct rxrpc_transport *trans)
-{
-	LIST_HEAD(tmp);
-
-	struct rxrpc_message *msg;
-	struct list_head *_p, *_n;
-
-	_enter("%p",trans);
-
-	/* move all the messages for this transport onto a temp list */
-	spin_lock(&rxrpc_krxsecd_initmsgq_lock);
-
-	list_for_each_safe(_p, _n, &rxrpc_krxsecd_initmsgq) {
-		msg = list_entry(_p, struct rxrpc_message, link);
-		if (msg->trans == trans) {
-			list_move_tail(&msg->link, &tmp);
-			atomic_dec(&rxrpc_krxsecd_qcount);
-		}
-	}
-
-	spin_unlock(&rxrpc_krxsecd_initmsgq_lock);
-
-	/* zap all messages on the temp list */
-	while (!list_empty(&tmp)) {
-		msg = list_entry(tmp.next, struct rxrpc_message, link);
-		list_del_init(&msg->link);
-		rxrpc_put_message(msg);
-	}
-
-	_leave("");
-} /* end rxrpc_krxsecd_clear_transport() */
-
-/*****************************************************************************/
-/*
- * queue a message on the incoming calls list
- */
-void rxrpc_krxsecd_queue_incoming_call(struct rxrpc_message *msg)
-{
-	_enter("%p", msg);
-
-	/* queue for processing by krxsecd */
-	spin_lock(&rxrpc_krxsecd_initmsgq_lock);
-
-	if (!rxrpc_krxsecd_die) {
-		rxrpc_get_message(msg);
-		list_add_tail(&msg->link, &rxrpc_krxsecd_initmsgq);
-		atomic_inc(&rxrpc_krxsecd_qcount);
-	}
-
-	spin_unlock(&rxrpc_krxsecd_initmsgq_lock);
-
-	wake_up(&rxrpc_krxsecd_sleepq);
-
-	_leave("");
-} /* end rxrpc_krxsecd_queue_incoming_call() */
-
-/*****************************************************************************/
-/*
- * process the initial message of an incoming call
- */
-void rxrpc_krxsecd_process_incoming_call(struct rxrpc_message *msg)
-{
-	struct rxrpc_transport *trans = msg->trans;
-	struct rxrpc_service *srv;
-	struct rxrpc_call *call;
-	struct list_head *_p;
-	unsigned short sid;
-	int ret;
-
-	_enter("%p{tr=%p}", msg, trans);
-
-	ret = rxrpc_incoming_call(msg->conn, msg, &call);
-	if (ret < 0)
-		goto out;
-
-	/* find the matching service on the transport */
-	sid = ntohs(msg->hdr.serviceId);
-	srv = NULL;
-
-	spin_lock(&trans->lock);
-	list_for_each(_p, &trans->services) {
-		srv = list_entry(_p, struct rxrpc_service, link);
-		if (srv->service_id == sid && try_module_get(srv->owner)) {
-			/* found a match (made sure it won't vanish) */
-			_debug("found service '%s'", srv->name);
-			call->owner = srv->owner;
-			break;
-		}
-	}
-	spin_unlock(&trans->lock);
-
-	/* report the new connection
-	 * - the func must inc the call's usage count to keep it
-	 */
-	ret = -ENOENT;
-	if (_p != &trans->services) {
-		/* attempt to accept the call */
-		call->conn->service = srv;
-		call->app_attn_func = srv->attn_func;
-		call->app_error_func = srv->error_func;
-		call->app_aemap_func = srv->aemap_func;
-
-		ret = srv->new_call(call);
-
-		/* send an abort if an error occurred */
-		if (ret < 0) {
-			rxrpc_call_abort(call, ret);
-		}
-		else {
-			/* formally receive and ACK the new packet */
-			ret = rxrpc_conn_receive_call_packet(call->conn,
-							     call, msg);
-		}
-	}
-
-	rxrpc_put_call(call);
- out:
-	if (ret < 0)
-		rxrpc_trans_immediate_abort(trans, msg, ret);
-
-	_leave(" (%d)", ret);
-} /* end rxrpc_krxsecd_process_incoming_call() */
diff --git a/net/rxrpc/krxtimod.c b/net/rxrpc/krxtimod.c
deleted file mode 100644
index 9a9b6132dba..00000000000
--- a/net/rxrpc/krxtimod.c
+++ /dev/null
@@ -1,204 +0,0 @@
-/* krxtimod.c: RXRPC timeout daemon
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/completion.h>
-#include <linux/freezer.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/krxtimod.h>
-#include <asm/errno.h>
-#include "internal.h"
-
-static DECLARE_COMPLETION(krxtimod_alive);
-static DECLARE_COMPLETION(krxtimod_dead);
-static DECLARE_WAIT_QUEUE_HEAD(krxtimod_sleepq);
-static int krxtimod_die;
-
-static LIST_HEAD(krxtimod_list);
-static DEFINE_SPINLOCK(krxtimod_lock);
-
-static int krxtimod(void *arg);
-
-/*****************************************************************************/
-/*
- * start the timeout daemon
- */
-int rxrpc_krxtimod_start(void)
-{
-	int ret;
-
-	ret = kernel_thread(krxtimod, NULL, 0);
-	if (ret < 0)
-		return ret;
-
-	wait_for_completion(&krxtimod_alive);
-
-	return ret;
-} /* end rxrpc_krxtimod_start() */
-
-/*****************************************************************************/
-/*
- * stop the timeout daemon
- */
-void rxrpc_krxtimod_kill(void)
-{
-	/* get rid of my daemon */
-	krxtimod_die = 1;
-	wake_up(&krxtimod_sleepq);
-	wait_for_completion(&krxtimod_dead);
-
-} /* end rxrpc_krxtimod_kill() */
-
-/*****************************************************************************/
-/*
- * timeout processing daemon
- */
-static int krxtimod(void *arg)
-{
-	DECLARE_WAITQUEUE(myself, current);
-
-	rxrpc_timer_t *timer;
-
-	printk("Started krxtimod %d\n", current->pid);
-
-	daemonize("krxtimod");
-
-	complete(&krxtimod_alive);
-
-	/* loop around looking for things to attend to */
- loop:
-	set_current_state(TASK_INTERRUPTIBLE);
-	add_wait_queue(&krxtimod_sleepq, &myself);
-
-	for (;;) {
-		unsigned long jif;
-		long timeout;
-
-		/* deal with the server being asked to die */
-		if (krxtimod_die) {
-			remove_wait_queue(&krxtimod_sleepq, &myself);
-			_leave("");
-			complete_and_exit(&krxtimod_dead, 0);
-		}
-
-		try_to_freeze();
-
-		/* discard pending signals */
-		rxrpc_discard_my_signals();
-
-		/* work out the time to elapse before the next event */
-		spin_lock(&krxtimod_lock);
-		if (list_empty(&krxtimod_list)) {
-			timeout = MAX_SCHEDULE_TIMEOUT;
-		}
-		else {
-			timer = list_entry(krxtimod_list.next,
-					   rxrpc_timer_t, link);
-			timeout = timer->timo_jif;
-			jif = jiffies;
-
-			if (time_before_eq((unsigned long) timeout, jif))
-				goto immediate;
-
-			else {
-				timeout = (long) timeout - (long) jiffies;
-			}
-		}
-		spin_unlock(&krxtimod_lock);
-
-		schedule_timeout(timeout);
-
-		set_current_state(TASK_INTERRUPTIBLE);
-	}
-
-	/* the thing on the front of the queue needs processing
-	 * - we come here with the lock held and timer pointing to the expired
-	 *   entry
-	 */
- immediate:
-	remove_wait_queue(&krxtimod_sleepq, &myself);
-	set_current_state(TASK_RUNNING);
-
-	_debug("@@@ Begin Timeout of %p", timer);
-
-	/* dequeue the timer */
-	list_del_init(&timer->link);
-	spin_unlock(&krxtimod_lock);
-
-	/* call the timeout function */
-	timer->ops->timed_out(timer);
-
-	_debug("@@@ End Timeout");
-	goto loop;
-
-} /* end krxtimod() */
-
-/*****************************************************************************/
-/*
- * (re-)queue a timer
- */
-void rxrpc_krxtimod_add_timer(rxrpc_timer_t *timer, unsigned long timeout)
-{
-	struct list_head *_p;
-	rxrpc_timer_t *ptimer;
-
-	_enter("%p,%lu", timer, timeout);
-
-	spin_lock(&krxtimod_lock);
-
-	list_del(&timer->link);
-
-	/* the timer was deferred or reset - put it back in the queue at the
-	 * right place */
-	timer->timo_jif = jiffies + timeout;
-
-	list_for_each(_p, &krxtimod_list) {
-		ptimer = list_entry(_p, rxrpc_timer_t, link);
-		if (time_before(timer->timo_jif, ptimer->timo_jif))
-			break;
-	}
-
-	list_add_tail(&timer->link, _p); /* insert before stopping point */
-
-	spin_unlock(&krxtimod_lock);
-
-	wake_up(&krxtimod_sleepq);
-
-	_leave("");
-} /* end rxrpc_krxtimod_add_timer() */
-
-/*****************************************************************************/
-/*
- * dequeue a timer
- * - returns 0 if the timer was deleted or -ENOENT if it wasn't queued
- */
-int rxrpc_krxtimod_del_timer(rxrpc_timer_t *timer)
-{
-	int ret = 0;
-
-	_enter("%p", timer);
-
-	spin_lock(&krxtimod_lock);
-
-	if (list_empty(&timer->link))
-		ret = -ENOENT;
-	else
-		list_del_init(&timer->link);
-
-	spin_unlock(&krxtimod_lock);
-
-	wake_up(&krxtimod_sleepq);
-
-	_leave(" = %d", ret);
-	return ret;
-} /* end rxrpc_krxtimod_del_timer() */
diff --git a/net/rxrpc/main.c b/net/rxrpc/main.c
deleted file mode 100644
index cead31b5bdf..00000000000
--- a/net/rxrpc/main.c
+++ /dev/null
@@ -1,180 +0,0 @@
-/* main.c: Rx RPC interface
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/krxiod.h>
-#include <rxrpc/krxsecd.h>
-#include <rxrpc/krxtimod.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/message.h>
-#include "internal.h"
-
-MODULE_DESCRIPTION("Rx RPC implementation");
-MODULE_AUTHOR("Red Hat, Inc.");
-MODULE_LICENSE("GPL");
-
-__be32 rxrpc_epoch;
-
-/*****************************************************************************/
-/*
- * initialise the Rx module
- */
-static int __init rxrpc_initialise(void)
-{
-	int ret;
-
-	/* my epoch value */
-	rxrpc_epoch = htonl(get_seconds());
-
-	/* register the /proc interface */
-#ifdef CONFIG_PROC_FS
-	ret = rxrpc_proc_init();
-	if (ret<0)
-		return ret;
-#endif
-
-	/* register the sysctl files */
-#ifdef CONFIG_SYSCTL
-	ret = rxrpc_sysctl_init();
-	if (ret<0)
-		goto error_proc;
-#endif
-
-	/* start the krxtimod daemon */
-	ret = rxrpc_krxtimod_start();
-	if (ret<0)
-		goto error_sysctl;
-
-	/* start the krxiod daemon */
-	ret = rxrpc_krxiod_init();
-	if (ret<0)
-		goto error_krxtimod;
-
-	/* start the krxsecd daemon */
-	ret = rxrpc_krxsecd_init();
-	if (ret<0)
-		goto error_krxiod;
-
-	kdebug("\n\n");
-
-	return 0;
-
- error_krxiod:
-	rxrpc_krxiod_kill();
- error_krxtimod:
-	rxrpc_krxtimod_kill();
- error_sysctl:
-#ifdef CONFIG_SYSCTL
-	rxrpc_sysctl_cleanup();
- error_proc:
-#endif
-#ifdef CONFIG_PROC_FS
-	rxrpc_proc_cleanup();
-#endif
-	return ret;
-} /* end rxrpc_initialise() */
-
-module_init(rxrpc_initialise);
-
-/*****************************************************************************/
-/*
- * clean up the Rx module
- */
-static void __exit rxrpc_cleanup(void)
-{
-	kenter("");
-
-	__RXACCT(printk("Outstanding Messages   : %d\n",
-			atomic_read(&rxrpc_message_count)));
-	__RXACCT(printk("Outstanding Calls      : %d\n",
-			atomic_read(&rxrpc_call_count)));
-	__RXACCT(printk("Outstanding Connections: %d\n",
-			atomic_read(&rxrpc_connection_count)));
-	__RXACCT(printk("Outstanding Peers      : %d\n",
-			atomic_read(&rxrpc_peer_count)));
-	__RXACCT(printk("Outstanding Transports : %d\n",
-			atomic_read(&rxrpc_transport_count)));
-
-	rxrpc_krxsecd_kill();
-	rxrpc_krxiod_kill();
-	rxrpc_krxtimod_kill();
-#ifdef CONFIG_SYSCTL
-	rxrpc_sysctl_cleanup();
-#endif
-#ifdef CONFIG_PROC_FS
-	rxrpc_proc_cleanup();
-#endif
-
-	__RXACCT(printk("Outstanding Messages   : %d\n",
-			atomic_read(&rxrpc_message_count)));
-	__RXACCT(printk("Outstanding Calls      : %d\n",
-			atomic_read(&rxrpc_call_count)));
-	__RXACCT(printk("Outstanding Connections: %d\n",
-			atomic_read(&rxrpc_connection_count)));
-	__RXACCT(printk("Outstanding Peers      : %d\n",
-			atomic_read(&rxrpc_peer_count)));
-	__RXACCT(printk("Outstanding Transports : %d\n",
-			atomic_read(&rxrpc_transport_count)));
-
-	kleave("");
-} /* end rxrpc_cleanup() */
-
-module_exit(rxrpc_cleanup);
-
-/*****************************************************************************/
-/*
- * clear the dead space between task_struct and kernel stack
- * - called by supplying -finstrument-functions to gcc
- */
-#if 0
-void __cyg_profile_func_enter (void *this_fn, void *call_site)
-__attribute__((no_instrument_function));
-
-void __cyg_profile_func_enter (void *this_fn, void *call_site)
-{
-       asm volatile("  movl    %%esp,%%edi     \n"
-		    "  andl    %0,%%edi        \n"
-		    "  addl    %1,%%edi        \n"
-		    "  movl    %%esp,%%ecx     \n"
-		    "  subl    %%edi,%%ecx     \n"
-		    "  shrl    $2,%%ecx        \n"
-		    "  movl    $0xedededed,%%eax     \n"
-		    "  rep stosl               \n"
-		    :
-		    : "i"(~(THREAD_SIZE-1)), "i"(sizeof(struct thread_info))
-		    : "eax", "ecx", "edi", "memory", "cc"
-		    );
-}
-
-void __cyg_profile_func_exit(void *this_fn, void *call_site)
-__attribute__((no_instrument_function));
-
-void __cyg_profile_func_exit(void *this_fn, void *call_site)
-{
-       asm volatile("  movl    %%esp,%%edi     \n"
-		    "  andl    %0,%%edi        \n"
-		    "  addl    %1,%%edi        \n"
-		    "  movl    %%esp,%%ecx     \n"
-		    "  subl    %%edi,%%ecx     \n"
-		    "  shrl    $2,%%ecx        \n"
-		    "  movl    $0xdadadada,%%eax     \n"
-		    "  rep stosl               \n"
-		    :
-		    : "i"(~(THREAD_SIZE-1)), "i"(sizeof(struct thread_info))
-		    : "eax", "ecx", "edi", "memory", "cc"
-		    );
-}
-#endif
diff --git a/net/rxrpc/peer.c b/net/rxrpc/peer.c
deleted file mode 100644
index 8a275157a3b..00000000000
--- a/net/rxrpc/peer.c
+++ /dev/null
@@ -1,398 +0,0 @@
-/* peer.c: Rx RPC peer management
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/message.h>
-#include <linux/udp.h>
-#include <linux/ip.h>
-#include <net/sock.h>
-#include <asm/uaccess.h>
-#include <asm/div64.h>
-#include "internal.h"
-
-__RXACCT_DECL(atomic_t rxrpc_peer_count);
-LIST_HEAD(rxrpc_peers);
-DECLARE_RWSEM(rxrpc_peers_sem);
-unsigned long rxrpc_peer_timeout = 12 * 60 * 60;
-
-static void rxrpc_peer_do_timeout(struct rxrpc_peer *peer);
-
-static void __rxrpc_peer_timeout(rxrpc_timer_t *timer)
-{
-	struct rxrpc_peer *peer =
-		list_entry(timer, struct rxrpc_peer, timeout);
-
-	_debug("Rx PEER TIMEOUT [%p{u=%d}]", peer, atomic_read(&peer->usage));
-
-	rxrpc_peer_do_timeout(peer);
-}
-
-static const struct rxrpc_timer_ops rxrpc_peer_timer_ops = {
-	.timed_out	= __rxrpc_peer_timeout,
-};
-
-/*****************************************************************************/
-/*
- * create a peer record
- */
-static int __rxrpc_create_peer(struct rxrpc_transport *trans, __be32 addr,
-			       struct rxrpc_peer **_peer)
-{
-	struct rxrpc_peer *peer;
-
-	_enter("%p,%08x", trans, ntohl(addr));
-
-	/* allocate and initialise a peer record */
-	peer = kzalloc(sizeof(struct rxrpc_peer), GFP_KERNEL);
-	if (!peer) {
-		_leave(" = -ENOMEM");
-		return -ENOMEM;
-	}
-
-	atomic_set(&peer->usage, 1);
-
-	INIT_LIST_HEAD(&peer->link);
-	INIT_LIST_HEAD(&peer->proc_link);
-	INIT_LIST_HEAD(&peer->conn_idlist);
-	INIT_LIST_HEAD(&peer->conn_active);
-	INIT_LIST_HEAD(&peer->conn_graveyard);
-	spin_lock_init(&peer->conn_gylock);
-	init_waitqueue_head(&peer->conn_gy_waitq);
-	rwlock_init(&peer->conn_idlock);
-	rwlock_init(&peer->conn_lock);
-	atomic_set(&peer->conn_count, 0);
-	spin_lock_init(&peer->lock);
-	rxrpc_timer_init(&peer->timeout, &rxrpc_peer_timer_ops);
-
-	peer->addr.s_addr = addr;
-
-	peer->trans = trans;
-	peer->ops = trans->peer_ops;
-
-	__RXACCT(atomic_inc(&rxrpc_peer_count));
-	*_peer = peer;
-	_leave(" = 0 (%p)", peer);
-
-	return 0;
-} /* end __rxrpc_create_peer() */
-
-/*****************************************************************************/
-/*
- * find a peer record on the specified transport
- * - returns (if successful) with peer record usage incremented
- * - resurrects it from the graveyard if found there
- */
-int rxrpc_peer_lookup(struct rxrpc_transport *trans, __be32 addr,
-		      struct rxrpc_peer **_peer)
-{
-	struct rxrpc_peer *peer, *candidate = NULL;
-	struct list_head *_p;
-	int ret;
-
-	_enter("%p{%hu},%08x", trans, trans->port, ntohl(addr));
-
-	/* [common case] search the transport's active list first */
-	read_lock(&trans->peer_lock);
-	list_for_each(_p, &trans->peer_active) {
-		peer = list_entry(_p, struct rxrpc_peer, link);
-		if (peer->addr.s_addr == addr)
-			goto found_active;
-	}
-	read_unlock(&trans->peer_lock);
-
-	/* [uncommon case] not active - create a candidate for a new record */
-	ret = __rxrpc_create_peer(trans, addr, &candidate);
-	if (ret < 0) {
-		_leave(" = %d", ret);
-		return ret;
-	}
-
-	/* search the active list again, just in case it appeared whilst we
-	 * were busy */
-	write_lock(&trans->peer_lock);
-	list_for_each(_p, &trans->peer_active) {
-		peer = list_entry(_p, struct rxrpc_peer, link);
-		if (peer->addr.s_addr == addr)
-			goto found_active_second_chance;
-	}
-
-	/* search the transport's graveyard list */
-	spin_lock(&trans->peer_gylock);
-	list_for_each(_p, &trans->peer_graveyard) {
-		peer = list_entry(_p, struct rxrpc_peer, link);
-		if (peer->addr.s_addr == addr)
-			goto found_in_graveyard;
-	}
-	spin_unlock(&trans->peer_gylock);
-
-	/* we can now add the new candidate to the list
-	 * - tell the application layer that this peer has been added
-	 */
-	rxrpc_get_transport(trans);
-	peer = candidate;
-	candidate = NULL;
-
-	if (peer->ops && peer->ops->adding) {
-		ret = peer->ops->adding(peer);
-		if (ret < 0) {
-			write_unlock(&trans->peer_lock);
-			__RXACCT(atomic_dec(&rxrpc_peer_count));
-			kfree(peer);
-			rxrpc_put_transport(trans);
-			_leave(" = %d", ret);
-			return ret;
-		}
-	}
-
-	atomic_inc(&trans->peer_count);
-
- make_active:
-	list_add_tail(&peer->link, &trans->peer_active);
-
- success_uwfree:
-	write_unlock(&trans->peer_lock);
-
-	if (candidate) {
-		__RXACCT(atomic_dec(&rxrpc_peer_count));
-		kfree(candidate);
-	}
-
-	if (list_empty(&peer->proc_link)) {
-		down_write(&rxrpc_peers_sem);
-		list_add_tail(&peer->proc_link, &rxrpc_peers);
-		up_write(&rxrpc_peers_sem);
-	}
-
- success:
-	*_peer = peer;
-
-	_leave(" = 0 (%p{u=%d cc=%d})",
-	       peer,
-	       atomic_read(&peer->usage),
-	       atomic_read(&peer->conn_count));
-	return 0;
-
-	/* handle the peer being found in the active list straight off */
- found_active:
-	rxrpc_get_peer(peer);
-	read_unlock(&trans->peer_lock);
-	goto success;
-
-	/* handle resurrecting a peer from the graveyard */
- found_in_graveyard:
-	rxrpc_get_peer(peer);
-	rxrpc_get_transport(peer->trans);
-	rxrpc_krxtimod_del_timer(&peer->timeout);
-	list_del_init(&peer->link);
-	spin_unlock(&trans->peer_gylock);
-	goto make_active;
-
-	/* handle finding the peer on the second time through the active
-	 * list */
- found_active_second_chance:
-	rxrpc_get_peer(peer);
-	goto success_uwfree;
-
-} /* end rxrpc_peer_lookup() */
-
-/*****************************************************************************/
-/*
- * finish with a peer record
- * - it gets sent to the graveyard from where it can be resurrected or timed
- *   out
- */
-void rxrpc_put_peer(struct rxrpc_peer *peer)
-{
-	struct rxrpc_transport *trans = peer->trans;
-
-	_enter("%p{cc=%d a=%08x}",
-	       peer,
-	       atomic_read(&peer->conn_count),
-	       ntohl(peer->addr.s_addr));
-
-	/* sanity check */
-	if (atomic_read(&peer->usage) <= 0)
-		BUG();
-
-	write_lock(&trans->peer_lock);
-	spin_lock(&trans->peer_gylock);
-	if (likely(!atomic_dec_and_test(&peer->usage))) {
-		spin_unlock(&trans->peer_gylock);
-		write_unlock(&trans->peer_lock);
-		_leave("");
-		return;
-	}
-
-	/* move to graveyard queue */
-	list_del(&peer->link);
-	write_unlock(&trans->peer_lock);
-
-	list_add_tail(&peer->link, &trans->peer_graveyard);
-
-	BUG_ON(!list_empty(&peer->conn_active));
-
-	rxrpc_krxtimod_add_timer(&peer->timeout, rxrpc_peer_timeout * HZ);
-
-	spin_unlock(&trans->peer_gylock);
-
-	rxrpc_put_transport(trans);
-
-	_leave(" [killed]");
-} /* end rxrpc_put_peer() */
-
-/*****************************************************************************/
-/*
- * handle a peer timing out in the graveyard
- * - called from krxtimod
- */
-static void rxrpc_peer_do_timeout(struct rxrpc_peer *peer)
-{
-	struct rxrpc_transport *trans = peer->trans;
-
-	_enter("%p{u=%d cc=%d a=%08x}",
-	       peer,
-	       atomic_read(&peer->usage),
-	       atomic_read(&peer->conn_count),
-	       ntohl(peer->addr.s_addr));
-
-	BUG_ON(atomic_read(&peer->usage) < 0);
-
-	/* remove from graveyard if still dead */
-	spin_lock(&trans->peer_gylock);
-	if (atomic_read(&peer->usage) == 0)
-		list_del_init(&peer->link);
-	else
-		peer = NULL;
-	spin_unlock(&trans->peer_gylock);
-
-	if (!peer) {
-		_leave("");
-		return; /* resurrected */
-	}
-
-	/* clear all connections on this peer */
-	rxrpc_conn_clearall(peer);
-
-	BUG_ON(!list_empty(&peer->conn_active));
-	BUG_ON(!list_empty(&peer->conn_graveyard));
-
-	/* inform the application layer */
-	if (peer->ops && peer->ops->discarding)
-		peer->ops->discarding(peer);
-
-	if (!list_empty(&peer->proc_link)) {
-		down_write(&rxrpc_peers_sem);
-		list_del(&peer->proc_link);
-		up_write(&rxrpc_peers_sem);
-	}
-
-	__RXACCT(atomic_dec(&rxrpc_peer_count));
-	kfree(peer);
-
-	/* if the graveyard is now empty, wake up anyone waiting for that */
-	if (atomic_dec_and_test(&trans->peer_count))
-		wake_up(&trans->peer_gy_waitq);
-
-	_leave(" [destroyed]");
-} /* end rxrpc_peer_do_timeout() */
-
-/*****************************************************************************/
-/*
- * clear all peer records from a transport endpoint
- */
-void rxrpc_peer_clearall(struct rxrpc_transport *trans)
-{
-	DECLARE_WAITQUEUE(myself,current);
-
-	struct rxrpc_peer *peer;
-	int err;
-
-	_enter("%p",trans);
-
-	/* there shouldn't be any active peers remaining */
-	BUG_ON(!list_empty(&trans->peer_active));
-
-	/* manually timeout all peers in the graveyard */
-	spin_lock(&trans->peer_gylock);
-	while (!list_empty(&trans->peer_graveyard)) {
-		peer = list_entry(trans->peer_graveyard.next,
-				  struct rxrpc_peer, link);
-		_debug("Clearing peer %p\n", peer);
-		err = rxrpc_krxtimod_del_timer(&peer->timeout);
-		spin_unlock(&trans->peer_gylock);
-
-		if (err == 0)
-			rxrpc_peer_do_timeout(peer);
-
-		spin_lock(&trans->peer_gylock);
-	}
-	spin_unlock(&trans->peer_gylock);
-
-	/* wait for the the peer graveyard to be completely cleared */
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	add_wait_queue(&trans->peer_gy_waitq, &myself);
-
-	while (atomic_read(&trans->peer_count) != 0) {
-		schedule();
-		set_current_state(TASK_UNINTERRUPTIBLE);
-	}
-
-	remove_wait_queue(&trans->peer_gy_waitq, &myself);
-	set_current_state(TASK_RUNNING);
-
-	_leave("");
-} /* end rxrpc_peer_clearall() */
-
-/*****************************************************************************/
-/*
- * calculate and cache the Round-Trip-Time for a message and its response
- */
-void rxrpc_peer_calculate_rtt(struct rxrpc_peer *peer,
-			      struct rxrpc_message *msg,
-			      struct rxrpc_message *resp)
-{
-	unsigned long long rtt;
-	int loop;
-
-	_enter("%p,%p,%p", peer, msg, resp);
-
-	/* calculate the latest RTT */
-	rtt = resp->stamp.tv_sec - msg->stamp.tv_sec;
-	rtt *= 1000000UL;
-	rtt += resp->stamp.tv_usec - msg->stamp.tv_usec;
-
-	/* add to cache */
-	peer->rtt_cache[peer->rtt_point] = rtt;
-	peer->rtt_point++;
-	peer->rtt_point %= RXRPC_RTT_CACHE_SIZE;
-
-	if (peer->rtt_usage < RXRPC_RTT_CACHE_SIZE)
-		peer->rtt_usage++;
-
-	/* recalculate RTT */
-	rtt = 0;
-	for (loop = peer->rtt_usage - 1; loop >= 0; loop--)
-		rtt += peer->rtt_cache[loop];
-
-	do_div(rtt, peer->rtt_usage);
-	peer->rtt = rtt;
-
-	_leave(" RTT=%lu.%lums",
-	       (long) (peer->rtt / 1000), (long) (peer->rtt % 1000));
-
-} /* end rxrpc_peer_calculate_rtt() */
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
deleted file mode 100644
index 8551c879e45..00000000000
--- a/net/rxrpc/proc.c
+++ /dev/null
@@ -1,617 +0,0 @@
-/* proc.c: /proc interface for RxRPC
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <rxrpc/rxrpc.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/message.h>
-#include "internal.h"
-
-static struct proc_dir_entry *proc_rxrpc;
-
-static int rxrpc_proc_transports_open(struct inode *inode, struct file *file);
-static void *rxrpc_proc_transports_start(struct seq_file *p, loff_t *pos);
-static void *rxrpc_proc_transports_next(struct seq_file *p, void *v, loff_t *pos);
-static void rxrpc_proc_transports_stop(struct seq_file *p, void *v);
-static int rxrpc_proc_transports_show(struct seq_file *m, void *v);
-
-static struct seq_operations rxrpc_proc_transports_ops = {
-	.start	= rxrpc_proc_transports_start,
-	.next	= rxrpc_proc_transports_next,
-	.stop	= rxrpc_proc_transports_stop,
-	.show	= rxrpc_proc_transports_show,
-};
-
-static const struct file_operations rxrpc_proc_transports_fops = {
-	.open		= rxrpc_proc_transports_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-static int rxrpc_proc_peers_open(struct inode *inode, struct file *file);
-static void *rxrpc_proc_peers_start(struct seq_file *p, loff_t *pos);
-static void *rxrpc_proc_peers_next(struct seq_file *p, void *v, loff_t *pos);
-static void rxrpc_proc_peers_stop(struct seq_file *p, void *v);
-static int rxrpc_proc_peers_show(struct seq_file *m, void *v);
-
-static struct seq_operations rxrpc_proc_peers_ops = {
-	.start	= rxrpc_proc_peers_start,
-	.next	= rxrpc_proc_peers_next,
-	.stop	= rxrpc_proc_peers_stop,
-	.show	= rxrpc_proc_peers_show,
-};
-
-static const struct file_operations rxrpc_proc_peers_fops = {
-	.open		= rxrpc_proc_peers_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-static int rxrpc_proc_conns_open(struct inode *inode, struct file *file);
-static void *rxrpc_proc_conns_start(struct seq_file *p, loff_t *pos);
-static void *rxrpc_proc_conns_next(struct seq_file *p, void *v, loff_t *pos);
-static void rxrpc_proc_conns_stop(struct seq_file *p, void *v);
-static int rxrpc_proc_conns_show(struct seq_file *m, void *v);
-
-static struct seq_operations rxrpc_proc_conns_ops = {
-	.start	= rxrpc_proc_conns_start,
-	.next	= rxrpc_proc_conns_next,
-	.stop	= rxrpc_proc_conns_stop,
-	.show	= rxrpc_proc_conns_show,
-};
-
-static const struct file_operations rxrpc_proc_conns_fops = {
-	.open		= rxrpc_proc_conns_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-static int rxrpc_proc_calls_open(struct inode *inode, struct file *file);
-static void *rxrpc_proc_calls_start(struct seq_file *p, loff_t *pos);
-static void *rxrpc_proc_calls_next(struct seq_file *p, void *v, loff_t *pos);
-static void rxrpc_proc_calls_stop(struct seq_file *p, void *v);
-static int rxrpc_proc_calls_show(struct seq_file *m, void *v);
-
-static struct seq_operations rxrpc_proc_calls_ops = {
-	.start	= rxrpc_proc_calls_start,
-	.next	= rxrpc_proc_calls_next,
-	.stop	= rxrpc_proc_calls_stop,
-	.show	= rxrpc_proc_calls_show,
-};
-
-static const struct file_operations rxrpc_proc_calls_fops = {
-	.open		= rxrpc_proc_calls_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
-static const char *rxrpc_call_states7[] = {
-	"complet",
-	"error  ",
-	"rcv_op ",
-	"rcv_arg",
-	"got_arg",
-	"snd_rpl",
-	"fin_ack",
-	"snd_arg",
-	"rcv_rpl",
-	"got_rpl"
-};
-
-static const char *rxrpc_call_error_states7[] = {
-	"no_err ",
-	"loc_abt",
-	"rmt_abt",
-	"loc_err",
-	"rmt_err"
-};
-
-/*****************************************************************************/
-/*
- * initialise the /proc/net/rxrpc/ directory
- */
-int rxrpc_proc_init(void)
-{
-	struct proc_dir_entry *p;
-
-	proc_rxrpc = proc_mkdir("rxrpc", proc_net);
-	if (!proc_rxrpc)
-		goto error;
-	proc_rxrpc->owner = THIS_MODULE;
-
-	p = create_proc_entry("calls", 0, proc_rxrpc);
-	if (!p)
-		goto error_proc;
-	p->proc_fops = &rxrpc_proc_calls_fops;
-	p->owner = THIS_MODULE;
-
-	p = create_proc_entry("connections", 0, proc_rxrpc);
-	if (!p)
-		goto error_calls;
-	p->proc_fops = &rxrpc_proc_conns_fops;
-	p->owner = THIS_MODULE;
-
-	p = create_proc_entry("peers", 0, proc_rxrpc);
-	if (!p)
-		goto error_calls;
-	p->proc_fops = &rxrpc_proc_peers_fops;
-	p->owner = THIS_MODULE;
-
-	p = create_proc_entry("transports", 0, proc_rxrpc);
-	if (!p)
-		goto error_conns;
-	p->proc_fops = &rxrpc_proc_transports_fops;
-	p->owner = THIS_MODULE;
-
-	return 0;
-
- error_conns:
-	remove_proc_entry("connections", proc_rxrpc);
- error_calls:
-	remove_proc_entry("calls", proc_rxrpc);
- error_proc:
-	remove_proc_entry("rxrpc", proc_net);
- error:
-	return -ENOMEM;
-} /* end rxrpc_proc_init() */
-
-/*****************************************************************************/
-/*
- * clean up the /proc/net/rxrpc/ directory
- */
-void rxrpc_proc_cleanup(void)
-{
-	remove_proc_entry("transports", proc_rxrpc);
-	remove_proc_entry("peers", proc_rxrpc);
-	remove_proc_entry("connections", proc_rxrpc);
-	remove_proc_entry("calls", proc_rxrpc);
-
-	remove_proc_entry("rxrpc", proc_net);
-
-} /* end rxrpc_proc_cleanup() */
-
-/*****************************************************************************/
-/*
- * open "/proc/net/rxrpc/transports" which provides a summary of extant transports
- */
-static int rxrpc_proc_transports_open(struct inode *inode, struct file *file)
-{
-	struct seq_file *m;
-	int ret;
-
-	ret = seq_open(file, &rxrpc_proc_transports_ops);
-	if (ret < 0)
-		return ret;
-
-	m = file->private_data;
-	m->private = PDE(inode)->data;
-
-	return 0;
-} /* end rxrpc_proc_transports_open() */
-
-/*****************************************************************************/
-/*
- * set up the iterator to start reading from the transports list and return the first item
- */
-static void *rxrpc_proc_transports_start(struct seq_file *m, loff_t *_pos)
-{
-	struct list_head *_p;
-	loff_t pos = *_pos;
-
-	/* lock the list against modification */
-	down_read(&rxrpc_proc_transports_sem);
-
-	/* allow for the header line */
-	if (!pos)
-		return SEQ_START_TOKEN;
-	pos--;
-
-	/* find the n'th element in the list */
-	list_for_each(_p, &rxrpc_proc_transports)
-		if (!pos--)
-			break;
-
-	return _p != &rxrpc_proc_transports ? _p : NULL;
-} /* end rxrpc_proc_transports_start() */
-
-/*****************************************************************************/
-/*
- * move to next call in transports list
- */
-static void *rxrpc_proc_transports_next(struct seq_file *p, void *v, loff_t *pos)
-{
-	struct list_head *_p;
-
-	(*pos)++;
-
-	_p = v;
-	_p = (v == SEQ_START_TOKEN) ? rxrpc_proc_transports.next : _p->next;
-
-	return _p != &rxrpc_proc_transports ? _p : NULL;
-} /* end rxrpc_proc_transports_next() */
-
-/*****************************************************************************/
-/*
- * clean up after reading from the transports list
- */
-static void rxrpc_proc_transports_stop(struct seq_file *p, void *v)
-{
-	up_read(&rxrpc_proc_transports_sem);
-
-} /* end rxrpc_proc_transports_stop() */
-
-/*****************************************************************************/
-/*
- * display a header line followed by a load of call lines
- */
-static int rxrpc_proc_transports_show(struct seq_file *m, void *v)
-{
-	struct rxrpc_transport *trans =
-		list_entry(v, struct rxrpc_transport, proc_link);
-
-	/* display header on line 1 */
-	if (v == SEQ_START_TOKEN) {
-		seq_puts(m, "LOCAL USE\n");
-		return 0;
-	}
-
-	/* display one transport per line on subsequent lines */
-	seq_printf(m, "%5hu %3d\n",
-		   trans->port,
-		   atomic_read(&trans->usage)
-		   );
-
-	return 0;
-} /* end rxrpc_proc_transports_show() */
-
-/*****************************************************************************/
-/*
- * open "/proc/net/rxrpc/peers" which provides a summary of extant peers
- */
-static int rxrpc_proc_peers_open(struct inode *inode, struct file *file)
-{
-	struct seq_file *m;
-	int ret;
-
-	ret = seq_open(file, &rxrpc_proc_peers_ops);
-	if (ret < 0)
-		return ret;
-
-	m = file->private_data;
-	m->private = PDE(inode)->data;
-
-	return 0;
-} /* end rxrpc_proc_peers_open() */
-
-/*****************************************************************************/
-/*
- * set up the iterator to start reading from the peers list and return the
- * first item
- */
-static void *rxrpc_proc_peers_start(struct seq_file *m, loff_t *_pos)
-{
-	struct list_head *_p;
-	loff_t pos = *_pos;
-
-	/* lock the list against modification */
-	down_read(&rxrpc_peers_sem);
-
-	/* allow for the header line */
-	if (!pos)
-		return SEQ_START_TOKEN;
-	pos--;
-
-	/* find the n'th element in the list */
-	list_for_each(_p, &rxrpc_peers)
-		if (!pos--)
-			break;
-
-	return _p != &rxrpc_peers ? _p : NULL;
-} /* end rxrpc_proc_peers_start() */
-
-/*****************************************************************************/
-/*
- * move to next conn in peers list
- */
-static void *rxrpc_proc_peers_next(struct seq_file *p, void *v, loff_t *pos)
-{
-	struct list_head *_p;
-
-	(*pos)++;
-
-	_p = v;
-	_p = (v == SEQ_START_TOKEN) ? rxrpc_peers.next : _p->next;
-
-	return _p != &rxrpc_peers ? _p : NULL;
-} /* end rxrpc_proc_peers_next() */
-
-/*****************************************************************************/
-/*
- * clean up after reading from the peers list
- */
-static void rxrpc_proc_peers_stop(struct seq_file *p, void *v)
-{
-	up_read(&rxrpc_peers_sem);
-
-} /* end rxrpc_proc_peers_stop() */
-
-/*****************************************************************************/
-/*
- * display a header line followed by a load of conn lines
- */
-static int rxrpc_proc_peers_show(struct seq_file *m, void *v)
-{
-	struct rxrpc_peer *peer = list_entry(v, struct rxrpc_peer, proc_link);
-	long timeout;
-
-	/* display header on line 1 */
-	if (v == SEQ_START_TOKEN) {
-		seq_puts(m, "LOCAL REMOTE   USAGE CONNS  TIMEOUT"
-			 "   MTU RTT(uS)\n");
-		return 0;
-	}
-
-	/* display one peer per line on subsequent lines */
-	timeout = 0;
-	if (!list_empty(&peer->timeout.link))
-		timeout = (long) peer->timeout.timo_jif -
-			(long) jiffies;
-
-	seq_printf(m, "%5hu %08x %5d %5d %8ld %5Zu %7lu\n",
-		   peer->trans->port,
-		   ntohl(peer->addr.s_addr),
-		   atomic_read(&peer->usage),
-		   atomic_read(&peer->conn_count),
-		   timeout,
-		   peer->if_mtu,
-		   (long) peer->rtt
-		   );
-
-	return 0;
-} /* end rxrpc_proc_peers_show() */
-
-/*****************************************************************************/
-/*
- * open "/proc/net/rxrpc/connections" which provides a summary of extant
- * connections
- */
-static int rxrpc_proc_conns_open(struct inode *inode, struct file *file)
-{
-	struct seq_file *m;
-	int ret;
-
-	ret = seq_open(file, &rxrpc_proc_conns_ops);
-	if (ret < 0)
-		return ret;
-
-	m = file->private_data;
-	m->private = PDE(inode)->data;
-
-	return 0;
-} /* end rxrpc_proc_conns_open() */
-
-/*****************************************************************************/
-/*
- * set up the iterator to start reading from the conns list and return the
- * first item
- */
-static void *rxrpc_proc_conns_start(struct seq_file *m, loff_t *_pos)
-{
-	struct list_head *_p;
-	loff_t pos = *_pos;
-
-	/* lock the list against modification */
-	down_read(&rxrpc_conns_sem);
-
-	/* allow for the header line */
-	if (!pos)
-		return SEQ_START_TOKEN;
-	pos--;
-
-	/* find the n'th element in the list */
-	list_for_each(_p, &rxrpc_conns)
-		if (!pos--)
-			break;
-
-	return _p != &rxrpc_conns ? _p : NULL;
-} /* end rxrpc_proc_conns_start() */
-
-/*****************************************************************************/
-/*
- * move to next conn in conns list
- */
-static void *rxrpc_proc_conns_next(struct seq_file *p, void *v, loff_t *pos)
-{
-	struct list_head *_p;
-
-	(*pos)++;
-
-	_p = v;
-	_p = (v == SEQ_START_TOKEN) ? rxrpc_conns.next : _p->next;
-
-	return _p != &rxrpc_conns ? _p : NULL;
-} /* end rxrpc_proc_conns_next() */
-
-/*****************************************************************************/
-/*
- * clean up after reading from the conns list
- */
-static void rxrpc_proc_conns_stop(struct seq_file *p, void *v)
-{
-	up_read(&rxrpc_conns_sem);
-
-} /* end rxrpc_proc_conns_stop() */
-
-/*****************************************************************************/
-/*
- * display a header line followed by a load of conn lines
- */
-static int rxrpc_proc_conns_show(struct seq_file *m, void *v)
-{
-	struct rxrpc_connection *conn;
-	long timeout;
-
-	conn = list_entry(v, struct rxrpc_connection, proc_link);
-
-	/* display header on line 1 */
-	if (v == SEQ_START_TOKEN) {
-		seq_puts(m,
-			 "LOCAL REMOTE   RPORT SRVC CONN     END SERIALNO "
-			 "CALLNO     MTU  TIMEOUT"
-			 "\n");
-		return 0;
-	}
-
-	/* display one conn per line on subsequent lines */
-	timeout = 0;
-	if (!list_empty(&conn->timeout.link))
-		timeout = (long) conn->timeout.timo_jif -
-			(long) jiffies;
-
-	seq_printf(m,
-		   "%5hu %08x %5hu %04hx %08x %-3.3s %08x %08x %5Zu %8ld\n",
-		   conn->trans->port,
-		   ntohl(conn->addr.sin_addr.s_addr),
-		   ntohs(conn->addr.sin_port),
-		   ntohs(conn->service_id),
-		   ntohl(conn->conn_id),
-		   conn->out_clientflag ? "CLT" : "SRV",
-		   conn->serial_counter,
-		   conn->call_counter,
-		   conn->mtu_size,
-		   timeout
-		   );
-
-	return 0;
-} /* end rxrpc_proc_conns_show() */
-
-/*****************************************************************************/
-/*
- * open "/proc/net/rxrpc/calls" which provides a summary of extant calls
- */
-static int rxrpc_proc_calls_open(struct inode *inode, struct file *file)
-{
-	struct seq_file *m;
-	int ret;
-
-	ret = seq_open(file, &rxrpc_proc_calls_ops);
-	if (ret < 0)
-		return ret;
-
-	m = file->private_data;
-	m->private = PDE(inode)->data;
-
-	return 0;
-} /* end rxrpc_proc_calls_open() */
-
-/*****************************************************************************/
-/*
- * set up the iterator to start reading from the calls list and return the
- * first item
- */
-static void *rxrpc_proc_calls_start(struct seq_file *m, loff_t *_pos)
-{
-	struct list_head *_p;
-	loff_t pos = *_pos;
-
-	/* lock the list against modification */
-	down_read(&rxrpc_calls_sem);
-
-	/* allow for the header line */
-	if (!pos)
-		return SEQ_START_TOKEN;
-	pos--;
-
-	/* find the n'th element in the list */
-	list_for_each(_p, &rxrpc_calls)
-		if (!pos--)
-			break;
-
-	return _p != &rxrpc_calls ? _p : NULL;
-} /* end rxrpc_proc_calls_start() */
-
-/*****************************************************************************/
-/*
- * move to next call in calls list
- */
-static void *rxrpc_proc_calls_next(struct seq_file *p, void *v, loff_t *pos)
-{
-	struct list_head *_p;
-
-	(*pos)++;
-
-	_p = v;
-	_p = (v == SEQ_START_TOKEN) ? rxrpc_calls.next : _p->next;
-
-	return _p != &rxrpc_calls ? _p : NULL;
-} /* end rxrpc_proc_calls_next() */
-
-/*****************************************************************************/
-/*
- * clean up after reading from the calls list
- */
-static void rxrpc_proc_calls_stop(struct seq_file *p, void *v)
-{
-	up_read(&rxrpc_calls_sem);
-
-} /* end rxrpc_proc_calls_stop() */
-
-/*****************************************************************************/
-/*
- * display a header line followed by a load of call lines
- */
-static int rxrpc_proc_calls_show(struct seq_file *m, void *v)
-{
-	struct rxrpc_call *call = list_entry(v, struct rxrpc_call, call_link);
-
-	/* display header on line 1 */
-	if (v == SEQ_START_TOKEN) {
-		seq_puts(m,
-			 "LOCAL REMOT SRVC CONN     CALL     DIR USE "
-			 " L STATE   OPCODE ABORT    ERRNO\n"
-			 );
-		return 0;
-	}
-
-	/* display one call per line on subsequent lines */
-	seq_printf(m,
-		   "%5hu %5hu %04hx %08x %08x %s %3u%c"
-		   " %c %-7.7s %6d %08x %5d\n",
-		   call->conn->trans->port,
-		   ntohs(call->conn->addr.sin_port),
-		   ntohs(call->conn->service_id),
-		   ntohl(call->conn->conn_id),
-		   ntohl(call->call_id),
-		   call->conn->service ? "SVC" : "CLT",
-		   atomic_read(&call->usage),
-		   waitqueue_active(&call->waitq) ? 'w' : ' ',
-		   call->app_last_rcv ? 'Y' : '-',
-		   (call->app_call_state!=RXRPC_CSTATE_ERROR ?
-		    rxrpc_call_states7[call->app_call_state] :
-		    rxrpc_call_error_states7[call->app_err_state]),
-		   call->app_opcode,
-		   call->app_abort_code,
-		   call->app_errno
-		   );
-
-	return 0;
-} /* end rxrpc_proc_calls_show() */
diff --git a/net/rxrpc/rxrpc_syms.c b/net/rxrpc/rxrpc_syms.c
deleted file mode 100644
index 9896fd87a4d..00000000000
--- a/net/rxrpc/rxrpc_syms.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/* rxrpc_syms.c: exported Rx RPC layer interface symbols
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-
-#include <rxrpc/transport.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/krxiod.h>
-
-/* call.c */
-EXPORT_SYMBOL(rxrpc_create_call);
-EXPORT_SYMBOL(rxrpc_put_call);
-EXPORT_SYMBOL(rxrpc_call_abort);
-EXPORT_SYMBOL(rxrpc_call_read_data);
-EXPORT_SYMBOL(rxrpc_call_write_data);
-
-/* connection.c */
-EXPORT_SYMBOL(rxrpc_create_connection);
-EXPORT_SYMBOL(rxrpc_put_connection);
-
-/* transport.c */
-EXPORT_SYMBOL(rxrpc_create_transport);
-EXPORT_SYMBOL(rxrpc_put_transport);
-EXPORT_SYMBOL(rxrpc_add_service);
-EXPORT_SYMBOL(rxrpc_del_service);
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
deleted file mode 100644
index 884290754af..00000000000
--- a/net/rxrpc/sysctl.c
+++ /dev/null
@@ -1,121 +0,0 @@
-/* sysctl.c: Rx RPC control
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/sysctl.h>
-#include <rxrpc/types.h>
-#include <rxrpc/rxrpc.h>
-#include <asm/errno.h>
-#include "internal.h"
-
-int rxrpc_ktrace;
-int rxrpc_kdebug;
-int rxrpc_kproto;
-int rxrpc_knet;
-
-#ifdef CONFIG_SYSCTL
-static struct ctl_table_header *rxrpc_sysctl = NULL;
-
-static ctl_table rxrpc_sysctl_table[] = {
-	{
-		.ctl_name	= 1,
-		.procname	= "kdebug",
-		.data		= &rxrpc_kdebug,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
-	},
-	{
-		.ctl_name	= 2,
-		.procname	= "ktrace",
-		.data		= &rxrpc_ktrace,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
-	},
-	{
-		.ctl_name	= 3,
-		.procname	= "kproto",
-		.data		= &rxrpc_kproto,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
-	},
-	{
-		.ctl_name	= 4,
-		.procname	= "knet",
-		.data		= &rxrpc_knet,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec
-	},
-	{
-		.ctl_name	= 5,
-		.procname	= "peertimo",
-		.data		= &rxrpc_peer_timeout,
-		.maxlen		= sizeof(unsigned long),
-		.mode		= 0644,
-		.proc_handler	= &proc_doulongvec_minmax
-	},
-	{
-		.ctl_name	= 6,
-		.procname	= "conntimo",
-		.data		= &rxrpc_conn_timeout,
-		.maxlen		= sizeof(unsigned long),
-		.mode		= 0644,
-		.proc_handler	= &proc_doulongvec_minmax
-	},
-	{ .ctl_name = 0 }
-};
-
-static ctl_table rxrpc_dir_sysctl_table[] = {
-	{
-		.ctl_name	= 1,
-		.procname	= "rxrpc",
-		.maxlen		= 0,
-		.mode		= 0555,
-		.child		= rxrpc_sysctl_table
-	},
-	{ .ctl_name = 0 }
-};
-#endif /* CONFIG_SYSCTL */
-
-/*****************************************************************************/
-/*
- * initialise the sysctl stuff for Rx RPC
- */
-int rxrpc_sysctl_init(void)
-{
-#ifdef CONFIG_SYSCTL
-	rxrpc_sysctl = register_sysctl_table(rxrpc_dir_sysctl_table);
-	if (!rxrpc_sysctl)
-		return -ENOMEM;
-#endif /* CONFIG_SYSCTL */
-
-	return 0;
-} /* end rxrpc_sysctl_init() */
-
-/*****************************************************************************/
-/*
- * clean up the sysctl stuff for Rx RPC
- */
-void rxrpc_sysctl_cleanup(void)
-{
-#ifdef CONFIG_SYSCTL
-	if (rxrpc_sysctl) {
-		unregister_sysctl_table(rxrpc_sysctl);
-		rxrpc_sysctl = NULL;
-	}
-#endif /* CONFIG_SYSCTL */
-
-} /* end rxrpc_sysctl_cleanup() */
diff --git a/net/rxrpc/transport.c b/net/rxrpc/transport.c
deleted file mode 100644
index 62398fd01f8..00000000000
--- a/net/rxrpc/transport.c
+++ /dev/null
@@ -1,846 +0,0 @@
-/* transport.c: Rx Transport routines
- *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <rxrpc/transport.h>
-#include <rxrpc/peer.h>
-#include <rxrpc/connection.h>
-#include <rxrpc/call.h>
-#include <rxrpc/message.h>
-#include <rxrpc/krxiod.h>
-#include <rxrpc/krxsecd.h>
-#include <linux/udp.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/icmp.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/ip.h>
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-#include <linux/ipv6.h>	/* this should _really_ be in errqueue.h.. */
-#endif
-#include <linux/errqueue.h>
-#include <asm/uaccess.h>
-#include "internal.h"
-
-struct errormsg {
-	struct cmsghdr			cmsg;		/* control message header */
-	struct sock_extended_err	ee;		/* extended error information */
-	struct sockaddr_in		icmp_src;	/* ICMP packet source address */
-};
-
-static DEFINE_SPINLOCK(rxrpc_transports_lock);
-static struct list_head rxrpc_transports = LIST_HEAD_INIT(rxrpc_transports);
-
-__RXACCT_DECL(atomic_t rxrpc_transport_count);
-LIST_HEAD(rxrpc_proc_transports);
-DECLARE_RWSEM(rxrpc_proc_transports_sem);
-
-static void rxrpc_data_ready(struct sock *sk, int count);
-static void rxrpc_error_report(struct sock *sk);
-static int rxrpc_trans_receive_new_call(struct rxrpc_transport *trans,
-					struct list_head *msgq);
-static void rxrpc_trans_receive_error_report(struct rxrpc_transport *trans);
-
-/*****************************************************************************/
-/*
- * create a new transport endpoint using the specified UDP port
- */
-int rxrpc_create_transport(unsigned short port,
-			   struct rxrpc_transport **_trans)
-{
-	struct rxrpc_transport *trans;
-	struct sockaddr_in sin;
-	mm_segment_t oldfs;
-	struct sock *sock;
-	int ret, opt;
-
-	_enter("%hu", port);
-
-	trans = kzalloc(sizeof(struct rxrpc_transport), GFP_KERNEL);
-	if (!trans)
-		return -ENOMEM;
-
-	atomic_set(&trans->usage, 1);
-	INIT_LIST_HEAD(&trans->services);
-	INIT_LIST_HEAD(&trans->link);
-	INIT_LIST_HEAD(&trans->krxiodq_link);
-	spin_lock_init(&trans->lock);
-	INIT_LIST_HEAD(&trans->peer_active);
-	INIT_LIST_HEAD(&trans->peer_graveyard);
-	spin_lock_init(&trans->peer_gylock);
-	init_waitqueue_head(&trans->peer_gy_waitq);
-	rwlock_init(&trans->peer_lock);
-	atomic_set(&trans->peer_count, 0);
-	trans->port = port;
-
-	/* create a UDP socket to be my actual transport endpoint */
-	ret = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &trans->socket);
-	if (ret < 0)
-		goto error;
-
-	/* use the specified port */
-	if (port) {
-		memset(&sin, 0, sizeof(sin));
-		sin.sin_family = AF_INET;
-		sin.sin_port = htons(port);
-		ret = trans->socket->ops->bind(trans->socket,
-					       (struct sockaddr *) &sin,
-					       sizeof(sin));
-		if (ret < 0)
-			goto error;
-	}
-
-	opt = 1;
-	oldfs = get_fs();
-	set_fs(KERNEL_DS);
-	ret = trans->socket->ops->setsockopt(trans->socket, SOL_IP, IP_RECVERR,
-					     (char *) &opt, sizeof(opt));
-	set_fs(oldfs);
-
-	spin_lock(&rxrpc_transports_lock);
-	list_add(&trans->link, &rxrpc_transports);
-	spin_unlock(&rxrpc_transports_lock);
-
-	/* set the socket up */
-	sock = trans->socket->sk;
-	sock->sk_user_data	= trans;
-	sock->sk_data_ready	= rxrpc_data_ready;
-	sock->sk_error_report	= rxrpc_error_report;
-
-	down_write(&rxrpc_proc_transports_sem);
-	list_add_tail(&trans->proc_link, &rxrpc_proc_transports);
-	up_write(&rxrpc_proc_transports_sem);
-
-	__RXACCT(atomic_inc(&rxrpc_transport_count));
-
-	*_trans = trans;
-	_leave(" = 0 (%p)", trans);
-	return 0;
-
- error:
-	/* finish cleaning up the transport (not really needed here, but...) */
-	if (trans->socket)
-		trans->socket->ops->shutdown(trans->socket, 2);
-
-	/* close the socket */
-	if (trans->socket) {
-		trans->socket->sk->sk_user_data = NULL;
-		sock_release(trans->socket);
-		trans->socket = NULL;
-	}
-
-	kfree(trans);
-
-
-	_leave(" = %d", ret);
-	return ret;
-} /* end rxrpc_create_transport() */
-
-/*****************************************************************************/
-/*
- * destroy a transport endpoint
- */
-void rxrpc_put_transport(struct rxrpc_transport *trans)
-{
-	_enter("%p{u=%d p=%hu}",
-	       trans, atomic_read(&trans->usage), trans->port);
-
-	BUG_ON(atomic_read(&trans->usage) <= 0);
-
-	/* to prevent a race, the decrement and the dequeue must be
-	 * effectively atomic */
-	spin_lock(&rxrpc_transports_lock);
-	if (likely(!atomic_dec_and_test(&trans->usage))) {
-		spin_unlock(&rxrpc_transports_lock);
-		_leave("");
-		return;
-	}
-
-	list_del(&trans->link);
-	spin_unlock(&rxrpc_transports_lock);
-
-	/* finish cleaning up the transport */
-	if (trans->socket)
-		trans->socket->ops->shutdown(trans->socket, 2);
-
-	rxrpc_krxsecd_clear_transport(trans);
-	rxrpc_krxiod_dequeue_transport(trans);
-
-	/* discard all peer information */
-	rxrpc_peer_clearall(trans);
-
-	down_write(&rxrpc_proc_transports_sem);
-	list_del(&trans->proc_link);
-	up_write(&rxrpc_proc_transports_sem);
-	__RXACCT(atomic_dec(&rxrpc_transport_count));
-
-	/* close the socket */
-	if (trans->socket) {
-		trans->socket->sk->sk_user_data = NULL;
-		sock_release(trans->socket);
-		trans->socket = NULL;
-	}
-
-	kfree(trans);
-
-	_leave("");
-} /* end rxrpc_put_transport() */
-
-/*****************************************************************************/
-/*
- * add a service to a transport to be listened upon
- */
-int rxrpc_add_service(struct rxrpc_transport *trans,
-		      struct rxrpc_service *newsrv)
-{
-	struct rxrpc_service *srv;
-	struct list_head *_p;
-	int ret = -EEXIST;
-
-	_enter("%p{%hu},%p{%hu}",
-	       trans, trans->port, newsrv, newsrv->service_id);
-
-	/* verify that the service ID is not already present */
-	spin_lock(&trans->lock);
-
-	list_for_each(_p, &trans->services) {
-		srv = list_entry(_p, struct rxrpc_service, link);
-		if (srv->service_id == newsrv->service_id)
-			goto out;
-	}
-
-	/* okay - add the transport to the list */
-	list_add_tail(&newsrv->link, &trans->services);
-	rxrpc_get_transport(trans);
-	ret = 0;
-
- out:
-	spin_unlock(&trans->lock);
-
-	_leave("= %d", ret);
-	return ret;
-} /* end rxrpc_add_service() */
-
-/*****************************************************************************/
-/*
- * remove a service from a transport
- */
-void rxrpc_del_service(struct rxrpc_transport *trans, struct rxrpc_service *srv)
-{
-	_enter("%p{%hu},%p{%hu}", trans, trans->port, srv, srv->service_id);
-
-	spin_lock(&trans->lock);
-	list_del(&srv->link);
-	spin_unlock(&trans->lock);
-
-	rxrpc_put_transport(trans);
-
-	_leave("");
-} /* end rxrpc_del_service() */
-
-/*****************************************************************************/
-/*
- * INET callback when data has been received on the socket.
- */
-static void rxrpc_data_ready(struct sock *sk, int count)
-{
-	struct rxrpc_transport *trans;
-
-	_enter("%p{t=%p},%d", sk, sk->sk_user_data, count);
-
-	/* queue the transport for attention by krxiod */
-	trans = (struct rxrpc_transport *) sk->sk_user_data;
-	if (trans)
-		rxrpc_krxiod_queue_transport(trans);
-
-	/* wake up anyone waiting on the socket */
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-		wake_up_interruptible(sk->sk_sleep);
-
-	_leave("");
-} /* end rxrpc_data_ready() */
-
-/*****************************************************************************/
-/*
- * INET callback when an ICMP error packet is received
- * - sk->err is error (EHOSTUNREACH, EPROTO or EMSGSIZE)
- */
-static void rxrpc_error_report(struct sock *sk)
-{
-	struct rxrpc_transport *trans;
-
-	_enter("%p{t=%p}", sk, sk->sk_user_data);
-
-	/* queue the transport for attention by krxiod */
-	trans = (struct rxrpc_transport *) sk->sk_user_data;
-	if (trans) {
-		trans->error_rcvd = 1;
-		rxrpc_krxiod_queue_transport(trans);
-	}
-
-	/* wake up anyone waiting on the socket */
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-		wake_up_interruptible(sk->sk_sleep);
-
-	_leave("");
-} /* end rxrpc_error_report() */
-
-/*****************************************************************************/
-/*
- * split a message up, allocating message records and filling them in
- * from the contents of a socket buffer
- */
-static int rxrpc_incoming_msg(struct rxrpc_transport *trans,
-			      struct sk_buff *pkt,
-			      struct list_head *msgq)
-{
-	struct rxrpc_message *msg;
-	int ret;
-
-	_enter("");
-
-	msg = kzalloc(sizeof(struct rxrpc_message), GFP_KERNEL);
-	if (!msg) {
-		_leave(" = -ENOMEM");
-		return -ENOMEM;
-	}
-
-	atomic_set(&msg->usage, 1);
-	list_add_tail(&msg->link,msgq);
-
-	/* dig out the Rx routing parameters */
-	if (skb_copy_bits(pkt, sizeof(struct udphdr),
-			  &msg->hdr, sizeof(msg->hdr)) < 0) {
-		ret = -EBADMSG;
-		goto error;
-	}
-
-	msg->trans = trans;
-	msg->state = RXRPC_MSG_RECEIVED;
-	skb_get_timestamp(pkt, &msg->stamp);
-	if (msg->stamp.tv_sec == 0) {
-		do_gettimeofday(&msg->stamp);
-		if (pkt->sk)
-			sock_enable_timestamp(pkt->sk);
-	}
-	msg->seq = ntohl(msg->hdr.seq);
-
-	/* attach the packet */
-	skb_get(pkt);
-	msg->pkt = pkt;
-
-	msg->offset = sizeof(struct udphdr) + sizeof(struct rxrpc_header);
-	msg->dsize = msg->pkt->len - msg->offset;
-
-	_net("Rx Received packet from %s (%08x;%08x,%1x,%d,%s,%02x,%d,%d)",
-	     msg->hdr.flags & RXRPC_CLIENT_INITIATED ? "client" : "server",
-	     ntohl(msg->hdr.epoch),
-	     (ntohl(msg->hdr.cid) & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT,
-	     ntohl(msg->hdr.cid) & RXRPC_CHANNELMASK,
-	     ntohl(msg->hdr.callNumber),
-	     rxrpc_pkts[msg->hdr.type],
-	     msg->hdr.flags,
-	     ntohs(msg->hdr.serviceId),
-	     msg->hdr.securityIndex);
-
-	__RXACCT(atomic_inc(&rxrpc_message_count));
-
-	/* split off jumbo packets */
-	while (msg->hdr.type == RXRPC_PACKET_TYPE_DATA &&
-	       msg->hdr.flags & RXRPC_JUMBO_PACKET
-	       ) {
-		struct rxrpc_jumbo_header jumbo;
-		struct rxrpc_message *jumbomsg = msg;
-
-		_debug("split jumbo packet");
-
-		/* quick sanity check */
-		ret = -EBADMSG;
-		if (msg->dsize <
-		    RXRPC_JUMBO_DATALEN + sizeof(struct rxrpc_jumbo_header))
-			goto error;
-		if (msg->hdr.flags & RXRPC_LAST_PACKET)
-			goto error;
-
-		/* dig out the secondary header */
-		if (skb_copy_bits(pkt, msg->offset + RXRPC_JUMBO_DATALEN,
-				  &jumbo, sizeof(jumbo)) < 0)
-			goto error;
-
-		/* allocate a new message record */
-		ret = -ENOMEM;
-		msg = kmemdup(jumbomsg, sizeof(struct rxrpc_message), GFP_KERNEL);
-		if (!msg)
-			goto error;
-
-		list_add_tail(&msg->link, msgq);
-
-		/* adjust the jumbo packet */
-		jumbomsg->dsize = RXRPC_JUMBO_DATALEN;
-
-		/* attach the packet here too */
-		skb_get(pkt);
-
-		/* adjust the parameters */
-		msg->seq++;
-		msg->hdr.seq = htonl(msg->seq);
-		msg->hdr.serial = htonl(ntohl(msg->hdr.serial) + 1);
-		msg->offset += RXRPC_JUMBO_DATALEN +
-			sizeof(struct rxrpc_jumbo_header);
-		msg->dsize -= RXRPC_JUMBO_DATALEN +
-			sizeof(struct rxrpc_jumbo_header);
-		msg->hdr.flags = jumbo.flags;
-		msg->hdr._rsvd = jumbo._rsvd;
-
-		_net("Rx Split jumbo packet from %s"
-		     " (%08x;%08x,%1x,%d,%s,%02x,%d,%d)",
-		     msg->hdr.flags & RXRPC_CLIENT_INITIATED ? "client" : "server",
-		     ntohl(msg->hdr.epoch),
-		     (ntohl(msg->hdr.cid) & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT,
-		     ntohl(msg->hdr.cid) & RXRPC_CHANNELMASK,
-		     ntohl(msg->hdr.callNumber),
-		     rxrpc_pkts[msg->hdr.type],
-		     msg->hdr.flags,
-		     ntohs(msg->hdr.serviceId),
-		     msg->hdr.securityIndex);
-
-		__RXACCT(atomic_inc(&rxrpc_message_count));
-	}
-
-	_leave(" = 0 #%d", atomic_read(&rxrpc_message_count));
-	return 0;
-
- error:
-	while (!list_empty(msgq)) {
-		msg = list_entry(msgq->next, struct rxrpc_message, link);
-		list_del_init(&msg->link);
-
-		rxrpc_put_message(msg);
-	}
-
-	_leave(" = %d", ret);
-	return ret;
-} /* end rxrpc_incoming_msg() */
-
-/*****************************************************************************/
-/*
- * accept a new call
- * - called from krxiod in process context
- */
-void rxrpc_trans_receive_packet(struct rxrpc_transport *trans)
-{
-	struct rxrpc_message *msg;
-	struct rxrpc_peer *peer;
-	struct sk_buff *pkt;
-	int ret;
-	__be32 addr;
-	__be16 port;
-
-	LIST_HEAD(msgq);
-
-	_enter("%p{%d}", trans, trans->port);
-
-	for (;;) {
-		/* deal with outstanting errors first */
-		if (trans->error_rcvd)
-			rxrpc_trans_receive_error_report(trans);
-
-		/* attempt to receive a packet */
-		pkt = skb_recv_datagram(trans->socket->sk, 0, 1, &ret);
-		if (!pkt) {
-			if (ret == -EAGAIN) {
-				_leave(" EAGAIN");
-				return;
-			}
-
-			/* an icmp error may have occurred */
-			rxrpc_krxiod_queue_transport(trans);
-			_leave(" error %d\n", ret);
-			return;
-		}
-
-		/* we'll probably need to checksum it (didn't call
-		 * sock_recvmsg) */
-		if (skb_checksum_complete(pkt)) {
-			kfree_skb(pkt);
-			rxrpc_krxiod_queue_transport(trans);
-			_leave(" CSUM failed");
-			return;
-		}
-
-		addr = ip_hdr(pkt)->saddr;
-		port = udp_hdr(pkt)->source;
-
-		_net("Rx Received UDP packet from %08x:%04hu",
-		     ntohl(addr), ntohs(port));
-
-		/* unmarshall the Rx parameters and split jumbo packets */
-		ret = rxrpc_incoming_msg(trans, pkt, &msgq);
-		if (ret < 0) {
-			kfree_skb(pkt);
-			rxrpc_krxiod_queue_transport(trans);
-			_leave(" bad packet");
-			return;
-		}
-
-		BUG_ON(list_empty(&msgq));
-
-		msg = list_entry(msgq.next, struct rxrpc_message, link);
-
-		/* locate the record for the peer from which it
-		 * originated */
-		ret = rxrpc_peer_lookup(trans, addr, &peer);
-		if (ret < 0) {
-			kdebug("Rx No connections from that peer");
-			rxrpc_trans_immediate_abort(trans, msg, -EINVAL);
-			goto finished_msg;
-		}
-
-		/* try and find a matching connection */
-		ret = rxrpc_connection_lookup(peer, msg, &msg->conn);
-		if (ret < 0) {
-			kdebug("Rx Unknown Connection");
-			rxrpc_trans_immediate_abort(trans, msg, -EINVAL);
-			rxrpc_put_peer(peer);
-			goto finished_msg;
-		}
-		rxrpc_put_peer(peer);
-
-		/* deal with the first packet of a new call */
-		if (msg->hdr.flags & RXRPC_CLIENT_INITIATED &&
-		    msg->hdr.type == RXRPC_PACKET_TYPE_DATA &&
-		    ntohl(msg->hdr.seq) == 1
-		    ) {
-			_debug("Rx New server call");
-			rxrpc_trans_receive_new_call(trans, &msgq);
-			goto finished_msg;
-		}
-
-		/* deal with subsequent packet(s) of call */
-		_debug("Rx Call packet");
-		while (!list_empty(&msgq)) {
-			msg = list_entry(msgq.next, struct rxrpc_message, link);
-			list_del_init(&msg->link);
-
-			ret = rxrpc_conn_receive_call_packet(msg->conn, NULL, msg);
-			if (ret < 0) {
-				rxrpc_trans_immediate_abort(trans, msg, ret);
-				rxrpc_put_message(msg);
-				goto finished_msg;
-			}
-
-			rxrpc_put_message(msg);
-		}
-
-		goto finished_msg;
-
-		/* dispose of the packets */
-	finished_msg:
-		while (!list_empty(&msgq)) {
-			msg = list_entry(msgq.next, struct rxrpc_message, link);
-			list_del_init(&msg->link);
-
-			rxrpc_put_message(msg);
-		}
-		kfree_skb(pkt);
-	}
-
-	_leave("");
-
-} /* end rxrpc_trans_receive_packet() */
-
-/*****************************************************************************/
-/*
- * accept a new call from a client trying to connect to one of my services
- * - called in process context
- */
-static int rxrpc_trans_receive_new_call(struct rxrpc_transport *trans,
-					struct list_head *msgq)
-{
-	struct rxrpc_message *msg;
-
-	_enter("");
-
-	/* only bother with the first packet */
-	msg = list_entry(msgq->next, struct rxrpc_message, link);
-	list_del_init(&msg->link);
-	rxrpc_krxsecd_queue_incoming_call(msg);
-	rxrpc_put_message(msg);
-
-	_leave(" = 0");
-
-	return 0;
-} /* end rxrpc_trans_receive_new_call() */
-
-/*****************************************************************************/
-/*
- * perform an immediate abort without connection or call structures
- */
-int rxrpc_trans_immediate_abort(struct rxrpc_transport *trans,
-				struct rxrpc_message *msg,
-				int error)
-{
-	struct rxrpc_header ahdr;
-	struct sockaddr_in sin;
-	struct msghdr msghdr;
-	struct kvec iov[2];
-	__be32 _error;
-	int len, ret;
-
-	_enter("%p,%p,%d", trans, msg, error);
-
-	/* don't abort an abort packet */
-	if (msg->hdr.type == RXRPC_PACKET_TYPE_ABORT) {
-		_leave(" = 0");
-		return 0;
-	}
-
-	_error = htonl(-error);
-
-	/* set up the message to be transmitted */
-	memcpy(&ahdr, &msg->hdr, sizeof(ahdr));
-	ahdr.epoch	= msg->hdr.epoch;
-	ahdr.serial	= htonl(1);
-	ahdr.seq	= 0;
-	ahdr.type	= RXRPC_PACKET_TYPE_ABORT;
-	ahdr.flags	= RXRPC_LAST_PACKET;
-	ahdr.flags	|= ~msg->hdr.flags & RXRPC_CLIENT_INITIATED;
-
-	iov[0].iov_len	= sizeof(ahdr);
-	iov[0].iov_base	= &ahdr;
-	iov[1].iov_len	= sizeof(_error);
-	iov[1].iov_base	= &_error;
-
-	len = sizeof(ahdr) + sizeof(_error);
-
-	memset(&sin,0,sizeof(sin));
-	sin.sin_family		= AF_INET;
-	sin.sin_port		= udp_hdr(msg->pkt)->source;
-	sin.sin_addr.s_addr	= ip_hdr(msg->pkt)->saddr;
-
-	msghdr.msg_name		= &sin;
-	msghdr.msg_namelen	= sizeof(sin);
-	msghdr.msg_control	= NULL;
-	msghdr.msg_controllen	= 0;
-	msghdr.msg_flags	= MSG_DONTWAIT;
-
-	_net("Sending message type %d of %d bytes to %08x:%d",
-	     ahdr.type,
-	     len,
-	     ntohl(sin.sin_addr.s_addr),
-	     ntohs(sin.sin_port));
-
-	/* send the message */
-	ret = kernel_sendmsg(trans->socket, &msghdr, iov, 2, len);
-
-	_leave(" = %d", ret);
-	return ret;
-} /* end rxrpc_trans_immediate_abort() */
-
-/*****************************************************************************/
-/*
- * receive an ICMP error report and percolate it to all connections
- * heading to the affected host or port
- */
-static void rxrpc_trans_receive_error_report(struct rxrpc_transport *trans)
-{
-	struct rxrpc_connection *conn;
-	struct sockaddr_in sin;
-	struct rxrpc_peer *peer;
-	struct list_head connq, *_p;
-	struct errormsg emsg;
-	struct msghdr msg;
-	__be16 port;
-	int local, err;
-
-	_enter("%p", trans);
-
-	for (;;) {
-		trans->error_rcvd = 0;
-
-		/* try and receive an error message */
-		msg.msg_name	= &sin;
-		msg.msg_namelen	= sizeof(sin);
-		msg.msg_control	= &emsg;
-		msg.msg_controllen = sizeof(emsg);
-		msg.msg_flags	= 0;
-
-		err = kernel_recvmsg(trans->socket, &msg, NULL, 0, 0,
-				   MSG_ERRQUEUE | MSG_DONTWAIT | MSG_TRUNC);
-
-		if (err == -EAGAIN) {
-			_leave("");
-			return;
-		}
-
-		if (err < 0) {
-			printk("%s: unable to recv an error report: %d\n",
-			       __FUNCTION__, err);
-			_leave("");
-			return;
-		}
-
-		msg.msg_controllen = (char *) msg.msg_control - (char *) &emsg;
-
-		if (msg.msg_controllen < sizeof(emsg.cmsg) ||
-		    msg.msg_namelen < sizeof(sin)) {
-			printk("%s: short control message"
-			       " (nlen=%u clen=%Zu fl=%x)\n",
-			       __FUNCTION__,
-			       msg.msg_namelen,
-			       msg.msg_controllen,
-			       msg.msg_flags);
-			continue;
-		}
-
-		_net("Rx Received control message"
-		     " { len=%Zu level=%u type=%u }",
-		     emsg.cmsg.cmsg_len,
-		     emsg.cmsg.cmsg_level,
-		     emsg.cmsg.cmsg_type);
-
-		if (sin.sin_family != AF_INET) {
-			printk("Rx Ignoring error report with non-INET address"
-			       " (fam=%u)",
-			       sin.sin_family);
-			continue;
-		}
-
-		_net("Rx Received message pertaining to host addr=%x port=%hu",
-		     ntohl(sin.sin_addr.s_addr), ntohs(sin.sin_port));
-
-		if (emsg.cmsg.cmsg_level != SOL_IP ||
-		    emsg.cmsg.cmsg_type != IP_RECVERR) {
-			printk("Rx Ignoring unknown error report"
-			       " { level=%u type=%u }",
-			       emsg.cmsg.cmsg_level,
-			       emsg.cmsg.cmsg_type);
-			continue;
-		}
-
-		if (msg.msg_controllen < sizeof(emsg.cmsg) + sizeof(emsg.ee)) {
-			printk("%s: short error message (%Zu)\n",
-			       __FUNCTION__, msg.msg_controllen);
-			_leave("");
-			return;
-		}
-
-		port = sin.sin_port;
-
-		switch (emsg.ee.ee_origin) {
-		case SO_EE_ORIGIN_ICMP:
-			local = 0;
-			switch (emsg.ee.ee_type) {
-			case ICMP_DEST_UNREACH:
-				switch (emsg.ee.ee_code) {
-				case ICMP_NET_UNREACH:
-					_net("Rx Received ICMP Network Unreachable");
-					port = 0;
-					err = -ENETUNREACH;
-					break;
-				case ICMP_HOST_UNREACH:
-					_net("Rx Received ICMP Host Unreachable");
-					port = 0;
-					err = -EHOSTUNREACH;
-					break;
-				case ICMP_PORT_UNREACH:
-					_net("Rx Received ICMP Port Unreachable");
-					err = -ECONNREFUSED;
-					break;
-				case ICMP_NET_UNKNOWN:
-					_net("Rx Received ICMP Unknown Network");
-					port = 0;
-					err = -ENETUNREACH;
-					break;
-				case ICMP_HOST_UNKNOWN:
-					_net("Rx Received ICMP Unknown Host");
-					port = 0;
-					err = -EHOSTUNREACH;
-					break;
-				default:
-					_net("Rx Received ICMP DestUnreach { code=%u }",
-					     emsg.ee.ee_code);
-					err = emsg.ee.ee_errno;
-					break;
-				}
-				break;
-
-			case ICMP_TIME_EXCEEDED:
-				_net("Rx Received ICMP TTL Exceeded");
-				err = emsg.ee.ee_errno;
-				break;
-
-			default:
-				_proto("Rx Received ICMP error { type=%u code=%u }",
-				       emsg.ee.ee_type, emsg.ee.ee_code);
-				err = emsg.ee.ee_errno;
-				break;
-			}
-			break;
-
-		case SO_EE_ORIGIN_LOCAL:
-			_proto("Rx Received local error { error=%d }",
-			       emsg.ee.ee_errno);
-			local = 1;
-			err = emsg.ee.ee_errno;
-			break;
-
-		case SO_EE_ORIGIN_NONE:
-		case SO_EE_ORIGIN_ICMP6:
-		default:
-			_proto("Rx Received error report { orig=%u }",
-			       emsg.ee.ee_origin);
-			local = 0;
-			err = emsg.ee.ee_errno;
-			break;
-		}
-
-		/* find all the connections between this transport and the
-		 * affected destination */
-		INIT_LIST_HEAD(&connq);
-
-		if (rxrpc_peer_lookup(trans, sin.sin_addr.s_addr,
-				      &peer) == 0) {
-			read_lock(&peer->conn_lock);
-			list_for_each(_p, &peer->conn_active) {
-				conn = list_entry(_p, struct rxrpc_connection,
-						  link);
-				if (port && conn->addr.sin_port != port)
-					continue;
-				if (!list_empty(&conn->err_link))
-					continue;
-
-				rxrpc_get_connection(conn);
-				list_add_tail(&conn->err_link, &connq);
-			}
-			read_unlock(&peer->conn_lock);
-
-			/* service all those connections */
-			while (!list_empty(&connq)) {
-				conn = list_entry(connq.next,
-						  struct rxrpc_connection,
-						  err_link);
-				list_del(&conn->err_link);
-
-				rxrpc_conn_handle_error(conn, local, err);
-
-				rxrpc_put_connection(conn);
-			}
-
-			rxrpc_put_peer(peer);
-		}
-	}
-
-	_leave("");
-	return;
-} /* end rxrpc_trans_receive_error_report() */
-- 
cgit v1.2.3


From 436058a49e0fb91c74454dbee9cfee6fb53b4336 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 26 Apr 2007 15:56:24 -0700
Subject: [AFS]: Handle multiple mounts of an AFS superblock correctly.

Handle multiple mounts of an AFS superblock correctly, checking to see
whether the superblock is already initialised after calling sget()
rather than just unconditionally stamping all over it.

Also delete the "silent" parameter to afs_fill_super() as it's not
used and can, in any case, be obtained from sb->s_flags.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/afs/super.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/fs/afs/super.c b/fs/afs/super.c
index efc4fe69f4f..77e68759788 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -212,7 +212,7 @@ static int afs_test_super(struct super_block *sb, void *data)
 /*
  * fill in the superblock
  */
-static int afs_fill_super(struct super_block *sb, void *data, int silent)
+static int afs_fill_super(struct super_block *sb, void *data)
 {
 	struct afs_mount_params *params = data;
 	struct afs_super_info *as = NULL;
@@ -319,17 +319,23 @@ static int afs_get_sb(struct file_system_type *fs_type,
 		goto error;
 	}
 
-	sb->s_flags = flags;
-
-	ret = afs_fill_super(sb, &params, flags & MS_SILENT ? 1 : 0);
-	if (ret < 0) {
-		up_write(&sb->s_umount);
-		deactivate_super(sb);
-		goto error;
+	if (!sb->s_root) {
+		/* initial superblock/root creation */
+		_debug("create");
+		sb->s_flags = flags;
+		ret = afs_fill_super(sb, &params);
+		if (ret < 0) {
+			up_write(&sb->s_umount);
+			deactivate_super(sb);
+			goto error;
+		}
+		sb->s_flags |= MS_ACTIVE;
+	} else {
+		_debug("reuse");
+		ASSERTCMP(sb->s_flags, &, MS_ACTIVE);
 	}
-	sb->s_flags |= MS_ACTIVE;
-	simple_set_mnt(mnt, sb);
 
+	simple_set_mnt(mnt, sb);
 	afs_put_volume(params.volume);
 	afs_put_cell(params.default_cell);
 	_leave(" = 0 [%p]", sb);
-- 
cgit v1.2.3


From 00d3b7a4533e367b0dc2812a706db8f9f071c27f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 26 Apr 2007 15:57:07 -0700
Subject: [AFS]: Add security support.

Add security support to the AFS filesystem.  Kerberos IV tickets are added as
RxRPC keys are added to the session keyring with the klog program.  open() and
other VFS operations then find this ticket with request_key() and either use
it immediately (eg: mkdir, unlink) or attach it to a file descriptor (open).

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/afs/Makefile    |   1 +
 fs/afs/afs.h       |  27 ++++-
 fs/afs/callback.c  |   5 +-
 fs/afs/cell.c      |  98 +++++++++++----
 fs/afs/cmservice.c |   3 +
 fs/afs/dir.c       |  51 ++++++--
 fs/afs/file.c      |  60 ++++++++--
 fs/afs/fsclient.c  |   9 +-
 fs/afs/inode.c     |  21 +++-
 fs/afs/internal.h  | 106 ++++++++++++----
 fs/afs/mntpt.c     |  12 +-
 fs/afs/rxrpc.c     | 137 ++++++++++++++++-----
 fs/afs/security.c  | 345 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/afs/super.c     | 142 ++++++++++++++++++----
 fs/afs/vlclient.c  |   6 +
 fs/afs/vlocation.c |  26 ++--
 fs/afs/vnode.c     |  25 ++--
 fs/afs/volume.c    | 109 ++++-------------
 18 files changed, 945 insertions(+), 238 deletions(-)
 create mode 100644 fs/afs/security.c

diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 66bdc219ccd..cca198b2cae 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -15,6 +15,7 @@ kafs-objs := \
 	mntpt.o \
 	proc.o \
 	rxrpc.o \
+	security.o \
 	server.o \
 	super.o \
 	vlclient.o \
diff --git a/fs/afs/afs.h b/fs/afs/afs.h
index b9d2d2ceaf4..d959092aaf4 100644
--- a/fs/afs/afs.h
+++ b/fs/afs/afs.h
@@ -14,6 +14,9 @@
 
 #include <linux/in.h>
 
+#define AFS_MAXCELLNAME	64		/* maximum length of a cell name */
+#define AFS_MAXVOLNAME	64		/* maximum length of a volume name */
+
 typedef unsigned			afs_volid_t;
 typedef unsigned			afs_vnodeid_t;
 typedef unsigned long long		afs_dataversion_t;
@@ -74,6 +77,26 @@ struct afs_volume_info {
 	} servers[8];
 };
 
+/*
+ * AFS security ACE access mask
+ */
+typedef u32 afs_access_t;
+#define AFS_ACE_READ		0x00000001U	/* - permission to read a file/dir */
+#define AFS_ACE_WRITE		0x00000002U	/* - permission to write/chmod a file */
+#define AFS_ACE_INSERT		0x00000004U	/* - permission to create dirent in a dir */
+#define AFS_ACE_LOOKUP		0x00000008U	/* - permission to lookup a file/dir in a dir */
+#define AFS_ACE_DELETE		0x00000010U	/* - permission to delete a dirent from a dir */
+#define AFS_ACE_LOCK		0x00000020U	/* - permission to lock a file */
+#define AFS_ACE_ADMINISTER	0x00000040U	/* - permission to change ACL */
+#define AFS_ACE_USER_A		0x01000000U	/* - 'A' user-defined permission */
+#define AFS_ACE_USER_B		0x02000000U	/* - 'B' user-defined permission */
+#define AFS_ACE_USER_C		0x04000000U	/* - 'C' user-defined permission */
+#define AFS_ACE_USER_D		0x08000000U	/* - 'D' user-defined permission */
+#define AFS_ACE_USER_E		0x10000000U	/* - 'E' user-defined permission */
+#define AFS_ACE_USER_F		0x20000000U	/* - 'F' user-defined permission */
+#define AFS_ACE_USER_G		0x40000000U	/* - 'G' user-defined permission */
+#define AFS_ACE_USER_H		0x80000000U	/* - 'H' user-defined permission */
+
 /*
  * AFS file status information
  */
@@ -87,8 +110,8 @@ struct afs_file_status {
 	afs_dataversion_t	data_version;	/* current data version */
 	unsigned		author;		/* author ID */
 	unsigned		owner;		/* owner ID */
-	unsigned		caller_access;	/* access rights for authenticated caller */
-	unsigned		anon_access;	/* access rights for unauthenticated caller */
+	afs_access_t		caller_access;	/* access rights for authenticated caller */
+	afs_access_t		anon_access;	/* access rights for unauthenticated caller */
 	umode_t			mode;		/* UNIX mode */
 	struct afs_fid		parent;		/* parent file ID */
 	time_t			mtime_client;	/* last time client changed data */
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 61121554714..e674bebbb8b 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -72,7 +72,10 @@ void afs_broken_callback_work(struct work_struct *work)
 		return; /* someone else is dealing with it */
 
 	if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
-		if (afs_vnode_fetch_status(vnode) < 0)
+		if (S_ISDIR(vnode->vfs_inode.i_mode))
+			afs_clear_permits(vnode);
+
+		if (afs_vnode_fetch_status(vnode, NULL, NULL) < 0)
 			goto out;
 
 		if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 733c60246ab..9b1311a1df5 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -11,6 +11,9 @@
 
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/key.h>
+#include <linux/ctype.h>
+#include <keys/rxrpc-type.h>
 #include "internal.h"
 
 DECLARE_RWSEM(afs_proc_cells_sem);
@@ -23,45 +26,43 @@ static DECLARE_WAIT_QUEUE_HEAD(afs_cells_freeable_wq);
 static struct afs_cell *afs_cell_root;
 
 /*
- * create a cell record
- * - "name" is the name of the cell
- * - "vllist" is a colon separated list of IP addresses in "a.b.c.d" format
+ * allocate a cell record and fill in its name, VL server address list and
+ * allocate an anonymous key
  */
-struct afs_cell *afs_cell_create(const char *name, char *vllist)
+static struct afs_cell *afs_cell_alloc(const char *name, char *vllist)
 {
 	struct afs_cell *cell;
-	char *next;
+	size_t namelen;
+	char keyname[4 + AFS_MAXCELLNAME + 1], *cp, *dp, *next;
 	int ret;
 
 	_enter("%s,%s", name, vllist);
 
 	BUG_ON(!name); /* TODO: want to look up "this cell" in the cache */
 
+	namelen = strlen(name);
+	if (namelen > AFS_MAXCELLNAME)
+		return ERR_PTR(-ENAMETOOLONG);
+
 	/* allocate and initialise a cell record */
-	cell = kmalloc(sizeof(struct afs_cell) + strlen(name) + 1, GFP_KERNEL);
+	cell = kzalloc(sizeof(struct afs_cell) + namelen + 1, GFP_KERNEL);
 	if (!cell) {
 		_leave(" = -ENOMEM");
 		return ERR_PTR(-ENOMEM);
 	}
 
-	down_write(&afs_cells_sem);
+	memcpy(cell->name, name, namelen);
+	cell->name[namelen] = 0;
 
-	memset(cell, 0, sizeof(struct afs_cell));
 	atomic_set(&cell->usage, 1);
-
 	INIT_LIST_HEAD(&cell->link);
-
 	rwlock_init(&cell->servers_lock);
 	INIT_LIST_HEAD(&cell->servers);
-
 	init_rwsem(&cell->vl_sem);
 	INIT_LIST_HEAD(&cell->vl_list);
 	spin_lock_init(&cell->vl_lock);
 
-	strcpy(cell->name, name);
-
 	/* fill in the VL server list from the rest of the string */
-	ret = -EINVAL;
 	do {
 		unsigned a, b, c, d;
 
@@ -70,18 +71,73 @@ struct afs_cell *afs_cell_create(const char *name, char *vllist)
 			*next++ = 0;
 
 		if (sscanf(vllist, "%u.%u.%u.%u", &a, &b, &c, &d) != 4)
-			goto badaddr;
+			goto bad_address;
 
 		if (a > 255 || b > 255 || c > 255 || d > 255)
-			goto badaddr;
+			goto bad_address;
 
 		cell->vl_addrs[cell->vl_naddrs++].s_addr =
 			htonl((a << 24) | (b << 16) | (c << 8) | d);
 
-		if (cell->vl_naddrs >= AFS_CELL_MAX_ADDRS)
-			break;
+	} while (cell->vl_naddrs < AFS_CELL_MAX_ADDRS && (vllist = next));
+
+	/* create a key to represent an anonymous user */
+	memcpy(keyname, "afs@", 4);
+	dp = keyname + 4;
+	cp = cell->name;
+	do {
+		*dp++ = toupper(*cp);
+	} while (*cp++);
+	cell->anonymous_key = key_alloc(&key_type_rxrpc, keyname, 0, 0, current,
+					KEY_POS_SEARCH, KEY_ALLOC_NOT_IN_QUOTA);
+	if (IS_ERR(cell->anonymous_key)) {
+		_debug("no key");
+		ret = PTR_ERR(cell->anonymous_key);
+		goto error;
+	}
+
+	ret = key_instantiate_and_link(cell->anonymous_key, NULL, 0,
+				       NULL, NULL);
+	if (ret < 0) {
+		_debug("instantiate failed");
+		goto error;
+	}
+
+	_debug("anon key %p{%x}",
+	       cell->anonymous_key, key_serial(cell->anonymous_key));
+
+	_leave(" = %p", cell);
+	return cell;
+
+bad_address:
+	printk(KERN_ERR "kAFS: bad VL server IP address\n");
+	ret = -EINVAL;
+error:
+	key_put(cell->anonymous_key);
+	kfree(cell);
+	_leave(" = %d", ret);
+	return ERR_PTR(ret);
+}
 
-	} while ((vllist = next));
+/*
+ * create a cell record
+ * - "name" is the name of the cell
+ * - "vllist" is a colon separated list of IP addresses in "a.b.c.d" format
+ */
+struct afs_cell *afs_cell_create(const char *name, char *vllist)
+{
+	struct afs_cell *cell;
+	int ret;
+
+	_enter("%s,%s", name, vllist);
+
+	cell = afs_cell_alloc(name, vllist);
+	if (IS_ERR(cell)) {
+		_leave(" = %ld", PTR_ERR(cell));
+		return cell;
+	}
+
+	down_write(&afs_cells_sem);
 
 	/* add a proc directory for this cell */
 	ret = afs_proc_cell_setup(cell);
@@ -109,10 +165,9 @@ struct afs_cell *afs_cell_create(const char *name, char *vllist)
 	_leave(" = %p", cell);
 	return cell;
 
-badaddr:
-	printk(KERN_ERR "kAFS: bad VL server IP address\n");
 error:
 	up_write(&afs_cells_sem);
+	key_put(cell->anonymous_key);
 	kfree(cell);
 	_leave(" = %d", ret);
 	return ERR_PTR(ret);
@@ -301,6 +356,7 @@ static void afs_cell_destroy(struct afs_cell *cell)
 	cachefs_relinquish_cookie(cell->cache, 0);
 #endif
 
+	key_put(cell->anonymous_key);
 	kfree(cell);
 
 	_leave(" [destroyed]");
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index c7141175391..c3ec57a237b 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -28,6 +28,7 @@ static void afs_cm_destructor(struct afs_call *);
  * CB.CallBack operation type
  */
 static const struct afs_call_type afs_SRXCBCallBack = {
+	.name		= "CB.CallBack",
 	.deliver	= afs_deliver_cb_callback,
 	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_cm_destructor,
@@ -37,6 +38,7 @@ static const struct afs_call_type afs_SRXCBCallBack = {
  * CB.InitCallBackState operation type
  */
 static const struct afs_call_type afs_SRXCBInitCallBackState = {
+	.name		= "CB.InitCallBackState",
 	.deliver	= afs_deliver_cb_init_call_back_state,
 	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_cm_destructor,
@@ -46,6 +48,7 @@ static const struct afs_call_type afs_SRXCBInitCallBackState = {
  * CB.Probe operation type
  */
 static const struct afs_call_type afs_SRXCBProbe = {
+	.name		= "CB.Probe",
 	.deliver	= afs_deliver_cb_probe,
 	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_cm_destructor,
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index d7697f6f3b7..87368417e4d 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
+#include <linux/ctype.h>
 #include "internal.h"
 
 static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry,
@@ -28,11 +29,13 @@ static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen,
 
 const struct file_operations afs_dir_file_operations = {
 	.open		= afs_dir_open,
+	.release	= afs_release,
 	.readdir	= afs_dir_readdir,
 };
 
 const struct inode_operations afs_dir_inode_operations = {
 	.lookup		= afs_dir_lookup,
+	.permission	= afs_permission,
 	.getattr	= afs_inode_getattr,
 #if 0 /* TODO */
 	.create		= afs_dir_create,
@@ -169,13 +172,17 @@ static inline void afs_dir_put_page(struct page *page)
 /*
  * get a page into the pagecache
  */
-static struct page *afs_dir_get_page(struct inode *dir, unsigned long index)
+static struct page *afs_dir_get_page(struct inode *dir, unsigned long index,
+				     struct key *key)
 {
 	struct page *page;
+	struct file file = {
+		.private_data = key,
+	};
 
 	_enter("{%lu},%lu", dir->i_ino, index);
 
-	page = read_mapping_page(dir->i_mapping, index, NULL);
+	page = read_mapping_page(dir->i_mapping, index, &file);
 	if (!IS_ERR(page)) {
 		wait_on_page_locked(page);
 		kmap(page);
@@ -207,8 +214,7 @@ static int afs_dir_open(struct inode *inode, struct file *file)
 	if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(inode)->flags))
 		return -ENOENT;
 
-	_leave(" = 0");
-	return 0;
+	return afs_open(inode, file);
 }
 
 /*
@@ -311,7 +317,7 @@ static int afs_dir_iterate_block(unsigned *fpos,
  * iterate through the data blob that lists the contents of an AFS directory
  */
 static int afs_dir_iterate(struct inode *dir, unsigned *fpos, void *cookie,
-			   filldir_t filldir)
+			   filldir_t filldir, struct key *key)
 {
 	union afs_dir_block *dblock;
 	struct afs_dir_page *dbuf;
@@ -336,7 +342,7 @@ static int afs_dir_iterate(struct inode *dir, unsigned *fpos, void *cookie,
 		blkoff = *fpos & ~(sizeof(union afs_dir_block) - 1);
 
 		/* fetch the appropriate page from the directory */
-		page = afs_dir_get_page(dir, blkoff / PAGE_SIZE);
+		page = afs_dir_get_page(dir, blkoff / PAGE_SIZE, key);
 		if (IS_ERR(page)) {
 			ret = PTR_ERR(page);
 			break;
@@ -381,9 +387,11 @@ static int afs_dir_readdir(struct file *file, void *cookie, filldir_t filldir)
 	_enter("{%Ld,{%lu}}",
 	       file->f_pos, file->f_path.dentry->d_inode->i_ino);
 
+	ASSERT(file->private_data != NULL);
+
 	fpos = file->f_pos;
 	ret = afs_dir_iterate(file->f_path.dentry->d_inode, &fpos,
-			      cookie, filldir);
+			      cookie, filldir, file->private_data);
 	file->f_pos = fpos;
 
 	_leave(" = %d", ret);
@@ -424,7 +432,7 @@ static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen,
  * do a lookup in a directory
  */
 static int afs_do_lookup(struct inode *dir, struct dentry *dentry,
-			 struct afs_fid *fid)
+			 struct afs_fid *fid, struct key *key)
 {
 	struct afs_dir_lookup_cookie cookie;
 	struct afs_super_info *as;
@@ -442,7 +450,8 @@ static int afs_do_lookup(struct inode *dir, struct dentry *dentry,
 	cookie.found	= 0;
 
 	fpos = 0;
-	ret = afs_dir_iterate(dir, &fpos, &cookie, afs_dir_lookup_filldir);
+	ret = afs_dir_iterate(dir, &fpos, &cookie, afs_dir_lookup_filldir,
+			      key);
 	if (ret < 0) {
 		_leave(" = %d [iter]", ret);
 		return ret;
@@ -468,6 +477,7 @@ static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry,
 	struct afs_vnode *vnode;
 	struct afs_fid fid;
 	struct inode *inode;
+	struct key *key;
 	int ret;
 
 	_enter("{%lu},%p{%s}", dir->i_ino, dentry, dentry->d_name.name);
@@ -483,14 +493,22 @@ static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry,
 		return ERR_PTR(-ESTALE);
 	}
 
-	ret = afs_do_lookup(dir, dentry, &fid);
+	key = afs_request_key(vnode->volume->cell);
+	if (IS_ERR(key)) {
+		_leave(" = %ld [key]", PTR_ERR(key));
+		return ERR_PTR(PTR_ERR(key));
+	}
+
+	ret = afs_do_lookup(dir, dentry, &fid, key);
 	if (ret < 0) {
+		key_put(key);
 		_leave(" = %d [do]", ret);
 		return ERR_PTR(ret);
 	}
 
 	/* instantiate the dentry */
-	inode = afs_iget(dir->i_sb, &fid);
+	inode = afs_iget(dir->i_sb, key, &fid);
+	key_put(key);
 	if (IS_ERR(inode)) {
 		_leave(" = %ld", PTR_ERR(inode));
 		return ERR_PTR(PTR_ERR(inode));
@@ -559,6 +577,7 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 	struct afs_fid fid;
 	struct dentry *parent;
 	struct inode *inode, *dir;
+	struct key *key;
 	int ret;
 
 	vnode = AFS_FS_I(dentry->d_inode);
@@ -566,6 +585,10 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 	_enter("{sb=%p n=%s fl=%lx},",
 	       dentry->d_sb, dentry->d_name.name, vnode->flags);
 
+	key = afs_request_key(vnode->volume->cell);
+	if (IS_ERR(key))
+		key = NULL;
+
 	/* lock down the parent dentry so we can peer at it */
 	parent = dget_parent(dentry);
 
@@ -595,7 +618,7 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 		_debug("dir modified");
 
 		/* search the directory for this vnode */
-		ret = afs_do_lookup(dir, dentry, &fid);
+		ret = afs_do_lookup(dir, dentry, &fid, key);
 		if (ret == -ENOENT) {
 			_debug("%s: dirent not found", dentry->d_name.name);
 			goto not_found;
@@ -637,7 +660,7 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 	    test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
 		_debug("%s: changed", dentry->d_name.name);
 		set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
-		if (afs_vnode_fetch_status(vnode) < 0) {
+		if (afs_vnode_fetch_status(vnode, NULL, key) < 0) {
 			mutex_unlock(&vnode->cb_broken_lock);
 			goto out_bad;
 		}
@@ -667,6 +690,7 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 
 out_valid:
 	dput(parent);
+	key_put(key);
 	_leave(" = 1 [valid]");
 	return 1;
 
@@ -688,6 +712,7 @@ out_bad:
 	shrink_dcache_parent(dentry);
 	d_drop(dentry);
 	dput(parent);
+	key_put(key);
 
 	_leave(" = 0 [bad]");
 	return 0;
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 6990327e75d..101bbb8c0d8 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -17,17 +17,23 @@
 #include <linux/pagemap.h>
 #include "internal.h"
 
-#if 0
-static int afs_file_open(struct inode *inode, struct file *file);
-static int afs_file_release(struct inode *inode, struct file *file);
-#endif
-
 static int afs_file_readpage(struct file *file, struct page *page);
 static void afs_file_invalidatepage(struct page *page, unsigned long offset);
 static int afs_file_releasepage(struct page *page, gfp_t gfp_flags);
 
+const struct file_operations afs_file_operations = {
+	.open		= afs_open,
+	.release	= afs_release,
+	.llseek		= generic_file_llseek,
+	.read		= do_sync_read,
+	.aio_read	= generic_file_aio_read,
+	.mmap		= generic_file_readonly_mmap,
+	.sendfile	= generic_file_sendfile,
+};
+
 const struct inode_operations afs_file_inode_operations = {
 	.getattr	= afs_inode_getattr,
+	.permission	= afs_permission,
 };
 
 const struct address_space_operations afs_fs_aops = {
@@ -37,6 +43,41 @@ const struct address_space_operations afs_fs_aops = {
 	.invalidatepage	= afs_file_invalidatepage,
 };
 
+/*
+ * open an AFS file or directory and attach a key to it
+ */
+int afs_open(struct inode *inode, struct file *file)
+{
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	struct key *key;
+
+	_enter("{%x:%x},", vnode->fid.vid, vnode->fid.vnode);
+
+	key = afs_request_key(vnode->volume->cell);
+	if (IS_ERR(key)) {
+		_leave(" = %ld [key]", PTR_ERR(key));
+		return PTR_ERR(key);
+	}
+
+	file->private_data = key;
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * release an AFS file or directory and discard its key
+ */
+int afs_release(struct inode *inode, struct file *file)
+{
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+
+	_enter("{%x:%x},", vnode->fid.vid, vnode->fid.vnode);
+
+	key_put(file->private_data);
+	_leave(" = 0");
+	return 0;
+}
+
 /*
  * deal with notification that a page was read from the cache
  */
@@ -79,13 +120,18 @@ static int afs_file_readpage(struct file *file, struct page *page)
 {
 	struct afs_vnode *vnode;
 	struct inode *inode;
+	struct key *key;
 	size_t len;
 	off_t offset;
 	int ret;
 
 	inode = page->mapping->host;
 
-	_enter("{%lu},{%lu}", inode->i_ino, page->index);
+	ASSERT(file != NULL);
+	key = file->private_data;
+	ASSERT(key != NULL);
+
+	_enter("{%x},{%lu},{%lu}", key_serial(key), inode->i_ino, page->index);
 
 	vnode = AFS_FS_I(inode);
 
@@ -124,7 +170,7 @@ static int afs_file_readpage(struct file *file, struct page *page)
 
 		/* read the contents of the file from the server into the
 		 * page */
-		ret = afs_vnode_fetch_data(vnode, offset, len, page);
+		ret = afs_vnode_fetch_data(vnode, key, offset, len, page);
 		if (ret < 0) {
 			if (ret == -ENOENT) {
 				_debug("got NOENT from server"
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 167ca615c2e..321b489aa90 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -148,6 +148,7 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call,
  * FS.FetchStatus operation type
  */
 static const struct afs_call_type afs_RXFSFetchStatus = {
+	.name		= "FS.FetchStatus",
 	.deliver	= afs_deliver_fs_fetch_status,
 	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
@@ -157,6 +158,7 @@ static const struct afs_call_type afs_RXFSFetchStatus = {
  * fetch the status information for a file
  */
 int afs_fs_fetch_file_status(struct afs_server *server,
+			     struct key *key,
 			     struct afs_vnode *vnode,
 			     struct afs_volsync *volsync,
 			     const struct afs_wait_mode *wait_mode)
@@ -164,12 +166,13 @@ int afs_fs_fetch_file_status(struct afs_server *server,
 	struct afs_call *call;
 	__be32 *bp;
 
-	_enter("");
+	_enter(",%x,,,", key_serial(key));
 
 	call = afs_alloc_flat_call(&afs_RXFSFetchStatus, 16, 120);
 	if (!call)
 		return -ENOMEM;
 
+	call->key = key;
 	call->reply = vnode;
 	call->reply2 = volsync;
 	call->service_id = FS_SERVICE;
@@ -279,6 +282,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
  * FS.FetchData operation type
  */
 static const struct afs_call_type afs_RXFSFetchData = {
+	.name		= "FS.FetchData",
 	.deliver	= afs_deliver_fs_fetch_data,
 	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
@@ -288,6 +292,7 @@ static const struct afs_call_type afs_RXFSFetchData = {
  * fetch data from a file
  */
 int afs_fs_fetch_data(struct afs_server *server,
+		      struct key *key,
 		      struct afs_vnode *vnode,
 		      off_t offset, size_t length,
 		      struct page *buffer,
@@ -303,6 +308,7 @@ int afs_fs_fetch_data(struct afs_server *server,
 	if (!call)
 		return -ENOMEM;
 
+	call->key = key;
 	call->reply = vnode;
 	call->reply2 = volsync;
 	call->reply3 = buffer;
@@ -338,6 +344,7 @@ static int afs_deliver_fs_give_up_callbacks(struct afs_call *call,
  * FS.GiveUpCallBacks operation type
  */
 static const struct afs_call_type afs_RXFSGiveUpCallBacks = {
+	.name		= "FS.GiveUpCallBacks",
 	.deliver	= afs_deliver_fs_give_up_callbacks,
 	.abort_to_error	= afs_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 18863315211..22733622829 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -29,7 +29,7 @@ struct afs_iget_data {
 /*
  * map the AFS file status to the inode member variables
  */
-static int afs_inode_map_status(struct afs_vnode *vnode)
+static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
 {
 	struct inode *inode = AFS_VNODE_TO_I(vnode);
 
@@ -44,7 +44,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode)
 	case AFS_FTYPE_FILE:
 		inode->i_mode	= S_IFREG | vnode->status.mode;
 		inode->i_op	= &afs_file_inode_operations;
-		inode->i_fop	= &generic_ro_fops;
+		inode->i_fop	= &afs_file_operations;
 		break;
 	case AFS_FTYPE_DIR:
 		inode->i_mode	= S_IFDIR | vnode->status.mode;
@@ -73,7 +73,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode)
 
 	/* check to see whether a symbolic link is really a mountpoint */
 	if (vnode->status.type == AFS_FTYPE_SYMLINK) {
-		afs_mntpt_check_symlink(vnode);
+		afs_mntpt_check_symlink(vnode, key);
 
 		if (test_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags)) {
 			inode->i_mode	= S_IFDIR | vnode->status.mode;
@@ -115,7 +115,8 @@ static int afs_iget5_set(struct inode *inode, void *opaque)
 /*
  * inode retrieval
  */
-inline struct inode *afs_iget(struct super_block *sb, struct afs_fid *fid)
+inline struct inode *afs_iget(struct super_block *sb, struct key *key,
+			      struct afs_fid *fid)
 {
 	struct afs_iget_data data = { .fid = *fid };
 	struct afs_super_info *as;
@@ -157,10 +158,10 @@ inline struct inode *afs_iget(struct super_block *sb, struct afs_fid *fid)
 
 	/* okay... it's a new inode */
 	set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
-	ret = afs_vnode_fetch_status(vnode);
+	ret = afs_vnode_fetch_status(vnode, NULL, key);
 	if (ret < 0)
 		goto bad_inode;
-	ret = afs_inode_map_status(vnode);
+	ret = afs_inode_map_status(vnode, key);
 	if (ret < 0)
 		goto bad_inode;
 
@@ -201,6 +202,7 @@ int afs_inode_getattr(struct vfsmount *mnt, struct dentry *dentry,
  */
 void afs_clear_inode(struct inode *inode)
 {
+	struct afs_permits *permits;
 	struct afs_vnode *vnode;
 
 	vnode = AFS_FS_I(inode);
@@ -233,5 +235,12 @@ void afs_clear_inode(struct inode *inode)
 	vnode->cache = NULL;
 #endif
 
+	mutex_lock(&vnode->permits_lock);
+	permits = vnode->permits;
+	rcu_assign_pointer(vnode->permits, NULL);
+	mutex_unlock(&vnode->permits_lock);
+	if (permits)
+		call_rcu(&permits->rcu, afs_zap_permits);
+
 	_leave("");
 }
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index afc6f0f3025..8bed2429d01 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -15,6 +15,7 @@
 #include <linux/pagemap.h>
 #include <linux/skbuff.h>
 #include <linux/rxrpc.h>
+#include <linux/key.h>
 #include "afs.h"
 #include "afs_vl.h"
 
@@ -32,6 +33,17 @@ typedef enum {
 	AFS_VL_UNCERTAIN,		/* uncertain state (update failed) */
 } __attribute__((packed)) afs_vlocation_state_t;
 
+struct afs_mount_params {
+	bool			rwpath;		/* T if the parent should be considered R/W */
+	bool			force;		/* T to force cell type */
+	afs_voltype_t		type;		/* type of volume requested */
+	int			volnamesz;	/* size of volume name */
+	const char		*volname;	/* name of volume to mount */
+	struct afs_cell		*cell;		/* cell in which to find volume */
+	struct afs_volume	*volume;	/* volume record */
+	struct key		*key;		/* key to use for secure mounting */
+};
+
 /*
  * definition of how to wait for the completion of an operation
  */
@@ -95,6 +107,8 @@ struct afs_call {
 };
 
 struct afs_call_type {
+	const char *name;
+
 	/* deliver request or reply data to an call
 	 * - returning an error will cause the call to be aborted
 	 */
@@ -128,8 +142,8 @@ extern struct file_system_type afs_fs_type;
  * entry in the cached cell catalogue
  */
 struct afs_cache_cell {
-	char			name[64];	/* cell name (padded with NULs) */
-	struct in_addr		vl_servers[15];	/* cached cell VL servers */
+	char		name[AFS_MAXCELLNAME];	/* cell name (padded with NULs) */
+	struct in_addr	vl_servers[15];		/* cached cell VL servers */
 };
 
 /*
@@ -138,6 +152,7 @@ struct afs_cache_cell {
 struct afs_cell {
 	atomic_t		usage;
 	struct list_head	link;		/* main cell list link */
+	struct key		*anonymous_key;	/* anonymous user key for this cell */
 	struct list_head	proc_link;	/* /proc cell list link */
 	struct proc_dir_entry	*proc_dir;	/* /proc dir for this cell */
 #ifdef AFS_CACHING_SUPPORT
@@ -163,7 +178,9 @@ struct afs_cell {
  * entry in the cached volume location catalogue
  */
 struct afs_cache_vlocation {
-	uint8_t			name[64 + 1];	/* volume name (lowercase, padded with NULs) */
+	/* volume name (lowercase, padded with NULs) */
+	uint8_t			name[AFS_MAXVOLNAME + 1];
+
 	uint8_t			nservers;	/* number of entries used in servers[] */
 	uint8_t			vidmask;	/* voltype mask for vid[] */
 	uint8_t			srvtmask[8];	/* voltype masks for servers[] */
@@ -281,7 +298,8 @@ struct afs_vnode {
 #ifdef AFS_CACHING_SUPPORT
 	struct cachefs_cookie	*cache;		/* caching cookie */
 #endif
-
+	struct afs_permits	*permits;	/* cache of permits so far obtained */
+	struct mutex		permits_lock;	/* lock for altering permits list */
 	wait_queue_head_t	update_waitq;	/* status fetch waitqueue */
 	unsigned		update_cnt;	/* number of outstanding ops that will update the
 						 * status */
@@ -296,12 +314,13 @@ struct afs_vnode {
 #define AFS_VNODE_DIR_CHANGED	6		/* set if vnode's parent dir metadata changed */
 #define AFS_VNODE_DIR_MODIFIED	7		/* set if vnode's parent dir data modified */
 
+	long			acl_order;	/* ACL check count (callback break count) */
+
 	/* outstanding callback notification on this file */
 	struct rb_node		server_rb;	/* link in server->fs_vnodes */
 	struct rb_node		cb_promise;	/* link in server->cb_promises */
 	struct work_struct	cb_broken_work;	/* work to be done on callback break */
 	struct mutex		cb_broken_lock;	/* lock against multiple attempts to fix break */
-//	struct list_head	cb_hash_link;	/* link in master callback hash */
 	time_t			cb_expires;	/* time at which callback expires */
 	time_t			cb_expires_at;	/* time used to order cb_promise */
 	unsigned		cb_version;	/* callback version */
@@ -310,6 +329,23 @@ struct afs_vnode {
 	bool			cb_promised;	/* true if promise still holds */
 };
 
+/*
+ * cached security record for one user's attempt to access a vnode
+ */
+struct afs_permit {
+	struct key		*key;		/* RxRPC ticket holding a security context */
+	afs_access_t		access_mask;	/* access mask for this key */
+};
+
+/*
+ * cache of security records from attempts to access a vnode
+ */
+struct afs_permits {
+	struct rcu_head		rcu;		/* disposal procedure */
+	int			count;		/* number of records */
+	struct afs_permit	permits[0];	/* the permits so far examined */
+};
+
 /*****************************************************************************/
 /*
  * callback.c
@@ -352,11 +388,17 @@ extern bool afs_cm_incoming_call(struct afs_call *);
 extern const struct inode_operations afs_dir_inode_operations;
 extern const struct file_operations afs_dir_file_operations;
 
+extern int afs_permission(struct inode *, int, struct nameidata *);
+
 /*
  * file.c
  */
 extern const struct address_space_operations afs_fs_aops;
 extern const struct inode_operations afs_file_inode_operations;
+extern const struct file_operations afs_file_operations;
+
+extern int afs_open(struct inode *, struct file *);
+extern int afs_release(struct inode *, struct file *);
 
 #ifdef AFS_CACHING_SUPPORT
 extern int afs_cache_get_page_cookie(struct page *, struct cachefs_page **);
@@ -365,22 +407,24 @@ extern int afs_cache_get_page_cookie(struct page *, struct cachefs_page **);
 /*
  * fsclient.c
  */
-extern int afs_fs_fetch_file_status(struct afs_server *,
-				    struct afs_vnode *,
-				    struct afs_volsync *,
+extern int afs_fs_fetch_file_status(struct afs_server *, struct key *,
+				    struct afs_vnode *, struct afs_volsync *,
 				    const struct afs_wait_mode *);
 extern int afs_fs_give_up_callbacks(struct afs_server *,
 				    const struct afs_wait_mode *);
-extern int afs_fs_fetch_data(struct afs_server *, struct afs_vnode *, off_t,
-			     size_t, struct page *, struct afs_volsync *,
+extern int afs_fs_fetch_data(struct afs_server *, struct key *,
+			     struct afs_vnode *, off_t, size_t, struct page *,
+			     struct afs_volsync *,
 			     const struct afs_wait_mode *);
 
 /*
  * inode.c
  */
-extern struct inode *afs_iget(struct super_block *, struct afs_fid *);
+extern struct inode *afs_iget(struct super_block *, struct key *,
+			      struct afs_fid *);
 extern int afs_inode_getattr(struct vfsmount *, struct dentry *,
 			     struct kstat *);
+extern void afs_zap_permits(struct rcu_head *);
 extern void afs_clear_inode(struct inode *);
 
 /*
@@ -402,16 +446,10 @@ extern const struct inode_operations afs_mntpt_inode_operations;
 extern const struct file_operations afs_mntpt_file_operations;
 extern unsigned long afs_mntpt_expiry_timeout;
 
-extern int afs_mntpt_check_symlink(struct afs_vnode *);
+extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *);
 extern void afs_mntpt_kill_timer(void);
 extern void afs_umount_begin(struct vfsmount *, int);
 
-/*
- * super.c
- */
-extern int afs_fs_init(void);
-extern void afs_fs_exit(void);
-
 /*
  * proc.c
  */
@@ -435,6 +473,14 @@ extern void afs_send_empty_reply(struct afs_call *);
 extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *,
 			    size_t);
 
+/*
+ * security.c
+ */
+extern void afs_clear_permits(struct afs_vnode *);
+extern void afs_cache_permit(struct afs_vnode *, struct key *, long);
+extern struct key *afs_request_key(struct afs_cell *);
+extern int afs_permission(struct inode *, int, struct nameidata *);
+
 /*
  * server.c
  */
@@ -448,6 +494,12 @@ extern struct afs_server *afs_find_server(const struct in_addr *);
 extern void afs_put_server(struct afs_server *);
 extern void __exit afs_purge_servers(void);
 
+/*
+ * super.c
+ */
+extern int afs_fs_init(void);
+extern void afs_fs_exit(void);
+
 /*
  * vlclient.c
  */
@@ -455,10 +507,11 @@ extern void __exit afs_purge_servers(void);
 extern struct cachefs_index_def afs_vlocation_cache_index_def;
 #endif
 
-extern int afs_vl_get_entry_by_name(struct in_addr *, const char *,
-				    struct afs_cache_vlocation *,
+extern int afs_vl_get_entry_by_name(struct in_addr *, struct key *,
+				    const char *, struct afs_cache_vlocation *,
 				    const struct afs_wait_mode *);
-extern int afs_vl_get_entry_by_id(struct in_addr *, afs_volid_t, afs_voltype_t,
+extern int afs_vl_get_entry_by_id(struct in_addr *, struct key *,
+				  afs_volid_t, afs_voltype_t,
 				  struct afs_cache_vlocation *,
 				  const struct afs_wait_mode *);
 
@@ -469,6 +522,7 @@ extern int afs_vl_get_entry_by_id(struct in_addr *, afs_volid_t, afs_voltype_t,
 
 extern int __init afs_vlocation_update_init(void);
 extern struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *,
+						  struct key *,
 						  const char *, size_t);
 extern void afs_put_vlocation(struct afs_vlocation *);
 extern void __exit afs_vlocation_purge(void);
@@ -492,9 +546,10 @@ static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode)
 	return &vnode->vfs_inode;
 }
 
-extern int afs_vnode_fetch_status(struct afs_vnode *);
-extern int afs_vnode_fetch_data(struct afs_vnode *vnode, off_t, size_t,
-				struct page *);
+extern int afs_vnode_fetch_status(struct afs_vnode *, struct afs_vnode *,
+				  struct key *);
+extern int afs_vnode_fetch_data(struct afs_vnode *, struct key *,
+				off_t, size_t, struct page *);
 
 /*
  * volume.c
@@ -506,8 +561,7 @@ extern struct cachefs_index_def afs_volume_cache_index_def;
 #define afs_get_volume(V) do { atomic_inc(&(V)->usage); } while(0)
 
 extern void afs_put_volume(struct afs_volume *);
-extern struct afs_volume *afs_volume_lookup(const char *, struct afs_cell *,
-					    int);
+extern struct afs_volume *afs_volume_lookup(struct afs_mount_params *);
 extern struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *);
 extern int afs_volume_release_fileserver(struct afs_vnode *,
 					 struct afs_server *, int);
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 08c11a0b66b..b905ae37f91 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -48,8 +48,11 @@ unsigned long afs_mntpt_expiry_timeout = 10 * 60;
  * check a symbolic link to see whether it actually encodes a mountpoint
  * - sets the AFS_VNODE_MOUNTPOINT flag on the vnode appropriately
  */
-int afs_mntpt_check_symlink(struct afs_vnode *vnode)
+int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key)
 {
+	struct file file = {
+		.private_data = key,
+	};
 	struct page *page;
 	size_t size;
 	char *buf;
@@ -58,7 +61,7 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode)
 	_enter("{%u,%u}", vnode->fid.vnode, vnode->fid.unique);
 
 	/* read the contents of the symlink into the pagecache */
-	page = read_mapping_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0, NULL);
+	page = read_mapping_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0, &file);
 	if (IS_ERR(page)) {
 		ret = PTR_ERR(page);
 		goto out;
@@ -214,7 +217,7 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
 	struct vfsmount *newmnt;
 	int err;
 
-	_enter("%p{%s},{%s:%p{%s}}",
+	_enter("%p{%s},{%s:%p{%s},}",
 	       dentry,
 	       dentry->d_name.name,
 	       nd->mnt->mnt_devname,
@@ -234,7 +237,8 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
 	err = do_add_mount(newmnt, nd, MNT_SHRINKABLE, &afs_vfsmounts);
 	switch (err) {
 	case 0:
-		path_release(nd);
+		mntput(nd->mnt);
+		dput(nd->dentry);
 		nd->mnt = newmnt;
 		nd->dentry = dget(newmnt->mnt_root);
 		schedule_delayed_work(&afs_mntpt_expiry_timer,
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index b92774231b3..e86c527d87a 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -17,6 +17,8 @@
 
 static struct socket *afs_socket; /* my RxRPC socket */
 static struct workqueue_struct *afs_async_calls;
+static atomic_t afs_outstanding_calls;
+static atomic_t afs_outstanding_skbs;
 
 static void afs_wake_up_call_waiter(struct afs_call *);
 static int afs_wait_for_call_to_complete(struct afs_call *);
@@ -45,6 +47,7 @@ static const struct afs_wait_mode afs_async_incoming_call = {
 
 /* asynchronous incoming call initial processing */
 static const struct afs_call_type afs_RXCMxxxx = {
+	.name		= "CB.xxxx",
 	.deliver	= afs_deliver_cm_op_id,
 	.abort_to_error	= afs_abort_to_error,
 };
@@ -118,9 +121,66 @@ void afs_close_socket(void)
 
 	_debug("dework");
 	destroy_workqueue(afs_async_calls);
+
+	ASSERTCMP(atomic_read(&afs_outstanding_skbs), ==, 0);
+	ASSERTCMP(atomic_read(&afs_outstanding_calls), ==, 0);
 	_leave("");
 }
 
+/*
+ * note that the data in a socket buffer is now delivered and that the buffer
+ * should be freed
+ */
+static void afs_data_delivered(struct sk_buff *skb)
+{
+	if (!skb) {
+		_debug("DLVR NULL [%d]", atomic_read(&afs_outstanding_skbs));
+		dump_stack();
+	} else {
+		_debug("DLVR %p{%u} [%d]",
+		       skb, skb->mark, atomic_read(&afs_outstanding_skbs));
+		if (atomic_dec_return(&afs_outstanding_skbs) == -1)
+			BUG();
+		rxrpc_kernel_data_delivered(skb);
+	}
+}
+
+/*
+ * free a socket buffer
+ */
+static void afs_free_skb(struct sk_buff *skb)
+{
+	if (!skb) {
+		_debug("FREE NULL [%d]", atomic_read(&afs_outstanding_skbs));
+		dump_stack();
+	} else {
+		_debug("FREE %p{%u} [%d]",
+		       skb, skb->mark, atomic_read(&afs_outstanding_skbs));
+		if (atomic_dec_return(&afs_outstanding_skbs) == -1)
+			BUG();
+		rxrpc_kernel_free_skb(skb);
+	}
+}
+
+/*
+ * free a call
+ */
+static void afs_free_call(struct afs_call *call)
+{
+	_debug("DONE %p{%s} [%d]",
+	       call, call->type->name, atomic_read(&afs_outstanding_calls));
+	if (atomic_dec_return(&afs_outstanding_calls) == -1)
+		BUG();
+
+	ASSERTCMP(call->rxcall, ==, NULL);
+	ASSERT(!work_pending(&call->async_work));
+	ASSERT(skb_queue_empty(&call->rx_queue));
+	ASSERT(call->type->name != NULL);
+
+	kfree(call->request);
+	kfree(call);
+}
+
 /*
  * allocate a call with flat request and reply buffers
  */
@@ -133,30 +193,32 @@ struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type,
 	if (!call)
 		goto nomem_call;
 
+	_debug("CALL %p{%s} [%d]",
+	       call, type->name, atomic_read(&afs_outstanding_calls));
+	atomic_inc(&afs_outstanding_calls);
+
+	call->type = type;
+	call->request_size = request_size;
+	call->reply_max = reply_size;
+
 	if (request_size) {
 		call->request = kmalloc(request_size, GFP_NOFS);
 		if (!call->request)
-			goto nomem_request;
+			goto nomem_free;
 	}
 
 	if (reply_size) {
 		call->buffer = kmalloc(reply_size, GFP_NOFS);
 		if (!call->buffer)
-			goto nomem_buffer;
+			goto nomem_free;
 	}
 
-	call->type = type;
-	call->request_size = request_size;
-	call->reply_max = reply_size;
-
 	init_waitqueue_head(&call->waitq);
 	skb_queue_head_init(&call->rx_queue);
 	return call;
 
-nomem_buffer:
-	kfree(call->request);
-nomem_request:
-	kfree(call);
+nomem_free:
+	afs_free_call(call);
 nomem_call:
 	return NULL;
 }
@@ -188,6 +250,12 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 
 	_enter("%x,{%d},", addr->s_addr, ntohs(call->port));
 
+	ASSERT(call->type != NULL);
+	ASSERT(call->type->name != NULL);
+
+	_debug("MAKE %p{%s} [%d]",
+	       call, call->type->name, atomic_read(&afs_outstanding_calls));
+
 	call->wait_mode = wait_mode;
 	INIT_WORK(&call->async_work, afs_process_async_call);
 
@@ -203,6 +271,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 	/* create a call */
 	rxcall = rxrpc_kernel_begin_call(afs_socket, &srx, call->key,
 					 (unsigned long) call, gfp);
+	call->key = NULL;
 	if (IS_ERR(rxcall)) {
 		ret = PTR_ERR(rxcall);
 		goto error_kill_call;
@@ -237,10 +306,10 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 error_do_abort:
 	rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT);
 	rxrpc_kernel_end_call(rxcall);
+	call->rxcall = NULL;
 error_kill_call:
 	call->type->destructor(call);
-	ASSERT(skb_queue_empty(&call->rx_queue));
-	kfree(call);
+	afs_free_call(call);
 	_leave(" = %d", ret);
 	return ret;
 }
@@ -257,15 +326,19 @@ static void afs_rx_interceptor(struct sock *sk, unsigned long user_call_ID,
 
 	_enter("%p,,%u", call, skb->mark);
 
+	_debug("ICPT %p{%u} [%d]",
+	       skb, skb->mark, atomic_read(&afs_outstanding_skbs));
+
 	ASSERTCMP(sk, ==, afs_socket->sk);
+	atomic_inc(&afs_outstanding_skbs);
 
 	if (!call) {
 		/* its an incoming call for our callback service */
-		__skb_queue_tail(&afs_incoming_calls, skb);
+		skb_queue_tail(&afs_incoming_calls, skb);
 		schedule_work(&afs_collect_incoming_call_work);
 	} else {
 		/* route the messages directly to the appropriate call */
-		__skb_queue_tail(&call->rx_queue, skb);
+		skb_queue_tail(&call->rx_queue, skb);
 		call->wait_mode->rx_wakeup(call);
 	}
 
@@ -317,9 +390,9 @@ static void afs_deliver_to_call(struct afs_call *call)
 				call->state = AFS_CALL_ERROR;
 				break;
 			}
-			rxrpc_kernel_data_delivered(skb);
+			afs_data_delivered(skb);
 			skb = NULL;
-			break;
+			continue;
 		case RXRPC_SKB_MARK_FINAL_ACK:
 			_debug("Rcv ACK");
 			call->state = AFS_CALL_COMPLETE;
@@ -350,19 +423,19 @@ static void afs_deliver_to_call(struct afs_call *call)
 			break;
 		}
 
-		rxrpc_kernel_free_skb(skb);
+		afs_free_skb(skb);
 	}
 
 	/* make sure the queue is empty if the call is done with (we might have
 	 * aborted the call early because of an unmarshalling error) */
 	if (call->state >= AFS_CALL_COMPLETE) {
 		while ((skb = skb_dequeue(&call->rx_queue)))
-			rxrpc_kernel_free_skb(skb);
+			afs_free_skb(skb);
 		if (call->incoming) {
 			rxrpc_kernel_end_call(call->rxcall);
+			call->rxcall = NULL;
 			call->type->destructor(call);
-			ASSERT(skb_queue_empty(&call->rx_queue));
-			kfree(call);
+			afs_free_call(call);
 		}
 	}
 
@@ -409,14 +482,14 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
 		_debug("call incomplete");
 		rxrpc_kernel_abort_call(call->rxcall, RX_CALL_DEAD);
 		while ((skb = skb_dequeue(&call->rx_queue)))
-			rxrpc_kernel_free_skb(skb);
+			afs_free_skb(skb);
 	}
 
 	_debug("call complete");
 	rxrpc_kernel_end_call(call->rxcall);
+	call->rxcall = NULL;
 	call->type->destructor(call);
-	ASSERT(skb_queue_empty(&call->rx_queue));
-	kfree(call);
+	afs_free_call(call);
 	_leave(" = %d", ret);
 	return ret;
 }
@@ -459,9 +532,7 @@ static void afs_delete_async_call(struct work_struct *work)
 
 	_enter("");
 
-	ASSERT(skb_queue_empty(&call->rx_queue));
-	ASSERT(!work_pending(&call->async_work));
-	kfree(call);
+	afs_free_call(call);
 
 	_leave("");
 }
@@ -489,6 +560,7 @@ static void afs_process_async_call(struct work_struct *work)
 
 		/* kill the call */
 		rxrpc_kernel_end_call(call->rxcall);
+		call->rxcall = NULL;
 		if (call->type->destructor)
 			call->type->destructor(call);
 
@@ -526,7 +598,7 @@ static void afs_collect_incoming_call(struct work_struct *work)
 		_debug("new call");
 
 		/* don't need the notification */
-		rxrpc_kernel_free_skb(skb);
+		afs_free_skb(skb);
 
 		if (!call) {
 			call = kzalloc(sizeof(struct afs_call), GFP_KERNEL);
@@ -541,6 +613,11 @@ static void afs_collect_incoming_call(struct work_struct *work)
 			init_waitqueue_head(&call->waitq);
 			skb_queue_head_init(&call->rx_queue);
 			call->state = AFS_CALL_AWAIT_OP_ID;
+
+			_debug("CALL %p{%s} [%d]",
+			       call, call->type->name,
+			       atomic_read(&afs_outstanding_calls));
+			atomic_inc(&afs_outstanding_calls);
 		}
 
 		rxcall = rxrpc_kernel_accept_call(afs_socket,
@@ -551,7 +628,8 @@ static void afs_collect_incoming_call(struct work_struct *work)
 		}
 	}
 
-	kfree(call);
+	if (call)
+		afs_free_call(call);
 }
 
 /*
@@ -629,8 +707,7 @@ void afs_send_empty_reply(struct afs_call *call)
 		rxrpc_kernel_end_call(call->rxcall);
 		call->rxcall = NULL;
 		call->type->destructor(call);
-		ASSERT(skb_queue_empty(&call->rx_queue));
-		kfree(call);
+		afs_free_call(call);
 		_leave(" [error]");
 		return;
 	}
diff --git a/fs/afs/security.c b/fs/afs/security.c
new file mode 100644
index 00000000000..cbdd7f7162f
--- /dev/null
+++ b/fs/afs/security.c
@@ -0,0 +1,345 @@
+/* AFS security handling
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/ctype.h>
+#include <keys/rxrpc-type.h>
+#include "internal.h"
+
+/*
+ * get a key
+ */
+struct key *afs_request_key(struct afs_cell *cell)
+{
+	struct key *key;
+
+	_enter("{%x}", key_serial(cell->anonymous_key));
+
+	_debug("key %s", cell->anonymous_key->description);
+	key = request_key(&key_type_rxrpc, cell->anonymous_key->description,
+			  NULL);
+	if (IS_ERR(key)) {
+		if (PTR_ERR(key) != -ENOKEY) {
+			_leave(" = %ld", PTR_ERR(key));
+			return key;
+		}
+
+		/* act as anonymous user */
+		_leave(" = {%x} [anon]", key_serial(cell->anonymous_key));
+		return key_get(cell->anonymous_key);
+	} else {
+		/* act as authorised user */
+		_leave(" = {%x} [auth]", key_serial(key));
+		return key;
+	}
+}
+
+/*
+ * dispose of a permits list
+ */
+void afs_zap_permits(struct rcu_head *rcu)
+{
+	struct afs_permits *permits =
+		container_of(rcu, struct afs_permits, rcu);
+	int loop;
+
+	_enter("{%d}", permits->count);
+
+	for (loop = permits->count - 1; loop >= 0; loop--)
+		key_put(permits->permits[loop].key);
+	kfree(permits);
+}
+
+/*
+ * dispose of a permits list in which all the key pointers have been copied
+ */
+static void afs_dispose_of_permits(struct rcu_head *rcu)
+{
+	struct afs_permits *permits =
+		container_of(rcu, struct afs_permits, rcu);
+
+	_enter("{%d}", permits->count);
+
+	kfree(permits);
+}
+
+/*
+ * get the authorising vnode - this is the specified inode itself if it's a
+ * directory or it's the parent directory if the specified inode is a file or
+ * symlink
+ * - the caller must release the ref on the inode
+ */
+static struct afs_vnode *afs_get_auth_inode(struct afs_vnode *vnode,
+					    struct key *key)
+{
+	struct afs_vnode *auth_vnode;
+	struct inode *auth_inode;
+
+	_enter("");
+
+	if (S_ISDIR(vnode->vfs_inode.i_mode)) {
+		auth_inode = igrab(&vnode->vfs_inode);
+		ASSERT(auth_inode != NULL);
+	} else {
+		auth_inode = afs_iget(vnode->vfs_inode.i_sb, key,
+				      &vnode->status.parent);
+		if (IS_ERR(auth_inode))
+			return ERR_PTR(PTR_ERR(auth_inode));
+	}
+
+	auth_vnode = AFS_FS_I(auth_inode);
+	_leave(" = {%x}", auth_vnode->fid.vnode);
+	return auth_vnode;
+}
+
+/*
+ * clear the permit cache on a directory vnode
+ */
+void afs_clear_permits(struct afs_vnode *vnode)
+{
+	struct afs_permits *permits;
+
+	_enter("{%x}", vnode->fid.vnode);
+
+	mutex_lock(&vnode->permits_lock);
+	permits = vnode->permits;
+	rcu_assign_pointer(vnode->permits, NULL);
+	mutex_unlock(&vnode->permits_lock);
+
+	if (permits)
+		call_rcu(&permits->rcu, afs_zap_permits);
+	_leave("");
+}
+
+/*
+ * add the result obtained for a vnode to its or its parent directory's cache
+ * for the key used to access it
+ */
+void afs_cache_permit(struct afs_vnode *vnode, struct key *key, long acl_order)
+{
+	struct afs_permits *permits, *xpermits;
+	struct afs_permit *permit;
+	struct afs_vnode *auth_vnode;
+	int count, loop;
+
+	_enter("{%x},%x,%lx", vnode->fid.vnode, key_serial(key), acl_order);
+
+	auth_vnode = afs_get_auth_inode(vnode, key);
+	if (IS_ERR(auth_vnode)) {
+		_leave(" [get error %ld]", PTR_ERR(auth_vnode));
+		return;
+	}
+
+	mutex_lock(&auth_vnode->permits_lock);
+
+	/* guard against a rename being detected whilst we waited for the
+	 * lock */
+	if (memcmp(&auth_vnode->fid, &vnode->status.parent,
+		   sizeof(struct afs_fid)) != 0) {
+		_debug("renamed");
+		goto out_unlock;
+	}
+
+	/* have to be careful as the directory's callback may be broken between
+	 * us receiving the status we're trying to cache and us getting the
+	 * lock to update the cache for the status */
+	if (auth_vnode->acl_order - acl_order > 0) {
+		_debug("ACL changed?");
+		goto out_unlock;
+	}
+
+	/* always update the anonymous mask */
+	_debug("anon access %x", vnode->status.anon_access);
+	auth_vnode->status.anon_access = vnode->status.anon_access;
+	if (key == vnode->volume->cell->anonymous_key)
+		goto out_unlock;
+
+	xpermits = auth_vnode->permits;
+	count = 0;
+	if (xpermits) {
+		/* see if the permit is already in the list
+		 * - if it is then we just amend the list
+		 */
+		count = xpermits->count;
+		permit = xpermits->permits;
+		for (loop = count; loop > 0; loop--) {
+			if (permit->key == key) {
+				permit->access_mask =
+					vnode->status.caller_access;
+				goto out_unlock;
+			}
+			permit++;
+		}
+	}
+
+	permits = kmalloc(sizeof(*permits) + sizeof(*permit) * (count + 1),
+			  GFP_NOFS);
+	if (!permits)
+		goto out_unlock;
+
+	memcpy(permits->permits, xpermits->permits,
+	       count * sizeof(struct afs_permit));
+
+	_debug("key %x access %x",
+	       key_serial(key), vnode->status.caller_access);
+	permits->permits[count].access_mask = vnode->status.caller_access;
+	permits->permits[count].key = key_get(key);
+	permits->count = count + 1;
+
+	rcu_assign_pointer(auth_vnode->permits, permits);
+	if (xpermits)
+		call_rcu(&xpermits->rcu, afs_dispose_of_permits);
+
+out_unlock:
+	mutex_unlock(&auth_vnode->permits_lock);
+	iput(&auth_vnode->vfs_inode);
+	_leave("");
+}
+
+/*
+ * check with the fileserver to see if the directory or parent directory is
+ * permitted to be accessed with this authorisation, and if so, what access it
+ * is granted
+ */
+static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
+			    afs_access_t *_access)
+{
+	struct afs_permits *permits;
+	struct afs_permit *permit;
+	struct afs_vnode *auth_vnode;
+	bool valid;
+	int loop, ret;
+
+	_enter("");
+
+	auth_vnode = afs_get_auth_inode(vnode, key);
+	if (IS_ERR(auth_vnode)) {
+		*_access = 0;
+		_leave(" = %ld", PTR_ERR(auth_vnode));
+		return PTR_ERR(auth_vnode);
+	}
+
+	ASSERT(S_ISDIR(auth_vnode->vfs_inode.i_mode));
+
+	/* check the permits to see if we've got one yet */
+	if (key == auth_vnode->volume->cell->anonymous_key) {
+		_debug("anon");
+		*_access = auth_vnode->status.anon_access;
+		valid = true;
+	} else {
+		valid = false;
+		rcu_read_lock();
+		permits = rcu_dereference(auth_vnode->permits);
+		if (permits) {
+			permit = permits->permits;
+			for (loop = permits->count; loop > 0; loop--) {
+				if (permit->key == key) {
+					_debug("found in cache");
+					*_access = permit->access_mask;
+					valid = true;
+					break;
+				}
+				permit++;
+			}
+		}
+		rcu_read_unlock();
+	}
+
+	if (!valid) {
+		/* check the status on the file we're actually interested in
+		 * (the post-processing will cache the result on auth_vnode) */
+		_debug("no valid permit");
+
+		set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
+		ret = afs_vnode_fetch_status(vnode, auth_vnode, key);
+		if (ret < 0) {
+			iput(&auth_vnode->vfs_inode);
+			*_access = 0;
+			_leave(" = %d", ret);
+			return ret;
+		}
+	}
+
+	*_access = vnode->status.caller_access;
+	iput(&auth_vnode->vfs_inode);
+	_leave(" = 0 [access %x]", *_access);
+	return 0;
+}
+
+/*
+ * check the permissions on an AFS file
+ * - AFS ACLs are attached to directories only, and a file is controlled by its
+ *   parent directory's ACL
+ */
+int afs_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+	struct afs_vnode *vnode = AFS_FS_I(inode);
+	afs_access_t access;
+	struct key *key;
+	int ret;
+
+	_enter("{%x:%x},%x,", vnode->fid.vid, vnode->fid.vnode, mask);
+
+	key = afs_request_key(vnode->volume->cell);
+	if (IS_ERR(key)) {
+		_leave(" = %ld [key]", PTR_ERR(key));
+		return PTR_ERR(key);
+	}
+
+	/* check the permits to see if we've got one yet */
+	ret = afs_check_permit(vnode, key, &access);
+	if (ret < 0) {
+		key_put(key);
+		_leave(" = %d [check]", ret);
+		return ret;
+	}
+
+	/* interpret the access mask */
+	_debug("REQ %x ACC %x on %s",
+	       mask, access, S_ISDIR(inode->i_mode) ? "dir" : "file");
+
+	if (S_ISDIR(inode->i_mode)) {
+		if (mask & MAY_EXEC) {
+			if (!(access & AFS_ACE_LOOKUP))
+				goto permission_denied;
+		} else if (mask & MAY_READ) {
+			if (!(access & AFS_ACE_READ))
+				goto permission_denied;
+		} else if (mask & MAY_WRITE) {
+			if (!(access & (AFS_ACE_DELETE | /* rmdir, unlink, rename from */
+					AFS_ACE_INSERT | /* create, mkdir, symlink, rename to */
+					AFS_ACE_WRITE))) /* chmod */
+				goto permission_denied;
+		} else {
+			BUG();
+		}
+	} else {
+		if (!(access & AFS_ACE_LOOKUP))
+			goto permission_denied;
+		if (mask & (MAY_EXEC | MAY_READ)) {
+			if (!(access & AFS_ACE_READ))
+				goto permission_denied;
+		} else if (mask & MAY_WRITE) {
+			if (!(access & AFS_ACE_WRITE))
+				goto permission_denied;
+		}
+	}
+
+	key_put(key);
+	return generic_permission(inode, mask, NULL);
+
+permission_denied:
+	key_put(key);
+	_leave(" = -EACCES");
+	return -EACCES;
+}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 77e68759788..497350a5463 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -24,12 +24,6 @@
 
 #define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
 
-struct afs_mount_params {
-	int			rwpath;
-	struct afs_cell		*default_cell;
-	struct afs_volume	*volume;
-};
-
 static void afs_i_init_once(void *foo, struct kmem_cache *cachep,
 			    unsigned long flags);
 
@@ -150,8 +144,8 @@ static int want_no_value(char *const *_value, const char *option)
  * - this function has been shamelessly adapted from the ext3 fs which
  *   shamelessly adapted it from the msdos fs
  */
-static int afs_super_parse_options(struct afs_mount_params *params,
-				   char *options, const char **devname)
+static int afs_parse_options(struct afs_mount_params *params,
+			     char *options, const char **devname)
 {
 	struct afs_cell *cell;
 	char *key, *value;
@@ -183,8 +177,8 @@ static int afs_super_parse_options(struct afs_mount_params *params,
 			cell = afs_cell_lookup(value, strlen(value));
 			if (IS_ERR(cell))
 				return PTR_ERR(cell);
-			afs_put_cell(params->default_cell);
-			params->default_cell = cell;
+			afs_put_cell(params->cell);
+			params->cell = cell;
 		} else {
 			printk("kAFS: Unknown mount option: '%s'\n",  key);
 			ret = -EINVAL;
@@ -198,6 +192,99 @@ error:
 	return ret;
 }
 
+/*
+ * parse a device name to get cell name, volume name, volume type and R/W
+ * selector
+ * - this can be one of the following:
+ *	"%[cell:]volume[.]"		R/W volume
+ *	"#[cell:]volume[.]"		R/O or R/W volume (rwpath=0),
+ *					 or R/W (rwpath=1) volume
+ *	"%[cell:]volume.readonly"	R/O volume
+ *	"#[cell:]volume.readonly"	R/O volume
+ *	"%[cell:]volume.backup"		Backup volume
+ *	"#[cell:]volume.backup"		Backup volume
+ */
+static int afs_parse_device_name(struct afs_mount_params *params,
+				 const char *name)
+{
+	struct afs_cell *cell;
+	const char *cellname, *suffix;
+	int cellnamesz;
+
+	_enter(",%s", name);
+
+	if (!name) {
+		printk(KERN_ERR "kAFS: no volume name specified\n");
+		return -EINVAL;
+	}
+
+	if ((name[0] != '%' && name[0] != '#') || !name[1]) {
+		printk(KERN_ERR "kAFS: unparsable volume name\n");
+		return -EINVAL;
+	}
+
+	/* determine the type of volume we're looking for */
+	params->type = AFSVL_ROVOL;
+	params->force = false;
+	if (params->rwpath || name[0] == '%') {
+		params->type = AFSVL_RWVOL;
+		params->force = true;
+	}
+	name++;
+
+	/* split the cell name out if there is one */
+	params->volname = strchr(name, ':');
+	if (params->volname) {
+		cellname = name;
+		cellnamesz = params->volname - name;
+		params->volname++;
+	} else {
+		params->volname = name;
+		cellname = NULL;
+		cellnamesz = 0;
+	}
+
+	/* the volume type is further affected by a possible suffix */
+	suffix = strrchr(params->volname, '.');
+	if (suffix) {
+		if (strcmp(suffix, ".readonly") == 0) {
+			params->type = AFSVL_ROVOL;
+			params->force = true;
+		} else if (strcmp(suffix, ".backup") == 0) {
+			params->type = AFSVL_BACKVOL;
+			params->force = true;
+		} else if (suffix[1] == 0) {
+		} else {
+			suffix = NULL;
+		}
+	}
+
+	params->volnamesz = suffix ?
+		suffix - params->volname : strlen(params->volname);
+
+	_debug("cell %*.*s [%p]",
+	       cellnamesz, cellnamesz, cellname ?: "", params->cell);
+
+	/* lookup the cell record */
+	if (cellname || !params->cell) {
+		cell = afs_cell_lookup(cellname, cellnamesz);
+		if (IS_ERR(cell)) {
+			printk(KERN_ERR "kAFS: unable to lookup cell '%s'\n",
+			       cellname ?: "");
+			return PTR_ERR(cell);
+		}
+		afs_put_cell(params->cell);
+		params->cell = cell;
+	}
+
+	_debug("CELL:%s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s",
+	       params->cell->name, params->cell,
+	       params->volnamesz, params->volnamesz, params->volname,
+	       suffix ?: "-", params->type, params->force ? " FORCE" : "");
+
+	return 0;
+}
+
 /*
  * check a superblock to see if it's the one we're looking for
  */
@@ -244,7 +331,7 @@ static int afs_fill_super(struct super_block *sb, void *data)
 	fid.vid		= as->volume->vid;
 	fid.vnode	= 1;
 	fid.unique	= 1;
-	inode = afs_iget(sb, &fid);
+	inode = afs_iget(sb, params->key, &fid);
 	if (IS_ERR(inode))
 		goto error_inode;
 
@@ -285,31 +372,40 @@ static int afs_get_sb(struct file_system_type *fs_type,
 	struct afs_mount_params params;
 	struct super_block *sb;
 	struct afs_volume *vol;
+	struct key *key;
 	int ret;
 
 	_enter(",,%s,%p", dev_name, options);
 
 	memset(&params, 0, sizeof(params));
 
-	/* parse the options */
+	/* parse the options and device name */
 	if (options) {
-		ret = afs_super_parse_options(&params, options, &dev_name);
+		ret = afs_parse_options(&params, options, &dev_name);
 		if (ret < 0)
 			goto error;
-		if (!dev_name) {
-			printk("kAFS: no volume name specified\n");
-			ret = -EINVAL;
-			goto error;
-		}
 	}
 
+
+	ret = afs_parse_device_name(&params, dev_name);
+	if (ret < 0)
+		goto error;
+
+	/* try and do the mount securely */
+	key = afs_request_key(params.cell);
+	if (IS_ERR(key)) {
+		_leave(" = %ld [key]", PTR_ERR(key));
+		ret = PTR_ERR(key);
+		goto error;
+	}
+	params.key = key;
+
 	/* parse the device name */
-	vol = afs_volume_lookup(dev_name, params.default_cell, params.rwpath);
+	vol = afs_volume_lookup(&params);
 	if (IS_ERR(vol)) {
 		ret = PTR_ERR(vol);
 		goto error;
 	}
-
 	params.volume = vol;
 
 	/* allocate a deviceless superblock */
@@ -337,13 +433,14 @@ static int afs_get_sb(struct file_system_type *fs_type,
 
 	simple_set_mnt(mnt, sb);
 	afs_put_volume(params.volume);
-	afs_put_cell(params.default_cell);
+	afs_put_cell(params.cell);
 	_leave(" = 0 [%p]", sb);
 	return 0;
 
 error:
 	afs_put_volume(params.volume);
-	afs_put_cell(params.default_cell);
+	afs_put_cell(params.cell);
+	key_put(params.key);
 	_leave(" = %d", ret);
 	return ret;
 }
@@ -375,6 +472,7 @@ static void afs_i_init_once(void *_vnode, struct kmem_cache *cachep,
 		memset(vnode, 0, sizeof(*vnode));
 		inode_init_once(&vnode->vfs_inode);
 		init_waitqueue_head(&vnode->update_waitq);
+		mutex_init(&vnode->permits_lock);
 		spin_lock_init(&vnode->lock);
 		INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work);
 		mutex_init(&vnode->cb_broken_lock);
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 0c7eba17483..36c1306e09e 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -127,6 +127,7 @@ static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call,
  * VL.GetEntryByName operation type
  */
 static const struct afs_call_type afs_RXVLGetEntryByName = {
+	.name		= "VL.GetEntryByName",
 	.deliver	= afs_deliver_vl_get_entry_by_xxx,
 	.abort_to_error	= afs_vl_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
@@ -136,6 +137,7 @@ static const struct afs_call_type afs_RXVLGetEntryByName = {
  * VL.GetEntryById operation type
  */
 static const struct afs_call_type afs_RXVLGetEntryById = {
+	.name		= "VL.GetEntryById",
 	.deliver	= afs_deliver_vl_get_entry_by_xxx,
 	.abort_to_error	= afs_vl_abort_to_error,
 	.destructor	= afs_flat_call_destructor,
@@ -145,6 +147,7 @@ static const struct afs_call_type afs_RXVLGetEntryById = {
  * dispatch a get volume entry by name operation
  */
 int afs_vl_get_entry_by_name(struct in_addr *addr,
+			     struct key *key,
 			     const char *volname,
 			     struct afs_cache_vlocation *entry,
 			     const struct afs_wait_mode *wait_mode)
@@ -163,6 +166,7 @@ int afs_vl_get_entry_by_name(struct in_addr *addr,
 	if (!call)
 		return -ENOMEM;
 
+	call->key = key;
 	call->reply = entry;
 	call->service_id = VL_SERVICE;
 	call->port = htons(AFS_VL_PORT);
@@ -183,6 +187,7 @@ int afs_vl_get_entry_by_name(struct in_addr *addr,
  * dispatch a get volume entry by ID operation
  */
 int afs_vl_get_entry_by_id(struct in_addr *addr,
+			   struct key *key,
 			   afs_volid_t volid,
 			   afs_voltype_t voltype,
 			   struct afs_cache_vlocation *entry,
@@ -197,6 +202,7 @@ int afs_vl_get_entry_by_id(struct in_addr *addr,
 	if (!call)
 		return -ENOMEM;
 
+	call->key = key;
 	call->reply = entry;
 	call->service_id = VL_SERVICE;
 	call->port = htons(AFS_VL_PORT);
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index 60cb2f408c7..7d9815e9ae0 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -33,6 +33,7 @@ static struct workqueue_struct *afs_vlocation_update_worker;
  * about the volume in question
  */
 static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl,
+					   struct key *key,
 					   struct afs_cache_vlocation *vldb)
 {
 	struct afs_cell *cell = vl->cell;
@@ -49,7 +50,7 @@ static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl,
 		_debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
 
 		/* attempt to access the VL server */
-		ret = afs_vl_get_entry_by_name(&addr, vl->vldb.name, vldb,
+		ret = afs_vl_get_entry_by_name(&addr, key, vl->vldb.name, vldb,
 					       &afs_sync_call);
 		switch (ret) {
 		case 0:
@@ -86,6 +87,7 @@ out:
  * about the volume in question
  */
 static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
+					 struct key *key,
 					 afs_volid_t volid,
 					 afs_voltype_t voltype,
 					 struct afs_cache_vlocation *vldb)
@@ -104,7 +106,7 @@ static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
 		_debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
 
 		/* attempt to access the VL server */
-		ret = afs_vl_get_entry_by_id(&addr, volid, voltype, vldb,
+		ret = afs_vl_get_entry_by_id(&addr, key, volid, voltype, vldb,
 					     &afs_sync_call);
 		switch (ret) {
 		case 0:
@@ -188,6 +190,7 @@ static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell,
  * update record if we found it in the cache
  */
 static int afs_vlocation_update_record(struct afs_vlocation *vl,
+				       struct key *key,
 				       struct afs_cache_vlocation *vldb)
 {
 	afs_voltype_t voltype;
@@ -228,7 +231,7 @@ static int afs_vlocation_update_record(struct afs_vlocation *vl,
 	/* contact the server to make sure the volume is still available
 	 * - TODO: need to handle disconnected operation here
 	 */
-	ret = afs_vlocation_access_vl_by_id(vl, vid, voltype, vldb);
+	ret = afs_vlocation_access_vl_by_id(vl, key, vid, voltype, vldb);
 	switch (ret) {
 		/* net error */
 	default:
@@ -287,7 +290,8 @@ static void afs_vlocation_apply_update(struct afs_vlocation *vl,
  * fill in a volume location record, consulting the cache and the VL server
  * both
  */
-static int afs_vlocation_fill_in_record(struct afs_vlocation *vl)
+static int afs_vlocation_fill_in_record(struct afs_vlocation *vl,
+					struct key *key)
 {
 	struct afs_cache_vlocation vldb;
 	int ret;
@@ -310,11 +314,11 @@ static int afs_vlocation_fill_in_record(struct afs_vlocation *vl)
 		/* try to update a known volume in the cell VL databases by
 		 * ID as the name may have changed */
 		_debug("found in cache");
-		ret = afs_vlocation_update_record(vl, &vldb);
+		ret = afs_vlocation_update_record(vl, key, &vldb);
 	} else {
 		/* try to look up an unknown volume in the cell VL databases by
 		 * name */
-		ret = afs_vlocation_access_vl_by_name(vl, &vldb);
+		ret = afs_vlocation_access_vl_by_name(vl, key, &vldb);
 		if (ret < 0) {
 			printk("kAFS: failed to locate '%s' in cell '%s'\n",
 			       vl->vldb.name, vl->cell->name);
@@ -366,14 +370,16 @@ void afs_vlocation_queue_for_updates(struct afs_vlocation *vl)
  * - insert/update in the local cache if did get a VL response
  */
 struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *cell,
+					   struct key *key,
 					   const char *name,
 					   size_t namesz)
 {
 	struct afs_vlocation *vl;
 	int ret;
 
-	_enter("{%s},%*.*s,%zu",
-	       cell->name, (int) namesz, (int) namesz, name, namesz);
+	_enter("{%s},{%x},%*.*s,%zu",
+	       cell->name, key_serial(key),
+	       (int) namesz, (int) namesz, name, namesz);
 
 	if (namesz > sizeof(vl->vldb.name)) {
 		_leave(" = -ENAMETOOLONG");
@@ -405,7 +411,7 @@ struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *cell,
 	up_write(&cell->vl_sem);
 
 fill_in_record:
-	ret = afs_vlocation_fill_in_record(vl);
+	ret = afs_vlocation_fill_in_record(vl, key);
 	if (ret < 0)
 		goto error_abandon;
 	vl->state = AFS_VL_VALID;
@@ -656,7 +662,7 @@ static void afs_vlocation_updater(struct work_struct *work)
 	vl->upd_rej_cnt = 0;
 	vl->upd_busy_cnt = 0;
 
-	ret = afs_vlocation_update_record(vl, &vldb);
+	ret = afs_vlocation_update_record(vl, NULL, &vldb);
 	switch (ret) {
 	case 0:
 		afs_vlocation_apply_update(vl, &vldb);
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
index d2ca1398474..160097619ec 100644
--- a/fs/afs/vnode.c
+++ b/fs/afs/vnode.c
@@ -238,9 +238,11 @@ static void afs_vnode_finalise_status_update(struct afs_vnode *vnode,
  *   - there are any outstanding ops that will fetch the status
  * - TODO implement local caching
  */
-int afs_vnode_fetch_status(struct afs_vnode *vnode)
+int afs_vnode_fetch_status(struct afs_vnode *vnode,
+			   struct afs_vnode *auth_vnode, struct key *key)
 {
 	struct afs_server *server;
+	unsigned long acl_order;
 	int ret;
 
 	DECLARE_WAITQUEUE(myself, current);
@@ -260,6 +262,10 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode)
 		return -ENOENT;
 	}
 
+	acl_order = 0;
+	if (auth_vnode)
+		acl_order = auth_vnode->acl_order;
+
 	spin_lock(&vnode->lock);
 
 	if (!test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
@@ -324,12 +330,14 @@ get_anyway:
 		_debug("USING SERVER: %p{%08x}",
 		       server, ntohl(server->addr.s_addr));
 
-		ret = afs_fs_fetch_file_status(server, vnode, NULL,
+		ret = afs_fs_fetch_file_status(server, key, vnode, NULL,
 					       &afs_sync_call);
 
 	} while (!afs_volume_release_fileserver(vnode, server, ret));
 
 	/* adjust the flags */
+	if (ret == 0 && auth_vnode)
+		afs_cache_permit(vnode, key, acl_order);
 	afs_vnode_finalise_status_update(vnode, server, ret);
 
 	_leave(" = %d", ret);
@@ -340,17 +348,18 @@ get_anyway:
  * fetch file data from the volume
  * - TODO implement caching and server failover
  */
-int afs_vnode_fetch_data(struct afs_vnode *vnode, off_t offset, size_t length,
-			 struct page *page)
+int afs_vnode_fetch_data(struct afs_vnode *vnode, struct key *key,
+			 off_t offset, size_t length, struct page *page)
 {
 	struct afs_server *server;
 	int ret;
 
-	_enter("%s,{%u,%u,%u}",
+	_enter("%s{%u,%u,%u},%x,,,",
 	       vnode->volume->vlocation->vldb.name,
 	       vnode->fid.vid,
 	       vnode->fid.vnode,
-	       vnode->fid.unique);
+	       vnode->fid.unique,
+	       key_serial(key));
 
 	/* this op will fetch the status */
 	spin_lock(&vnode->lock);
@@ -367,8 +376,8 @@ int afs_vnode_fetch_data(struct afs_vnode *vnode, off_t offset, size_t length,
 
 		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
 
-		ret = afs_fs_fetch_data(server, vnode, offset, length, page,
-					NULL, &afs_sync_call);
+		ret = afs_fs_fetch_data(server, key, vnode, offset, length,
+					page, NULL, &afs_sync_call);
 
 	} while (!afs_volume_release_fileserver(vnode, server, ret));
 
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 45491cfd4f4..15e13678c21 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -41,83 +41,20 @@ static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
  * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
  *           explicitly told otherwise
  */
-struct afs_volume *afs_volume_lookup(const char *name, struct afs_cell *cell,
-				     int rwpath)
+struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
 {
 	struct afs_vlocation *vlocation = NULL;
 	struct afs_volume *volume = NULL;
 	struct afs_server *server = NULL;
-	afs_voltype_t type;
-	const char *cellname, *volname, *suffix;
 	char srvtmask;
-	int force, ret, loop, cellnamesz, volnamesz;
+	int ret, loop;
 
-	_enter("%s,,%d,", name, rwpath);
-
-	if (!name || (name[0] != '%' && name[0] != '#') || !name[1]) {
-		printk("kAFS: unparsable volume name\n");
-		return ERR_PTR(-EINVAL);
-	}
-
-	/* determine the type of volume we're looking for */
-	force = 0;
-	type = AFSVL_ROVOL;
-
-	if (rwpath || name[0] == '%') {
-		type = AFSVL_RWVOL;
-		force = 1;
-	}
-
-	suffix = strrchr(name, '.');
-	if (suffix) {
-		if (strcmp(suffix, ".readonly") == 0) {
-			type = AFSVL_ROVOL;
-			force = 1;
-		} else if (strcmp(suffix, ".backup") == 0) {
-			type = AFSVL_BACKVOL;
-			force = 1;
-		} else if (suffix[1] == 0) {
-		} else {
-			suffix = NULL;
-		}
-	}
-
-	/* split the cell and volume names */
-	name++;
-	volname = strchr(name, ':');
-	if (volname) {
-		cellname = name;
-		cellnamesz = volname - name;
-		volname++;
-	} else {
-		volname = name;
-		cellname = NULL;
-		cellnamesz = 0;
-	}
-
-	volnamesz = suffix ? suffix - volname : strlen(volname);
-
-	_debug("CELL:%*.*s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s",
-	       cellnamesz, cellnamesz, cellname ?: "", cell,
-	       volnamesz, volnamesz, volname, suffix ?: "-",
-	       type,
-	       force ? " FORCE" : "");
-
-	/* lookup the cell record */
-	if (cellname || !cell) {
-		cell = afs_cell_lookup(cellname, cellnamesz);
-		if (IS_ERR(cell)) {
-			ret = PTR_ERR(cell);
-			printk("kAFS: unable to lookup cell '%s'\n",
-			       cellname ?: "");
-			goto error;
-		}
-	} else {
-		afs_get_cell(cell);
-	}
+	_enter("{%*.*s,%d}",
+	       params->volnamesz, params->volnamesz, params->volname, params->rwpath);
 
 	/* lookup the volume location record */
-	vlocation = afs_vlocation_lookup(cell, volname, volnamesz);
+	vlocation = afs_vlocation_lookup(params->cell, params->key,
+					 params->volname, params->volnamesz);
 	if (IS_ERR(vlocation)) {
 		ret = PTR_ERR(vlocation);
 		vlocation = NULL;
@@ -126,30 +63,30 @@ struct afs_volume *afs_volume_lookup(const char *name, struct afs_cell *cell,
 
 	/* make the final decision on the type we want */
 	ret = -ENOMEDIUM;
-	if (force && !(vlocation->vldb.vidmask & (1 << type)))
+	if (params->force && !(vlocation->vldb.vidmask & (1 << params->type)))
 		goto error;
 
 	srvtmask = 0;
 	for (loop = 0; loop < vlocation->vldb.nservers; loop++)
 		srvtmask |= vlocation->vldb.srvtmask[loop];
 
-	if (force) {
-		if (!(srvtmask & (1 << type)))
+	if (params->force) {
+		if (!(srvtmask & (1 << params->type)))
 			goto error;
 	} else if (srvtmask & AFS_VOL_VTM_RO) {
-		type = AFSVL_ROVOL;
+		params->type = AFSVL_ROVOL;
 	} else if (srvtmask & AFS_VOL_VTM_RW) {
-		type = AFSVL_RWVOL;
+		params->type = AFSVL_RWVOL;
 	} else {
 		goto error;
 	}
 
-	down_write(&cell->vl_sem);
+	down_write(&params->cell->vl_sem);
 
 	/* is the volume already active? */
-	if (vlocation->vols[type]) {
+	if (vlocation->vols[params->type]) {
 		/* yes - re-use it */
-		volume = vlocation->vols[type];
+		volume = vlocation->vols[params->type];
 		afs_get_volume(volume);
 		goto success;
 	}
@@ -163,10 +100,10 @@ struct afs_volume *afs_volume_lookup(const char *name, struct afs_cell *cell,
 		goto error_up;
 
 	atomic_set(&volume->usage, 1);
-	volume->type		= type;
-	volume->type_force	= force;
-	volume->cell		= cell;
-	volume->vid		= vlocation->vldb.vid[type];
+	volume->type		= params->type;
+	volume->type_force	= params->force;
+	volume->cell		= params->cell;
+	volume->vid		= vlocation->vldb.vid[params->type];
 
 	init_rwsem(&volume->server_sem);
 
@@ -196,28 +133,26 @@ struct afs_volume *afs_volume_lookup(const char *name, struct afs_cell *cell,
 	afs_get_vlocation(vlocation);
 	volume->vlocation = vlocation;
 
-	vlocation->vols[type] = volume;
+	vlocation->vols[volume->type] = volume;
 
 success:
 	_debug("kAFS selected %s volume %08x",
 	       afs_voltypes[volume->type], volume->vid);
-	up_write(&cell->vl_sem);
+	up_write(&params->cell->vl_sem);
 	afs_put_vlocation(vlocation);
-	afs_put_cell(cell);
 	_leave(" = %p", volume);
 	return volume;
 
 	/* clean up */
 error_up:
-	up_write(&cell->vl_sem);
+	up_write(&params->cell->vl_sem);
 error:
 	afs_put_vlocation(vlocation);
-	afs_put_cell(cell);
 	_leave(" = %d", ret);
 	return ERR_PTR(ret);
 
 error_discard:
-	up_write(&cell->vl_sem);
+	up_write(&params->cell->vl_sem);
 
 	for (loop = volume->nservers - 1; loop >= 0; loop--)
 		afs_put_server(volume->servers[loop]);
-- 
cgit v1.2.3


From 0795e7c031c4bda46fbdde678adf29de19bef7f4 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 26 Apr 2007 15:57:43 -0700
Subject: [AFS]: Update the AFS fs documentation.

Update the AFS fs documentation.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/filesystems/afs.txt | 214 +++++++++++++++++++++++++++-----------
 1 file changed, 154 insertions(+), 60 deletions(-)

diff --git a/Documentation/filesystems/afs.txt b/Documentation/filesystems/afs.txt
index 2f4237dfb8c..12ad6c7f4e5 100644
--- a/Documentation/filesystems/afs.txt
+++ b/Documentation/filesystems/afs.txt
@@ -1,31 +1,82 @@
+			     ====================
 			     kAFS: AFS FILESYSTEM
 			     ====================
 
-ABOUT
-=====
+Contents:
+
+ - Overview.
+ - Usage.
+ - Mountpoints.
+ - Proc filesystem.
+ - The cell database.
+ - Security.
+ - Examples.
+
+
+========
+OVERVIEW
+========
 
-This filesystem provides a fairly simple AFS filesystem driver. It is under
-development and only provides very basic facilities. It does not yet support
-the following AFS features:
+This filesystem provides a fairly simple secure AFS filesystem driver. It is
+under development and does not yet provide the full feature set.  The features
+it does support include:
 
-	(*) Write support.
-	(*) Communications security.
-	(*) Local caching.
-	(*) pioctl() system call.
-	(*) Automatic mounting of embedded mountpoints.
+ (*) Security (currently only AFS kaserver and KerberosIV tickets).
 
+ (*) File reading.
 
+ (*) Automounting.
+
+It does not yet support the following AFS features:
+
+ (*) Write support.
+
+ (*) Local caching.
+
+ (*) pioctl() system call.
+
+
+===========
+COMPILATION
+===========
+
+The filesystem should be enabled by turning on the kernel configuration
+options:
+
+	CONFIG_AF_RXRPC		- The RxRPC protocol transport
+	CONFIG_RXKAD		- The RxRPC Kerberos security handler
+	CONFIG_AFS		- The AFS filesystem
+
+Additionally, the following can be turned on to aid debugging:
+
+	CONFIG_AF_RXRPC_DEBUG	- Permit AF_RXRPC debugging to be enabled
+	CONFIG_AFS_DEBUG	- Permit AFS debugging to be enabled
+
+They permit the debugging messages to be turned on dynamically by manipulating
+the masks in the following files:
+
+	/sys/module/af_rxrpc/parameters/debug
+	/sys/module/afs/parameters/debug
+
+
+=====
 USAGE
 =====
 
 When inserting the driver modules the root cell must be specified along with a
 list of volume location server IP addresses:
 
-	insmod rxrpc.o
+	insmod af_rxrpc.o
+	insmod rxkad.o
 	insmod kafs.o rootcell=cambridge.redhat.com:172.16.18.73:172.16.18.91
 
-The first module is a driver for the RxRPC remote operation protocol, and the
-second is the actual filesystem driver for the AFS filesystem.
+The first module is the AF_RXRPC network protocol driver.  This provides the
+RxRPC remote operation protocol and may also be accessed from userspace.  See:
+
+	Documentation/networking/rxrpc.txt
+
+The second module is the kerberos RxRPC security driver, and the third module
+is the actual filesystem driver for the AFS filesystem.
 
 Once the module has been loaded, more modules can be added by the following
 procedure:
@@ -33,7 +84,7 @@ procedure:
 	echo add grand.central.org 18.7.14.88:128.2.191.224 >/proc/fs/afs/cells
 
 Where the parameters to the "add" command are the name of a cell and a list of
-volume location servers within that cell.
+volume location servers within that cell, with the latter separated by colons.
 
 Filesystems can be mounted anywhere by commands similar to the following:
 
@@ -42,11 +93,6 @@ Filesystems can be mounted anywhere by commands similar to the following:
 	mount -t afs "#root.afs." /afs
 	mount -t afs "#root.cell." /afs/cambridge
 
-  NB: When using this on Linux 2.4, the mount command has to be different,
-      since the filesystem doesn't have access to the device name argument:
-
-	mount -t afs none /afs -ovol="#root.afs."
-
 Where the initial character is either a hash or a percent symbol depending on
 whether you definitely want a R/W volume (hash) or whether you'd prefer a R/O
 volume, but are willing to use a R/W volume instead (percent).
@@ -60,55 +106,66 @@ named volume will be looked up in the cell specified during insmod.
 Additional cells can be added through /proc (see later section).
 
 
+===========
 MOUNTPOINTS
 ===========
 
-AFS has a concept of mountpoints. These are specially formatted symbolic links
-(of the same form as the "device name" passed to mount). kAFS presents these
-to the user as directories that have special properties:
+AFS has a concept of mountpoints. In AFS terms, these are specially formatted
+symbolic links (of the same form as the "device name" passed to mount).  kAFS
+presents these to the user as directories that have a follow-link capability
+(ie: symbolic link semantics).  If anyone attempts to access them, they will
+automatically cause the target volume to be mounted (if possible) on that site.
 
-  (*) They cannot be listed. Running a program like "ls" on them will incur an
-      EREMOTE error (Object is remote).
+Automatically mounted filesystems will be automatically unmounted approximately
+twenty minutes after they were last used.  Alternatively they can be unmounted
+directly with the umount() system call.
 
-  (*) Other objects can't be looked up inside of them. This also incurs an
-      EREMOTE error.
+Manually unmounting an AFS volume will cause any idle submounts upon it to be
+culled first.  If all are culled, then the requested volume will also be
+unmounted, otherwise error EBUSY will be returned.
 
-  (*) They can be queried with the readlink() system call, which will return
-      the name of the mountpoint to which they point. The "readlink" program
-      will also work.
+This can be used by the administrator to attempt to unmount the whole AFS tree
+mounted on /afs in one go by doing:
 
-  (*) They can be mounted on (which symbolic links can't).
+	umount /afs
 
 
+===============
 PROC FILESYSTEM
 ===============
 
-The rxrpc module creates a number of files in various places in the /proc
-filesystem:
-
-  (*) Firstly, some information files are made available in a directory called
-      "/proc/net/rxrpc/". These list the extant transport endpoint, peer,
-      connection and call records.
-
-  (*) Secondly, some control files are made available in a directory called
-      "/proc/sys/rxrpc/". Currently, all these files can be used for is to
-      turn on various levels of tracing.
-
 The AFS modules creates a "/proc/fs/afs/" directory and populates it:
 
-  (*) A "cells" file that lists cells currently known to the afs module.
+  (*) A "cells" file that lists cells currently known to the afs module and
+      their usage counts:
+
+	[root@andromeda ~]# cat /proc/fs/afs/cells
+	USE NAME
+	  3 cambridge.redhat.com
 
   (*) A directory per cell that contains files that list volume location
       servers, volumes, and active servers known within that cell.
 
+	[root@andromeda ~]# cat /proc/fs/afs/cambridge.redhat.com/servers
+	USE ADDR            STATE
+	  4 172.16.18.91        0
+	[root@andromeda ~]# cat /proc/fs/afs/cambridge.redhat.com/vlservers
+	ADDRESS
+	172.16.18.91
+	[root@andromeda ~]# cat /proc/fs/afs/cambridge.redhat.com/volumes
+	USE STT VLID[0]  VLID[1]  VLID[2]  NAME
+	  1 Val 20000000 20000001 20000002 root.afs
 
+
+=================
 THE CELL DATABASE
 =================
 
-The filesystem maintains an internal database of all the cells it knows and
-the IP addresses of the volume location servers for those cells. The cell to
-which the computer belongs is added to the database when insmod is performed
-by the "rootcell=" argument.
+The filesystem maintains an internal database of all the cells it knows and the
+IP addresses of the volume location servers for those cells.  The cell to which
+the system belongs is added to the database when insmod is performed by the
+"rootcell=" argument or, if compiled in, using a "kafs.rootcell=" argument on
+the kernel command line.
 
 Further cells can be added by commands similar to the following:
 
@@ -118,20 +175,65 @@ Further cells can be added by commands similar to the following:
 No other cell database operations are available at this time.
 
 
+========
+SECURITY
+========
+
+Secure operations are initiated by acquiring a key using the klog program.  A
+very primitive klog program is available at:
+
+	http://people.redhat.com/~dhowells/rxrpc/klog.c
+
+This should be compiled by:
+
+	make klog LDLIBS="-lcrypto -lcrypt -lkrb4 -lkeyutils"
+
+And then run as:
+
+	./klog
+
+Assuming it's successful, this adds a key of type RxRPC, named for the service
+and cell, eg: "afs@<cellname>".  This can be viewed with the keyctl program or
+by cat'ing /proc/keys:
+
+	[root@andromeda ~]# keyctl show
+	Session Keyring
+	       -3 --alswrv      0     0  keyring: _ses.3268
+		2 --alswrv      0     0   \_ keyring: _uid.0
+	111416553 --als--v      0     0   \_ rxrpc: afs@CAMBRIDGE.REDHAT.COM
+
+Currently the username, realm, password and proposed ticket lifetime are
+compiled in to the program.
+
+It is not required to acquire a key before using AFS facilities, but if one is
+not acquired then all operations will be governed by the anonymous user parts
+of the ACLs.
+
+If a key is acquired, then all AFS operations, including mounts and automounts,
+made by a possessor of that key will be secured with that key.
+
+If a file is opened with a particular key and then the file descriptor is
+passed to a process that doesn't have that key (perhaps over an AF_UNIX
+socket), then the operations on the file will be made with key that was used to
+open the file.
+
+
+========
 EXAMPLES
 ========
 
-Here's what I use to test this. Some of the names and IP addresses are local
-to my internal DNS. My "root.afs" partition has a mount point within it for
+Here's what I use to test this.  Some of the names and IP addresses are local
+to my internal DNS.  My "root.afs" partition has a mount point within it for
 some public volumes volumes.
 
-insmod -S /tmp/rxrpc.o 
-insmod -S /tmp/kafs.o rootcell=cambridge.redhat.com:172.16.18.73:172.16.18.91
+insmod /tmp/rxrpc.o
+insmod /tmp/rxkad.o
+insmod /tmp/kafs.o rootcell=cambridge.redhat.com:172.16.18.91
 
 mount -t afs \%root.afs. /afs
 mount -t afs \%cambridge.redhat.com:root.cell. /afs/cambridge.redhat.com/
 
-echo add grand.central.org 18.7.14.88:128.2.191.224 > /proc/fs/afs/cells 
+echo add grand.central.org 18.7.14.88:128.2.191.224 > /proc/fs/afs/cells
 mount -t afs "#grand.central.org:root.cell." /afs/grand.central.org/
 mount -t afs "#grand.central.org:root.archive." /afs/grand.central.org/archive
 mount -t afs "#grand.central.org:root.contrib." /afs/grand.central.org/contrib
@@ -141,15 +243,7 @@ mount -t afs "#grand.central.org:root.service." /afs/grand.central.org/service
 mount -t afs "#grand.central.org:root.software." /afs/grand.central.org/software
 mount -t afs "#grand.central.org:root.user." /afs/grand.central.org/user
 
-umount /afs/grand.central.org/user
-umount /afs/grand.central.org/software
-umount /afs/grand.central.org/service
-umount /afs/grand.central.org/project
-umount /afs/grand.central.org/doc
-umount /afs/grand.central.org/contrib
-umount /afs/grand.central.org/archive
-umount /afs/grand.central.org
-umount /afs/cambridge.redhat.com
 umount /afs
 rmmod kafs
+rmmod rxkad
 rmmod rxrpc
-- 
cgit v1.2.3


From b908fe6b2d1294d93b0d0badf6bf4f9a2cd7d729 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 26 Apr 2007 15:58:17 -0700
Subject: [AFS]: Add support for the CB.GetCapabilities operation.

Add support for the CB.GetCapabilities operation with which the fileserver can
ask the client for the following information:

 (1) The list of network interfaces it has available as IPv4 address + netmask
     plus the MTUs.

 (2) The client's UUID.

 (3) The extended capabilities of the client, for which the only current one
     is unified error mapping (abort code interpretation).

To support this, the patch adds the following routines to AFS:

 (1) A function to iterate through all the network interfaces using RTNETLINK
     to extract IPv4 addresses and MTUs.

 (2) A function to iterate through all the network interfaces using RTNETLINK
     to pull out the MAC address of the lowest index interface to use in UUID
     construction.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/afs/Makefile        |   1 +
 fs/afs/afs_cm.h        |   3 +
 fs/afs/cmservice.c     |  98 ++++++++++
 fs/afs/internal.h      |  42 +++++
 fs/afs/main.c          |  49 +++++
 fs/afs/rxrpc.c         |  39 ++++
 fs/afs/use-rtnetlink.c | 473 +++++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 705 insertions(+)
 create mode 100644 fs/afs/use-rtnetlink.c

diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index cca198b2cae..01545eb1d87 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -18,6 +18,7 @@ kafs-objs := \
 	security.o \
 	server.o \
 	super.o \
+	use-rtnetlink.o \
 	vlclient.o \
 	vlocation.o \
 	vnode.o \
diff --git a/fs/afs/afs_cm.h b/fs/afs/afs_cm.h
index 7c8e3d43c8e..d4bd201cc31 100644
--- a/fs/afs/afs_cm.h
+++ b/fs/afs/afs_cm.h
@@ -23,6 +23,9 @@ enum AFS_CM_Operations {
 	CBGetCE			= 208,	/* get cache file description */
 	CBGetXStatsVersion	= 209,	/* get version of extended statistics */
 	CBGetXStats		= 210,	/* get contents of extended statistics data */
+	CBGetCapabilities	= 65538, /* get client capabilities */
 };
 
+#define AFS_CAP_ERROR_TRANSLATION	0x1
+
 #endif /* AFS_FS_H */
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index c3ec57a237b..a6af3acf016 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -22,6 +22,8 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *,
 					       struct sk_buff *, bool);
 static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool);
 static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool);
+static int afs_deliver_cb_get_capabilities(struct afs_call *, struct sk_buff *,
+					   bool);
 static void afs_cm_destructor(struct afs_call *);
 
 /*
@@ -54,6 +56,16 @@ static const struct afs_call_type afs_SRXCBProbe = {
 	.destructor	= afs_cm_destructor,
 };
 
+/*
+ * CB.GetCapabilities operation type
+ */
+static const struct afs_call_type afs_SRXCBGetCapabilites = {
+	.name		= "CB.GetCapabilities",
+	.deliver	= afs_deliver_cb_get_capabilities,
+	.abort_to_error	= afs_abort_to_error,
+	.destructor	= afs_cm_destructor,
+};
+
 /*
  * route an incoming cache manager call
  * - return T if supported, F if not
@@ -74,6 +86,9 @@ bool afs_cm_incoming_call(struct afs_call *call)
 	case CBProbe:
 		call->type = &afs_SRXCBProbe;
 		return true;
+	case CBGetCapabilities:
+		call->type = &afs_SRXCBGetCapabilites;
+		return true;
 	default:
 		return false;
 	}
@@ -328,3 +343,86 @@ static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb,
 	schedule_work(&call->work);
 	return 0;
 }
+
+/*
+ * allow the fileserver to ask about the cache manager's capabilities
+ */
+static void SRXAFSCB_GetCapabilities(struct work_struct *work)
+{
+	struct afs_interface *ifs;
+	struct afs_call *call = container_of(work, struct afs_call, work);
+	int loop, nifs;
+
+	struct {
+		struct /* InterfaceAddr */ {
+			__be32 nifs;
+			__be32 uuid[11];
+			__be32 ifaddr[32];
+			__be32 netmask[32];
+			__be32 mtu[32];
+		} ia;
+		struct /* Capabilities */ {
+			__be32 capcount;
+			__be32 caps[1];
+		} cap;
+	} reply;
+
+	_enter("");
+
+	nifs = 0;
+	ifs = kcalloc(32, sizeof(*ifs), GFP_KERNEL);
+	if (ifs) {
+		nifs = afs_get_ipv4_interfaces(ifs, 32, false);
+		if (nifs < 0) {
+			kfree(ifs);
+			ifs = NULL;
+			nifs = 0;
+		}
+	}
+
+	memset(&reply, 0, sizeof(reply));
+	reply.ia.nifs = htonl(nifs);
+
+	reply.ia.uuid[0] = htonl(afs_uuid.time_low);
+	reply.ia.uuid[1] = htonl(afs_uuid.time_mid);
+	reply.ia.uuid[2] = htonl(afs_uuid.time_hi_and_version);
+	reply.ia.uuid[3] = htonl((s8) afs_uuid.clock_seq_hi_and_reserved);
+	reply.ia.uuid[4] = htonl((s8) afs_uuid.clock_seq_low);
+	for (loop = 0; loop < 6; loop++)
+		reply.ia.uuid[loop + 5] = htonl((s8) afs_uuid.node[loop]);
+
+	if (ifs) {
+		for (loop = 0; loop < nifs; loop++) {
+			reply.ia.ifaddr[loop] = ifs[loop].address.s_addr;
+			reply.ia.netmask[loop] = ifs[loop].netmask.s_addr;
+			reply.ia.mtu[loop] = htonl(ifs[loop].mtu);
+		}
+	}
+
+	reply.cap.capcount = htonl(1);
+	reply.cap.caps[0] = htonl(AFS_CAP_ERROR_TRANSLATION);
+	afs_send_simple_reply(call, &reply, sizeof(reply));
+
+	_leave("");
+}
+
+/*
+ * deliver request data to a CB.GetCapabilities call
+ */
+static int afs_deliver_cb_get_capabilities(struct afs_call *call,
+					   struct sk_buff *skb, bool last)
+{
+	_enter(",{%u},%d", skb->len, last);
+
+	if (skb->len > 0)
+		return -EBADMSG;
+	if (!last)
+		return 0;
+
+	/* no unmarshalling required */
+	call->state = AFS_CALL_REPLYING;
+
+	INIT_WORK(&call->work, SRXAFSCB_GetCapabilities);
+	schedule_work(&call->work);
+	return 0;
+}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 8bed2429d01..6120d4bd19e 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -346,6 +346,40 @@ struct afs_permits {
 	struct afs_permit	permits[0];	/* the permits so far examined */
 };
 
+/*
+ * record of one of a system's set of network interfaces
+ */
+struct afs_interface {
+	unsigned	index;		/* interface index */
+	struct in_addr	address;	/* IPv4 address bound to interface */
+	struct in_addr	netmask;	/* netmask applied to address */
+	unsigned	mtu;		/* MTU of interface */
+};
+
+/*
+ * UUID definition [internet draft]
+ * - the timestamp is a 60-bit value, split 32/16/12, and goes in 100ns
+ *   increments since midnight 15th October 1582
+ *   - add AFS_UUID_TO_UNIX_TIME to convert unix time in 100ns units to UUID
+ *     time
+ * - the clock sequence is a 14-bit counter to avoid duplicate times
+ */
+struct afs_uuid {
+	u32		time_low;			/* low part of timestamp */
+	u16		time_mid;			/* mid part of timestamp */
+	u16		time_hi_and_version;		/* high part of timestamp and version  */
+#define AFS_UUID_TO_UNIX_TIME	0x01b21dd213814000
+#define AFS_UUID_TIMEHI_MASK	0x0fff
+#define AFS_UUID_VERSION_TIME	0x1000	/* time-based UUID */
+#define AFS_UUID_VERSION_NAME	0x3000	/* name-based UUID */
+#define AFS_UUID_VERSION_RANDOM	0x4000	/* (pseudo-)random generated UUID */
+	u8		clock_seq_hi_and_reserved;	/* clock seq hi and variant */
+#define AFS_UUID_CLOCKHI_MASK	0x3f
+#define AFS_UUID_VARIANT_STD	0x80
+	u8		clock_seq_low;			/* clock seq low */
+	u8		node[6];			/* spatially unique node ID (MAC addr) */
+};
+
 /*****************************************************************************/
 /*
  * callback.c
@@ -430,6 +464,7 @@ extern void afs_clear_inode(struct inode *);
 /*
  * main.c
  */
+extern struct afs_uuid afs_uuid;
 #ifdef AFS_CACHING_SUPPORT
 extern struct cachefs_netfs afs_cache_netfs;
 #endif
@@ -470,6 +505,7 @@ extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *,
 extern void afs_flat_call_destructor(struct afs_call *);
 extern void afs_transfer_reply(struct afs_call *, struct sk_buff *);
 extern void afs_send_empty_reply(struct afs_call *);
+extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
 extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *,
 			    size_t);
 
@@ -500,6 +536,12 @@ extern void __exit afs_purge_servers(void);
 extern int afs_fs_init(void);
 extern void afs_fs_exit(void);
 
+/*
+ * use-rtnetlink.c
+ */
+extern int afs_get_ipv4_interfaces(struct afs_interface *, size_t, bool);
+extern int afs_get_MAC_address(u8 [6]);
+
 /*
  * vlclient.c
  */
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 0cf1b021ad5..40c2704e755 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -40,6 +40,51 @@ struct cachefs_netfs afs_cache_netfs = {
 };
 #endif
 
+struct afs_uuid afs_uuid;
+
+/*
+ * get a client UUID
+ */
+static int __init afs_get_client_UUID(void)
+{
+	struct timespec ts;
+	u64 uuidtime;
+	u16 clockseq;
+	int ret;
+
+	/* read the MAC address of one of the external interfaces and construct
+	 * a UUID from it */
+	ret = afs_get_MAC_address(afs_uuid.node);
+	if (ret < 0)
+		return ret;
+
+	getnstimeofday(&ts);
+	uuidtime = (u64) ts.tv_sec * 1000 * 1000 * 10;
+	uuidtime += ts.tv_nsec / 100;
+	uuidtime += AFS_UUID_TO_UNIX_TIME;
+	afs_uuid.time_low = uuidtime;
+	afs_uuid.time_mid = uuidtime >> 32;
+	afs_uuid.time_hi_and_version = (uuidtime >> 48) & AFS_UUID_TIMEHI_MASK;
+	afs_uuid.time_hi_and_version = AFS_UUID_VERSION_TIME;
+
+	get_random_bytes(&clockseq, 2);
+	afs_uuid.clock_seq_low = clockseq;
+	afs_uuid.clock_seq_hi_and_reserved =
+		(clockseq >> 8) & AFS_UUID_CLOCKHI_MASK;
+	afs_uuid.clock_seq_hi_and_reserved = AFS_UUID_VARIANT_STD;
+
+	_debug("AFS UUID: %08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+	       afs_uuid.time_low,
+	       afs_uuid.time_mid,
+	       afs_uuid.time_hi_and_version,
+	       afs_uuid.clock_seq_hi_and_reserved,
+	       afs_uuid.clock_seq_low,
+	       afs_uuid.node[0], afs_uuid.node[1], afs_uuid.node[2],
+	       afs_uuid.node[3], afs_uuid.node[4], afs_uuid.node[5]);
+
+	return 0;
+}
+
 /*
  * initialise the AFS client FS module
  */
@@ -49,6 +94,10 @@ static int __init afs_init(void)
 
 	printk(KERN_INFO "kAFS: Red Hat AFS client v0.1 registering.\n");
 
+	ret = afs_get_client_UUID();
+	if (ret < 0)
+		return ret;
+
 	/* register the /proc stuff */
 	ret = afs_proc_init();
 	if (ret < 0)
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index e86c527d87a..e7b047328a3 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -713,6 +713,45 @@ void afs_send_empty_reply(struct afs_call *call)
 	}
 }
 
+/*
+ * send a simple reply
+ */
+void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
+{
+	struct msghdr msg;
+	struct iovec iov[1];
+
+	_enter("");
+
+	iov[0].iov_base		= (void *) buf;
+	iov[0].iov_len		= len;
+	msg.msg_name		= NULL;
+	msg.msg_namelen		= 0;
+	msg.msg_iov		= iov;
+	msg.msg_iovlen		= 1;
+	msg.msg_control		= NULL;
+	msg.msg_controllen	= 0;
+	msg.msg_flags		= 0;
+
+	call->state = AFS_CALL_AWAIT_ACK;
+	switch (rxrpc_kernel_send_data(call->rxcall, &msg, len)) {
+	case 0:
+		_leave(" [replied]");
+		return;
+
+	case -ENOMEM:
+		_debug("oom");
+		rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT);
+	default:
+		rxrpc_kernel_end_call(call->rxcall);
+		call->rxcall = NULL;
+		call->type->destructor(call);
+		afs_free_call(call);
+		_leave(" [error]");
+		return;
+	}
+}
+
 /*
  * extract a piece of data from the received data socket buffers
  */
diff --git a/fs/afs/use-rtnetlink.c b/fs/afs/use-rtnetlink.c
new file mode 100644
index 00000000000..82f0daa2897
--- /dev/null
+++ b/fs/afs/use-rtnetlink.c
@@ -0,0 +1,473 @@
+/* RTNETLINK client
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_addr.h>
+#include <linux/if_arp.h>
+#include <linux/inetdevice.h>
+#include <net/netlink.h>
+#include "internal.h"
+
+struct afs_rtm_desc {
+	struct socket		*nlsock;
+	struct afs_interface	*bufs;
+	u8			*mac;
+	size_t			nbufs;
+	size_t			maxbufs;
+	void			*data;
+	ssize_t			datalen;
+	size_t			datamax;
+	int			msg_seq;
+	unsigned		mac_index;
+	bool			wantloopback;
+	int (*parse)(struct afs_rtm_desc *, struct nlmsghdr *);
+};
+
+/*
+ * parse an RTM_GETADDR response
+ */
+static int afs_rtm_getaddr_parse(struct afs_rtm_desc *desc,
+				 struct nlmsghdr *nlhdr)
+{
+	struct afs_interface *this;
+	struct ifaddrmsg *ifa;
+	struct rtattr *rtattr;
+	const char *name;
+	size_t len;
+
+	ifa = (struct ifaddrmsg *) NLMSG_DATA(nlhdr);
+
+	_enter("{ix=%d,af=%d}", ifa->ifa_index, ifa->ifa_family);
+
+	if (ifa->ifa_family != AF_INET) {
+		_leave(" = 0 [family %d]", ifa->ifa_family);
+		return 0;
+	}
+	if (desc->nbufs >= desc->maxbufs) {
+		_leave(" = 0 [max %zu/%zu]", desc->nbufs, desc->maxbufs);
+		return 0;
+	}
+
+	this = &desc->bufs[desc->nbufs];
+
+	this->index = ifa->ifa_index;
+	this->netmask.s_addr = inet_make_mask(ifa->ifa_prefixlen);
+	this->mtu = 0;
+
+	rtattr = NLMSG_DATA(nlhdr) + NLMSG_ALIGN(sizeof(struct ifaddrmsg));
+	len = NLMSG_PAYLOAD(nlhdr, sizeof(struct ifaddrmsg));
+
+	name = "unknown";
+	for (; RTA_OK(rtattr, len); rtattr = RTA_NEXT(rtattr, len)) {
+		switch (rtattr->rta_type) {
+		case IFA_ADDRESS:
+			memcpy(&this->address, RTA_DATA(rtattr), 4);
+			break;
+		case IFA_LABEL:
+			name = RTA_DATA(rtattr);
+			break;
+		}
+	}
+
+	_debug("%s: "NIPQUAD_FMT"/"NIPQUAD_FMT,
+	       name, NIPQUAD(this->address), NIPQUAD(this->netmask));
+
+	desc->nbufs++;
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * parse an RTM_GETLINK response for MTUs
+ */
+static int afs_rtm_getlink_if_parse(struct afs_rtm_desc *desc,
+				    struct nlmsghdr *nlhdr)
+{
+	struct afs_interface *this;
+	struct ifinfomsg *ifi;
+	struct rtattr *rtattr;
+	const char *name;
+	size_t len, loop;
+
+	ifi = (struct ifinfomsg *) NLMSG_DATA(nlhdr);
+
+	_enter("{ix=%d}", ifi->ifi_index);
+
+	for (loop = 0; loop < desc->nbufs; loop++) {
+		this = &desc->bufs[loop];
+		if (this->index == ifi->ifi_index)
+			goto found;
+	}
+
+	_leave(" = 0 [no match]");
+	return 0;
+
+found:
+	if (ifi->ifi_type == ARPHRD_LOOPBACK && !desc->wantloopback) {
+		_leave(" = 0 [loopback]");
+		return 0;
+	}
+
+	rtattr = NLMSG_DATA(nlhdr) + NLMSG_ALIGN(sizeof(struct ifinfomsg));
+	len = NLMSG_PAYLOAD(nlhdr, sizeof(struct ifinfomsg));
+
+	name = "unknown";
+	for (; RTA_OK(rtattr, len); rtattr = RTA_NEXT(rtattr, len)) {
+		switch (rtattr->rta_type) {
+		case IFLA_MTU:
+			memcpy(&this->mtu, RTA_DATA(rtattr), 4);
+			break;
+		case IFLA_IFNAME:
+			name = RTA_DATA(rtattr);
+			break;
+		}
+	}
+
+	_debug("%s: "NIPQUAD_FMT"/"NIPQUAD_FMT" mtu %u",
+	       name, NIPQUAD(this->address), NIPQUAD(this->netmask),
+	       this->mtu);
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * parse an RTM_GETLINK response for the MAC address belonging to the lowest
+ * non-internal interface
+ */
+static int afs_rtm_getlink_mac_parse(struct afs_rtm_desc *desc,
+				     struct nlmsghdr *nlhdr)
+{
+	struct ifinfomsg *ifi;
+	struct rtattr *rtattr;
+	const char *name;
+	size_t remain, len;
+	bool set;
+
+	ifi = (struct ifinfomsg *) NLMSG_DATA(nlhdr);
+
+	_enter("{ix=%d}", ifi->ifi_index);
+
+	if (ifi->ifi_index >= desc->mac_index) {
+		_leave(" = 0 [high]");
+		return 0;
+	}
+	if (ifi->ifi_type == ARPHRD_LOOPBACK) {
+		_leave(" = 0 [loopback]");
+		return 0;
+	}
+
+	rtattr = NLMSG_DATA(nlhdr) + NLMSG_ALIGN(sizeof(struct ifinfomsg));
+	remain = NLMSG_PAYLOAD(nlhdr, sizeof(struct ifinfomsg));
+
+	name = "unknown";
+	set = false;
+	for (; RTA_OK(rtattr, remain); rtattr = RTA_NEXT(rtattr, remain)) {
+		switch (rtattr->rta_type) {
+		case IFLA_ADDRESS:
+			len = RTA_PAYLOAD(rtattr);
+			memcpy(desc->mac, RTA_DATA(rtattr),
+			       min_t(size_t, len, 6));
+			desc->mac_index = ifi->ifi_index;
+			set = true;
+			break;
+		case IFLA_IFNAME:
+			name = RTA_DATA(rtattr);
+			break;
+		}
+	}
+
+	if (set)
+		_debug("%s: %02x:%02x:%02x:%02x:%02x:%02x",
+		       name,
+		       desc->mac[0], desc->mac[1], desc->mac[2],
+		       desc->mac[3], desc->mac[4], desc->mac[5]);
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * read the rtnetlink response and pass to parsing routine
+ */
+static int afs_read_rtm(struct afs_rtm_desc *desc)
+{
+	struct nlmsghdr *nlhdr, tmphdr;
+	struct msghdr msg;
+	struct kvec iov[1];
+	void *data;
+	bool last = false;
+	int len, ret, remain;
+
+	_enter("");
+
+	do {
+		/* first of all peek to see how big the packet is */
+		memset(&msg, 0, sizeof(msg));
+		iov[0].iov_base = &tmphdr;
+		iov[0].iov_len = sizeof(tmphdr);
+		len = kernel_recvmsg(desc->nlsock, &msg, iov, 1,
+				     sizeof(tmphdr), MSG_PEEK | MSG_TRUNC);
+		if (len < 0) {
+			_leave(" = %d [peek]", len);
+			return len;
+		}
+		if (len == 0)
+			continue;
+		if (len < sizeof(tmphdr) || len < NLMSG_PAYLOAD(&tmphdr, 0)) {
+			_leave(" = -EMSGSIZE");
+			return -EMSGSIZE;
+		}
+
+		if (desc->datamax < len) {
+			kfree(desc->data);
+			desc->data = NULL;
+			data = kmalloc(len, GFP_KERNEL);
+			if (!data)
+				return -ENOMEM;
+			desc->data = data;
+		}
+		desc->datamax = len;
+
+		/* read all the data from this packet */
+		iov[0].iov_base = desc->data;
+		iov[0].iov_len = desc->datamax;
+		desc->datalen = kernel_recvmsg(desc->nlsock, &msg, iov, 1,
+					       desc->datamax, 0);
+		if (desc->datalen < 0) {
+			_leave(" = %ld [recv]", desc->datalen);
+			return desc->datalen;
+		}
+
+		nlhdr = desc->data;
+
+		/* check if the header is valid */
+		if (!NLMSG_OK(nlhdr, desc->datalen) ||
+		    nlhdr->nlmsg_type == NLMSG_ERROR) {
+			_leave(" = -EIO");
+			return -EIO;
+		}
+
+		/* see if this is the last message */
+		if (nlhdr->nlmsg_type == NLMSG_DONE ||
+		    !(nlhdr->nlmsg_flags & NLM_F_MULTI))
+			last = true;
+
+		/* parse the bits we got this time */
+		nlmsg_for_each_msg(nlhdr, desc->data, desc->datalen, remain) {
+			ret = desc->parse(desc, nlhdr);
+			if (ret < 0) {
+				_leave(" = %d [parse]", ret);
+				return ret;
+			}
+		}
+
+	} while (!last);
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * list the interface bound addresses to get the address and netmask
+ */
+static int afs_rtm_getaddr(struct afs_rtm_desc *desc)
+{
+	struct msghdr msg;
+	struct kvec iov[1];
+	int ret;
+
+	struct {
+		struct nlmsghdr nl_msg __attribute__((aligned(NLMSG_ALIGNTO)));
+		struct ifaddrmsg addr_msg __attribute__((aligned(NLMSG_ALIGNTO)));
+	} request;
+
+	_enter("");
+
+	memset(&request, 0, sizeof(request));
+
+	request.nl_msg.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
+	request.nl_msg.nlmsg_type = RTM_GETADDR;
+	request.nl_msg.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+	request.nl_msg.nlmsg_seq = desc->msg_seq++;
+	request.nl_msg.nlmsg_pid = 0;
+
+	memset(&msg, 0, sizeof(msg));
+	iov[0].iov_base = &request;
+	iov[0].iov_len = sizeof(request);
+
+	ret = kernel_sendmsg(desc->nlsock, &msg, iov, 1, iov[0].iov_len);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * list the interface link statuses to get the MTUs
+ */
+static int afs_rtm_getlink(struct afs_rtm_desc *desc)
+{
+	struct msghdr msg;
+	struct kvec iov[1];
+	int ret;
+
+	struct {
+		struct nlmsghdr nl_msg __attribute__((aligned(NLMSG_ALIGNTO)));
+		struct ifinfomsg link_msg __attribute__((aligned(NLMSG_ALIGNTO)));
+	} request;
+
+	_enter("");
+
+	memset(&request, 0, sizeof(request));
+
+	request.nl_msg.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+	request.nl_msg.nlmsg_type = RTM_GETLINK;
+	request.nl_msg.nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
+	request.nl_msg.nlmsg_seq = desc->msg_seq++;
+	request.nl_msg.nlmsg_pid = 0;
+
+	memset(&msg, 0, sizeof(msg));
+	iov[0].iov_base = &request;
+	iov[0].iov_len = sizeof(request);
+
+	ret = kernel_sendmsg(desc->nlsock, &msg, iov, 1, iov[0].iov_len);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * cull any interface records for which there isn't an MTU value
+ */
+static void afs_cull_interfaces(struct afs_rtm_desc *desc)
+{
+	struct afs_interface *bufs = desc->bufs;
+	size_t nbufs = desc->nbufs;
+	int loop, point = 0;
+
+	_enter("{%zu}", nbufs);
+
+	for (loop = 0; loop < nbufs; loop++) {
+		if (desc->bufs[loop].mtu != 0) {
+			if (loop != point) {
+				ASSERTCMP(loop, >, point);
+				bufs[point] = bufs[loop];
+			}
+			point++;
+		}
+	}
+
+	desc->nbufs = point;
+	_leave(" [%zu/%zu]", desc->nbufs, nbufs);
+}
+
+/*
+ * get a list of this system's interface IPv4 addresses, netmasks and MTUs
+ * - returns the number of interface records in the buffer
+ */
+int afs_get_ipv4_interfaces(struct afs_interface *bufs, size_t maxbufs,
+			    bool wantloopback)
+{
+	struct afs_rtm_desc desc;
+	int ret, loop;
+
+	_enter("");
+
+	memset(&desc, 0, sizeof(desc));
+	desc.bufs = bufs;
+	desc.maxbufs = maxbufs;
+	desc.wantloopback = wantloopback;
+
+	ret = sock_create_kern(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE,
+			       &desc.nlsock);
+	if (ret < 0) {
+		_leave(" = %d [sock]", ret);
+		return ret;
+	}
+
+	/* issue RTM_GETADDR */
+	desc.parse = afs_rtm_getaddr_parse;
+	ret = afs_rtm_getaddr(&desc);
+	if (ret < 0)
+		goto error;
+	ret = afs_read_rtm(&desc);
+	if (ret < 0)
+		goto error;
+
+	/* issue RTM_GETLINK */
+	desc.parse = afs_rtm_getlink_if_parse;
+	ret = afs_rtm_getlink(&desc);
+	if (ret < 0)
+		goto error;
+	ret = afs_read_rtm(&desc);
+	if (ret < 0)
+		goto error;
+
+	afs_cull_interfaces(&desc);
+	ret = desc.nbufs;
+
+	for (loop = 0; loop < ret; loop++)
+		_debug("[%d] "NIPQUAD_FMT"/"NIPQUAD_FMT" mtu %u",
+		       bufs[loop].index,
+		       NIPQUAD(bufs[loop].address),
+		       NIPQUAD(bufs[loop].netmask),
+		       bufs[loop].mtu);
+
+error:
+	kfree(desc.data);
+	sock_release(desc.nlsock);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * get a MAC address from a random ethernet interface that has a real one
+ * - the buffer should be 6 bytes in size
+ */
+int afs_get_MAC_address(u8 mac[6])
+{
+	struct afs_rtm_desc desc;
+	int ret;
+
+	_enter("");
+
+	memset(&desc, 0, sizeof(desc));
+	desc.mac = mac;
+	desc.mac_index = UINT_MAX;
+
+	ret = sock_create_kern(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE,
+			       &desc.nlsock);
+	if (ret < 0) {
+		_leave(" = %d [sock]", ret);
+		return ret;
+	}
+
+	/* issue RTM_GETLINK */
+	desc.parse = afs_rtm_getlink_mac_parse;
+	ret = afs_rtm_getlink(&desc);
+	if (ret < 0)
+		goto error;
+	ret = afs_read_rtm(&desc);
+	if (ret < 0)
+		goto error;
+
+	if (desc.mac_index < UINT_MAX) {
+		/* got a MAC address */
+		_debug("[%d] %02x:%02x:%02x:%02x:%02x:%02x",
+		       desc.mac_index,
+		       mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
+	} else {
+		ret = -ENONET;
+	}
+
+error:
+	sock_release(desc.nlsock);
+	_leave(" = %d", ret);
+	return ret;
+}
-- 
cgit v1.2.3


From c35eccb1f614954b10cba3f74b7c301993b2f42e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 26 Apr 2007 15:58:49 -0700
Subject: [AFS]: Implement the CB.InitCallBackState3 operation.

Implement the CB.InitCallBackState3 operation for the fileserver to
call.  This reduces the amount of network traffic because if this op
is aborted, the fileserver will then attempt an CB.InitCallBackState
operation.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/afs/afs_cm.h    |  1 +
 fs/afs/cmservice.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

diff --git a/fs/afs/afs_cm.h b/fs/afs/afs_cm.h
index d4bd201cc31..7b4d4fab4c8 100644
--- a/fs/afs/afs_cm.h
+++ b/fs/afs/afs_cm.h
@@ -23,6 +23,7 @@ enum AFS_CM_Operations {
 	CBGetCE			= 208,	/* get cache file description */
 	CBGetXStatsVersion	= 209,	/* get version of extended statistics */
 	CBGetXStats		= 210,	/* get contents of extended statistics data */
+	CBInitCallBackState3	= 213,	/* initialise callback state, version 3 */
 	CBGetCapabilities	= 65538, /* get client capabilities */
 };
 
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index a6af3acf016..6685f4cbccb 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -20,6 +20,8 @@ struct workqueue_struct *afs_cm_workqueue;
 
 static int afs_deliver_cb_init_call_back_state(struct afs_call *,
 					       struct sk_buff *, bool);
+static int afs_deliver_cb_init_call_back_state3(struct afs_call *,
+						struct sk_buff *, bool);
 static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool);
 static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool);
 static int afs_deliver_cb_get_capabilities(struct afs_call *, struct sk_buff *,
@@ -46,6 +48,16 @@ static const struct afs_call_type afs_SRXCBInitCallBackState = {
 	.destructor	= afs_cm_destructor,
 };
 
+/*
+ * CB.InitCallBackState3 operation type
+ */
+static const struct afs_call_type afs_SRXCBInitCallBackState3 = {
+	.name		= "CB.InitCallBackState3",
+	.deliver	= afs_deliver_cb_init_call_back_state3,
+	.abort_to_error	= afs_abort_to_error,
+	.destructor	= afs_cm_destructor,
+};
+
 /*
  * CB.Probe operation type
  */
@@ -83,6 +95,9 @@ bool afs_cm_incoming_call(struct afs_call *call)
 	case CBInitCallBackState:
 		call->type = &afs_SRXCBInitCallBackState;
 		return true;
+	case CBInitCallBackState3:
+		call->type = &afs_SRXCBInitCallBackState3;
+		return true;
 	case CBProbe:
 		call->type = &afs_SRXCBProbe;
 		return true;
@@ -311,6 +326,37 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call,
 	return 0;
 }
 
+/*
+ * deliver request data to a CB.InitCallBackState3 call
+ */
+static int afs_deliver_cb_init_call_back_state3(struct afs_call *call,
+						struct sk_buff *skb,
+						bool last)
+{
+	struct afs_server *server;
+	struct in_addr addr;
+
+	_enter(",{%u},%d", skb->len, last);
+
+	if (!last)
+		return 0;
+
+	/* no unmarshalling required */
+	call->state = AFS_CALL_REPLYING;
+
+	/* we'll need the file server record as that tells us which set of
+	 * vnodes to operate upon */
+	memcpy(&addr, &ip_hdr(skb)->saddr, 4);
+	server = afs_find_server(&addr);
+	if (!server)
+		return -ENOTCONN;
+	call->server = server;
+
+	INIT_WORK(&call->work, SRXAFSCB_InitCallBackState);
+	schedule_work(&call->work);
+	return 0;
+}
+
 /*
  * allow the fileserver to see if the cache manager is still alive
  */
-- 
cgit v1.2.3


From 260a980317dac80182dd76140cf67c6e81d6d3dd Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 26 Apr 2007 15:59:35 -0700
Subject: [AFS]: Add "directory write" support.

Add support for the create, link, symlink, unlink, mkdir, rmdir and
rename VFS operations to the in-kernel AFS filesystem.

Also:

 (1) Fix dentry and inode revalidation.  d_revalidate should only look at
     state of the dentry.  Revalidation of the contents of an inode pointed to
     by a dentry is now separate.

 (2) Fix afs_lookup() to hash negative dentries as well as positive ones.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/afs/afs.h      |  26 ++-
 fs/afs/afs_fs.h   |  11 +-
 fs/afs/callback.c |  36 ++-
 fs/afs/dir.c      | 676 +++++++++++++++++++++++++++++++++++++++++++-----------
 fs/afs/file.c     |   7 +
 fs/afs/fsclient.c | 625 ++++++++++++++++++++++++++++++++++++++++++++++----
 fs/afs/inode.c    | 115 +++++++++-
 fs/afs/internal.h |  54 ++++-
 fs/afs/misc.c     |  21 +-
 fs/afs/security.c |  31 ++-
 fs/afs/server.c   |   2 +
 fs/afs/super.c    |   6 +-
 fs/afs/vnode.c    | 422 +++++++++++++++++++++++++++++++---
 fs/afs/volume.c   |  14 +-
 14 files changed, 1778 insertions(+), 268 deletions(-)

diff --git a/fs/afs/afs.h b/fs/afs/afs.h
index d959092aaf4..52d0752265b 100644
--- a/fs/afs/afs.h
+++ b/fs/afs/afs.h
@@ -106,18 +106,36 @@ struct afs_file_status {
 
 	afs_file_type_t		type;		/* file type */
 	unsigned		nlink;		/* link count */
-	size_t			size;		/* file size */
+	u64			size;		/* file size */
 	afs_dataversion_t	data_version;	/* current data version */
-	unsigned		author;		/* author ID */
-	unsigned		owner;		/* owner ID */
+	u32			author;		/* author ID */
+	u32			owner;		/* owner ID */
+	u32			group;		/* group ID */
 	afs_access_t		caller_access;	/* access rights for authenticated caller */
 	afs_access_t		anon_access;	/* access rights for unauthenticated caller */
 	umode_t			mode;		/* UNIX mode */
-	struct afs_fid		parent;		/* parent file ID */
+	struct afs_fid		parent;		/* parent dir ID for non-dirs only */
 	time_t			mtime_client;	/* last time client changed data */
 	time_t			mtime_server;	/* last time server changed data */
 };
 
+/*
+ * AFS file status change request
+ */
+struct afs_store_status {
+	u32			mask;		/* which bits of the struct are set */
+	u32			mtime_client;	/* last time client changed data */
+	u32			owner;		/* owner ID */
+	u32			group;		/* group ID */
+	umode_t			mode;		/* UNIX mode */
+};
+
+#define AFS_SET_MTIME		0x01		/* set the mtime */
+#define AFS_SET_OWNER		0x02		/* set the owner ID */
+#define AFS_SET_GROUP		0x04		/* set the group ID (unsupported?) */
+#define AFS_SET_MODE		0x08		/* set the UNIX mode */
+#define AFS_SET_SEG_SIZE	0x10		/* set the segment size (unsupported) */
+
 /*
  * AFS volume synchronisation information
  */
diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h
index fd385954f21..89e0d1650a7 100644
--- a/fs/afs/afs_fs.h
+++ b/fs/afs/afs_fs.h
@@ -16,12 +16,19 @@
 #define FS_SERVICE		1	/* AFS File Service ID */
 
 enum AFS_FS_Operations {
-	FSFETCHSTATUS		= 132,	/* AFS Fetch file status */
 	FSFETCHDATA		= 130,	/* AFS Fetch file data */
+	FSFETCHSTATUS		= 132,	/* AFS Fetch file status */
+	FSREMOVEFILE		= 136,	/* AFS Remove a file */
+	FSCREATEFILE		= 137,	/* AFS Create a file */
+	FSRENAME		= 138,	/* AFS Rename or move a file or directory */
+	FSSYMLINK		= 139,	/* AFS Create a symbolic link */
+	FSLINK			= 140,	/* AFS Create a hard link */
+	FSMAKEDIR		= 141,	/* AFS Create a directory */
+	FSREMOVEDIR		= 142,	/* AFS Remove a directory */
 	FSGIVEUPCALLBACKS	= 147,	/* AFS Discard callback promises */
 	FSGETVOLUMEINFO		= 148,	/* AFS Get root volume information */
 	FSGETROOTVOLUME		= 151,	/* AFS Get root volume name */
-	FSLOOKUP		= 161	/* AFS lookup file in directory */
+	FSLOOKUP		= 161,	/* AFS lookup file in directory */
 };
 
 enum AFS_FS_Errors {
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index e674bebbb8b..639399f0ab6 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -44,7 +44,8 @@ void afs_init_callback_state(struct afs_server *server)
 	while (!RB_EMPTY_ROOT(&server->cb_promises)) {
 		vnode = rb_entry(server->cb_promises.rb_node,
 				 struct afs_vnode, cb_promise);
-		printk("\nUNPROMISE on %p\n", vnode);
+		_debug("UNPROMISE { vid=%x vn=%u uq=%u}",
+		       vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
 		rb_erase(&vnode->cb_promise, &server->cb_promises);
 		vnode->cb_promised = false;
 	}
@@ -68,7 +69,7 @@ void afs_broken_callback_work(struct work_struct *work)
 
 	/* we're only interested in dealing with a broken callback on *this*
 	 * vnode and only if no-one else has dealt with it yet */
-	if (!mutex_trylock(&vnode->cb_broken_lock))
+	if (!mutex_trylock(&vnode->validate_lock))
 		return; /* someone else is dealing with it */
 
 	if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
@@ -84,13 +85,14 @@ void afs_broken_callback_work(struct work_struct *work)
 		/* if the vnode's data version number changed then its contents
 		 * are different */
 		if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
-			_debug("zap data");
+			_debug("zap data {%x:%u}",
+			       vnode->fid.vid, vnode->fid.vnode);
 			invalidate_remote_inode(&vnode->vfs_inode);
 		}
 	}
 
 out:
-	mutex_unlock(&vnode->cb_broken_lock);
+	mutex_unlock(&vnode->validate_lock);
 
 	/* avoid the potential race whereby the mutex_trylock() in this
 	 * function happens again between the clear_bit() and the
@@ -251,6 +253,32 @@ static void afs_do_give_up_callback(struct afs_server *server,
 	_leave("");
 }
 
+/*
+ * discard the callback on a deleted item
+ */
+void afs_discard_callback_on_delete(struct afs_vnode *vnode)
+{
+	struct afs_server *server = vnode->server;
+
+	_enter("%d", vnode->cb_promised);
+
+	if (!vnode->cb_promised) {
+		_leave(" [not promised]");
+		return;
+	}
+
+	ASSERT(server != NULL);
+
+	spin_lock(&server->cb_lock);
+	if (vnode->cb_promised) {
+		ASSERT(server->cb_promises.rb_node != NULL);
+		rb_erase(&vnode->cb_promise, &server->cb_promises);
+		vnode->cb_promised = false;
+	}
+	spin_unlock(&server->cb_lock);
+	_leave("");
+}
+
 /*
  * give up the callback registered for a vnode on the file server when the
  * inode is being cleared
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 87368417e4d..dbbe75d6023 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -18,40 +18,50 @@
 #include <linux/ctype.h>
 #include "internal.h"
 
-static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry,
-				     struct nameidata *nd);
+static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
+				 struct nameidata *nd);
 static int afs_dir_open(struct inode *inode, struct file *file);
-static int afs_dir_readdir(struct file *file, void *dirent, filldir_t filldir);
+static int afs_readdir(struct file *file, void *dirent, filldir_t filldir);
 static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd);
 static int afs_d_delete(struct dentry *dentry);
-static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen,
+static void afs_d_release(struct dentry *dentry);
+static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
 				  loff_t fpos, u64 ino, unsigned dtype);
+static int afs_create(struct inode *dir, struct dentry *dentry, int mode,
+		      struct nameidata *nd);
+static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode);
+static int afs_rmdir(struct inode *dir, struct dentry *dentry);
+static int afs_unlink(struct inode *dir, struct dentry *dentry);
+static int afs_link(struct dentry *from, struct inode *dir,
+		    struct dentry *dentry);
+static int afs_symlink(struct inode *dir, struct dentry *dentry,
+		       const char *content);
+static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
+		      struct inode *new_dir, struct dentry *new_dentry);
 
 const struct file_operations afs_dir_file_operations = {
 	.open		= afs_dir_open,
 	.release	= afs_release,
-	.readdir	= afs_dir_readdir,
+	.readdir	= afs_readdir,
 };
 
 const struct inode_operations afs_dir_inode_operations = {
-	.lookup		= afs_dir_lookup,
+	.create		= afs_create,
+	.lookup		= afs_lookup,
+	.link		= afs_link,
+	.unlink		= afs_unlink,
+	.symlink	= afs_symlink,
+	.mkdir		= afs_mkdir,
+	.rmdir		= afs_rmdir,
+	.rename		= afs_rename,
 	.permission	= afs_permission,
 	.getattr	= afs_inode_getattr,
-#if 0 /* TODO */
-	.create		= afs_dir_create,
-	.link		= afs_dir_link,
-	.unlink		= afs_dir_unlink,
-	.symlink	= afs_dir_symlink,
-	.mkdir		= afs_dir_mkdir,
-	.rmdir		= afs_dir_rmdir,
-	.mknod		= afs_dir_mknod,
-	.rename		= afs_dir_rename,
-#endif
 };
 
 static struct dentry_operations afs_fs_dentry_operations = {
 	.d_revalidate	= afs_d_revalidate,
 	.d_delete	= afs_d_delete,
+	.d_release	= afs_d_release,
 };
 
 #define AFS_DIR_HASHTBL_SIZE	128
@@ -103,7 +113,7 @@ struct afs_dir_page {
 	union afs_dir_block blocks[PAGE_SIZE / sizeof(union afs_dir_block)];
 };
 
-struct afs_dir_lookup_cookie {
+struct afs_lookup_cookie {
 	struct afs_fid	fid;
 	const char	*name;
 	size_t		nlen;
@@ -299,7 +309,7 @@ static int afs_dir_iterate_block(unsigned *fpos,
 			      nlen,
 			      blkoff + offset * sizeof(union afs_dirent),
 			      ntohl(dire->u.vnode),
-			      filldir == afs_dir_lookup_filldir ?
+			      filldir == afs_lookup_filldir ?
 			      ntohl(dire->u.unique) : DT_UNKNOWN);
 		if (ret < 0) {
 			_leave(" = 0 [full]");
@@ -379,7 +389,7 @@ out:
 /*
  * read an AFS directory
  */
-static int afs_dir_readdir(struct file *file, void *cookie, filldir_t filldir)
+static int afs_readdir(struct file *file, void *cookie, filldir_t filldir)
 {
 	unsigned fpos;
 	int ret;
@@ -403,10 +413,10 @@ static int afs_dir_readdir(struct file *file, void *cookie, filldir_t filldir)
  * - if afs_dir_iterate_block() spots this function, it'll pass the FID
  *   uniquifier through dtype
  */
-static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen,
-				  loff_t fpos, u64 ino, unsigned dtype)
+static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
+			      loff_t fpos, u64 ino, unsigned dtype)
 {
-	struct afs_dir_lookup_cookie *cookie = _cookie;
+	struct afs_lookup_cookie *cookie = _cookie;
 
 	_enter("{%s,%Zu},%s,%u,,%llu,%u",
 	       cookie->name, cookie->nlen, name, nlen, ino, dtype);
@@ -430,11 +440,12 @@ static int afs_dir_lookup_filldir(void *_cookie, const char *name, int nlen,
 
 /*
  * do a lookup in a directory
+ * - just returns the FID the dentry name maps to if found
  */
 static int afs_do_lookup(struct inode *dir, struct dentry *dentry,
 			 struct afs_fid *fid, struct key *key)
 {
-	struct afs_dir_lookup_cookie cookie;
+	struct afs_lookup_cookie cookie;
 	struct afs_super_info *as;
 	unsigned fpos;
 	int ret;
@@ -450,7 +461,7 @@ static int afs_do_lookup(struct inode *dir, struct dentry *dentry,
 	cookie.found	= 0;
 
 	fpos = 0;
-	ret = afs_dir_iterate(dir, &fpos, &cookie, afs_dir_lookup_filldir,
+	ret = afs_dir_iterate(dir, &fpos, &cookie, afs_lookup_filldir,
 			      key);
 	if (ret < 0) {
 		_leave(" = %d [iter]", ret);
@@ -471,8 +482,8 @@ static int afs_do_lookup(struct inode *dir, struct dentry *dentry,
 /*
  * look up an entry in a directory
  */
-static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry,
-				     struct nameidata *nd)
+static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
+				 struct nameidata *nd)
 {
 	struct afs_vnode *vnode;
 	struct afs_fid fid;
@@ -480,14 +491,18 @@ static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry,
 	struct key *key;
 	int ret;
 
-	_enter("{%lu},%p{%s}", dir->i_ino, dentry, dentry->d_name.name);
+	vnode = AFS_FS_I(dir);
+
+	_enter("{%x:%d},%p{%s},",
+	       vnode->fid.vid, vnode->fid.vnode, dentry, dentry->d_name.name);
+
+	ASSERTCMP(dentry->d_inode, ==, NULL);
 
 	if (dentry->d_name.len > 255) {
 		_leave(" = -ENAMETOOLONG");
 		return ERR_PTR(-ENAMETOOLONG);
 	}
 
-	vnode = AFS_FS_I(dir);
 	if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
 		_leave(" = -ESTALE");
 		return ERR_PTR(-ESTALE);
@@ -499,15 +514,28 @@ static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry,
 		return ERR_PTR(PTR_ERR(key));
 	}
 
+	ret = afs_validate(vnode, key);
+	if (ret < 0) {
+		key_put(key);
+		_leave(" = %d [val]", ret);
+		return ERR_PTR(ret);
+	}
+
 	ret = afs_do_lookup(dir, dentry, &fid, key);
 	if (ret < 0) {
 		key_put(key);
+		if (ret == -ENOENT) {
+			d_add(dentry, NULL);
+			_leave(" = NULL [negative]");
+			return NULL;
+		}
 		_leave(" = %d [do]", ret);
 		return ERR_PTR(ret);
 	}
+	dentry->d_fsdata = (void *)(unsigned long) vnode->status.data_version;
 
 	/* instantiate the dentry */
-	inode = afs_iget(dir->i_sb, key, &fid);
+	inode = afs_iget(dir->i_sb, key, &fid, NULL, NULL);
 	key_put(key);
 	if (IS_ERR(inode)) {
 		_leave(" = %ld", PTR_ERR(inode));
@@ -526,106 +554,65 @@ static struct dentry *afs_dir_lookup(struct inode *dir, struct dentry *dentry,
 	return NULL;
 }
 
-/*
- * propagate changed and modified flags on a directory to all the children of
- * that directory as they may indicate that the ACL on the dir has changed,
- * potentially rendering the child inaccessible or that a file has been deleted
- * or renamed
- */
-static void afs_propagate_dir_changes(struct dentry *dir)
-{
-	struct dentry *child;
-	bool c, m;
-
-	c = test_bit(AFS_VNODE_CHANGED, &AFS_FS_I(dir->d_inode)->flags);
-	m = test_bit(AFS_VNODE_MODIFIED, &AFS_FS_I(dir->d_inode)->flags);
-
-	_enter("{%d,%d}", c, m);
-
-	spin_lock(&dir->d_lock);
-
-	list_for_each_entry(child, &dir->d_subdirs, d_u.d_child) {
-		if (child->d_inode) {
-			struct afs_vnode *vnode;
-
-			_debug("tag %s", child->d_name.name);
-			vnode = AFS_FS_I(child->d_inode);
-			if (c)
-				set_bit(AFS_VNODE_DIR_CHANGED, &vnode->flags);
-			if (m)
-				set_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags);
-		}
-	}
-
-	spin_unlock(&dir->d_lock);
-}
-
 /*
  * check that a dentry lookup hit has found a valid entry
  * - NOTE! the hit can be a negative hit too, so we can't assume we have an
  *   inode
- * - there are several things we need to check
- *   - parent dir data changes (rm, rmdir, rename, mkdir, create, link,
- *     symlink)
- *   - parent dir metadata changed (security changes)
- *   - dentry data changed (write, truncate)
- *   - dentry metadata changed (security changes)
  */
 static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 {
-	struct afs_vnode *vnode;
+	struct afs_vnode *vnode, *dir;
 	struct afs_fid fid;
 	struct dentry *parent;
-	struct inode *inode, *dir;
 	struct key *key;
+	void *dir_version;
 	int ret;
 
 	vnode = AFS_FS_I(dentry->d_inode);
 
-	_enter("{sb=%p n=%s fl=%lx},",
-	       dentry->d_sb, dentry->d_name.name, vnode->flags);
+	if (dentry->d_inode)
+		_enter("{v={%x:%u} n=%s fl=%lx},",
+		       vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
+		       vnode->flags);
+	else
+		_enter("{neg n=%s}", dentry->d_name.name);
 
-	key = afs_request_key(vnode->volume->cell);
+	key = afs_request_key(AFS_FS_S(dentry->d_sb)->volume->cell);
 	if (IS_ERR(key))
 		key = NULL;
 
 	/* lock down the parent dentry so we can peer at it */
 	parent = dget_parent(dentry);
-
-	dir = parent->d_inode;
-	inode = dentry->d_inode;
-
-	/* handle a negative dentry */
-	if (!inode)
+	if (!parent->d_inode)
 		goto out_bad;
 
-	/* handle a bad inode */
-	if (is_bad_inode(inode)) {
-		printk("kAFS: afs_d_revalidate: %s/%s has bad inode\n",
-		       parent->d_name.name, dentry->d_name.name);
-		goto out_bad;
-	}
+	dir = AFS_FS_I(parent->d_inode);
 
-	/* check that this dirent still exists if the directory's contents were
-	 * modified */
-	if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dir)->flags)) {
+	/* validate the parent directory */
+	if (test_bit(AFS_VNODE_MODIFIED, &dir->flags))
+		afs_validate(dir, key);
+
+	if (test_bit(AFS_VNODE_DELETED, &dir->flags)) {
 		_debug("%s: parent dir deleted", dentry->d_name.name);
 		goto out_bad;
 	}
 
-	if (test_and_clear_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags)) {
-		/* rm/rmdir/rename may have occurred */
-		_debug("dir modified");
+	dir_version = (void *) (unsigned long) dir->status.data_version;
+	if (dentry->d_fsdata == dir_version)
+		goto out_valid; /* the dir contents are unchanged */
 
-		/* search the directory for this vnode */
-		ret = afs_do_lookup(dir, dentry, &fid, key);
-		if (ret == -ENOENT) {
-			_debug("%s: dirent not found", dentry->d_name.name);
-			goto not_found;
-		}
-		if (ret < 0) {
-			_debug("failed to iterate dir %s: %d",
-			       parent->d_name.name, ret);
+	_debug("dir modified");
+
+	/* search the directory for this vnode */
+	ret = afs_do_lookup(&dir->vfs_inode, dentry, &fid, key);
+	switch (ret) {
+	case 0:
+		/* the filename maps to something */
+		if (!dentry->d_inode)
+			goto out_bad;
+		if (is_bad_inode(dentry->d_inode)) {
+			printk("kAFS: afs_d_revalidate: %s/%s has bad inode\n",
+			       parent->d_name.name, dentry->d_name.name);
 			goto out_bad;
 		}
 
@@ -639,56 +626,35 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
 		}
 
 		/* if the vnode ID uniqifier has changed, then the file has
-		 * been deleted */
+		 * been deleted and replaced, and the original vnode ID has
+		 * been reused */
 		if (fid.unique != vnode->fid.unique) {
 			_debug("%s: file deleted (uq %u -> %u I:%lu)",
 			       dentry->d_name.name, fid.unique,
-			       vnode->fid.unique, inode->i_version);
+			       vnode->fid.unique, dentry->d_inode->i_version);
 			spin_lock(&vnode->lock);
 			set_bit(AFS_VNODE_DELETED, &vnode->flags);
 			spin_unlock(&vnode->lock);
-			invalidate_remote_inode(inode);
-			goto out_bad;
+			goto not_found;
 		}
-	}
+		goto out_valid;
 
-	/* if the directory's metadata were changed then the security may be
-	 * different and we may no longer have access */
-	mutex_lock(&vnode->cb_broken_lock);
-
-	if (test_and_clear_bit(AFS_VNODE_DIR_CHANGED, &vnode->flags) ||
-	    test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
-		_debug("%s: changed", dentry->d_name.name);
-		set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
-		if (afs_vnode_fetch_status(vnode, NULL, key) < 0) {
-			mutex_unlock(&vnode->cb_broken_lock);
-			goto out_bad;
-		}
-	}
+	case -ENOENT:
+		/* the filename is unknown */
+		_debug("%s: dirent not found", dentry->d_name.name);
+		if (dentry->d_inode)
+			goto not_found;
+		goto out_valid;
 
-	if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
-		_debug("%s: file already deleted", dentry->d_name.name);
-		mutex_unlock(&vnode->cb_broken_lock);
+	default:
+		_debug("failed to iterate dir %s: %d",
+		       parent->d_name.name, ret);
 		goto out_bad;
 	}
 
-	/* if the vnode's data version number changed then its contents are
-	 * different */
-	if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
-		_debug("zap data");
-		invalidate_remote_inode(inode);
-	}
-
-	if (S_ISDIR(inode->i_mode) &&
-	    (test_bit(AFS_VNODE_CHANGED, &vnode->flags) ||
-	     test_bit(AFS_VNODE_MODIFIED, &vnode->flags)))
-		afs_propagate_dir_changes(dentry);
-
-	clear_bit(AFS_VNODE_CHANGED, &vnode->flags);
-	clear_bit(AFS_VNODE_MODIFIED, &vnode->flags);
-	mutex_unlock(&vnode->cb_broken_lock);
-
 out_valid:
+	dentry->d_fsdata = dir_version;
+out_skip:
 	dput(parent);
 	key_put(key);
 	_leave(" = 1 [valid]");
@@ -701,10 +667,10 @@ not_found:
 	spin_unlock(&dentry->d_lock);
 
 out_bad:
-	if (inode) {
+	if (dentry->d_inode) {
 		/* don't unhash if we have submounts */
 		if (have_submounts(dentry))
-			goto out_valid;
+			goto out_skip;
 	}
 
 	_debug("dropping dentry %s/%s",
@@ -742,3 +708,433 @@ zap:
 	_leave(" = 1 [zap]");
 	return 1;
 }
+
+/*
+ * handle dentry release
+ */
+static void afs_d_release(struct dentry *dentry)
+{
+	_enter("%s", dentry->d_name.name);
+}
+
+/*
+ * create a directory on an AFS filesystem
+ */
+static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+	struct afs_file_status status;
+	struct afs_callback cb;
+	struct afs_server *server;
+	struct afs_vnode *dvnode, *vnode;
+	struct afs_fid fid;
+	struct inode *inode;
+	struct key *key;
+	int ret;
+
+	dvnode = AFS_FS_I(dir);
+
+	_enter("{%x:%d},{%s},%o",
+	       dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
+
+	ret = -ENAMETOOLONG;
+	if (dentry->d_name.len > 255)
+		goto error;
+
+	key = afs_request_key(dvnode->volume->cell);
+	if (IS_ERR(key)) {
+		ret = PTR_ERR(key);
+		goto error;
+	}
+
+	mode |= S_IFDIR;
+	ret = afs_vnode_create(dvnode, key, dentry->d_name.name,
+			       mode, &fid, &status, &cb, &server);
+	if (ret < 0)
+		goto mkdir_error;
+
+	inode = afs_iget(dir->i_sb, key, &fid, &status, &cb);
+	if (IS_ERR(inode)) {
+		/* ENOMEM at a really inconvenient time - just abandon the new
+		 * directory on the server */
+		ret = PTR_ERR(inode);
+		goto iget_error;
+	}
+
+	/* apply the status report we've got for the new vnode */
+	vnode = AFS_FS_I(inode);
+	spin_lock(&vnode->lock);
+	vnode->update_cnt++;
+	spin_unlock(&vnode->lock);
+	afs_vnode_finalise_status_update(vnode, server);
+	afs_put_server(server);
+
+	d_instantiate(dentry, inode);
+	if (d_unhashed(dentry)) {
+		_debug("not hashed");
+		d_rehash(dentry);
+	}
+	key_put(key);
+	_leave(" = 0");
+	return 0;
+
+iget_error:
+	afs_put_server(server);
+mkdir_error:
+	key_put(key);
+error:
+	d_drop(dentry);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * remove a directory from an AFS filesystem
+ */
+static int afs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	struct afs_vnode *dvnode, *vnode;
+	struct key *key;
+	int ret;
+
+	dvnode = AFS_FS_I(dir);
+
+	_enter("{%x:%d},{%s}",
+	       dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name);
+
+	ret = -ENAMETOOLONG;
+	if (dentry->d_name.len > 255)
+		goto error;
+
+	key = afs_request_key(dvnode->volume->cell);
+	if (IS_ERR(key)) {
+		ret = PTR_ERR(key);
+		goto error;
+	}
+
+	ret = afs_vnode_remove(dvnode, key, dentry->d_name.name, true);
+	if (ret < 0)
+		goto rmdir_error;
+
+	if (dentry->d_inode) {
+		vnode = AFS_FS_I(dentry->d_inode);
+		clear_nlink(&vnode->vfs_inode);
+		set_bit(AFS_VNODE_DELETED, &vnode->flags);
+		afs_discard_callback_on_delete(vnode);
+	}
+
+	key_put(key);
+	_leave(" = 0");
+	return 0;
+
+rmdir_error:
+	key_put(key);
+error:
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * remove a file from an AFS filesystem
+ */
+static int afs_unlink(struct inode *dir, struct dentry *dentry)
+{
+	struct afs_vnode *dvnode, *vnode;
+	struct key *key;
+	int ret;
+
+	dvnode = AFS_FS_I(dir);
+
+	_enter("{%x:%d},{%s}",
+	       dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name);
+
+	ret = -ENAMETOOLONG;
+	if (dentry->d_name.len > 255)
+		goto error;
+
+	key = afs_request_key(dvnode->volume->cell);
+	if (IS_ERR(key)) {
+		ret = PTR_ERR(key);
+		goto error;
+	}
+
+	if (dentry->d_inode) {
+		vnode = AFS_FS_I(dentry->d_inode);
+
+		/* make sure we have a callback promise on the victim */
+		ret = afs_validate(vnode, key);
+		if (ret < 0)
+			goto error;
+	}
+
+	ret = afs_vnode_remove(dvnode, key, dentry->d_name.name, false);
+	if (ret < 0)
+		goto remove_error;
+
+	if (dentry->d_inode) {
+		/* if the file wasn't deleted due to excess hard links, the
+		 * fileserver will break the callback promise on the file - if
+		 * it had one - before it returns to us, and if it was deleted,
+		 * it won't
+		 *
+		 * however, if we didn't have a callback promise outstanding,
+		 * or it was outstanding on a different server, then it won't
+		 * break it either...
+		 */
+		vnode = AFS_FS_I(dentry->d_inode);
+		if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+			_debug("AFS_VNODE_DELETED");
+		if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags))
+			_debug("AFS_VNODE_CB_BROKEN");
+		set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
+		ret = afs_validate(vnode, key);
+		_debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret);
+	}
+
+	key_put(key);
+	_leave(" = 0");
+	return 0;
+
+remove_error:
+	key_put(key);
+error:
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * create a regular file on an AFS filesystem
+ */
+static int afs_create(struct inode *dir, struct dentry *dentry, int mode,
+		      struct nameidata *nd)
+{
+	struct afs_file_status status;
+	struct afs_callback cb;
+	struct afs_server *server;
+	struct afs_vnode *dvnode, *vnode;
+	struct afs_fid fid;
+	struct inode *inode;
+	struct key *key;
+	int ret;
+
+	dvnode = AFS_FS_I(dir);
+
+	_enter("{%x:%d},{%s},%o,",
+	       dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
+
+	ret = -ENAMETOOLONG;
+	if (dentry->d_name.len > 255)
+		goto error;
+
+	key = afs_request_key(dvnode->volume->cell);
+	if (IS_ERR(key)) {
+		ret = PTR_ERR(key);
+		goto error;
+	}
+
+	mode |= S_IFREG;
+	ret = afs_vnode_create(dvnode, key, dentry->d_name.name,
+			       mode, &fid, &status, &cb, &server);
+	if (ret < 0)
+		goto create_error;
+
+	inode = afs_iget(dir->i_sb, key, &fid, &status, &cb);
+	if (IS_ERR(inode)) {
+		/* ENOMEM at a really inconvenient time - just abandon the new
+		 * directory on the server */
+		ret = PTR_ERR(inode);
+		goto iget_error;
+	}
+
+	/* apply the status report we've got for the new vnode */
+	vnode = AFS_FS_I(inode);
+	spin_lock(&vnode->lock);
+	vnode->update_cnt++;
+	spin_unlock(&vnode->lock);
+	afs_vnode_finalise_status_update(vnode, server);
+	afs_put_server(server);
+
+	d_instantiate(dentry, inode);
+	if (d_unhashed(dentry)) {
+		_debug("not hashed");
+		d_rehash(dentry);
+	}
+	key_put(key);
+	_leave(" = 0");
+	return 0;
+
+iget_error:
+	afs_put_server(server);
+create_error:
+	key_put(key);
+error:
+	d_drop(dentry);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * create a hard link between files in an AFS filesystem
+ */
+static int afs_link(struct dentry *from, struct inode *dir,
+		    struct dentry *dentry)
+{
+	struct afs_vnode *dvnode, *vnode;
+	struct key *key;
+	int ret;
+
+	vnode = AFS_FS_I(from->d_inode);
+	dvnode = AFS_FS_I(dir);
+
+	_enter("{%x:%d},{%x:%d},{%s}",
+	       vnode->fid.vid, vnode->fid.vnode,
+	       dvnode->fid.vid, dvnode->fid.vnode,
+	       dentry->d_name.name);
+
+	ret = -ENAMETOOLONG;
+	if (dentry->d_name.len > 255)
+		goto error;
+
+	key = afs_request_key(dvnode->volume->cell);
+	if (IS_ERR(key)) {
+		ret = PTR_ERR(key);
+		goto error;
+	}
+
+	ret = afs_vnode_link(dvnode, vnode, key, dentry->d_name.name);
+	if (ret < 0)
+		goto link_error;
+
+	atomic_inc(&vnode->vfs_inode.i_count);
+	d_instantiate(dentry, &vnode->vfs_inode);
+	key_put(key);
+	_leave(" = 0");
+	return 0;
+
+link_error:
+	key_put(key);
+error:
+	d_drop(dentry);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * create a symlink in an AFS filesystem
+ */
+static int afs_symlink(struct inode *dir, struct dentry *dentry,
+		       const char *content)
+{
+	struct afs_file_status status;
+	struct afs_server *server;
+	struct afs_vnode *dvnode, *vnode;
+	struct afs_fid fid;
+	struct inode *inode;
+	struct key *key;
+	int ret;
+
+	dvnode = AFS_FS_I(dir);
+
+	_enter("{%x:%d},{%s},%s",
+	       dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name,
+	       content);
+
+	ret = -ENAMETOOLONG;
+	if (dentry->d_name.len > 255)
+		goto error;
+
+	ret = -EINVAL;
+	if (strlen(content) > 1023)
+		goto error;
+
+	key = afs_request_key(dvnode->volume->cell);
+	if (IS_ERR(key)) {
+		ret = PTR_ERR(key);
+		goto error;
+	}
+
+	ret = afs_vnode_symlink(dvnode, key, dentry->d_name.name, content,
+				&fid, &status, &server);
+	if (ret < 0)
+		goto create_error;
+
+	inode = afs_iget(dir->i_sb, key, &fid, &status, NULL);
+	if (IS_ERR(inode)) {
+		/* ENOMEM at a really inconvenient time - just abandon the new
+		 * directory on the server */
+		ret = PTR_ERR(inode);
+		goto iget_error;
+	}
+
+	/* apply the status report we've got for the new vnode */
+	vnode = AFS_FS_I(inode);
+	spin_lock(&vnode->lock);
+	vnode->update_cnt++;
+	spin_unlock(&vnode->lock);
+	afs_vnode_finalise_status_update(vnode, server);
+	afs_put_server(server);
+
+	d_instantiate(dentry, inode);
+	if (d_unhashed(dentry)) {
+		_debug("not hashed");
+		d_rehash(dentry);
+	}
+	key_put(key);
+	_leave(" = 0");
+	return 0;
+
+iget_error:
+	afs_put_server(server);
+create_error:
+	key_put(key);
+error:
+	d_drop(dentry);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * rename a file in an AFS filesystem and/or move it between directories
+ */
+static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
+		      struct inode *new_dir, struct dentry *new_dentry)
+{
+	struct afs_vnode *orig_dvnode, *new_dvnode, *vnode;
+	struct key *key;
+	int ret;
+
+	vnode = AFS_FS_I(old_dentry->d_inode);
+	orig_dvnode = AFS_FS_I(old_dir);
+	new_dvnode = AFS_FS_I(new_dir);
+
+	_enter("{%x:%d},{%x:%d},{%x:%d},{%s}",
+	       orig_dvnode->fid.vid, orig_dvnode->fid.vnode,
+	       vnode->fid.vid, vnode->fid.vnode,
+	       new_dvnode->fid.vid, new_dvnode->fid.vnode,
+	       new_dentry->d_name.name);
+
+	ret = -ENAMETOOLONG;
+	if (new_dentry->d_name.len > 255)
+		goto error;
+
+	key = afs_request_key(orig_dvnode->volume->cell);
+	if (IS_ERR(key)) {
+		ret = PTR_ERR(key);
+		goto error;
+	}
+
+	ret = afs_vnode_rename(orig_dvnode, new_dvnode, key,
+			       old_dentry->d_name.name,
+			       new_dentry->d_name.name);
+	if (ret < 0)
+		goto rename_error;
+	key_put(key);
+	_leave(" = 0");
+	return 0;
+
+rename_error:
+	key_put(key);
+error:
+	d_drop(new_dentry);
+	_leave(" = %d", ret);
+	return ret;
+}
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 101bbb8c0d8..ae256498f4f 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -50,6 +50,7 @@ int afs_open(struct inode *inode, struct file *file)
 {
 	struct afs_vnode *vnode = AFS_FS_I(inode);
 	struct key *key;
+	int ret;
 
 	_enter("{%x:%x},", vnode->fid.vid, vnode->fid.vnode);
 
@@ -59,6 +60,12 @@ int afs_open(struct inode *inode, struct file *file)
 		return PTR_ERR(key);
 	}
 
+	ret = afs_validate(vnode, key);
+	if (ret < 0) {
+		_leave(" = %d [val]", ret);
+		return ret;
+	}
+
 	file->private_data = key;
 	_leave(" = 0");
 	return 0;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 321b489aa90..f036b4cc51a 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -15,15 +15,29 @@
 #include "internal.h"
 #include "afs_fs.h"
 
+/*
+ * decode an AFSFid block
+ */
+static void xdr_decode_AFSFid(const __be32 **_bp, struct afs_fid *fid)
+{
+	const __be32 *bp = *_bp;
+
+	fid->vid		= ntohl(*bp++);
+	fid->vnode		= ntohl(*bp++);
+	fid->unique		= ntohl(*bp++);
+	*_bp = bp;
+}
+
 /*
  * decode an AFSFetchStatus block
  */
 static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
+				      struct afs_file_status *status,
 				      struct afs_vnode *vnode)
 {
 	const __be32 *bp = *_bp;
 	umode_t mode;
-	u64 data_version;
+	u64 data_version, size;
 	u32 changed = 0; /* becomes non-zero if ctime-type changes seen */
 
 #define EXTRACT(DST)				\
@@ -33,55 +47,68 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
 		DST = x;			\
 	} while (0)
 
-	vnode->status.if_version = ntohl(*bp++);
-	EXTRACT(vnode->status.type);
-	vnode->status.nlink = ntohl(*bp++);
-	EXTRACT(vnode->status.size);
+	status->if_version = ntohl(*bp++);
+	EXTRACT(status->type);
+	EXTRACT(status->nlink);
+	size = ntohl(*bp++);
 	data_version = ntohl(*bp++);
-	EXTRACT(vnode->status.author);
-	EXTRACT(vnode->status.owner);
-	EXTRACT(vnode->status.caller_access); /* call ticket dependent */
-	EXTRACT(vnode->status.anon_access);
-	EXTRACT(vnode->status.mode);
-	vnode->status.parent.vid = vnode->fid.vid;
-	EXTRACT(vnode->status.parent.vnode);
-	EXTRACT(vnode->status.parent.unique);
+	EXTRACT(status->author);
+	EXTRACT(status->owner);
+	EXTRACT(status->caller_access); /* call ticket dependent */
+	EXTRACT(status->anon_access);
+	EXTRACT(status->mode);
+	EXTRACT(status->parent.vnode);
+	EXTRACT(status->parent.unique);
 	bp++; /* seg size */
-	vnode->status.mtime_client = ntohl(*bp++);
-	vnode->status.mtime_server = ntohl(*bp++);
-	bp++; /* group */
+	status->mtime_client = ntohl(*bp++);
+	status->mtime_server = ntohl(*bp++);
+	EXTRACT(status->group);
 	bp++; /* sync counter */
 	data_version |= (u64) ntohl(*bp++) << 32;
-	bp++; /* spare2 */
-	bp++; /* spare3 */
-	bp++; /* spare4 */
+	bp++; /* lock count */
+	size |= (u64) ntohl(*bp++) << 32;
+	bp++; /* spare 4 */
 	*_bp = bp;
 
-	if (changed) {
-		_debug("vnode changed");
-		set_bit(AFS_VNODE_CHANGED, &vnode->flags);
-		vnode->vfs_inode.i_uid		= vnode->status.owner;
-		vnode->vfs_inode.i_size		= vnode->status.size;
-		vnode->vfs_inode.i_version	= vnode->fid.unique;
-
-		vnode->status.mode &= S_IALLUGO;
-		mode = vnode->vfs_inode.i_mode;
-		mode &= ~S_IALLUGO;
-		mode |= vnode->status.mode;
-		vnode->vfs_inode.i_mode = mode;
+	if (size != status->size) {
+		status->size = size;
+		changed |= true;
 	}
+	status->mode &= S_IALLUGO;
 
 	_debug("vnode time %lx, %lx",
-	       vnode->status.mtime_client, vnode->status.mtime_server);
-	vnode->vfs_inode.i_ctime.tv_sec	= vnode->status.mtime_server;
-	vnode->vfs_inode.i_mtime	= vnode->vfs_inode.i_ctime;
-	vnode->vfs_inode.i_atime	= vnode->vfs_inode.i_ctime;
-
-	if (vnode->status.data_version != data_version) {
-		_debug("vnode modified %llx", data_version);
-		vnode->status.data_version = data_version;
-		set_bit(AFS_VNODE_MODIFIED, &vnode->flags);
-		set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
+	       status->mtime_client, status->mtime_server);
+
+	if (vnode) {
+		status->parent.vid = vnode->fid.vid;
+		if (changed && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
+			_debug("vnode changed");
+			i_size_write(&vnode->vfs_inode, size);
+			vnode->vfs_inode.i_uid = status->owner;
+			vnode->vfs_inode.i_gid = status->group;
+			vnode->vfs_inode.i_version = vnode->fid.unique;
+			vnode->vfs_inode.i_nlink = status->nlink;
+
+			mode = vnode->vfs_inode.i_mode;
+			mode &= ~S_IALLUGO;
+			mode |= status->mode;
+			barrier();
+			vnode->vfs_inode.i_mode = mode;
+		}
+
+		vnode->vfs_inode.i_ctime.tv_sec	= status->mtime_server;
+		vnode->vfs_inode.i_mtime	= vnode->vfs_inode.i_ctime;
+		vnode->vfs_inode.i_atime	= vnode->vfs_inode.i_ctime;
+	}
+
+	if (status->data_version != data_version) {
+		status->data_version = data_version;
+		if (vnode && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
+			_debug("vnode modified %llx on {%x:%u}",
+			       data_version, vnode->fid.vid, vnode->fid.vnode);
+			set_bit(AFS_VNODE_MODIFIED, &vnode->flags);
+			set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
+		}
 	}
 }
 
@@ -99,6 +126,17 @@ static void xdr_decode_AFSCallBack(const __be32 **_bp, struct afs_vnode *vnode)
 	*_bp = bp;
 }
 
+static void xdr_decode_AFSCallBack_raw(const __be32 **_bp,
+				       struct afs_callback *cb)
+{
+	const __be32 *bp = *_bp;
+
+	cb->version	= ntohl(*bp++);
+	cb->expiry	= ntohl(*bp++);
+	cb->type	= ntohl(*bp++);
+	*_bp = bp;
+}
+
 /*
  * decode an AFSVolSync block
  */
@@ -122,6 +160,7 @@ static void xdr_decode_AFSVolSync(const __be32 **_bp,
 static int afs_deliver_fs_fetch_status(struct afs_call *call,
 				       struct sk_buff *skb, bool last)
 {
+	struct afs_vnode *vnode = call->reply;
 	const __be32 *bp;
 
 	_enter(",,%u", last);
@@ -135,8 +174,8 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call,
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
-	xdr_decode_AFSFetchStatus(&bp, call->reply);
-	xdr_decode_AFSCallBack(&bp, call->reply);
+	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode);
+	xdr_decode_AFSCallBack(&bp, vnode);
 	if (call->reply2)
 		xdr_decode_AFSVolSync(&bp, call->reply2);
 
@@ -166,9 +205,10 @@ int afs_fs_fetch_file_status(struct afs_server *server,
 	struct afs_call *call;
 	__be32 *bp;
 
-	_enter(",%x,,,", key_serial(key));
+	_enter(",%x,{%x:%d},,",
+	       key_serial(key), vnode->fid.vid, vnode->fid.vnode);
 
-	call = afs_alloc_flat_call(&afs_RXFSFetchStatus, 16, 120);
+	call = afs_alloc_flat_call(&afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4);
 	if (!call)
 		return -ENOMEM;
 
@@ -194,6 +234,7 @@ int afs_fs_fetch_file_status(struct afs_server *server,
 static int afs_deliver_fs_fetch_data(struct afs_call *call,
 				     struct sk_buff *skb, bool last)
 {
+	struct afs_vnode *vnode = call->reply;
 	const __be32 *bp;
 	struct page *page;
 	void *buffer;
@@ -248,7 +289,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
 
 		/* extract the metadata */
 	case 3:
-		ret = afs_extract_data(call, skb, last, call->buffer, 120);
+		ret = afs_extract_data(call, skb, last, call->buffer,
+				       (21 + 3 + 6) * 4);
 		switch (ret) {
 		case 0:		break;
 		case -EAGAIN:	return 0;
@@ -256,8 +298,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
 		}
 
 		bp = call->buffer;
-		xdr_decode_AFSFetchStatus(&bp, call->reply);
-		xdr_decode_AFSCallBack(&bp, call->reply);
+		xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode);
+		xdr_decode_AFSCallBack(&bp, vnode);
 		if (call->reply2)
 			xdr_decode_AFSVolSync(&bp, call->reply2);
 
@@ -296,7 +338,6 @@ int afs_fs_fetch_data(struct afs_server *server,
 		      struct afs_vnode *vnode,
 		      off_t offset, size_t length,
 		      struct page *buffer,
-		      struct afs_volsync *volsync,
 		      const struct afs_wait_mode *wait_mode)
 {
 	struct afs_call *call;
@@ -304,13 +345,13 @@ int afs_fs_fetch_data(struct afs_server *server,
 
 	_enter("");
 
-	call = afs_alloc_flat_call(&afs_RXFSFetchData, 24, 120);
+	call = afs_alloc_flat_call(&afs_RXFSFetchData, 24, (21 + 3 + 6) * 4);
 	if (!call)
 		return -ENOMEM;
 
 	call->key = key;
 	call->reply = vnode;
-	call->reply2 = volsync;
+	call->reply2 = NULL; /* volsync */
 	call->reply3 = buffer;
 	call->service_id = FS_SERVICE;
 	call->port = htons(AFS_FS_PORT);
@@ -411,3 +452,485 @@ int afs_fs_give_up_callbacks(struct afs_server *server,
 
 	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
 }
+
+/*
+ * deliver reply data to an FS.CreateFile or an FS.MakeDir
+ */
+static int afs_deliver_fs_create_vnode(struct afs_call *call,
+				       struct sk_buff *skb, bool last)
+{
+	struct afs_vnode *vnode = call->reply;
+	const __be32 *bp;
+
+	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+
+	afs_transfer_reply(call, skb);
+	if (!last)
+		return 0;
+
+	if (call->reply_size != call->reply_max)
+		return -EBADMSG;
+
+	/* unmarshall the reply once we've received all of it */
+	bp = call->buffer;
+	xdr_decode_AFSFid(&bp, call->reply2);
+	xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL);
+	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode);
+	xdr_decode_AFSCallBack_raw(&bp, call->reply4);
+	/* xdr_decode_AFSVolSync(&bp, call->replyX); */
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * FS.CreateFile and FS.MakeDir operation type
+ */
+static const struct afs_call_type afs_RXFSCreateXXXX = {
+	.name		= "FS.CreateXXXX",
+	.deliver	= afs_deliver_fs_create_vnode,
+	.abort_to_error	= afs_abort_to_error,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * create a file or make a directory
+ */
+int afs_fs_create(struct afs_server *server,
+		  struct key *key,
+		  struct afs_vnode *vnode,
+		  const char *name,
+		  umode_t mode,
+		  struct afs_fid *newfid,
+		  struct afs_file_status *newstatus,
+		  struct afs_callback *newcb,
+		  const struct afs_wait_mode *wait_mode)
+{
+	struct afs_call *call;
+	size_t namesz, reqsz, padsz;
+	__be32 *bp;
+
+	_enter("");
+
+	namesz = strlen(name);
+	padsz = (4 - (namesz & 3)) & 3;
+	reqsz = (5 * 4) + namesz + padsz + (6 * 4);
+
+	call = afs_alloc_flat_call(&afs_RXFSCreateXXXX, reqsz,
+				   (3 + 21 + 21 + 3 + 6) * 4);
+	if (!call)
+		return -ENOMEM;
+
+	call->key = key;
+	call->reply = vnode;
+	call->reply2 = newfid;
+	call->reply3 = newstatus;
+	call->reply4 = newcb;
+	call->service_id = FS_SERVICE;
+	call->port = htons(AFS_FS_PORT);
+
+	/* marshall the parameters */
+	bp = call->request;
+	*bp++ = htonl(S_ISDIR(mode) ? FSMAKEDIR : FSCREATEFILE);
+	*bp++ = htonl(vnode->fid.vid);
+	*bp++ = htonl(vnode->fid.vnode);
+	*bp++ = htonl(vnode->fid.unique);
+	*bp++ = htonl(namesz);
+	memcpy(bp, name, namesz);
+	bp = (void *) bp + namesz;
+	if (padsz > 0) {
+		memset(bp, 0, padsz);
+		bp = (void *) bp + padsz;
+	}
+	*bp++ = htonl(AFS_SET_MODE);
+	*bp++ = 0; /* mtime */
+	*bp++ = 0; /* owner */
+	*bp++ = 0; /* group */
+	*bp++ = htonl(mode & S_IALLUGO); /* unix mode */
+	*bp++ = 0; /* segment size */
+
+	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+}
+
+/*
+ * deliver reply data to an FS.RemoveFile or FS.RemoveDir
+ */
+static int afs_deliver_fs_remove(struct afs_call *call,
+				 struct sk_buff *skb, bool last)
+{
+	struct afs_vnode *vnode = call->reply;
+	const __be32 *bp;
+
+	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+
+	afs_transfer_reply(call, skb);
+	if (!last)
+		return 0;
+
+	if (call->reply_size != call->reply_max)
+		return -EBADMSG;
+
+	/* unmarshall the reply once we've received all of it */
+	bp = call->buffer;
+	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode);
+	/* xdr_decode_AFSVolSync(&bp, call->replyX); */
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * FS.RemoveDir/FS.RemoveFile operation type
+ */
+static const struct afs_call_type afs_RXFSRemoveXXXX = {
+	.name		= "FS.RemoveXXXX",
+	.deliver	= afs_deliver_fs_remove,
+	.abort_to_error	= afs_abort_to_error,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * remove a file or directory
+ */
+int afs_fs_remove(struct afs_server *server,
+		  struct key *key,
+		  struct afs_vnode *vnode,
+		  const char *name,
+		  bool isdir,
+		  const struct afs_wait_mode *wait_mode)
+{
+	struct afs_call *call;
+	size_t namesz, reqsz, padsz;
+	__be32 *bp;
+
+	_enter("");
+
+	namesz = strlen(name);
+	padsz = (4 - (namesz & 3)) & 3;
+	reqsz = (5 * 4) + namesz + padsz;
+
+	call = afs_alloc_flat_call(&afs_RXFSRemoveXXXX, reqsz, (21 + 6) * 4);
+	if (!call)
+		return -ENOMEM;
+
+	call->key = key;
+	call->reply = vnode;
+	call->service_id = FS_SERVICE;
+	call->port = htons(AFS_FS_PORT);
+
+	/* marshall the parameters */
+	bp = call->request;
+	*bp++ = htonl(isdir ? FSREMOVEDIR : FSREMOVEFILE);
+	*bp++ = htonl(vnode->fid.vid);
+	*bp++ = htonl(vnode->fid.vnode);
+	*bp++ = htonl(vnode->fid.unique);
+	*bp++ = htonl(namesz);
+	memcpy(bp, name, namesz);
+	bp = (void *) bp + namesz;
+	if (padsz > 0) {
+		memset(bp, 0, padsz);
+		bp = (void *) bp + padsz;
+	}
+
+	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+}
+
+/*
+ * deliver reply data to an FS.Link
+ */
+static int afs_deliver_fs_link(struct afs_call *call,
+			       struct sk_buff *skb, bool last)
+{
+	struct afs_vnode *dvnode = call->reply, *vnode = call->reply2;
+	const __be32 *bp;
+
+	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+
+	afs_transfer_reply(call, skb);
+	if (!last)
+		return 0;
+
+	if (call->reply_size != call->reply_max)
+		return -EBADMSG;
+
+	/* unmarshall the reply once we've received all of it */
+	bp = call->buffer;
+	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode);
+	xdr_decode_AFSFetchStatus(&bp, &dvnode->status, dvnode);
+	/* xdr_decode_AFSVolSync(&bp, call->replyX); */
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * FS.Link operation type
+ */
+static const struct afs_call_type afs_RXFSLink = {
+	.name		= "FS.Link",
+	.deliver	= afs_deliver_fs_link,
+	.abort_to_error	= afs_abort_to_error,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * make a hard link
+ */
+int afs_fs_link(struct afs_server *server,
+		struct key *key,
+		struct afs_vnode *dvnode,
+		struct afs_vnode *vnode,
+		const char *name,
+		const struct afs_wait_mode *wait_mode)
+{
+	struct afs_call *call;
+	size_t namesz, reqsz, padsz;
+	__be32 *bp;
+
+	_enter("");
+
+	namesz = strlen(name);
+	padsz = (4 - (namesz & 3)) & 3;
+	reqsz = (5 * 4) + namesz + padsz + (3 * 4);
+
+	call = afs_alloc_flat_call(&afs_RXFSLink, reqsz, (21 + 21 + 6) * 4);
+	if (!call)
+		return -ENOMEM;
+
+	call->key = key;
+	call->reply = dvnode;
+	call->reply2 = vnode;
+	call->service_id = FS_SERVICE;
+	call->port = htons(AFS_FS_PORT);
+
+	/* marshall the parameters */
+	bp = call->request;
+	*bp++ = htonl(FSLINK);
+	*bp++ = htonl(dvnode->fid.vid);
+	*bp++ = htonl(dvnode->fid.vnode);
+	*bp++ = htonl(dvnode->fid.unique);
+	*bp++ = htonl(namesz);
+	memcpy(bp, name, namesz);
+	bp = (void *) bp + namesz;
+	if (padsz > 0) {
+		memset(bp, 0, padsz);
+		bp = (void *) bp + padsz;
+	}
+	*bp++ = htonl(vnode->fid.vid);
+	*bp++ = htonl(vnode->fid.vnode);
+	*bp++ = htonl(vnode->fid.unique);
+
+	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+}
+
+/*
+ * deliver reply data to an FS.Symlink
+ */
+static int afs_deliver_fs_symlink(struct afs_call *call,
+				  struct sk_buff *skb, bool last)
+{
+	struct afs_vnode *vnode = call->reply;
+	const __be32 *bp;
+
+	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+
+	afs_transfer_reply(call, skb);
+	if (!last)
+		return 0;
+
+	if (call->reply_size != call->reply_max)
+		return -EBADMSG;
+
+	/* unmarshall the reply once we've received all of it */
+	bp = call->buffer;
+	xdr_decode_AFSFid(&bp, call->reply2);
+	xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL);
+	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode);
+	/* xdr_decode_AFSVolSync(&bp, call->replyX); */
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * FS.Symlink operation type
+ */
+static const struct afs_call_type afs_RXFSSymlink = {
+	.name		= "FS.Symlink",
+	.deliver	= afs_deliver_fs_symlink,
+	.abort_to_error	= afs_abort_to_error,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * create a symbolic link
+ */
+int afs_fs_symlink(struct afs_server *server,
+		   struct key *key,
+		   struct afs_vnode *vnode,
+		   const char *name,
+		   const char *contents,
+		   struct afs_fid *newfid,
+		   struct afs_file_status *newstatus,
+		   const struct afs_wait_mode *wait_mode)
+{
+	struct afs_call *call;
+	size_t namesz, reqsz, padsz, c_namesz, c_padsz;
+	__be32 *bp;
+
+	_enter("");
+
+	namesz = strlen(name);
+	padsz = (4 - (namesz & 3)) & 3;
+
+	c_namesz = strlen(contents);
+	c_padsz = (4 - (c_namesz & 3)) & 3;
+
+	reqsz = (6 * 4) + namesz + padsz + c_namesz + c_padsz + (6 * 4);
+
+	call = afs_alloc_flat_call(&afs_RXFSSymlink, reqsz,
+				   (3 + 21 + 21 + 6) * 4);
+	if (!call)
+		return -ENOMEM;
+
+	call->key = key;
+	call->reply = vnode;
+	call->reply2 = newfid;
+	call->reply3 = newstatus;
+	call->service_id = FS_SERVICE;
+	call->port = htons(AFS_FS_PORT);
+
+	/* marshall the parameters */
+	bp = call->request;
+	*bp++ = htonl(FSSYMLINK);
+	*bp++ = htonl(vnode->fid.vid);
+	*bp++ = htonl(vnode->fid.vnode);
+	*bp++ = htonl(vnode->fid.unique);
+	*bp++ = htonl(namesz);
+	memcpy(bp, name, namesz);
+	bp = (void *) bp + namesz;
+	if (padsz > 0) {
+		memset(bp, 0, padsz);
+		bp = (void *) bp + padsz;
+	}
+	*bp++ = htonl(c_namesz);
+	memcpy(bp, contents, c_namesz);
+	bp = (void *) bp + c_namesz;
+	if (c_padsz > 0) {
+		memset(bp, 0, c_padsz);
+		bp = (void *) bp + c_padsz;
+	}
+	*bp++ = htonl(AFS_SET_MODE);
+	*bp++ = 0; /* mtime */
+	*bp++ = 0; /* owner */
+	*bp++ = 0; /* group */
+	*bp++ = htonl(S_IRWXUGO); /* unix mode */
+	*bp++ = 0; /* segment size */
+
+	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+}
+
+/*
+ * deliver reply data to an FS.Rename
+ */
+static int afs_deliver_fs_rename(struct afs_call *call,
+				  struct sk_buff *skb, bool last)
+{
+	struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2;
+	const __be32 *bp;
+
+	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+
+	afs_transfer_reply(call, skb);
+	if (!last)
+		return 0;
+
+	if (call->reply_size != call->reply_max)
+		return -EBADMSG;
+
+	/* unmarshall the reply once we've received all of it */
+	bp = call->buffer;
+	xdr_decode_AFSFetchStatus(&bp, &orig_dvnode->status, orig_dvnode);
+	if (new_dvnode != orig_dvnode)
+		xdr_decode_AFSFetchStatus(&bp, &new_dvnode->status, new_dvnode);
+	/* xdr_decode_AFSVolSync(&bp, call->replyX); */
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * FS.Rename operation type
+ */
+static const struct afs_call_type afs_RXFSRename = {
+	.name		= "FS.Rename",
+	.deliver	= afs_deliver_fs_rename,
+	.abort_to_error	= afs_abort_to_error,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * create a symbolic link
+ */
+int afs_fs_rename(struct afs_server *server,
+		  struct key *key,
+		  struct afs_vnode *orig_dvnode,
+		  const char *orig_name,
+		  struct afs_vnode *new_dvnode,
+		  const char *new_name,
+		  const struct afs_wait_mode *wait_mode)
+{
+	struct afs_call *call;
+	size_t reqsz, o_namesz, o_padsz, n_namesz, n_padsz;
+	__be32 *bp;
+
+	_enter("");
+
+	o_namesz = strlen(orig_name);
+	o_padsz = (4 - (o_namesz & 3)) & 3;
+
+	n_namesz = strlen(new_name);
+	n_padsz = (4 - (n_namesz & 3)) & 3;
+
+	reqsz = (4 * 4) +
+		4 + o_namesz + o_padsz +
+		(3 * 4) +
+		4 + n_namesz + n_padsz;
+
+	call = afs_alloc_flat_call(&afs_RXFSRename, reqsz, (21 + 21 + 6) * 4);
+	if (!call)
+		return -ENOMEM;
+
+	call->key = key;
+	call->reply = orig_dvnode;
+	call->reply2 = new_dvnode;
+	call->service_id = FS_SERVICE;
+	call->port = htons(AFS_FS_PORT);
+
+	/* marshall the parameters */
+	bp = call->request;
+	*bp++ = htonl(FSRENAME);
+	*bp++ = htonl(orig_dvnode->fid.vid);
+	*bp++ = htonl(orig_dvnode->fid.vnode);
+	*bp++ = htonl(orig_dvnode->fid.unique);
+	*bp++ = htonl(o_namesz);
+	memcpy(bp, orig_name, o_namesz);
+	bp = (void *) bp + o_namesz;
+	if (o_padsz > 0) {
+		memset(bp, 0, o_padsz);
+		bp = (void *) bp + o_padsz;
+	}
+
+	*bp++ = htonl(new_dvnode->fid.vid);
+	*bp++ = htonl(new_dvnode->fid.vnode);
+	*bp++ = htonl(new_dvnode->fid.unique);
+	*bp++ = htonl(n_namesz);
+	memcpy(bp, new_name, n_namesz);
+	bp = (void *) bp + n_namesz;
+	if (n_padsz > 0) {
+		memset(bp, 0, n_padsz);
+		bp = (void *) bp + n_padsz;
+	}
+
+	return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+}
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 22733622829..56ca8581b37 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -33,7 +33,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
 {
 	struct inode *inode = AFS_VNODE_TO_I(vnode);
 
-	_debug("FS: ft=%d lk=%d sz=%Zu ver=%Lu mod=%hu",
+	_debug("FS: ft=%d lk=%d sz=%llu ver=%Lu mod=%hu",
 	       vnode->status.type,
 	       vnode->status.nlink,
 	       vnode->status.size,
@@ -115,8 +115,9 @@ static int afs_iget5_set(struct inode *inode, void *opaque)
 /*
  * inode retrieval
  */
-inline struct inode *afs_iget(struct super_block *sb, struct key *key,
-			      struct afs_fid *fid)
+struct inode *afs_iget(struct super_block *sb, struct key *key,
+		       struct afs_fid *fid, struct afs_file_status *status,
+		       struct afs_callback *cb)
 {
 	struct afs_iget_data data = { .fid = *fid };
 	struct afs_super_info *as;
@@ -156,16 +157,37 @@ inline struct inode *afs_iget(struct super_block *sb, struct key *key,
 			       &vnode->cache);
 #endif
 
-	/* okay... it's a new inode */
-	set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
-	ret = afs_vnode_fetch_status(vnode, NULL, key);
-	if (ret < 0)
-		goto bad_inode;
+	if (!status) {
+		/* it's a remotely extant inode */
+		set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
+		ret = afs_vnode_fetch_status(vnode, NULL, key);
+		if (ret < 0)
+			goto bad_inode;
+	} else {
+		/* it's an inode we just created */
+		memcpy(&vnode->status, status, sizeof(vnode->status));
+
+		if (!cb) {
+			/* it's a symlink we just created (the fileserver
+			 * didn't give us a callback) */
+			vnode->cb_version = 0;
+			vnode->cb_expiry = 0;
+			vnode->cb_type = 0;
+			vnode->cb_expires = get_seconds();
+		} else {
+			vnode->cb_version = cb->version;
+			vnode->cb_expiry = cb->expiry;
+			vnode->cb_type = cb->type;
+			vnode->cb_expires = vnode->cb_expiry + get_seconds();
+		}
+	}
+
 	ret = afs_inode_map_status(vnode, key);
 	if (ret < 0)
 		goto bad_inode;
 
 	/* success */
+	clear_bit(AFS_VNODE_UNSET, &vnode->flags);
 	inode->i_flags |= S_NOATIME;
 	unlock_new_inode(inode);
 	_leave(" = %p [CB { v=%u t=%u }]", inode, vnode->cb_version, vnode->cb_type);
@@ -181,6 +203,78 @@ bad_inode:
 	return ERR_PTR(ret);
 }
 
+/*
+ * validate a vnode/inode
+ * - there are several things we need to check
+ *   - parent dir data changes (rm, rmdir, rename, mkdir, create, link,
+ *     symlink)
+ *   - parent dir metadata changed (security changes)
+ *   - dentry data changed (write, truncate)
+ *   - dentry metadata changed (security changes)
+ */
+int afs_validate(struct afs_vnode *vnode, struct key *key)
+{
+	int ret;
+
+	_enter("{v={%x:%u} fl=%lx},%x",
+	       vnode->fid.vid, vnode->fid.vnode, vnode->flags,
+	       key_serial(key));
+
+	if (vnode->cb_promised &&
+	    !test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) &&
+	    !test_bit(AFS_VNODE_MODIFIED, &vnode->flags) &&
+	    !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
+		if (vnode->cb_expires < get_seconds() + 10) {
+			_debug("callback expired");
+			set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
+		} else {
+			goto valid;
+		}
+	}
+
+	if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+		goto valid;
+
+	mutex_lock(&vnode->validate_lock);
+
+	/* if the promise has expired, we need to check the server again to get
+	 * a new promise - note that if the (parent) directory's metadata was
+	 * changed then the security may be different and we may no longer have
+	 * access */
+	if (!vnode->cb_promised ||
+	    test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags)) {
+		_debug("not promised");
+		ret = afs_vnode_fetch_status(vnode, NULL, key);
+		if (ret < 0)
+			goto error_unlock;
+		_debug("new promise [fl=%lx]", vnode->flags);
+	}
+
+	if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
+		_debug("file already deleted");
+		ret = -ESTALE;
+		goto error_unlock;
+	}
+
+	/* if the vnode's data version number changed then its contents are
+	 * different */
+	if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
+		_debug("zap data {%x:%d}", vnode->fid.vid, vnode->fid.vnode);
+		invalidate_remote_inode(&vnode->vfs_inode);
+	}
+
+	clear_bit(AFS_VNODE_MODIFIED, &vnode->flags);
+	mutex_unlock(&vnode->validate_lock);
+valid:
+	_leave(" = 0");
+	return 0;
+
+error_unlock:
+	mutex_unlock(&vnode->validate_lock);
+	_leave(" = %d", ret);
+	return ret;
+}
+
 /*
  * read the attributes of an inode
  */
@@ -207,9 +301,10 @@ void afs_clear_inode(struct inode *inode)
 
 	vnode = AFS_FS_I(inode);
 
-	_enter("ino=%lu { vn=%08x v=%u x=%u t=%u }",
-	       inode->i_ino,
+	_enter("{%x:%d.%d} v=%u x=%u t=%u }",
+	       vnode->fid.vid,
 	       vnode->fid.vnode,
+	       vnode->fid.unique,
 	       vnode->cb_version,
 	       vnode->cb_expiry,
 	       vnode->cb_type);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 6120d4bd19e..73bfa0b2d99 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -80,6 +80,7 @@ struct afs_call {
 	void			*reply;		/* reply buffer (first part) */
 	void			*reply2;	/* reply buffer (second part) */
 	void			*reply3;	/* reply buffer (third part) */
+	void			*reply4;	/* reply buffer (fourth part) */
 	enum {					/* call state */
 		AFS_CALL_REQUESTING,	/* request is being sent for outgoing call */
 		AFS_CALL_AWAIT_REPLY,	/* awaiting reply to outgoing call */
@@ -300,19 +301,18 @@ struct afs_vnode {
 #endif
 	struct afs_permits	*permits;	/* cache of permits so far obtained */
 	struct mutex		permits_lock;	/* lock for altering permits list */
+	struct mutex		validate_lock;	/* lock for validating this vnode */
 	wait_queue_head_t	update_waitq;	/* status fetch waitqueue */
-	unsigned		update_cnt;	/* number of outstanding ops that will update the
+	int			update_cnt;	/* number of outstanding ops that will update the
 						 * status */
 	spinlock_t		lock;		/* waitqueue/flags lock */
 	unsigned long		flags;
 #define AFS_VNODE_CB_BROKEN	0		/* set if vnode's callback was broken */
-#define AFS_VNODE_CHANGED	1		/* set if vnode's metadata changed */
+#define AFS_VNODE_UNSET		1		/* set if vnode attributes not yet set */
 #define AFS_VNODE_MODIFIED	2		/* set if vnode's data modified */
 #define AFS_VNODE_ZAP_DATA	3		/* set if vnode's data should be invalidated */
 #define AFS_VNODE_DELETED	4		/* set if vnode deleted on server */
 #define AFS_VNODE_MOUNTPOINT	5		/* set if vnode is a mountpoint symlink */
-#define AFS_VNODE_DIR_CHANGED	6		/* set if vnode's parent dir metadata changed */
-#define AFS_VNODE_DIR_MODIFIED	7		/* set if vnode's parent dir data modified */
 
 	long			acl_order;	/* ACL check count (callback break count) */
 
@@ -320,7 +320,6 @@ struct afs_vnode {
 	struct rb_node		server_rb;	/* link in server->fs_vnodes */
 	struct rb_node		cb_promise;	/* link in server->cb_promises */
 	struct work_struct	cb_broken_work;	/* work to be done on callback break */
-	struct mutex		cb_broken_lock;	/* lock against multiple attempts to fix break */
 	time_t			cb_expires;	/* time at which callback expires */
 	time_t			cb_expires_at;	/* time used to order cb_promise */
 	unsigned		cb_version;	/* callback version */
@@ -388,6 +387,7 @@ extern void afs_init_callback_state(struct afs_server *);
 extern void afs_broken_callback_work(struct work_struct *);
 extern void afs_break_callbacks(struct afs_server *, size_t,
 				struct afs_callback[]);
+extern void afs_discard_callback_on_delete(struct afs_vnode *);
 extern void afs_give_up_callback(struct afs_vnode *);
 extern void afs_dispatch_give_up_callbacks(struct work_struct *);
 extern void afs_flush_callback_breaks(struct afs_server *);
@@ -448,14 +448,34 @@ extern int afs_fs_give_up_callbacks(struct afs_server *,
 				    const struct afs_wait_mode *);
 extern int afs_fs_fetch_data(struct afs_server *, struct key *,
 			     struct afs_vnode *, off_t, size_t, struct page *,
-			     struct afs_volsync *,
 			     const struct afs_wait_mode *);
+extern int afs_fs_create(struct afs_server *, struct key *,
+			 struct afs_vnode *, const char *, umode_t,
+			 struct afs_fid *, struct afs_file_status *,
+			 struct afs_callback *,
+			 const struct afs_wait_mode *);
+extern int afs_fs_remove(struct afs_server *, struct key *,
+			 struct afs_vnode *, const char *, bool,
+			 const struct afs_wait_mode *);
+extern int afs_fs_link(struct afs_server *, struct key *, struct afs_vnode *,
+		       struct afs_vnode *, const char *,
+		       const struct afs_wait_mode *);
+extern int afs_fs_symlink(struct afs_server *, struct key *,
+			  struct afs_vnode *, const char *, const char *,
+			  struct afs_fid *, struct afs_file_status *,
+			  const struct afs_wait_mode *);
+extern int afs_fs_rename(struct afs_server *, struct key *,
+			 struct afs_vnode *, const char *,
+			 struct afs_vnode *, const char *,
+			 const struct afs_wait_mode *);
 
 /*
  * inode.c
  */
 extern struct inode *afs_iget(struct super_block *, struct key *,
-			      struct afs_fid *);
+			      struct afs_fid *, struct afs_file_status *,
+			      struct afs_callback *);
+extern int afs_validate(struct afs_vnode *, struct key *);
 extern int afs_inode_getattr(struct vfsmount *, struct dentry *,
 			     struct kstat *);
 extern void afs_zap_permits(struct rcu_head *);
@@ -522,7 +542,11 @@ extern int afs_permission(struct inode *, int, struct nameidata *);
  */
 extern spinlock_t afs_server_peer_lock;
 
-#define afs_get_server(S) do { atomic_inc(&(S)->usage); } while(0)
+#define afs_get_server(S)					\
+do {								\
+	_debug("GET SERVER %d", atomic_read(&(S)->usage));	\
+	atomic_inc(&(S)->usage);				\
+} while(0)
 
 extern struct afs_server *afs_lookup_server(struct afs_cell *,
 					    const struct in_addr *);
@@ -588,10 +612,24 @@ static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode)
 	return &vnode->vfs_inode;
 }
 
+extern void afs_vnode_finalise_status_update(struct afs_vnode *,
+					     struct afs_server *);
 extern int afs_vnode_fetch_status(struct afs_vnode *, struct afs_vnode *,
 				  struct key *);
 extern int afs_vnode_fetch_data(struct afs_vnode *, struct key *,
 				off_t, size_t, struct page *);
+extern int afs_vnode_create(struct afs_vnode *, struct key *, const char *,
+			    umode_t, struct afs_fid *, struct afs_file_status *,
+			    struct afs_callback *, struct afs_server **);
+extern int afs_vnode_remove(struct afs_vnode *, struct key *, const char *,
+			    bool);
+extern int afs_vnode_link(struct afs_vnode *, struct afs_vnode *, struct key *,
+			  const char *);
+extern int afs_vnode_symlink(struct afs_vnode *, struct key *, const char *,
+			     const char *, struct afs_fid *,
+			     struct afs_file_status *, struct afs_server **);
+extern int afs_vnode_rename(struct afs_vnode *, struct afs_vnode *,
+			    struct key *, const char *, const char *);
 
 /*
  * volume.c
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 98e9276c46a..cdb9792d816 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -22,6 +22,7 @@ int afs_abort_to_error(u32 abort_code)
 {
 	switch (abort_code) {
 	case 13:		return -EACCES;
+	case 30:		return -EROFS;
 	case VSALVAGE:		return -EIO;
 	case VNOVNODE:		return -ENOENT;
 	case VNOVOL:		return -ENOMEDIUM;
@@ -33,6 +34,24 @@ int afs_abort_to_error(u32 abort_code)
 	case VOVERQUOTA:	return -EDQUOT;
 	case VBUSY:		return -EBUSY;
 	case VMOVED:		return -ENXIO;
-	default:		return -EIO;
+	case 0x2f6df0c:		return -EACCES;
+	case 0x2f6df0f:		return -EBUSY;
+	case 0x2f6df10:		return -EEXIST;
+	case 0x2f6df11:		return -EXDEV;
+	case 0x2f6df13:		return -ENOTDIR;
+	case 0x2f6df14:		return -EISDIR;
+	case 0x2f6df15:		return -EINVAL;
+	case 0x2f6df1a:		return -EFBIG;
+	case 0x2f6df1b:		return -ENOSPC;
+	case 0x2f6df1d:		return -EROFS;
+	case 0x2f6df1e:		return -EMLINK;
+	case 0x2f6df20:		return -EDOM;
+	case 0x2f6df21:		return -ERANGE;
+	case 0x2f6df22:		return -EDEADLK;
+	case 0x2f6df23:		return -ENAMETOOLONG;
+	case 0x2f6df24:		return -ENOLCK;
+	case 0x2f6df26:		return -ENOTEMPTY;
+	case 0x2f6df78:		return -EDQUOT;
+	default:		return -EREMOTEIO;
 	}
 }
diff --git a/fs/afs/security.c b/fs/afs/security.c
index cbdd7f7162f..f9f424d8045 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -92,7 +92,7 @@ static struct afs_vnode *afs_get_auth_inode(struct afs_vnode *vnode,
 		ASSERT(auth_inode != NULL);
 	} else {
 		auth_inode = afs_iget(vnode->vfs_inode.i_sb, key,
-				      &vnode->status.parent);
+				      &vnode->status.parent, NULL, NULL);
 		if (IS_ERR(auth_inode))
 			return ERR_PTR(PTR_ERR(auth_inode));
 	}
@@ -288,7 +288,8 @@ int afs_permission(struct inode *inode, int mask, struct nameidata *nd)
 	struct key *key;
 	int ret;
 
-	_enter("{%x:%x},%x,", vnode->fid.vid, vnode->fid.vnode, mask);
+	_enter("{{%x:%x},%lx},%x,",
+	       vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask);
 
 	key = afs_request_key(vnode->volume->cell);
 	if (IS_ERR(key)) {
@@ -296,13 +297,19 @@ int afs_permission(struct inode *inode, int mask, struct nameidata *nd)
 		return PTR_ERR(key);
 	}
 
+	/* if the promise has expired, we need to check the server again */
+	if (!vnode->cb_promised) {
+		_debug("not promised");
+		ret = afs_vnode_fetch_status(vnode, NULL, key);
+		if (ret < 0)
+			goto error;
+		_debug("new promise [fl=%lx]", vnode->flags);
+	}
+
 	/* check the permits to see if we've got one yet */
 	ret = afs_check_permit(vnode, key, &access);
-	if (ret < 0) {
-		key_put(key);
-		_leave(" = %d [check]", ret);
-		return ret;
-	}
+	if (ret < 0)
+		goto error;
 
 	/* interpret the access mask */
 	_debug("REQ %x ACC %x on %s",
@@ -336,10 +343,14 @@ int afs_permission(struct inode *inode, int mask, struct nameidata *nd)
 	}
 
 	key_put(key);
-	return generic_permission(inode, mask, NULL);
+	ret = generic_permission(inode, mask, NULL);
+	_leave(" = %d", ret);
+	return ret;
 
 permission_denied:
+	ret = -EACCES;
+error:
 	key_put(key);
-	_leave(" = -EACCES");
-	return -EACCES;
+	_leave(" = %d", ret);
+	return ret;
 }
diff --git a/fs/afs/server.c b/fs/afs/server.c
index bde6125c2f2..96bb23b476a 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -223,6 +223,8 @@ void afs_put_server(struct afs_server *server)
 
 	_enter("%p{%d}", server, atomic_read(&server->usage));
 
+	_debug("PUT SERVER %d", atomic_read(&server->usage));
+
 	ASSERTCMP(atomic_read(&server->usage), >, 0);
 
 	if (likely(!atomic_dec_and_test(&server->usage))) {
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 497350a5463..cebd03c91f5 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -331,7 +331,7 @@ static int afs_fill_super(struct super_block *sb, void *data)
 	fid.vid		= as->volume->vid;
 	fid.vnode	= 1;
 	fid.unique	= 1;
-	inode = afs_iget(sb, params->key, &fid);
+	inode = afs_iget(sb, params->key, &fid, NULL, NULL);
 	if (IS_ERR(inode))
 		goto error_inode;
 
@@ -473,9 +473,9 @@ static void afs_i_init_once(void *_vnode, struct kmem_cache *cachep,
 		inode_init_once(&vnode->vfs_inode);
 		init_waitqueue_head(&vnode->update_waitq);
 		mutex_init(&vnode->permits_lock);
+		mutex_init(&vnode->validate_lock);
 		spin_lock_init(&vnode->lock);
 		INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work);
-		mutex_init(&vnode->cb_broken_lock);
 	}
 }
 
@@ -497,7 +497,7 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
 
 	vnode->volume		= NULL;
 	vnode->update_cnt	= 0;
-	vnode->flags		= 0;
+	vnode->flags		= 1 << AFS_VNODE_UNSET;
 	vnode->cb_promised	= false;
 
 	return &vnode->vfs_inode;
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
index 160097619ec..a1904ab8426 100644
--- a/fs/afs/vnode.c
+++ b/fs/afs/vnode.c
@@ -30,7 +30,7 @@ static noinline bool dump_tree_aux(struct rb_node *node, struct rb_node *parent,
 		bad = dump_tree_aux(node->rb_left, node, depth + 2, '/');
 
 	vnode = rb_entry(node, struct afs_vnode, cb_promise);
-	kdebug("%c %*.*s%c%p {%d}",
+	_debug("%c %*.*s%c%p {%d}",
 	       rb_is_red(node) ? 'R' : 'B',
 	       depth, depth, "", lr,
 	       vnode, vnode->cb_expires_at);
@@ -47,7 +47,7 @@ static noinline bool dump_tree_aux(struct rb_node *node, struct rb_node *parent,
 
 static noinline void dump_tree(const char *name, struct afs_server *server)
 {
-	kenter("%s", name);
+	_enter("%s", name);
 	if (dump_tree_aux(server->cb_promises.rb_node, NULL, 0, '-'))
 		BUG();
 }
@@ -187,47 +187,61 @@ static void afs_vnode_deleted_remotely(struct afs_vnode *vnode)
 		spin_unlock(&server->cb_lock);
 	}
 
+	spin_lock(&vnode->server->fs_lock);
+	rb_erase(&vnode->server_rb, &vnode->server->fs_vnodes);
+	spin_unlock(&vnode->server->fs_lock);
+
+	vnode->server = NULL;
 	afs_put_server(server);
 }
 
 /*
- * finish off updating the recorded status of a file
+ * finish off updating the recorded status of a file after a successful
+ * operation completion
  * - starts callback expiry timer
  * - adds to server's callback list
  */
-static void afs_vnode_finalise_status_update(struct afs_vnode *vnode,
-					     struct afs_server *server,
-					     int ret)
+void afs_vnode_finalise_status_update(struct afs_vnode *vnode,
+				      struct afs_server *server)
 {
 	struct afs_server *oldserver = NULL;
 
-	_enter("%p,%p,%d", vnode, server, ret);
+	_enter("%p,%p", vnode, server);
+
+	spin_lock(&vnode->lock);
+	clear_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
+	afs_vnode_note_promise(vnode, server);
+	vnode->update_cnt--;
+	ASSERTCMP(vnode->update_cnt, >=, 0);
+	spin_unlock(&vnode->lock);
+
+	wake_up_all(&vnode->update_waitq);
+	afs_put_server(oldserver);
+	_leave("");
+}
+
+/*
+ * finish off updating the recorded status of a file after an operation failed
+ */
+static void afs_vnode_status_update_failed(struct afs_vnode *vnode, int ret)
+{
+	_enter("%p,%d", vnode, ret);
 
 	spin_lock(&vnode->lock);
 
 	clear_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
 
-	switch (ret) {
-	case 0:
-		afs_vnode_note_promise(vnode, server);
-		break;
-	case -ENOENT:
+	if (ret == -ENOENT) {
 		/* the file was deleted on the server */
 		_debug("got NOENT from server - marking file deleted");
 		afs_vnode_deleted_remotely(vnode);
-		break;
-	default:
-		break;
 	}
 
 	vnode->update_cnt--;
-
+	ASSERTCMP(vnode->update_cnt, >=, 0);
 	spin_unlock(&vnode->lock);
 
 	wake_up_all(&vnode->update_waitq);
-
-	afs_put_server(oldserver);
-
 	_leave("");
 }
 
@@ -275,8 +289,12 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode,
 		return 0;
 	}
 
+	ASSERTCMP(vnode->update_cnt, >=, 0);
+
 	if (vnode->update_cnt > 0) {
 		/* someone else started a fetch */
+		_debug("wait on fetch %d", vnode->update_cnt);
+
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		ASSERT(myself.func != NULL);
 		add_wait_queue(&vnode->update_waitq, &myself);
@@ -325,7 +343,7 @@ get_anyway:
 		/* pick a server to query */
 		server = afs_volume_pick_fileserver(vnode);
 		if (IS_ERR(server))
-			return PTR_ERR(server);
+			goto no_server;
 
 		_debug("USING SERVER: %p{%08x}",
 		       server, ntohl(server->addr.s_addr));
@@ -336,17 +354,34 @@ get_anyway:
 	} while (!afs_volume_release_fileserver(vnode, server, ret));
 
 	/* adjust the flags */
-	if (ret == 0 && auth_vnode)
-		afs_cache_permit(vnode, key, acl_order);
-	afs_vnode_finalise_status_update(vnode, server, ret);
+	if (ret == 0) {
+		_debug("adjust");
+		if (auth_vnode)
+			afs_cache_permit(vnode, key, acl_order);
+		afs_vnode_finalise_status_update(vnode, server);
+		afs_put_server(server);
+	} else {
+		_debug("failed [%d]", ret);
+		afs_vnode_status_update_failed(vnode, ret);
+	}
 
-	_leave(" = %d", ret);
+	ASSERTCMP(vnode->update_cnt, >=, 0);
+
+	_leave(" = %d [cnt %d]", ret, vnode->update_cnt);
 	return ret;
+
+no_server:
+	spin_lock(&vnode->lock);
+	vnode->update_cnt--;
+	ASSERTCMP(vnode->update_cnt, >=, 0);
+	spin_unlock(&vnode->lock);
+	_leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
+	return PTR_ERR(server);
 }
 
 /*
  * fetch file data from the volume
- * - TODO implement caching and server failover
+ * - TODO implement caching
  */
 int afs_vnode_fetch_data(struct afs_vnode *vnode, struct key *key,
 			 off_t offset, size_t length, struct page *page)
@@ -372,18 +407,349 @@ int afs_vnode_fetch_data(struct afs_vnode *vnode, struct key *key,
 		/* pick a server to query */
 		server = afs_volume_pick_fileserver(vnode);
 		if (IS_ERR(server))
-			return PTR_ERR(server);
+			goto no_server;
 
 		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
 
 		ret = afs_fs_fetch_data(server, key, vnode, offset, length,
-					page, NULL, &afs_sync_call);
+					page, &afs_sync_call);
 
 	} while (!afs_volume_release_fileserver(vnode, server, ret));
 
 	/* adjust the flags */
-	afs_vnode_finalise_status_update(vnode, server, ret);
+	if (ret == 0) {
+		afs_vnode_finalise_status_update(vnode, server);
+		afs_put_server(server);
+	} else {
+		afs_vnode_status_update_failed(vnode, ret);
+	}
 
 	_leave(" = %d", ret);
 	return ret;
+
+no_server:
+	spin_lock(&vnode->lock);
+	vnode->update_cnt--;
+	ASSERTCMP(vnode->update_cnt, >=, 0);
+	spin_unlock(&vnode->lock);
+	return PTR_ERR(server);
+}
+
+/*
+ * make a file or a directory
+ */
+int afs_vnode_create(struct afs_vnode *vnode, struct key *key,
+		     const char *name, umode_t mode, struct afs_fid *newfid,
+		     struct afs_file_status *newstatus,
+		     struct afs_callback *newcb, struct afs_server **_server)
+{
+	struct afs_server *server;
+	int ret;
+
+	_enter("%s{%u,%u,%u},%x,%s,,",
+	       vnode->volume->vlocation->vldb.name,
+	       vnode->fid.vid,
+	       vnode->fid.vnode,
+	       vnode->fid.unique,
+	       key_serial(key),
+	       name);
+
+	/* this op will fetch the status on the directory we're creating in */
+	spin_lock(&vnode->lock);
+	vnode->update_cnt++;
+	spin_unlock(&vnode->lock);
+
+	do {
+		/* pick a server to query */
+		server = afs_volume_pick_fileserver(vnode);
+		if (IS_ERR(server))
+			goto no_server;
+
+		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+
+		ret = afs_fs_create(server, key, vnode, name, mode, newfid,
+				    newstatus, newcb, &afs_sync_call);
+
+	} while (!afs_volume_release_fileserver(vnode, server, ret));
+
+	/* adjust the flags */
+	if (ret == 0) {
+		afs_vnode_finalise_status_update(vnode, server);
+		*_server = server;
+	} else {
+		afs_vnode_status_update_failed(vnode, ret);
+		*_server = NULL;
+	}
+
+	_leave(" = %d [cnt %d]", ret, vnode->update_cnt);
+	return ret;
+
+no_server:
+	spin_lock(&vnode->lock);
+	vnode->update_cnt--;
+	ASSERTCMP(vnode->update_cnt, >=, 0);
+	spin_unlock(&vnode->lock);
+	_leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
+	return PTR_ERR(server);
+}
+
+/*
+ * remove a file or directory
+ */
+int afs_vnode_remove(struct afs_vnode *vnode, struct key *key, const char *name,
+		     bool isdir)
+{
+	struct afs_server *server;
+	int ret;
+
+	_enter("%s{%u,%u,%u},%x,%s",
+	       vnode->volume->vlocation->vldb.name,
+	       vnode->fid.vid,
+	       vnode->fid.vnode,
+	       vnode->fid.unique,
+	       key_serial(key),
+	       name);
+
+	/* this op will fetch the status on the directory we're removing from */
+	spin_lock(&vnode->lock);
+	vnode->update_cnt++;
+	spin_unlock(&vnode->lock);
+
+	do {
+		/* pick a server to query */
+		server = afs_volume_pick_fileserver(vnode);
+		if (IS_ERR(server))
+			goto no_server;
+
+		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+
+		ret = afs_fs_remove(server, key, vnode, name, isdir,
+				    &afs_sync_call);
+
+	} while (!afs_volume_release_fileserver(vnode, server, ret));
+
+	/* adjust the flags */
+	if (ret == 0) {
+		afs_vnode_finalise_status_update(vnode, server);
+		afs_put_server(server);
+	} else {
+		afs_vnode_status_update_failed(vnode, ret);
+	}
+
+	_leave(" = %d [cnt %d]", ret, vnode->update_cnt);
+	return ret;
+
+no_server:
+	spin_lock(&vnode->lock);
+	vnode->update_cnt--;
+	ASSERTCMP(vnode->update_cnt, >=, 0);
+	spin_unlock(&vnode->lock);
+	_leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
+	return PTR_ERR(server);
+}
+
+/*
+ * create a hard link
+ */
+extern int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode,
+			  struct key *key, const char *name)
+{
+	struct afs_server *server;
+	int ret;
+
+	_enter("%s{%u,%u,%u},%s{%u,%u,%u},%x,%s",
+	       dvnode->volume->vlocation->vldb.name,
+	       dvnode->fid.vid,
+	       dvnode->fid.vnode,
+	       dvnode->fid.unique,
+	       vnode->volume->vlocation->vldb.name,
+	       vnode->fid.vid,
+	       vnode->fid.vnode,
+	       vnode->fid.unique,
+	       key_serial(key),
+	       name);
+
+	/* this op will fetch the status on the directory we're removing from */
+	spin_lock(&vnode->lock);
+	vnode->update_cnt++;
+	spin_unlock(&vnode->lock);
+	spin_lock(&dvnode->lock);
+	dvnode->update_cnt++;
+	spin_unlock(&dvnode->lock);
+
+	do {
+		/* pick a server to query */
+		server = afs_volume_pick_fileserver(dvnode);
+		if (IS_ERR(server))
+			goto no_server;
+
+		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+
+		ret = afs_fs_link(server, key, dvnode, vnode, name,
+				  &afs_sync_call);
+
+	} while (!afs_volume_release_fileserver(dvnode, server, ret));
+
+	/* adjust the flags */
+	if (ret == 0) {
+		afs_vnode_finalise_status_update(vnode, server);
+		afs_vnode_finalise_status_update(dvnode, server);
+		afs_put_server(server);
+	} else {
+		afs_vnode_status_update_failed(vnode, ret);
+		afs_vnode_status_update_failed(dvnode, ret);
+	}
+
+	_leave(" = %d [cnt %d]", ret, vnode->update_cnt);
+	return ret;
+
+no_server:
+	spin_lock(&vnode->lock);
+	vnode->update_cnt--;
+	ASSERTCMP(vnode->update_cnt, >=, 0);
+	spin_unlock(&vnode->lock);
+	spin_lock(&dvnode->lock);
+	dvnode->update_cnt--;
+	ASSERTCMP(dvnode->update_cnt, >=, 0);
+	spin_unlock(&dvnode->lock);
+	_leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
+	return PTR_ERR(server);
+}
+
+/*
+ * create a symbolic link
+ */
+int afs_vnode_symlink(struct afs_vnode *vnode, struct key *key,
+		      const char *name, const char *content,
+		      struct afs_fid *newfid,
+		      struct afs_file_status *newstatus,
+		      struct afs_server **_server)
+{
+	struct afs_server *server;
+	int ret;
+
+	_enter("%s{%u,%u,%u},%x,%s,%s,,,",
+	       vnode->volume->vlocation->vldb.name,
+	       vnode->fid.vid,
+	       vnode->fid.vnode,
+	       vnode->fid.unique,
+	       key_serial(key),
+	       name, content);
+
+	/* this op will fetch the status on the directory we're creating in */
+	spin_lock(&vnode->lock);
+	vnode->update_cnt++;
+	spin_unlock(&vnode->lock);
+
+	do {
+		/* pick a server to query */
+		server = afs_volume_pick_fileserver(vnode);
+		if (IS_ERR(server))
+			goto no_server;
+
+		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+
+		ret = afs_fs_symlink(server, key, vnode, name, content,
+				     newfid, newstatus, &afs_sync_call);
+
+	} while (!afs_volume_release_fileserver(vnode, server, ret));
+
+	/* adjust the flags */
+	if (ret == 0) {
+		afs_vnode_finalise_status_update(vnode, server);
+		*_server = server;
+	} else {
+		afs_vnode_status_update_failed(vnode, ret);
+		*_server = NULL;
+	}
+
+	_leave(" = %d [cnt %d]", ret, vnode->update_cnt);
+	return ret;
+
+no_server:
+	spin_lock(&vnode->lock);
+	vnode->update_cnt--;
+	ASSERTCMP(vnode->update_cnt, >=, 0);
+	spin_unlock(&vnode->lock);
+	_leave(" = %ld [cnt %d]", PTR_ERR(server), vnode->update_cnt);
+	return PTR_ERR(server);
+}
+
+/*
+ * rename a file
+ */
+int afs_vnode_rename(struct afs_vnode *orig_dvnode,
+		     struct afs_vnode *new_dvnode,
+		     struct key *key,
+		     const char *orig_name,
+		     const char *new_name)
+{
+	struct afs_server *server;
+	int ret;
+
+	_enter("%s{%u,%u,%u},%s{%u,%u,%u},%x,%s,%s",
+	       orig_dvnode->volume->vlocation->vldb.name,
+	       orig_dvnode->fid.vid,
+	       orig_dvnode->fid.vnode,
+	       orig_dvnode->fid.unique,
+	       new_dvnode->volume->vlocation->vldb.name,
+	       new_dvnode->fid.vid,
+	       new_dvnode->fid.vnode,
+	       new_dvnode->fid.unique,
+	       key_serial(key),
+	       orig_name,
+	       new_name);
+
+	/* this op will fetch the status on both the directories we're dealing
+	 * with */
+	spin_lock(&orig_dvnode->lock);
+	orig_dvnode->update_cnt++;
+	spin_unlock(&orig_dvnode->lock);
+	if (new_dvnode != orig_dvnode) {
+		spin_lock(&new_dvnode->lock);
+		new_dvnode->update_cnt++;
+		spin_unlock(&new_dvnode->lock);
+	}
+
+	do {
+		/* pick a server to query */
+		server = afs_volume_pick_fileserver(orig_dvnode);
+		if (IS_ERR(server))
+			goto no_server;
+
+		_debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+
+		ret = afs_fs_rename(server, key, orig_dvnode, orig_name,
+				    new_dvnode, new_name, &afs_sync_call);
+
+	} while (!afs_volume_release_fileserver(orig_dvnode, server, ret));
+
+	/* adjust the flags */
+	if (ret == 0) {
+		afs_vnode_finalise_status_update(orig_dvnode, server);
+		if (new_dvnode != orig_dvnode)
+			afs_vnode_finalise_status_update(new_dvnode, server);
+		afs_put_server(server);
+	} else {
+		afs_vnode_status_update_failed(orig_dvnode, ret);
+		if (new_dvnode != orig_dvnode)
+			afs_vnode_status_update_failed(new_dvnode, ret);
+	}
+
+	_leave(" = %d [cnt %d]", ret, orig_dvnode->update_cnt);
+	return ret;
+
+no_server:
+	spin_lock(&orig_dvnode->lock);
+	orig_dvnode->update_cnt--;
+	ASSERTCMP(orig_dvnode->update_cnt, >=, 0);
+	spin_unlock(&orig_dvnode->lock);
+	if (new_dvnode != orig_dvnode) {
+		spin_lock(&new_dvnode->lock);
+		new_dvnode->update_cnt--;
+		ASSERTCMP(new_dvnode->update_cnt, >=, 0);
+		spin_unlock(&new_dvnode->lock);
+	}
+	_leave(" = %ld [cnt %d]", PTR_ERR(server), orig_dvnode->update_cnt);
+	return PTR_ERR(server);
 }
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 15e13678c21..dd160cada45 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -295,6 +295,7 @@ struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *vnode)
  * - releases the ref on the server struct that was acquired by picking
  * - records result of using a particular server to access a volume
  * - return 0 to try again, 1 if okay or to issue error
+ * - the caller must release the server struct if result was 0
  */
 int afs_volume_release_fileserver(struct afs_vnode *vnode,
 				  struct afs_server *server,
@@ -312,7 +313,8 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode,
 	case 0:
 		server->fs_act_jif = jiffies;
 		server->fs_state = 0;
-		break;
+		_leave("");
+		return 1;
 
 		/* the fileserver denied all knowledge of the volume */
 	case -ENOMEDIUM:
@@ -377,14 +379,12 @@ int afs_volume_release_fileserver(struct afs_vnode *vnode,
 		server->fs_act_jif = jiffies;
 	case -ENOMEM:
 	case -ENONET:
-		break;
+		/* tell the caller to accept the result */
+		afs_put_server(server);
+		_leave(" [local failure]");
+		return 1;
 	}
 
-	/* tell the caller to accept the result */
-	afs_put_server(server);
-	_leave("");
-	return 1;
-
 	/* tell the caller to loop around and try the next server */
 try_next_server_upw:
 	up_write(&volume->server_sem);
-- 
cgit v1.2.3


From ba3e0e1accd8d5bb12eaeb0977429d8dc04f6d1e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 26 Apr 2007 16:06:22 -0700
Subject: [AFS]: Fix u64 printing in debug logging.

Need 'unsigned long long' casts to quiet warnings on
64-bit platforms when using %ll on a u64.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/afs/dir.c      | 3 ++-
 fs/afs/fsclient.c | 3 ++-
 fs/afs/inode.c    | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index dbbe75d6023..dac5b990c0c 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -419,7 +419,8 @@ static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
 	struct afs_lookup_cookie *cookie = _cookie;
 
 	_enter("{%s,%Zu},%s,%u,,%llu,%u",
-	       cookie->name, cookie->nlen, name, nlen, ino, dtype);
+	       cookie->name, cookie->nlen, name, nlen,
+	       (unsigned long long) ino, dtype);
 
 	/* insanity checks first */
 	BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048);
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index f036b4cc51a..2393d2a08d7 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -105,7 +105,8 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
 		status->data_version = data_version;
 		if (vnode && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
 			_debug("vnode modified %llx on {%x:%u}",
-			       data_version, vnode->fid.vid, vnode->fid.vnode);
+			       (unsigned long long) data_version,
+			       vnode->fid.vid, vnode->fid.vnode);
 			set_bit(AFS_VNODE_MODIFIED, &vnode->flags);
 			set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
 		}
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 56ca8581b37..c184a4ee599 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -36,7 +36,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
 	_debug("FS: ft=%d lk=%d sz=%llu ver=%Lu mod=%hu",
 	       vnode->status.type,
 	       vnode->status.nlink,
-	       vnode->status.size,
+	       (unsigned long long) vnode->status.size,
 	       vnode->status.data_version,
 	       vnode->status.mode);
 
-- 
cgit v1.2.3


From 411faf5810cdd0e4f5071a3805d8adb49d120a07 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 26 Apr 2007 20:18:17 -0700
Subject: [RXRPC]: Remove bogus atomic_* overrides.

These are done with CPP defines which several platforms
use for their atomic.h implementation, which floods the
build with warnings and breaks the build.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/ar-internal.h | 24 ------------------------
 1 file changed, 24 deletions(-)

diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index cb1eb492ee4..58aaf892238 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -786,30 +786,6 @@ static inline void rxrpc_purge_queue(struct sk_buff_head *list)
 		rxrpc_free_skb(skb);
 }
 
-static inline void __rxrpc__atomic_inc(atomic_t *v)
-{
-	CHECK_SLAB_OKAY(v);
-	atomic_inc(v);
-}
-
-#define atomic_inc(v) __rxrpc__atomic_inc((v))
-
-static inline void __rxrpc__atomic_dec(atomic_t *v)
-{
-	CHECK_SLAB_OKAY(v);
-	atomic_dec(v);
-}
-
-#define atomic_dec(v) __rxrpc__atomic_dec((v))
-
-static inline int __rxrpc__atomic_dec_and_test(atomic_t *v)
-{
-	CHECK_SLAB_OKAY(v);
-	return atomic_dec_and_test(v);
-}
-
-#define atomic_dec_and_test(v) __rxrpc__atomic_dec_and_test((v))
-
 static inline void __rxrpc_get_local(struct rxrpc_local *local, const char *f)
 {
 	CHECK_SLAB_OKAY(&local->usage);
-- 
cgit v1.2.3


From 68c708fd5e90f6d178c84bb7e641589eb2842319 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 26 Apr 2007 20:20:21 -0700
Subject: [RXRPC]: Fix pointers passed to bitops.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

  CC [M]  net/rxrpc/ar-input.o
net/rxrpc/ar-input.c: In function ‘rxrpc_fast_process_data’:
net/rxrpc/ar-input.c:171: warning: passing argument 2 of ‘__test_and_set_bit’ from incompatible pointer type
net/rxrpc/ar-input.c:180: warning: passing argument 2 of ‘__clear_bit’ from incompatible pointer type
net/rxrpc/ar-input.c:218: warning: passing argument 2 of ‘__clear_bit’ from incompatible pointer type

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/ar-input.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index ceb5d619a1d..91b5bbb003e 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -168,7 +168,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call,
 	/* we may already have the packet in the out of sequence queue */
 	ackbit = seq - (call->rx_data_eaten + 1);
 	ASSERTCMP(ackbit, >=, 0);
-	if (__test_and_set_bit(ackbit, &call->ackr_window)) {
+	if (__test_and_set_bit(ackbit, call->ackr_window)) {
 		_debug("dup oos #%u [%u,%u]",
 		       seq, call->rx_data_eaten, call->rx_data_post);
 		ack = RXRPC_ACK_DUPLICATE;
@@ -177,7 +177,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call,
 
 	if (seq >= call->ackr_win_top) {
 		_debug("exceed #%u [%u]", seq, call->ackr_win_top);
-		__clear_bit(ackbit, &call->ackr_window);
+		__clear_bit(ackbit, call->ackr_window);
 		ack = RXRPC_ACK_EXCEEDS_WINDOW;
 		goto discard_and_ack;
 	}
@@ -215,7 +215,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call,
 	ret = rxrpc_queue_rcv_skb(call, skb, false, terminal);
 	if (ret < 0) {
 		if (ret == -ENOMEM || ret == -ENOBUFS) {
-			__clear_bit(ackbit, &call->ackr_window);
+			__clear_bit(ackbit, call->ackr_window);
 			ack = RXRPC_ACK_NOSPACE;
 			goto discard_and_ack;
 		}
-- 
cgit v1.2.3


From 39bf09493042200b967cdf2ee6e3f670b7963903 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Thu, 26 Apr 2007 20:39:14 -0700
Subject: [AFS]: Eliminate cmpxchg() usage in vlocation code.

cmpxchg() is not available on every processor so can't
be used in generic code.

Replace with spinlock protection on the ->state changes,
wakeups, and wait loops.

Add what appears to be a missing wakeup on transition
to AFS_VL_VALID state in afs_vlocation_updater().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/afs/internal.h  |  2 +-
 fs/afs/vlocation.c | 20 +++++++++++++++-----
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 73bfa0b2d99..6dd3197d1d8 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -219,7 +219,7 @@ struct afs_vlocation {
 	struct afs_volume	*vols[3];	/* volume access record pointer (index by type) */
 	wait_queue_head_t	waitq;		/* status change waitqueue */
 	time_t			update_at;	/* time at which record should be updated */
-	rwlock_t		lock;		/* access lock */
+	spinlock_t		lock;		/* access lock */
 	afs_vlocation_state_t	state;		/* volume location state */
 	unsigned short		upd_rej_cnt;	/* ENOMEDIUM count during update */
 	unsigned short		upd_busy_cnt;	/* EBUSY count during update */
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index 7d9815e9ae0..74cce174882 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -178,7 +178,7 @@ static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell,
 		INIT_LIST_HEAD(&vl->grave);
 		INIT_LIST_HEAD(&vl->update);
 		init_waitqueue_head(&vl->waitq);
-		rwlock_init(&vl->lock);
+		spin_lock_init(&vl->lock);
 		memcpy(vl->vldb.name, name, namesz);
 	}
 
@@ -414,8 +414,10 @@ fill_in_record:
 	ret = afs_vlocation_fill_in_record(vl, key);
 	if (ret < 0)
 		goto error_abandon;
+	spin_lock(&vl->lock);
 	vl->state = AFS_VL_VALID;
 	wake_up(&vl->waitq);
+	spin_unlock(&vl->lock);
 
 	/* schedule for regular updates */
 	afs_vlocation_queue_for_updates(vl);
@@ -434,22 +436,23 @@ found_in_memory:
 	up_write(&cell->vl_sem);
 
 	/* see if it was an abandoned record that we might try filling in */
+	spin_lock(&vl->lock);
 	while (vl->state != AFS_VL_VALID) {
 		afs_vlocation_state_t state = vl->state;
 
 		_debug("invalid [state %d]", state);
 
 		if ((state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME)) {
-			if (cmpxchg(&vl->state, state, AFS_VL_CREATING) ==
-			    state)
-				goto fill_in_record;
-			continue;
+			vl->state = AFS_VL_CREATING;
+			spin_unlock(&vl->lock);
+			goto fill_in_record;
 		}
 
 		/* must now wait for creation or update by someone else to
 		 * complete */
 		_debug("wait");
 
+		spin_unlock(&vl->lock);
 		ret = wait_event_interruptible(
 			vl->waitq,
 			vl->state == AFS_VL_NEW ||
@@ -457,15 +460,19 @@ found_in_memory:
 			vl->state == AFS_VL_NO_VOLUME);
 		if (ret < 0)
 			goto error;
+		spin_lock(&vl->lock);
 	}
+	spin_unlock(&vl->lock);
 
 success:
 	_leave(" = %p",vl);
 	return vl;
 
 error_abandon:
+	spin_lock(&vl->lock);
 	vl->state = AFS_VL_NEW;
 	wake_up(&vl->waitq);
+	spin_unlock(&vl->lock);
 error:
 	ASSERT(vl != NULL);
 	afs_put_vlocation(vl);
@@ -663,10 +670,12 @@ static void afs_vlocation_updater(struct work_struct *work)
 	vl->upd_busy_cnt = 0;
 
 	ret = afs_vlocation_update_record(vl, NULL, &vldb);
+	spin_lock(&vl->lock);
 	switch (ret) {
 	case 0:
 		afs_vlocation_apply_update(vl, &vldb);
 		vl->state = AFS_VL_VALID;
+		wake_up(&vl->waitq);
 		break;
 	case -ENOMEDIUM:
 		vl->state = AFS_VL_VOLUME_DELETED;
@@ -675,6 +684,7 @@ static void afs_vlocation_updater(struct work_struct *work)
 		vl->state = AFS_VL_UNCERTAIN;
 		break;
 	}
+	spin_unlock(&vl->lock);
 
 	/* and then reschedule */
 	_debug("reschedule");
-- 
cgit v1.2.3


From 11433ee450eb4a320f46ce5ed51410b52803ffcc Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 26 Apr 2007 20:42:51 -0700
Subject: [WEXT]: Move to net/wireless

This patch moves dev/core/wireless.c to net/wireless/wext.c.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/Makefile     |    1 -
 net/core/dev.c        |    2 +-
 net/core/wireless.c   | 1633 -------------------------------------------------
 net/wireless/Makefile |    1 +
 net/wireless/wext.c   | 1633 +++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 1635 insertions(+), 1635 deletions(-)
 delete mode 100644 net/core/wireless.c
 create mode 100644 net/wireless/wext.c

diff --git a/net/core/Makefile b/net/core/Makefile
index 73272d506e9..4751613e1b5 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -13,7 +13,6 @@ obj-y		     += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
 obj-$(CONFIG_XFRM) += flow.o
 obj-$(CONFIG_SYSFS) += net-sysfs.o
 obj-$(CONFIG_NET_PKTGEN) += pktgen.o
-obj-$(CONFIG_WIRELESS_EXT) += wireless.o
 obj-$(CONFIG_NETPOLL) += netpoll.o
 obj-$(CONFIG_NET_DMA) += user_dma.o
 obj-$(CONFIG_FIB_RULES) += fib_rules.o
diff --git a/net/core/dev.c b/net/core/dev.c
index d82d00f5451..700e4b5081b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2936,7 +2936,7 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
 				}
 				dev_load(ifr.ifr_name);
 				rtnl_lock();
-				/* Follow me in net/core/wireless.c */
+				/* Follow me in net/wireless/wext.c */
 				ret = wireless_process_ioctl(&ifr, cmd);
 				rtnl_unlock();
 				if (IW_IS_GET(cmd) &&
diff --git a/net/core/wireless.c b/net/core/wireless.c
deleted file mode 100644
index fba295e05e7..00000000000
--- a/net/core/wireless.c
+++ /dev/null
@@ -1,1633 +0,0 @@
-/*
- * This file implement the Wireless Extensions APIs.
- *
- * Authors :	Jean Tourrilhes - HPL - <jt@hpl.hp.com>
- * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved.
- *
- * (As all part of the Linux kernel, this file is GPL)
- */
-
-/************************** DOCUMENTATION **************************/
-/*
- * API definition :
- * --------------
- * See <linux/wireless.h> for details of the APIs and the rest.
- *
- * History :
- * -------
- *
- * v1 - 5.12.01 - Jean II
- *	o Created this file.
- *
- * v2 - 13.12.01 - Jean II
- *	o Move /proc/net/wireless stuff from net/core/dev.c to here
- *	o Make Wireless Extension IOCTLs go through here
- *	o Added iw_handler handling ;-)
- *	o Added standard ioctl description
- *	o Initial dumb commit strategy based on orinoco.c
- *
- * v3 - 19.12.01 - Jean II
- *	o Make sure we don't go out of standard_ioctl[] in ioctl_standard_call
- *	o Add event dispatcher function
- *	o Add event description
- *	o Propagate events as rtnetlink IFLA_WIRELESS option
- *	o Generate event on selected SET requests
- *
- * v4 - 18.04.02 - Jean II
- *	o Fix stupid off by one in iw_ioctl_description : IW_ESSID_MAX_SIZE + 1
- *
- * v5 - 21.06.02 - Jean II
- *	o Add IW_PRIV_TYPE_ADDR in priv_type_size (+cleanup)
- *	o Reshuffle IW_HEADER_TYPE_XXX to map IW_PRIV_TYPE_XXX changes
- *	o Add IWEVCUSTOM for driver specific event/scanning token
- *	o Turn on WE_STRICT_WRITE by default + kernel warning
- *	o Fix WE_STRICT_WRITE in ioctl_export_private() (32 => iw_num)
- *	o Fix off-by-one in test (extra_size <= IFNAMSIZ)
- *
- * v6 - 9.01.03 - Jean II
- *	o Add common spy support : iw_handler_set_spy(), wireless_spy_update()
- *	o Add enhanced spy support : iw_handler_set_thrspy() and event.
- *	o Add WIRELESS_EXT version display in /proc/net/wireless
- *
- * v6 - 18.06.04 - Jean II
- *	o Change get_spydata() method for added safety
- *	o Remove spy #ifdef, they are always on -> cleaner code
- *	o Allow any size GET request if user specifies length > max
- *		and if request has IW_DESCR_FLAG_NOMAX flag or is SIOCGIWPRIV
- *	o Start migrating get_wireless_stats to struct iw_handler_def
- *	o Add wmb() in iw_handler_set_spy() for non-coherent archs/cpus
- * Based on patch from Pavel Roskin <proski@gnu.org> :
- *	o Fix kernel data leak to user space in private handler handling
- *
- * v7 - 18.3.05 - Jean II
- *	o Remove (struct iw_point *)->pointer from events and streams
- *	o Remove spy_offset from struct iw_handler_def
- *	o Start deprecating dev->get_wireless_stats, output a warning
- *	o If IW_QUAL_DBM is set, show dBm values in /proc/net/wireless
- *	o Don't loose INVALID/DBM flags when clearing UPDATED flags (iwstats)
- *
- * v8 - 17.02.06 - Jean II
- *	o RtNetlink requests support (SET/GET)
- *
- * v8b - 03.08.06 - Herbert Xu
- *	o Fix Wireless Event locking issues.
- *
- * v9 - 14.3.06 - Jean II
- *	o Change length in ESSID and NICK to strlen() instead of strlen()+1
- *	o Make standard_ioctl_num and standard_event_num unsigned
- *	o Remove (struct net_device *)->get_wireless_stats()
- *
- * v10 - 16.3.07 - Jean II
- *	o Prevent leaking of kernel space in stream on 64 bits.
- */
-
-/***************************** INCLUDES *****************************/
-
-#include <linux/module.h>
-#include <linux/types.h>		/* off_t */
-#include <linux/netdevice.h>		/* struct ifreq, dev_get_by_name() */
-#include <linux/proc_fs.h>
-#include <linux/rtnetlink.h>		/* rtnetlink stuff */
-#include <linux/seq_file.h>
-#include <linux/init.h>			/* for __init */
-#include <linux/if_arp.h>		/* ARPHRD_ETHER */
-#include <linux/etherdevice.h>		/* compare_ether_addr */
-#include <linux/interrupt.h>
-
-#include <linux/wireless.h>		/* Pretty obvious */
-#include <net/iw_handler.h>		/* New driver API */
-#include <net/netlink.h>
-
-#include <asm/uaccess.h>		/* copy_to_user() */
-
-/**************************** CONSTANTS ****************************/
-
-/* Debugging stuff */
-#undef WE_IOCTL_DEBUG		/* Debug IOCTL API */
-#undef WE_EVENT_DEBUG		/* Debug Event dispatcher */
-#undef WE_SPY_DEBUG		/* Debug enhanced spy support */
-
-/* Options */
-#define WE_EVENT_RTNETLINK	/* Propagate events using RtNetlink */
-#define WE_SET_EVENT		/* Generate an event on some set commands */
-
-/************************* GLOBAL VARIABLES *************************/
-/*
- * You should not use global variables, because of re-entrancy.
- * On our case, it's only const, so it's OK...
- */
-/*
- * Meta-data about all the standard Wireless Extension request we
- * know about.
- */
-static const struct iw_ioctl_description standard_ioctl[] = {
-	[SIOCSIWCOMMIT	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_NULL,
-	},
-	[SIOCGIWNAME	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_CHAR,
-		.flags		= IW_DESCR_FLAG_DUMP,
-	},
-	[SIOCSIWNWID	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_PARAM,
-		.flags		= IW_DESCR_FLAG_EVENT,
-	},
-	[SIOCGIWNWID	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_PARAM,
-		.flags		= IW_DESCR_FLAG_DUMP,
-	},
-	[SIOCSIWFREQ	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_FREQ,
-		.flags		= IW_DESCR_FLAG_EVENT,
-	},
-	[SIOCGIWFREQ	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_FREQ,
-		.flags		= IW_DESCR_FLAG_DUMP,
-	},
-	[SIOCSIWMODE	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_UINT,
-		.flags		= IW_DESCR_FLAG_EVENT,
-	},
-	[SIOCGIWMODE	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_UINT,
-		.flags		= IW_DESCR_FLAG_DUMP,
-	},
-	[SIOCSIWSENS	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_PARAM,
-	},
-	[SIOCGIWSENS	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_PARAM,
-	},
-	[SIOCSIWRANGE	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_NULL,
-	},
-	[SIOCGIWRANGE	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= 1,
-		.max_tokens	= sizeof(struct iw_range),
-		.flags		= IW_DESCR_FLAG_DUMP,
-	},
-	[SIOCSIWPRIV	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_NULL,
-	},
-	[SIOCGIWPRIV	- SIOCIWFIRST] = { /* (handled directly by us) */
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= sizeof(struct iw_priv_args),
-		.max_tokens	= 16,
-		.flags		= IW_DESCR_FLAG_NOMAX,
-	},
-	[SIOCSIWSTATS	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_NULL,
-	},
-	[SIOCGIWSTATS	- SIOCIWFIRST] = { /* (handled directly by us) */
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= 1,
-		.max_tokens	= sizeof(struct iw_statistics),
-		.flags		= IW_DESCR_FLAG_DUMP,
-	},
-	[SIOCSIWSPY	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= sizeof(struct sockaddr),
-		.max_tokens	= IW_MAX_SPY,
-	},
-	[SIOCGIWSPY	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= sizeof(struct sockaddr) +
-				  sizeof(struct iw_quality),
-		.max_tokens	= IW_MAX_SPY,
-	},
-	[SIOCSIWTHRSPY	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= sizeof(struct iw_thrspy),
-		.min_tokens	= 1,
-		.max_tokens	= 1,
-	},
-	[SIOCGIWTHRSPY	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= sizeof(struct iw_thrspy),
-		.min_tokens	= 1,
-		.max_tokens	= 1,
-	},
-	[SIOCSIWAP	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_ADDR,
-	},
-	[SIOCGIWAP	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_ADDR,
-		.flags		= IW_DESCR_FLAG_DUMP,
-	},
-	[SIOCSIWMLME	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= 1,
-		.min_tokens	= sizeof(struct iw_mlme),
-		.max_tokens	= sizeof(struct iw_mlme),
-	},
-	[SIOCGIWAPLIST	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= sizeof(struct sockaddr) +
-				  sizeof(struct iw_quality),
-		.max_tokens	= IW_MAX_AP,
-		.flags		= IW_DESCR_FLAG_NOMAX,
-	},
-	[SIOCSIWSCAN	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= 1,
-		.min_tokens	= 0,
-		.max_tokens	= sizeof(struct iw_scan_req),
-	},
-	[SIOCGIWSCAN	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= 1,
-		.max_tokens	= IW_SCAN_MAX_DATA,
-		.flags		= IW_DESCR_FLAG_NOMAX,
-	},
-	[SIOCSIWESSID	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= 1,
-		.max_tokens	= IW_ESSID_MAX_SIZE,
-		.flags		= IW_DESCR_FLAG_EVENT,
-	},
-	[SIOCGIWESSID	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= 1,
-		.max_tokens	= IW_ESSID_MAX_SIZE,
-		.flags		= IW_DESCR_FLAG_DUMP,
-	},
-	[SIOCSIWNICKN	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= 1,
-		.max_tokens	= IW_ESSID_MAX_SIZE,
-	},
-	[SIOCGIWNICKN	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= 1,
-		.max_tokens	= IW_ESSID_MAX_SIZE,
-	},
-	[SIOCSIWRATE	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_PARAM,
-	},
-	[SIOCGIWRATE	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_PARAM,
-	},
-	[SIOCSIWRTS	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_PARAM,
-	},
-	[SIOCGIWRTS	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_PARAM,
-	},
-	[SIOCSIWFRAG	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_PARAM,
-	},
-	[SIOCGIWFRAG	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_PARAM,
-	},
-	[SIOCSIWTXPOW	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_PARAM,
-	},
-	[SIOCGIWTXPOW	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_PARAM,
-	},
-	[SIOCSIWRETRY	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_PARAM,
-	},
-	[SIOCGIWRETRY	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_PARAM,
-	},
-	[SIOCSIWENCODE	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= 1,
-		.max_tokens	= IW_ENCODING_TOKEN_MAX,
-		.flags		= IW_DESCR_FLAG_EVENT | IW_DESCR_FLAG_RESTRICT,
-	},
-	[SIOCGIWENCODE	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= 1,
-		.max_tokens	= IW_ENCODING_TOKEN_MAX,
-		.flags		= IW_DESCR_FLAG_DUMP | IW_DESCR_FLAG_RESTRICT,
-	},
-	[SIOCSIWPOWER	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_PARAM,
-	},
-	[SIOCGIWPOWER	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_PARAM,
-	},
-	[SIOCSIWGENIE	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= 1,
-		.max_tokens	= IW_GENERIC_IE_MAX,
-	},
-	[SIOCGIWGENIE	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= 1,
-		.max_tokens	= IW_GENERIC_IE_MAX,
-	},
-	[SIOCSIWAUTH	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_PARAM,
-	},
-	[SIOCGIWAUTH	- SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_PARAM,
-	},
-	[SIOCSIWENCODEEXT - SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= 1,
-		.min_tokens	= sizeof(struct iw_encode_ext),
-		.max_tokens	= sizeof(struct iw_encode_ext) +
-				  IW_ENCODING_TOKEN_MAX,
-	},
-	[SIOCGIWENCODEEXT - SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= 1,
-		.min_tokens	= sizeof(struct iw_encode_ext),
-		.max_tokens	= sizeof(struct iw_encode_ext) +
-				  IW_ENCODING_TOKEN_MAX,
-	},
-	[SIOCSIWPMKSA - SIOCIWFIRST] = {
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= 1,
-		.min_tokens	= sizeof(struct iw_pmksa),
-		.max_tokens	= sizeof(struct iw_pmksa),
-	},
-};
-static const unsigned standard_ioctl_num = ARRAY_SIZE(standard_ioctl);
-
-/*
- * Meta-data about all the additional standard Wireless Extension events
- * we know about.
- */
-static const struct iw_ioctl_description standard_event[] = {
-	[IWEVTXDROP	- IWEVFIRST] = {
-		.header_type	= IW_HEADER_TYPE_ADDR,
-	},
-	[IWEVQUAL	- IWEVFIRST] = {
-		.header_type	= IW_HEADER_TYPE_QUAL,
-	},
-	[IWEVCUSTOM	- IWEVFIRST] = {
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= 1,
-		.max_tokens	= IW_CUSTOM_MAX,
-	},
-	[IWEVREGISTERED	- IWEVFIRST] = {
-		.header_type	= IW_HEADER_TYPE_ADDR,
-	},
-	[IWEVEXPIRED	- IWEVFIRST] = {
-		.header_type	= IW_HEADER_TYPE_ADDR,
-	},
-	[IWEVGENIE	- IWEVFIRST] = {
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= 1,
-		.max_tokens	= IW_GENERIC_IE_MAX,
-	},
-	[IWEVMICHAELMICFAILURE	- IWEVFIRST] = {
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= 1,
-		.max_tokens	= sizeof(struct iw_michaelmicfailure),
-	},
-	[IWEVASSOCREQIE	- IWEVFIRST] = {
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= 1,
-		.max_tokens	= IW_GENERIC_IE_MAX,
-	},
-	[IWEVASSOCRESPIE	- IWEVFIRST] = {
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= 1,
-		.max_tokens	= IW_GENERIC_IE_MAX,
-	},
-	[IWEVPMKIDCAND	- IWEVFIRST] = {
-		.header_type	= IW_HEADER_TYPE_POINT,
-		.token_size	= 1,
-		.max_tokens	= sizeof(struct iw_pmkid_cand),
-	},
-};
-static const unsigned standard_event_num = ARRAY_SIZE(standard_event);
-
-/* Size (in bytes) of the various private data types */
-static const char iw_priv_type_size[] = {
-	0,				/* IW_PRIV_TYPE_NONE */
-	1,				/* IW_PRIV_TYPE_BYTE */
-	1,				/* IW_PRIV_TYPE_CHAR */
-	0,				/* Not defined */
-	sizeof(__u32),			/* IW_PRIV_TYPE_INT */
-	sizeof(struct iw_freq),		/* IW_PRIV_TYPE_FLOAT */
-	sizeof(struct sockaddr),	/* IW_PRIV_TYPE_ADDR */
-	0,				/* Not defined */
-};
-
-/* Size (in bytes) of various events */
-static const int event_type_size[] = {
-	IW_EV_LCP_LEN,			/* IW_HEADER_TYPE_NULL */
-	0,
-	IW_EV_CHAR_LEN,			/* IW_HEADER_TYPE_CHAR */
-	0,
-	IW_EV_UINT_LEN,			/* IW_HEADER_TYPE_UINT */
-	IW_EV_FREQ_LEN,			/* IW_HEADER_TYPE_FREQ */
-	IW_EV_ADDR_LEN,			/* IW_HEADER_TYPE_ADDR */
-	0,
-	IW_EV_POINT_LEN,		/* Without variable payload */
-	IW_EV_PARAM_LEN,		/* IW_HEADER_TYPE_PARAM */
-	IW_EV_QUAL_LEN,			/* IW_HEADER_TYPE_QUAL */
-};
-
-/* Size (in bytes) of various events, as packed */
-static const int event_type_pk_size[] = {
-	IW_EV_LCP_PK_LEN,		/* IW_HEADER_TYPE_NULL */
-	0,
-	IW_EV_CHAR_PK_LEN,		/* IW_HEADER_TYPE_CHAR */
-	0,
-	IW_EV_UINT_PK_LEN,		/* IW_HEADER_TYPE_UINT */
-	IW_EV_FREQ_PK_LEN,		/* IW_HEADER_TYPE_FREQ */
-	IW_EV_ADDR_PK_LEN,		/* IW_HEADER_TYPE_ADDR */
-	0,
-	IW_EV_POINT_PK_LEN,		/* Without variable payload */
-	IW_EV_PARAM_PK_LEN,		/* IW_HEADER_TYPE_PARAM */
-	IW_EV_QUAL_PK_LEN,		/* IW_HEADER_TYPE_QUAL */
-};
-
-/************************ COMMON SUBROUTINES ************************/
-/*
- * Stuff that may be used in various place or doesn't fit in one
- * of the section below.
- */
-
-/* ---------------------------------------------------------------- */
-/*
- * Return the driver handler associated with a specific Wireless Extension.
- * Called from various place, so make sure it remains efficient.
- */
-static inline iw_handler get_handler(struct net_device *dev,
-				     unsigned int cmd)
-{
-	/* Don't "optimise" the following variable, it will crash */
-	unsigned int	index;		/* *MUST* be unsigned */
-
-	/* Check if we have some wireless handlers defined */
-	if (dev->wireless_handlers == NULL)
-		return NULL;
-
-	/* Try as a standard command */
-	index = cmd - SIOCIWFIRST;
-	if (index < dev->wireless_handlers->num_standard)
-		return dev->wireless_handlers->standard[index];
-
-	/* Try as a private command */
-	index = cmd - SIOCIWFIRSTPRIV;
-	if (index < dev->wireless_handlers->num_private)
-		return dev->wireless_handlers->private[index];
-
-	/* Not found */
-	return NULL;
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Get statistics out of the driver
- */
-static inline struct iw_statistics *get_wireless_stats(struct net_device *dev)
-{
-	/* New location */
-	if ((dev->wireless_handlers != NULL) &&
-	   (dev->wireless_handlers->get_wireless_stats != NULL))
-		return dev->wireless_handlers->get_wireless_stats(dev);
-
-	/* Not found */
-	return (struct iw_statistics *) NULL;
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Call the commit handler in the driver
- * (if exist and if conditions are right)
- *
- * Note : our current commit strategy is currently pretty dumb,
- * but we will be able to improve on that...
- * The goal is to try to agreagate as many changes as possible
- * before doing the commit. Drivers that will define a commit handler
- * are usually those that need a reset after changing parameters, so
- * we want to minimise the number of reset.
- * A cool idea is to use a timer : at each "set" command, we re-set the
- * timer, when the timer eventually fires, we call the driver.
- * Hopefully, more on that later.
- *
- * Also, I'm waiting to see how many people will complain about the
- * netif_running(dev) test. I'm open on that one...
- * Hopefully, the driver will remember to do a commit in "open()" ;-)
- */
-static inline int call_commit_handler(struct net_device *	dev)
-{
-	if ((netif_running(dev)) &&
-	   (dev->wireless_handlers->standard[0] != NULL)) {
-		/* Call the commit handler on the driver */
-		return dev->wireless_handlers->standard[0](dev, NULL,
-							   NULL, NULL);
-	} else
-		return 0;		/* Command completed successfully */
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Calculate size of private arguments
- */
-static inline int get_priv_size(__u16	args)
-{
-	int	num = args & IW_PRIV_SIZE_MASK;
-	int	type = (args & IW_PRIV_TYPE_MASK) >> 12;
-
-	return num * iw_priv_type_size[type];
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Re-calculate the size of private arguments
- */
-static inline int adjust_priv_size(__u16		args,
-				   union iwreq_data *	wrqu)
-{
-	int	num = wrqu->data.length;
-	int	max = args & IW_PRIV_SIZE_MASK;
-	int	type = (args & IW_PRIV_TYPE_MASK) >> 12;
-
-	/* Make sure the driver doesn't goof up */
-	if (max < num)
-		num = max;
-
-	return num * iw_priv_type_size[type];
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Standard Wireless Handler : get wireless stats
- *	Allow programatic access to /proc/net/wireless even if /proc
- *	doesn't exist... Also more efficient...
- */
-static int iw_handler_get_iwstats(struct net_device *		dev,
-				  struct iw_request_info *	info,
-				  union iwreq_data *		wrqu,
-				  char *			extra)
-{
-	/* Get stats from the driver */
-	struct iw_statistics *stats;
-
-	stats = get_wireless_stats(dev);
-	if (stats != (struct iw_statistics *) NULL) {
-
-		/* Copy statistics to extra */
-		memcpy(extra, stats, sizeof(struct iw_statistics));
-		wrqu->data.length = sizeof(struct iw_statistics);
-
-		/* Check if we need to clear the updated flag */
-		if (wrqu->data.flags != 0)
-			stats->qual.updated &= ~IW_QUAL_ALL_UPDATED;
-		return 0;
-	} else
-		return -EOPNOTSUPP;
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Standard Wireless Handler : get iwpriv definitions
- * Export the driver private handler definition
- * They will be picked up by tools like iwpriv...
- */
-static int iw_handler_get_private(struct net_device *		dev,
-				  struct iw_request_info *	info,
-				  union iwreq_data *		wrqu,
-				  char *			extra)
-{
-	/* Check if the driver has something to export */
-	if ((dev->wireless_handlers->num_private_args == 0) ||
-	   (dev->wireless_handlers->private_args == NULL))
-		return -EOPNOTSUPP;
-
-	/* Check if there is enough buffer up there */
-	if (wrqu->data.length < dev->wireless_handlers->num_private_args) {
-		/* User space can't know in advance how large the buffer
-		 * needs to be. Give it a hint, so that we can support
-		 * any size buffer we want somewhat efficiently... */
-		wrqu->data.length = dev->wireless_handlers->num_private_args;
-		return -E2BIG;
-	}
-
-	/* Set the number of available ioctls. */
-	wrqu->data.length = dev->wireless_handlers->num_private_args;
-
-	/* Copy structure to the user buffer. */
-	memcpy(extra, dev->wireless_handlers->private_args,
-	       sizeof(struct iw_priv_args) * wrqu->data.length);
-
-	return 0;
-}
-
-
-/******************** /proc/net/wireless SUPPORT ********************/
-/*
- * The /proc/net/wireless file is a human readable user-space interface
- * exporting various wireless specific statistics from the wireless devices.
- * This is the most popular part of the Wireless Extensions ;-)
- *
- * This interface is a pure clone of /proc/net/dev (in net/core/dev.c).
- * The content of the file is basically the content of "struct iw_statistics".
- */
-
-#ifdef CONFIG_PROC_FS
-
-/* ---------------------------------------------------------------- */
-/*
- * Print one entry (line) of /proc/net/wireless
- */
-static __inline__ void wireless_seq_printf_stats(struct seq_file *seq,
-						 struct net_device *dev)
-{
-	/* Get stats from the driver */
-	struct iw_statistics *stats = get_wireless_stats(dev);
-
-	if (stats) {
-		seq_printf(seq, "%6s: %04x  %3d%c  %3d%c  %3d%c  %6d %6d %6d "
-				"%6d %6d   %6d\n",
-			   dev->name, stats->status, stats->qual.qual,
-			   stats->qual.updated & IW_QUAL_QUAL_UPDATED
-			   ? '.' : ' ',
-			   ((__s32) stats->qual.level) -
-			   ((stats->qual.updated & IW_QUAL_DBM) ? 0x100 : 0),
-			   stats->qual.updated & IW_QUAL_LEVEL_UPDATED
-			   ? '.' : ' ',
-			   ((__s32) stats->qual.noise) -
-			   ((stats->qual.updated & IW_QUAL_DBM) ? 0x100 : 0),
-			   stats->qual.updated & IW_QUAL_NOISE_UPDATED
-			   ? '.' : ' ',
-			   stats->discard.nwid, stats->discard.code,
-			   stats->discard.fragment, stats->discard.retries,
-			   stats->discard.misc, stats->miss.beacon);
-		stats->qual.updated &= ~IW_QUAL_ALL_UPDATED;
-	}
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Print info for /proc/net/wireless (print all entries)
- */
-static int wireless_seq_show(struct seq_file *seq, void *v)
-{
-	if (v == SEQ_START_TOKEN)
-		seq_printf(seq, "Inter-| sta-|   Quality        |   Discarded "
-				"packets               | Missed | WE\n"
-				" face | tus | link level noise |  nwid  "
-				"crypt   frag  retry   misc | beacon | %d\n",
-			   WIRELESS_EXT);
-	else
-		wireless_seq_printf_stats(seq, v);
-	return 0;
-}
-
-static const struct seq_operations wireless_seq_ops = {
-	.start = dev_seq_start,
-	.next  = dev_seq_next,
-	.stop  = dev_seq_stop,
-	.show  = wireless_seq_show,
-};
-
-static int wireless_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &wireless_seq_ops);
-}
-
-static const struct file_operations wireless_seq_fops = {
-	.owner	 = THIS_MODULE,
-	.open    = wireless_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release,
-};
-
-int __init wireless_proc_init(void)
-{
-	/* Create /proc/net/wireless entry */
-	if (!proc_net_fops_create("wireless", S_IRUGO, &wireless_seq_fops))
-		return -ENOMEM;
-
-	return 0;
-}
-#endif	/* CONFIG_PROC_FS */
-
-/************************** IOCTL SUPPORT **************************/
-/*
- * The original user space API to configure all those Wireless Extensions
- * is through IOCTLs.
- * In there, we check if we need to call the new driver API (iw_handler)
- * or just call the driver ioctl handler.
- */
-
-/* ---------------------------------------------------------------- */
-/*
- * Wrapper to call a standard Wireless Extension handler.
- * We do various checks and also take care of moving data between
- * user space and kernel space.
- */
-static int ioctl_standard_call(struct net_device *	dev,
-			       struct ifreq *		ifr,
-			       unsigned int		cmd,
-			       iw_handler		handler)
-{
-	struct iwreq *				iwr = (struct iwreq *) ifr;
-	const struct iw_ioctl_description *	descr;
-	struct iw_request_info			info;
-	int					ret = -EINVAL;
-
-	/* Get the description of the IOCTL */
-	if ((cmd - SIOCIWFIRST) >= standard_ioctl_num)
-		return -EOPNOTSUPP;
-	descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
-
-#ifdef WE_IOCTL_DEBUG
-	printk(KERN_DEBUG "%s (WE) : Found standard handler for 0x%04X\n",
-	       ifr->ifr_name, cmd);
-	printk(KERN_DEBUG "%s (WE) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
-#endif	/* WE_IOCTL_DEBUG */
-
-	/* Prepare the call */
-	info.cmd = cmd;
-	info.flags = 0;
-
-	/* Check if we have a pointer to user space data or not */
-	if (descr->header_type != IW_HEADER_TYPE_POINT) {
-
-		/* No extra arguments. Trivial to handle */
-		ret = handler(dev, &info, &(iwr->u), NULL);
-
-#ifdef WE_SET_EVENT
-		/* Generate an event to notify listeners of the change */
-		if ((descr->flags & IW_DESCR_FLAG_EVENT) &&
-		   ((ret == 0) || (ret == -EIWCOMMIT)))
-			wireless_send_event(dev, cmd, &(iwr->u), NULL);
-#endif	/* WE_SET_EVENT */
-	} else {
-		char *	extra;
-		int	extra_size;
-		int	user_length = 0;
-		int	err;
-		int	essid_compat = 0;
-
-		/* Calculate space needed by arguments. Always allocate
-		 * for max space. Easier, and won't last long... */
-		extra_size = descr->max_tokens * descr->token_size;
-
-		/* Check need for ESSID compatibility for WE < 21 */
-		switch (cmd) {
-		case SIOCSIWESSID:
-		case SIOCGIWESSID:
-		case SIOCSIWNICKN:
-		case SIOCGIWNICKN:
-			if (iwr->u.data.length == descr->max_tokens + 1)
-				essid_compat = 1;
-			else if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
-				char essid[IW_ESSID_MAX_SIZE + 1];
-
-				err = copy_from_user(essid, iwr->u.data.pointer,
-						     iwr->u.data.length *
-						     descr->token_size);
-				if (err)
-					return -EFAULT;
-
-				if (essid[iwr->u.data.length - 1] == '\0')
-					essid_compat = 1;
-			}
-			break;
-		default:
-			break;
-		}
-
-		iwr->u.data.length -= essid_compat;
-
-		/* Check what user space is giving us */
-		if (IW_IS_SET(cmd)) {
-			/* Check NULL pointer */
-			if ((iwr->u.data.pointer == NULL) &&
-			   (iwr->u.data.length != 0))
-				return -EFAULT;
-			/* Check if number of token fits within bounds */
-			if (iwr->u.data.length > descr->max_tokens)
-				return -E2BIG;
-			if (iwr->u.data.length < descr->min_tokens)
-				return -EINVAL;
-		} else {
-			/* Check NULL pointer */
-			if (iwr->u.data.pointer == NULL)
-				return -EFAULT;
-			/* Save user space buffer size for checking */
-			user_length = iwr->u.data.length;
-
-			/* Don't check if user_length > max to allow forward
-			 * compatibility. The test user_length < min is
-			 * implied by the test at the end. */
-
-			/* Support for very large requests */
-			if ((descr->flags & IW_DESCR_FLAG_NOMAX) &&
-			   (user_length > descr->max_tokens)) {
-				/* Allow userspace to GET more than max so
-				 * we can support any size GET requests.
-				 * There is still a limit : -ENOMEM. */
-				extra_size = user_length * descr->token_size;
-				/* Note : user_length is originally a __u16,
-				 * and token_size is controlled by us,
-				 * so extra_size won't get negative and
-				 * won't overflow... */
-			}
-		}
-
-#ifdef WE_IOCTL_DEBUG
-		printk(KERN_DEBUG "%s (WE) : Malloc %d bytes\n",
-		       dev->name, extra_size);
-#endif	/* WE_IOCTL_DEBUG */
-
-		/* Create the kernel buffer */
-		/*    kzalloc ensures NULL-termination for essid_compat */
-		extra = kzalloc(extra_size, GFP_KERNEL);
-		if (extra == NULL) {
-			return -ENOMEM;
-		}
-
-		/* If it is a SET, get all the extra data in here */
-		if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
-			err = copy_from_user(extra, iwr->u.data.pointer,
-					     iwr->u.data.length *
-					     descr->token_size);
-			if (err) {
-				kfree(extra);
-				return -EFAULT;
-			}
-#ifdef WE_IOCTL_DEBUG
-			printk(KERN_DEBUG "%s (WE) : Got %d bytes\n",
-			       dev->name,
-			       iwr->u.data.length * descr->token_size);
-#endif	/* WE_IOCTL_DEBUG */
-		}
-
-		/* Call the handler */
-		ret = handler(dev, &info, &(iwr->u), extra);
-
-		iwr->u.data.length += essid_compat;
-
-		/* If we have something to return to the user */
-		if (!ret && IW_IS_GET(cmd)) {
-			/* Check if there is enough buffer up there */
-			if (user_length < iwr->u.data.length) {
-				kfree(extra);
-				return -E2BIG;
-			}
-
-			err = copy_to_user(iwr->u.data.pointer, extra,
-					   iwr->u.data.length *
-					   descr->token_size);
-			if (err)
-				ret =  -EFAULT;
-#ifdef WE_IOCTL_DEBUG
-			printk(KERN_DEBUG "%s (WE) : Wrote %d bytes\n",
-			       dev->name,
-			       iwr->u.data.length * descr->token_size);
-#endif	/* WE_IOCTL_DEBUG */
-		}
-
-#ifdef WE_SET_EVENT
-		/* Generate an event to notify listeners of the change */
-		if ((descr->flags & IW_DESCR_FLAG_EVENT) &&
-		   ((ret == 0) || (ret == -EIWCOMMIT))) {
-			if (descr->flags & IW_DESCR_FLAG_RESTRICT)
-				/* If the event is restricted, don't
-				 * export the payload */
-				wireless_send_event(dev, cmd, &(iwr->u), NULL);
-			else
-				wireless_send_event(dev, cmd, &(iwr->u),
-						    extra);
-		}
-#endif	/* WE_SET_EVENT */
-
-		/* Cleanup - I told you it wasn't that long ;-) */
-		kfree(extra);
-	}
-
-	/* Call commit handler if needed and defined */
-	if (ret == -EIWCOMMIT)
-		ret = call_commit_handler(dev);
-
-	/* Here, we will generate the appropriate event if needed */
-
-	return ret;
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Wrapper to call a private Wireless Extension handler.
- * We do various checks and also take care of moving data between
- * user space and kernel space.
- * It's not as nice and slimline as the standard wrapper. The cause
- * is struct iw_priv_args, which was not really designed for the
- * job we are going here.
- *
- * IMPORTANT : This function prevent to set and get data on the same
- * IOCTL and enforce the SET/GET convention. Not doing it would be
- * far too hairy...
- * If you need to set and get data at the same time, please don't use
- * a iw_handler but process it in your ioctl handler (i.e. use the
- * old driver API).
- */
-static inline int ioctl_private_call(struct net_device *	dev,
-				     struct ifreq *		ifr,
-				     unsigned int		cmd,
-				     iw_handler		handler)
-{
-	struct iwreq *			iwr = (struct iwreq *) ifr;
-	const struct iw_priv_args *	descr = NULL;
-	struct iw_request_info		info;
-	int				extra_size = 0;
-	int				i;
-	int				ret = -EINVAL;
-
-	/* Get the description of the IOCTL */
-	for (i = 0; i < dev->wireless_handlers->num_private_args; i++)
-		if (cmd == dev->wireless_handlers->private_args[i].cmd) {
-			descr = &(dev->wireless_handlers->private_args[i]);
-			break;
-		}
-
-#ifdef WE_IOCTL_DEBUG
-	printk(KERN_DEBUG "%s (WE) : Found private handler for 0x%04X\n",
-	       ifr->ifr_name, cmd);
-	if (descr) {
-		printk(KERN_DEBUG "%s (WE) : Name %s, set %X, get %X\n",
-		       dev->name, descr->name,
-		       descr->set_args, descr->get_args);
-	}
-#endif	/* WE_IOCTL_DEBUG */
-
-	/* Compute the size of the set/get arguments */
-	if (descr != NULL) {
-		if (IW_IS_SET(cmd)) {
-			int	offset = 0;	/* For sub-ioctls */
-			/* Check for sub-ioctl handler */
-			if (descr->name[0] == '\0')
-				/* Reserve one int for sub-ioctl index */
-				offset = sizeof(__u32);
-
-			/* Size of set arguments */
-			extra_size = get_priv_size(descr->set_args);
-
-			/* Does it fits in iwr ? */
-			if ((descr->set_args & IW_PRIV_SIZE_FIXED) &&
-			   ((extra_size + offset) <= IFNAMSIZ))
-				extra_size = 0;
-		} else {
-			/* Size of get arguments */
-			extra_size = get_priv_size(descr->get_args);
-
-			/* Does it fits in iwr ? */
-			if ((descr->get_args & IW_PRIV_SIZE_FIXED) &&
-			   (extra_size <= IFNAMSIZ))
-				extra_size = 0;
-		}
-	}
-
-	/* Prepare the call */
-	info.cmd = cmd;
-	info.flags = 0;
-
-	/* Check if we have a pointer to user space data or not. */
-	if (extra_size == 0) {
-		/* No extra arguments. Trivial to handle */
-		ret = handler(dev, &info, &(iwr->u), (char *) &(iwr->u));
-	} else {
-		char *	extra;
-		int	err;
-
-		/* Check what user space is giving us */
-		if (IW_IS_SET(cmd)) {
-			/* Check NULL pointer */
-			if ((iwr->u.data.pointer == NULL) &&
-			   (iwr->u.data.length != 0))
-				return -EFAULT;
-
-			/* Does it fits within bounds ? */
-			if (iwr->u.data.length > (descr->set_args &
-						 IW_PRIV_SIZE_MASK))
-				return -E2BIG;
-		} else {
-			/* Check NULL pointer */
-			if (iwr->u.data.pointer == NULL)
-				return -EFAULT;
-		}
-
-#ifdef WE_IOCTL_DEBUG
-		printk(KERN_DEBUG "%s (WE) : Malloc %d bytes\n",
-		       dev->name, extra_size);
-#endif	/* WE_IOCTL_DEBUG */
-
-		/* Always allocate for max space. Easier, and won't last
-		 * long... */
-		extra = kmalloc(extra_size, GFP_KERNEL);
-		if (extra == NULL) {
-			return -ENOMEM;
-		}
-
-		/* If it is a SET, get all the extra data in here */
-		if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
-			err = copy_from_user(extra, iwr->u.data.pointer,
-					     extra_size);
-			if (err) {
-				kfree(extra);
-				return -EFAULT;
-			}
-#ifdef WE_IOCTL_DEBUG
-			printk(KERN_DEBUG "%s (WE) : Got %d elem\n",
-			       dev->name, iwr->u.data.length);
-#endif	/* WE_IOCTL_DEBUG */
-		}
-
-		/* Call the handler */
-		ret = handler(dev, &info, &(iwr->u), extra);
-
-		/* If we have something to return to the user */
-		if (!ret && IW_IS_GET(cmd)) {
-
-			/* Adjust for the actual length if it's variable,
-			 * avoid leaking kernel bits outside. */
-			if (!(descr->get_args & IW_PRIV_SIZE_FIXED)) {
-				extra_size = adjust_priv_size(descr->get_args,
-							      &(iwr->u));
-			}
-
-			err = copy_to_user(iwr->u.data.pointer, extra,
-					   extra_size);
-			if (err)
-				ret =  -EFAULT;
-#ifdef WE_IOCTL_DEBUG
-			printk(KERN_DEBUG "%s (WE) : Wrote %d elem\n",
-			       dev->name, iwr->u.data.length);
-#endif	/* WE_IOCTL_DEBUG */
-		}
-
-		/* Cleanup - I told you it wasn't that long ;-) */
-		kfree(extra);
-	}
-
-
-	/* Call commit handler if needed and defined */
-	if (ret == -EIWCOMMIT)
-		ret = call_commit_handler(dev);
-
-	return ret;
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Main IOCTl dispatcher. Called from the main networking code
- * (dev_ioctl() in net/core/dev.c).
- * Check the type of IOCTL and call the appropriate wrapper...
- */
-int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd)
-{
-	struct net_device *dev;
-	iw_handler	handler;
-
-	/* Permissions are already checked in dev_ioctl() before calling us.
-	 * The copy_to/from_user() of ifr is also dealt with in there */
-
-	/* Make sure the device exist */
-	if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL)
-		return -ENODEV;
-
-	/* A bunch of special cases, then the generic case...
-	 * Note that 'cmd' is already filtered in dev_ioctl() with
-	 * (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) */
-	switch (cmd) {
-	case SIOCGIWSTATS:
-		/* Get Wireless Stats */
-		return ioctl_standard_call(dev,
-					   ifr,
-					   cmd,
-					   &iw_handler_get_iwstats);
-
-	case SIOCGIWPRIV:
-		/* Check if we have some wireless handlers defined */
-		if (dev->wireless_handlers != NULL) {
-			/* We export to user space the definition of
-			 * the private handler ourselves */
-			return ioctl_standard_call(dev,
-						   ifr,
-						   cmd,
-						   &iw_handler_get_private);
-		}
-		// ## Fall-through for old API ##
-	default:
-		/* Generic IOCTL */
-		/* Basic check */
-		if (!netif_device_present(dev))
-			return -ENODEV;
-		/* New driver API : try to find the handler */
-		handler = get_handler(dev, cmd);
-		if (handler != NULL) {
-			/* Standard and private are not the same */
-			if (cmd < SIOCIWFIRSTPRIV)
-				return ioctl_standard_call(dev,
-							   ifr,
-							   cmd,
-							   handler);
-			else
-				return ioctl_private_call(dev,
-							  ifr,
-							  cmd,
-							  handler);
-		}
-		/* Old driver API : call driver ioctl handler */
-		if (dev->do_ioctl) {
-			return dev->do_ioctl(dev, ifr, cmd);
-		}
-		return -EOPNOTSUPP;
-	}
-	/* Not reached */
-	return -EINVAL;
-}
-
-
-/************************* EVENT PROCESSING *************************/
-/*
- * Process events generated by the wireless layer or the driver.
- * Most often, the event will be propagated through rtnetlink
- */
-
-#ifdef WE_EVENT_RTNETLINK
-/* ---------------------------------------------------------------- */
-/*
- * Locking...
- * ----------
- *
- * Thanks to Herbert Xu <herbert@gondor.apana.org.au> for fixing
- * the locking issue in here and implementing this code !
- *
- * The issue : wireless_send_event() is often called in interrupt context,
- * while the Netlink layer can never be called in interrupt context.
- * The fully formed RtNetlink events are queued, and then a tasklet is run
- * to feed those to Netlink.
- * The skb_queue is interrupt safe, and its lock is not held while calling
- * Netlink, so there is no possibility of dealock.
- * Jean II
- */
-
-static struct sk_buff_head wireless_nlevent_queue;
-
-static int __init wireless_nlevent_init(void)
-{
-	skb_queue_head_init(&wireless_nlevent_queue);
-	return 0;
-}
-
-subsys_initcall(wireless_nlevent_init);
-
-static void wireless_nlevent_process(unsigned long data)
-{
-	struct sk_buff *skb;
-
-	while ((skb = skb_dequeue(&wireless_nlevent_queue)))
-		rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
-}
-
-static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0);
-
-/* ---------------------------------------------------------------- */
-/*
- * Fill a rtnetlink message with our event data.
- * Note that we propage only the specified event and don't dump the
- * current wireless config. Dumping the wireless config is far too
- * expensive (for each parameter, the driver need to query the hardware).
- */
-static inline int rtnetlink_fill_iwinfo(struct sk_buff *	skb,
-					struct net_device *	dev,
-					int			type,
-					char *			event,
-					int			event_len)
-{
-	struct ifinfomsg *r;
-	struct nlmsghdr  *nlh;
-	unsigned char	 *b = skb_tail_pointer(skb);
-
-	nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(*r));
-	r = NLMSG_DATA(nlh);
-	r->ifi_family = AF_UNSPEC;
-	r->__ifi_pad = 0;
-	r->ifi_type = dev->type;
-	r->ifi_index = dev->ifindex;
-	r->ifi_flags = dev_get_flags(dev);
-	r->ifi_change = 0;	/* Wireless changes don't affect those flags */
-
-	/* Add the wireless events in the netlink packet */
-	RTA_PUT(skb, IFLA_WIRELESS, event_len, event);
-
-	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
-	return skb->len;
-
-nlmsg_failure:
-rtattr_failure:
-	nlmsg_trim(skb, b);
-	return -1;
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Create and broadcast and send it on the standard rtnetlink socket
- * This is a pure clone rtmsg_ifinfo() in net/core/rtnetlink.c
- * Andrzej Krzysztofowicz mandated that I used a IFLA_XXX field
- * within a RTM_NEWLINK event.
- */
-static inline void rtmsg_iwinfo(struct net_device *	dev,
-				char *			event,
-				int			event_len)
-{
-	struct sk_buff *skb;
-	int size = NLMSG_GOODSIZE;
-
-	skb = alloc_skb(size, GFP_ATOMIC);
-	if (!skb)
-		return;
-
-	if (rtnetlink_fill_iwinfo(skb, dev, RTM_NEWLINK,
-				  event, event_len) < 0) {
-		kfree_skb(skb);
-		return;
-	}
-	NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
-	skb_queue_tail(&wireless_nlevent_queue, skb);
-	tasklet_schedule(&wireless_nlevent_tasklet);
-}
-
-#endif	/* WE_EVENT_RTNETLINK */
-
-/* ---------------------------------------------------------------- */
-/*
- * Main event dispatcher. Called from other parts and drivers.
- * Send the event on the appropriate channels.
- * May be called from interrupt context.
- */
-void wireless_send_event(struct net_device *	dev,
-			 unsigned int		cmd,
-			 union iwreq_data *	wrqu,
-			 char *			extra)
-{
-	const struct iw_ioctl_description *	descr = NULL;
-	int extra_len = 0;
-	struct iw_event  *event;		/* Mallocated whole event */
-	int event_len;				/* Its size */
-	int hdr_len;				/* Size of the event header */
-	int wrqu_off = 0;			/* Offset in wrqu */
-	/* Don't "optimise" the following variable, it will crash */
-	unsigned	cmd_index;		/* *MUST* be unsigned */
-
-	/* Get the description of the Event */
-	if (cmd <= SIOCIWLAST) {
-		cmd_index = cmd - SIOCIWFIRST;
-		if (cmd_index < standard_ioctl_num)
-			descr = &(standard_ioctl[cmd_index]);
-	} else {
-		cmd_index = cmd - IWEVFIRST;
-		if (cmd_index < standard_event_num)
-			descr = &(standard_event[cmd_index]);
-	}
-	/* Don't accept unknown events */
-	if (descr == NULL) {
-		/* Note : we don't return an error to the driver, because
-		 * the driver would not know what to do about it. It can't
-		 * return an error to the user, because the event is not
-		 * initiated by a user request.
-		 * The best the driver could do is to log an error message.
-		 * We will do it ourselves instead...
-		 */
-		printk(KERN_ERR "%s (WE) : Invalid/Unknown Wireless Event (0x%04X)\n",
-		       dev->name, cmd);
-		return;
-	}
-#ifdef WE_EVENT_DEBUG
-	printk(KERN_DEBUG "%s (WE) : Got event 0x%04X\n",
-	       dev->name, cmd);
-	printk(KERN_DEBUG "%s (WE) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
-#endif	/* WE_EVENT_DEBUG */
-
-	/* Check extra parameters and set extra_len */
-	if (descr->header_type == IW_HEADER_TYPE_POINT) {
-		/* Check if number of token fits within bounds */
-		if (wrqu->data.length > descr->max_tokens) {
-			printk(KERN_ERR "%s (WE) : Wireless Event too big (%d)\n", dev->name, wrqu->data.length);
-			return;
-		}
-		if (wrqu->data.length < descr->min_tokens) {
-			printk(KERN_ERR "%s (WE) : Wireless Event too small (%d)\n", dev->name, wrqu->data.length);
-			return;
-		}
-		/* Calculate extra_len - extra is NULL for restricted events */
-		if (extra != NULL)
-			extra_len = wrqu->data.length * descr->token_size;
-		/* Always at an offset in wrqu */
-		wrqu_off = IW_EV_POINT_OFF;
-#ifdef WE_EVENT_DEBUG
-		printk(KERN_DEBUG "%s (WE) : Event 0x%04X, tokens %d, extra_len %d\n", dev->name, cmd, wrqu->data.length, extra_len);
-#endif	/* WE_EVENT_DEBUG */
-	}
-
-	/* Total length of the event */
-	hdr_len = event_type_size[descr->header_type];
-	event_len = hdr_len + extra_len;
-
-#ifdef WE_EVENT_DEBUG
-	printk(KERN_DEBUG "%s (WE) : Event 0x%04X, hdr_len %d, wrqu_off %d, event_len %d\n", dev->name, cmd, hdr_len, wrqu_off, event_len);
-#endif	/* WE_EVENT_DEBUG */
-
-	/* Create temporary buffer to hold the event */
-	event = kmalloc(event_len, GFP_ATOMIC);
-	if (event == NULL)
-		return;
-
-	/* Fill event */
-	event->len = event_len;
-	event->cmd = cmd;
-	memcpy(&event->u, ((char *) wrqu) + wrqu_off, hdr_len - IW_EV_LCP_LEN);
-	if (extra != NULL)
-		memcpy(((char *) event) + hdr_len, extra, extra_len);
-
-#ifdef WE_EVENT_RTNETLINK
-	/* Send via the RtNetlink event channel */
-	rtmsg_iwinfo(dev, (char *) event, event_len);
-#endif	/* WE_EVENT_RTNETLINK */
-
-	/* Cleanup */
-	kfree(event);
-
-	return;		/* Always success, I guess ;-) */
-}
-
-/********************** ENHANCED IWSPY SUPPORT **********************/
-/*
- * In the old days, the driver was handling spy support all by itself.
- * Now, the driver can delegate this task to Wireless Extensions.
- * It needs to use those standard spy iw_handler in struct iw_handler_def,
- * push data to us via wireless_spy_update() and include struct iw_spy_data
- * in its private part (and export it in net_device->wireless_data->spy_data).
- * One of the main advantage of centralising spy support here is that
- * it becomes much easier to improve and extend it without having to touch
- * the drivers. One example is the addition of the Spy-Threshold events.
- */
-
-/* ---------------------------------------------------------------- */
-/*
- * Return the pointer to the spy data in the driver.
- * Because this is called on the Rx path via wireless_spy_update(),
- * we want it to be efficient...
- */
-static inline struct iw_spy_data * get_spydata(struct net_device *dev)
-{
-	/* This is the new way */
-	if (dev->wireless_data)
-		return(dev->wireless_data->spy_data);
-	return NULL;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Standard Wireless Handler : set Spy List
- */
-int iw_handler_set_spy(struct net_device *	dev,
-		       struct iw_request_info *	info,
-		       union iwreq_data *	wrqu,
-		       char *			extra)
-{
-	struct iw_spy_data *	spydata = get_spydata(dev);
-	struct sockaddr *	address = (struct sockaddr *) extra;
-
-	/* Make sure driver is not buggy or using the old API */
-	if (!spydata)
-		return -EOPNOTSUPP;
-
-	/* Disable spy collection while we copy the addresses.
-	 * While we copy addresses, any call to wireless_spy_update()
-	 * will NOP. This is OK, as anyway the addresses are changing. */
-	spydata->spy_number = 0;
-
-	/* We want to operate without locking, because wireless_spy_update()
-	 * most likely will happen in the interrupt handler, and therefore
-	 * have its own locking constraints and needs performance.
-	 * The rtnl_lock() make sure we don't race with the other iw_handlers.
-	 * This make sure wireless_spy_update() "see" that the spy list
-	 * is temporarily disabled. */
-	smp_wmb();
-
-	/* Are there are addresses to copy? */
-	if (wrqu->data.length > 0) {
-		int i;
-
-		/* Copy addresses */
-		for (i = 0; i < wrqu->data.length; i++)
-			memcpy(spydata->spy_address[i], address[i].sa_data,
-			       ETH_ALEN);
-		/* Reset stats */
-		memset(spydata->spy_stat, 0,
-		       sizeof(struct iw_quality) * IW_MAX_SPY);
-
-#ifdef WE_SPY_DEBUG
-		printk(KERN_DEBUG "iw_handler_set_spy() :  wireless_data %p, spydata %p, num %d\n", dev->wireless_data, spydata, wrqu->data.length);
-		for (i = 0; i < wrqu->data.length; i++)
-			printk(KERN_DEBUG
-			       "%02X:%02X:%02X:%02X:%02X:%02X \n",
-			       spydata->spy_address[i][0],
-			       spydata->spy_address[i][1],
-			       spydata->spy_address[i][2],
-			       spydata->spy_address[i][3],
-			       spydata->spy_address[i][4],
-			       spydata->spy_address[i][5]);
-#endif	/* WE_SPY_DEBUG */
-	}
-
-	/* Make sure above is updated before re-enabling */
-	smp_wmb();
-
-	/* Enable addresses */
-	spydata->spy_number = wrqu->data.length;
-
-	return 0;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Standard Wireless Handler : get Spy List
- */
-int iw_handler_get_spy(struct net_device *	dev,
-		       struct iw_request_info *	info,
-		       union iwreq_data *	wrqu,
-		       char *			extra)
-{
-	struct iw_spy_data *	spydata = get_spydata(dev);
-	struct sockaddr *	address = (struct sockaddr *) extra;
-	int			i;
-
-	/* Make sure driver is not buggy or using the old API */
-	if (!spydata)
-		return -EOPNOTSUPP;
-
-	wrqu->data.length = spydata->spy_number;
-
-	/* Copy addresses. */
-	for (i = 0; i < spydata->spy_number; i++) 	{
-		memcpy(address[i].sa_data, spydata->spy_address[i], ETH_ALEN);
-		address[i].sa_family = AF_UNIX;
-	}
-	/* Copy stats to the user buffer (just after). */
-	if (spydata->spy_number > 0)
-		memcpy(extra  + (sizeof(struct sockaddr) *spydata->spy_number),
-		       spydata->spy_stat,
-		       sizeof(struct iw_quality) * spydata->spy_number);
-	/* Reset updated flags. */
-	for (i = 0; i < spydata->spy_number; i++)
-		spydata->spy_stat[i].updated &= ~IW_QUAL_ALL_UPDATED;
-	return 0;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Standard Wireless Handler : set spy threshold
- */
-int iw_handler_set_thrspy(struct net_device *	dev,
-			  struct iw_request_info *info,
-			  union iwreq_data *	wrqu,
-			  char *		extra)
-{
-	struct iw_spy_data *	spydata = get_spydata(dev);
-	struct iw_thrspy *	threshold = (struct iw_thrspy *) extra;
-
-	/* Make sure driver is not buggy or using the old API */
-	if (!spydata)
-		return -EOPNOTSUPP;
-
-	/* Just do it */
-	memcpy(&(spydata->spy_thr_low), &(threshold->low),
-	       2 * sizeof(struct iw_quality));
-
-	/* Clear flag */
-	memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under));
-
-#ifdef WE_SPY_DEBUG
-	printk(KERN_DEBUG "iw_handler_set_thrspy() :  low %d ; high %d\n", spydata->spy_thr_low.level, spydata->spy_thr_high.level);
-#endif	/* WE_SPY_DEBUG */
-
-	return 0;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Standard Wireless Handler : get spy threshold
- */
-int iw_handler_get_thrspy(struct net_device *	dev,
-			  struct iw_request_info *info,
-			  union iwreq_data *	wrqu,
-			  char *		extra)
-{
-	struct iw_spy_data *	spydata = get_spydata(dev);
-	struct iw_thrspy *	threshold = (struct iw_thrspy *) extra;
-
-	/* Make sure driver is not buggy or using the old API */
-	if (!spydata)
-		return -EOPNOTSUPP;
-
-	/* Just do it */
-	memcpy(&(threshold->low), &(spydata->spy_thr_low),
-	       2 * sizeof(struct iw_quality));
-
-	return 0;
-}
-
-/*------------------------------------------------------------------*/
-/*
- * Prepare and send a Spy Threshold event
- */
-static void iw_send_thrspy_event(struct net_device *	dev,
-				 struct iw_spy_data *	spydata,
-				 unsigned char *	address,
-				 struct iw_quality *	wstats)
-{
-	union iwreq_data	wrqu;
-	struct iw_thrspy	threshold;
-
-	/* Init */
-	wrqu.data.length = 1;
-	wrqu.data.flags = 0;
-	/* Copy address */
-	memcpy(threshold.addr.sa_data, address, ETH_ALEN);
-	threshold.addr.sa_family = ARPHRD_ETHER;
-	/* Copy stats */
-	memcpy(&(threshold.qual), wstats, sizeof(struct iw_quality));
-	/* Copy also thresholds */
-	memcpy(&(threshold.low), &(spydata->spy_thr_low),
-	       2 * sizeof(struct iw_quality));
-
-#ifdef WE_SPY_DEBUG
-	printk(KERN_DEBUG "iw_send_thrspy_event() : address %02X:%02X:%02X:%02X:%02X:%02X, level %d, up = %d\n",
-	       threshold.addr.sa_data[0],
-	       threshold.addr.sa_data[1],
-	       threshold.addr.sa_data[2],
-	       threshold.addr.sa_data[3],
-	       threshold.addr.sa_data[4],
-	       threshold.addr.sa_data[5], threshold.qual.level);
-#endif	/* WE_SPY_DEBUG */
-
-	/* Send event to user space */
-	wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold);
-}
-
-/* ---------------------------------------------------------------- */
-/*
- * Call for the driver to update the spy data.
- * For now, the spy data is a simple array. As the size of the array is
- * small, this is good enough. If we wanted to support larger number of
- * spy addresses, we should use something more efficient...
- */
-void wireless_spy_update(struct net_device *	dev,
-			 unsigned char *	address,
-			 struct iw_quality *	wstats)
-{
-	struct iw_spy_data *	spydata = get_spydata(dev);
-	int			i;
-	int			match = -1;
-
-	/* Make sure driver is not buggy or using the old API */
-	if (!spydata)
-		return;
-
-#ifdef WE_SPY_DEBUG
-	printk(KERN_DEBUG "wireless_spy_update() :  wireless_data %p, spydata %p, address %02X:%02X:%02X:%02X:%02X:%02X\n", dev->wireless_data, spydata, address[0], address[1], address[2], address[3], address[4], address[5]);
-#endif	/* WE_SPY_DEBUG */
-
-	/* Update all records that match */
-	for (i = 0; i < spydata->spy_number; i++)
-		if (!compare_ether_addr(address, spydata->spy_address[i])) {
-			memcpy(&(spydata->spy_stat[i]), wstats,
-			       sizeof(struct iw_quality));
-			match = i;
-		}
-
-	/* Generate an event if we cross the spy threshold.
-	 * To avoid event storms, we have a simple hysteresis : we generate
-	 * event only when we go under the low threshold or above the
-	 * high threshold. */
-	if (match >= 0) {
-		if (spydata->spy_thr_under[match]) {
-			if (wstats->level > spydata->spy_thr_high.level) {
-				spydata->spy_thr_under[match] = 0;
-				iw_send_thrspy_event(dev, spydata,
-						     address, wstats);
-			}
-		} else {
-			if (wstats->level < spydata->spy_thr_low.level) {
-				spydata->spy_thr_under[match] = 1;
-				iw_send_thrspy_event(dev, spydata,
-						     address, wstats);
-			}
-		}
-	}
-}
-
-EXPORT_SYMBOL(iw_handler_get_spy);
-EXPORT_SYMBOL(iw_handler_get_thrspy);
-EXPORT_SYMBOL(iw_handler_set_spy);
-EXPORT_SYMBOL(iw_handler_set_thrspy);
-EXPORT_SYMBOL(wireless_send_event);
-EXPORT_SYMBOL(wireless_spy_update);
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 3f082ffae38..3a96ae60271 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -1,3 +1,4 @@
+obj-$(CONFIG_WIRELESS_EXT) += wext.o
 obj-$(CONFIG_CFG80211) += cfg80211.o
 
 cfg80211-y += core.o sysfs.o
diff --git a/net/wireless/wext.c b/net/wireless/wext.c
new file mode 100644
index 00000000000..fba295e05e7
--- /dev/null
+++ b/net/wireless/wext.c
@@ -0,0 +1,1633 @@
+/*
+ * This file implement the Wireless Extensions APIs.
+ *
+ * Authors :	Jean Tourrilhes - HPL - <jt@hpl.hp.com>
+ * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved.
+ *
+ * (As all part of the Linux kernel, this file is GPL)
+ */
+
+/************************** DOCUMENTATION **************************/
+/*
+ * API definition :
+ * --------------
+ * See <linux/wireless.h> for details of the APIs and the rest.
+ *
+ * History :
+ * -------
+ *
+ * v1 - 5.12.01 - Jean II
+ *	o Created this file.
+ *
+ * v2 - 13.12.01 - Jean II
+ *	o Move /proc/net/wireless stuff from net/core/dev.c to here
+ *	o Make Wireless Extension IOCTLs go through here
+ *	o Added iw_handler handling ;-)
+ *	o Added standard ioctl description
+ *	o Initial dumb commit strategy based on orinoco.c
+ *
+ * v3 - 19.12.01 - Jean II
+ *	o Make sure we don't go out of standard_ioctl[] in ioctl_standard_call
+ *	o Add event dispatcher function
+ *	o Add event description
+ *	o Propagate events as rtnetlink IFLA_WIRELESS option
+ *	o Generate event on selected SET requests
+ *
+ * v4 - 18.04.02 - Jean II
+ *	o Fix stupid off by one in iw_ioctl_description : IW_ESSID_MAX_SIZE + 1
+ *
+ * v5 - 21.06.02 - Jean II
+ *	o Add IW_PRIV_TYPE_ADDR in priv_type_size (+cleanup)
+ *	o Reshuffle IW_HEADER_TYPE_XXX to map IW_PRIV_TYPE_XXX changes
+ *	o Add IWEVCUSTOM for driver specific event/scanning token
+ *	o Turn on WE_STRICT_WRITE by default + kernel warning
+ *	o Fix WE_STRICT_WRITE in ioctl_export_private() (32 => iw_num)
+ *	o Fix off-by-one in test (extra_size <= IFNAMSIZ)
+ *
+ * v6 - 9.01.03 - Jean II
+ *	o Add common spy support : iw_handler_set_spy(), wireless_spy_update()
+ *	o Add enhanced spy support : iw_handler_set_thrspy() and event.
+ *	o Add WIRELESS_EXT version display in /proc/net/wireless
+ *
+ * v6 - 18.06.04 - Jean II
+ *	o Change get_spydata() method for added safety
+ *	o Remove spy #ifdef, they are always on -> cleaner code
+ *	o Allow any size GET request if user specifies length > max
+ *		and if request has IW_DESCR_FLAG_NOMAX flag or is SIOCGIWPRIV
+ *	o Start migrating get_wireless_stats to struct iw_handler_def
+ *	o Add wmb() in iw_handler_set_spy() for non-coherent archs/cpus
+ * Based on patch from Pavel Roskin <proski@gnu.org> :
+ *	o Fix kernel data leak to user space in private handler handling
+ *
+ * v7 - 18.3.05 - Jean II
+ *	o Remove (struct iw_point *)->pointer from events and streams
+ *	o Remove spy_offset from struct iw_handler_def
+ *	o Start deprecating dev->get_wireless_stats, output a warning
+ *	o If IW_QUAL_DBM is set, show dBm values in /proc/net/wireless
+ *	o Don't loose INVALID/DBM flags when clearing UPDATED flags (iwstats)
+ *
+ * v8 - 17.02.06 - Jean II
+ *	o RtNetlink requests support (SET/GET)
+ *
+ * v8b - 03.08.06 - Herbert Xu
+ *	o Fix Wireless Event locking issues.
+ *
+ * v9 - 14.3.06 - Jean II
+ *	o Change length in ESSID and NICK to strlen() instead of strlen()+1
+ *	o Make standard_ioctl_num and standard_event_num unsigned
+ *	o Remove (struct net_device *)->get_wireless_stats()
+ *
+ * v10 - 16.3.07 - Jean II
+ *	o Prevent leaking of kernel space in stream on 64 bits.
+ */
+
+/***************************** INCLUDES *****************************/
+
+#include <linux/module.h>
+#include <linux/types.h>		/* off_t */
+#include <linux/netdevice.h>		/* struct ifreq, dev_get_by_name() */
+#include <linux/proc_fs.h>
+#include <linux/rtnetlink.h>		/* rtnetlink stuff */
+#include <linux/seq_file.h>
+#include <linux/init.h>			/* for __init */
+#include <linux/if_arp.h>		/* ARPHRD_ETHER */
+#include <linux/etherdevice.h>		/* compare_ether_addr */
+#include <linux/interrupt.h>
+
+#include <linux/wireless.h>		/* Pretty obvious */
+#include <net/iw_handler.h>		/* New driver API */
+#include <net/netlink.h>
+
+#include <asm/uaccess.h>		/* copy_to_user() */
+
+/**************************** CONSTANTS ****************************/
+
+/* Debugging stuff */
+#undef WE_IOCTL_DEBUG		/* Debug IOCTL API */
+#undef WE_EVENT_DEBUG		/* Debug Event dispatcher */
+#undef WE_SPY_DEBUG		/* Debug enhanced spy support */
+
+/* Options */
+#define WE_EVENT_RTNETLINK	/* Propagate events using RtNetlink */
+#define WE_SET_EVENT		/* Generate an event on some set commands */
+
+/************************* GLOBAL VARIABLES *************************/
+/*
+ * You should not use global variables, because of re-entrancy.
+ * On our case, it's only const, so it's OK...
+ */
+/*
+ * Meta-data about all the standard Wireless Extension request we
+ * know about.
+ */
+static const struct iw_ioctl_description standard_ioctl[] = {
+	[SIOCSIWCOMMIT	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_NULL,
+	},
+	[SIOCGIWNAME	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_CHAR,
+		.flags		= IW_DESCR_FLAG_DUMP,
+	},
+	[SIOCSIWNWID	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+		.flags		= IW_DESCR_FLAG_EVENT,
+	},
+	[SIOCGIWNWID	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+		.flags		= IW_DESCR_FLAG_DUMP,
+	},
+	[SIOCSIWFREQ	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_FREQ,
+		.flags		= IW_DESCR_FLAG_EVENT,
+	},
+	[SIOCGIWFREQ	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_FREQ,
+		.flags		= IW_DESCR_FLAG_DUMP,
+	},
+	[SIOCSIWMODE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_UINT,
+		.flags		= IW_DESCR_FLAG_EVENT,
+	},
+	[SIOCGIWMODE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_UINT,
+		.flags		= IW_DESCR_FLAG_DUMP,
+	},
+	[SIOCSIWSENS	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCGIWSENS	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCSIWRANGE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_NULL,
+	},
+	[SIOCGIWRANGE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= sizeof(struct iw_range),
+		.flags		= IW_DESCR_FLAG_DUMP,
+	},
+	[SIOCSIWPRIV	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_NULL,
+	},
+	[SIOCGIWPRIV	- SIOCIWFIRST] = { /* (handled directly by us) */
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= sizeof(struct iw_priv_args),
+		.max_tokens	= 16,
+		.flags		= IW_DESCR_FLAG_NOMAX,
+	},
+	[SIOCSIWSTATS	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_NULL,
+	},
+	[SIOCGIWSTATS	- SIOCIWFIRST] = { /* (handled directly by us) */
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= sizeof(struct iw_statistics),
+		.flags		= IW_DESCR_FLAG_DUMP,
+	},
+	[SIOCSIWSPY	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= sizeof(struct sockaddr),
+		.max_tokens	= IW_MAX_SPY,
+	},
+	[SIOCGIWSPY	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= sizeof(struct sockaddr) +
+				  sizeof(struct iw_quality),
+		.max_tokens	= IW_MAX_SPY,
+	},
+	[SIOCSIWTHRSPY	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= sizeof(struct iw_thrspy),
+		.min_tokens	= 1,
+		.max_tokens	= 1,
+	},
+	[SIOCGIWTHRSPY	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= sizeof(struct iw_thrspy),
+		.min_tokens	= 1,
+		.max_tokens	= 1,
+	},
+	[SIOCSIWAP	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_ADDR,
+	},
+	[SIOCGIWAP	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_ADDR,
+		.flags		= IW_DESCR_FLAG_DUMP,
+	},
+	[SIOCSIWMLME	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.min_tokens	= sizeof(struct iw_mlme),
+		.max_tokens	= sizeof(struct iw_mlme),
+	},
+	[SIOCGIWAPLIST	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= sizeof(struct sockaddr) +
+				  sizeof(struct iw_quality),
+		.max_tokens	= IW_MAX_AP,
+		.flags		= IW_DESCR_FLAG_NOMAX,
+	},
+	[SIOCSIWSCAN	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.min_tokens	= 0,
+		.max_tokens	= sizeof(struct iw_scan_req),
+	},
+	[SIOCGIWSCAN	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_SCAN_MAX_DATA,
+		.flags		= IW_DESCR_FLAG_NOMAX,
+	},
+	[SIOCSIWESSID	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_ESSID_MAX_SIZE,
+		.flags		= IW_DESCR_FLAG_EVENT,
+	},
+	[SIOCGIWESSID	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_ESSID_MAX_SIZE,
+		.flags		= IW_DESCR_FLAG_DUMP,
+	},
+	[SIOCSIWNICKN	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_ESSID_MAX_SIZE,
+	},
+	[SIOCGIWNICKN	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_ESSID_MAX_SIZE,
+	},
+	[SIOCSIWRATE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCGIWRATE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCSIWRTS	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCGIWRTS	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCSIWFRAG	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCGIWFRAG	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCSIWTXPOW	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCGIWTXPOW	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCSIWRETRY	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCGIWRETRY	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCSIWENCODE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_ENCODING_TOKEN_MAX,
+		.flags		= IW_DESCR_FLAG_EVENT | IW_DESCR_FLAG_RESTRICT,
+	},
+	[SIOCGIWENCODE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_ENCODING_TOKEN_MAX,
+		.flags		= IW_DESCR_FLAG_DUMP | IW_DESCR_FLAG_RESTRICT,
+	},
+	[SIOCSIWPOWER	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCGIWPOWER	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCSIWGENIE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_GENERIC_IE_MAX,
+	},
+	[SIOCGIWGENIE	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_GENERIC_IE_MAX,
+	},
+	[SIOCSIWAUTH	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCGIWAUTH	- SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_PARAM,
+	},
+	[SIOCSIWENCODEEXT - SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.min_tokens	= sizeof(struct iw_encode_ext),
+		.max_tokens	= sizeof(struct iw_encode_ext) +
+				  IW_ENCODING_TOKEN_MAX,
+	},
+	[SIOCGIWENCODEEXT - SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.min_tokens	= sizeof(struct iw_encode_ext),
+		.max_tokens	= sizeof(struct iw_encode_ext) +
+				  IW_ENCODING_TOKEN_MAX,
+	},
+	[SIOCSIWPMKSA - SIOCIWFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.min_tokens	= sizeof(struct iw_pmksa),
+		.max_tokens	= sizeof(struct iw_pmksa),
+	},
+};
+static const unsigned standard_ioctl_num = ARRAY_SIZE(standard_ioctl);
+
+/*
+ * Meta-data about all the additional standard Wireless Extension events
+ * we know about.
+ */
+static const struct iw_ioctl_description standard_event[] = {
+	[IWEVTXDROP	- IWEVFIRST] = {
+		.header_type	= IW_HEADER_TYPE_ADDR,
+	},
+	[IWEVQUAL	- IWEVFIRST] = {
+		.header_type	= IW_HEADER_TYPE_QUAL,
+	},
+	[IWEVCUSTOM	- IWEVFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_CUSTOM_MAX,
+	},
+	[IWEVREGISTERED	- IWEVFIRST] = {
+		.header_type	= IW_HEADER_TYPE_ADDR,
+	},
+	[IWEVEXPIRED	- IWEVFIRST] = {
+		.header_type	= IW_HEADER_TYPE_ADDR,
+	},
+	[IWEVGENIE	- IWEVFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_GENERIC_IE_MAX,
+	},
+	[IWEVMICHAELMICFAILURE	- IWEVFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= sizeof(struct iw_michaelmicfailure),
+	},
+	[IWEVASSOCREQIE	- IWEVFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_GENERIC_IE_MAX,
+	},
+	[IWEVASSOCRESPIE	- IWEVFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= IW_GENERIC_IE_MAX,
+	},
+	[IWEVPMKIDCAND	- IWEVFIRST] = {
+		.header_type	= IW_HEADER_TYPE_POINT,
+		.token_size	= 1,
+		.max_tokens	= sizeof(struct iw_pmkid_cand),
+	},
+};
+static const unsigned standard_event_num = ARRAY_SIZE(standard_event);
+
+/* Size (in bytes) of the various private data types */
+static const char iw_priv_type_size[] = {
+	0,				/* IW_PRIV_TYPE_NONE */
+	1,				/* IW_PRIV_TYPE_BYTE */
+	1,				/* IW_PRIV_TYPE_CHAR */
+	0,				/* Not defined */
+	sizeof(__u32),			/* IW_PRIV_TYPE_INT */
+	sizeof(struct iw_freq),		/* IW_PRIV_TYPE_FLOAT */
+	sizeof(struct sockaddr),	/* IW_PRIV_TYPE_ADDR */
+	0,				/* Not defined */
+};
+
+/* Size (in bytes) of various events */
+static const int event_type_size[] = {
+	IW_EV_LCP_LEN,			/* IW_HEADER_TYPE_NULL */
+	0,
+	IW_EV_CHAR_LEN,			/* IW_HEADER_TYPE_CHAR */
+	0,
+	IW_EV_UINT_LEN,			/* IW_HEADER_TYPE_UINT */
+	IW_EV_FREQ_LEN,			/* IW_HEADER_TYPE_FREQ */
+	IW_EV_ADDR_LEN,			/* IW_HEADER_TYPE_ADDR */
+	0,
+	IW_EV_POINT_LEN,		/* Without variable payload */
+	IW_EV_PARAM_LEN,		/* IW_HEADER_TYPE_PARAM */
+	IW_EV_QUAL_LEN,			/* IW_HEADER_TYPE_QUAL */
+};
+
+/* Size (in bytes) of various events, as packed */
+static const int event_type_pk_size[] = {
+	IW_EV_LCP_PK_LEN,		/* IW_HEADER_TYPE_NULL */
+	0,
+	IW_EV_CHAR_PK_LEN,		/* IW_HEADER_TYPE_CHAR */
+	0,
+	IW_EV_UINT_PK_LEN,		/* IW_HEADER_TYPE_UINT */
+	IW_EV_FREQ_PK_LEN,		/* IW_HEADER_TYPE_FREQ */
+	IW_EV_ADDR_PK_LEN,		/* IW_HEADER_TYPE_ADDR */
+	0,
+	IW_EV_POINT_PK_LEN,		/* Without variable payload */
+	IW_EV_PARAM_PK_LEN,		/* IW_HEADER_TYPE_PARAM */
+	IW_EV_QUAL_PK_LEN,		/* IW_HEADER_TYPE_QUAL */
+};
+
+/************************ COMMON SUBROUTINES ************************/
+/*
+ * Stuff that may be used in various place or doesn't fit in one
+ * of the section below.
+ */
+
+/* ---------------------------------------------------------------- */
+/*
+ * Return the driver handler associated with a specific Wireless Extension.
+ * Called from various place, so make sure it remains efficient.
+ */
+static inline iw_handler get_handler(struct net_device *dev,
+				     unsigned int cmd)
+{
+	/* Don't "optimise" the following variable, it will crash */
+	unsigned int	index;		/* *MUST* be unsigned */
+
+	/* Check if we have some wireless handlers defined */
+	if (dev->wireless_handlers == NULL)
+		return NULL;
+
+	/* Try as a standard command */
+	index = cmd - SIOCIWFIRST;
+	if (index < dev->wireless_handlers->num_standard)
+		return dev->wireless_handlers->standard[index];
+
+	/* Try as a private command */
+	index = cmd - SIOCIWFIRSTPRIV;
+	if (index < dev->wireless_handlers->num_private)
+		return dev->wireless_handlers->private[index];
+
+	/* Not found */
+	return NULL;
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Get statistics out of the driver
+ */
+static inline struct iw_statistics *get_wireless_stats(struct net_device *dev)
+{
+	/* New location */
+	if ((dev->wireless_handlers != NULL) &&
+	   (dev->wireless_handlers->get_wireless_stats != NULL))
+		return dev->wireless_handlers->get_wireless_stats(dev);
+
+	/* Not found */
+	return (struct iw_statistics *) NULL;
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Call the commit handler in the driver
+ * (if exist and if conditions are right)
+ *
+ * Note : our current commit strategy is currently pretty dumb,
+ * but we will be able to improve on that...
+ * The goal is to try to agreagate as many changes as possible
+ * before doing the commit. Drivers that will define a commit handler
+ * are usually those that need a reset after changing parameters, so
+ * we want to minimise the number of reset.
+ * A cool idea is to use a timer : at each "set" command, we re-set the
+ * timer, when the timer eventually fires, we call the driver.
+ * Hopefully, more on that later.
+ *
+ * Also, I'm waiting to see how many people will complain about the
+ * netif_running(dev) test. I'm open on that one...
+ * Hopefully, the driver will remember to do a commit in "open()" ;-)
+ */
+static inline int call_commit_handler(struct net_device *	dev)
+{
+	if ((netif_running(dev)) &&
+	   (dev->wireless_handlers->standard[0] != NULL)) {
+		/* Call the commit handler on the driver */
+		return dev->wireless_handlers->standard[0](dev, NULL,
+							   NULL, NULL);
+	} else
+		return 0;		/* Command completed successfully */
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Calculate size of private arguments
+ */
+static inline int get_priv_size(__u16	args)
+{
+	int	num = args & IW_PRIV_SIZE_MASK;
+	int	type = (args & IW_PRIV_TYPE_MASK) >> 12;
+
+	return num * iw_priv_type_size[type];
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Re-calculate the size of private arguments
+ */
+static inline int adjust_priv_size(__u16		args,
+				   union iwreq_data *	wrqu)
+{
+	int	num = wrqu->data.length;
+	int	max = args & IW_PRIV_SIZE_MASK;
+	int	type = (args & IW_PRIV_TYPE_MASK) >> 12;
+
+	/* Make sure the driver doesn't goof up */
+	if (max < num)
+		num = max;
+
+	return num * iw_priv_type_size[type];
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Standard Wireless Handler : get wireless stats
+ *	Allow programatic access to /proc/net/wireless even if /proc
+ *	doesn't exist... Also more efficient...
+ */
+static int iw_handler_get_iwstats(struct net_device *		dev,
+				  struct iw_request_info *	info,
+				  union iwreq_data *		wrqu,
+				  char *			extra)
+{
+	/* Get stats from the driver */
+	struct iw_statistics *stats;
+
+	stats = get_wireless_stats(dev);
+	if (stats != (struct iw_statistics *) NULL) {
+
+		/* Copy statistics to extra */
+		memcpy(extra, stats, sizeof(struct iw_statistics));
+		wrqu->data.length = sizeof(struct iw_statistics);
+
+		/* Check if we need to clear the updated flag */
+		if (wrqu->data.flags != 0)
+			stats->qual.updated &= ~IW_QUAL_ALL_UPDATED;
+		return 0;
+	} else
+		return -EOPNOTSUPP;
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Standard Wireless Handler : get iwpriv definitions
+ * Export the driver private handler definition
+ * They will be picked up by tools like iwpriv...
+ */
+static int iw_handler_get_private(struct net_device *		dev,
+				  struct iw_request_info *	info,
+				  union iwreq_data *		wrqu,
+				  char *			extra)
+{
+	/* Check if the driver has something to export */
+	if ((dev->wireless_handlers->num_private_args == 0) ||
+	   (dev->wireless_handlers->private_args == NULL))
+		return -EOPNOTSUPP;
+
+	/* Check if there is enough buffer up there */
+	if (wrqu->data.length < dev->wireless_handlers->num_private_args) {
+		/* User space can't know in advance how large the buffer
+		 * needs to be. Give it a hint, so that we can support
+		 * any size buffer we want somewhat efficiently... */
+		wrqu->data.length = dev->wireless_handlers->num_private_args;
+		return -E2BIG;
+	}
+
+	/* Set the number of available ioctls. */
+	wrqu->data.length = dev->wireless_handlers->num_private_args;
+
+	/* Copy structure to the user buffer. */
+	memcpy(extra, dev->wireless_handlers->private_args,
+	       sizeof(struct iw_priv_args) * wrqu->data.length);
+
+	return 0;
+}
+
+
+/******************** /proc/net/wireless SUPPORT ********************/
+/*
+ * The /proc/net/wireless file is a human readable user-space interface
+ * exporting various wireless specific statistics from the wireless devices.
+ * This is the most popular part of the Wireless Extensions ;-)
+ *
+ * This interface is a pure clone of /proc/net/dev (in net/core/dev.c).
+ * The content of the file is basically the content of "struct iw_statistics".
+ */
+
+#ifdef CONFIG_PROC_FS
+
+/* ---------------------------------------------------------------- */
+/*
+ * Print one entry (line) of /proc/net/wireless
+ */
+static __inline__ void wireless_seq_printf_stats(struct seq_file *seq,
+						 struct net_device *dev)
+{
+	/* Get stats from the driver */
+	struct iw_statistics *stats = get_wireless_stats(dev);
+
+	if (stats) {
+		seq_printf(seq, "%6s: %04x  %3d%c  %3d%c  %3d%c  %6d %6d %6d "
+				"%6d %6d   %6d\n",
+			   dev->name, stats->status, stats->qual.qual,
+			   stats->qual.updated & IW_QUAL_QUAL_UPDATED
+			   ? '.' : ' ',
+			   ((__s32) stats->qual.level) -
+			   ((stats->qual.updated & IW_QUAL_DBM) ? 0x100 : 0),
+			   stats->qual.updated & IW_QUAL_LEVEL_UPDATED
+			   ? '.' : ' ',
+			   ((__s32) stats->qual.noise) -
+			   ((stats->qual.updated & IW_QUAL_DBM) ? 0x100 : 0),
+			   stats->qual.updated & IW_QUAL_NOISE_UPDATED
+			   ? '.' : ' ',
+			   stats->discard.nwid, stats->discard.code,
+			   stats->discard.fragment, stats->discard.retries,
+			   stats->discard.misc, stats->miss.beacon);
+		stats->qual.updated &= ~IW_QUAL_ALL_UPDATED;
+	}
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Print info for /proc/net/wireless (print all entries)
+ */
+static int wireless_seq_show(struct seq_file *seq, void *v)
+{
+	if (v == SEQ_START_TOKEN)
+		seq_printf(seq, "Inter-| sta-|   Quality        |   Discarded "
+				"packets               | Missed | WE\n"
+				" face | tus | link level noise |  nwid  "
+				"crypt   frag  retry   misc | beacon | %d\n",
+			   WIRELESS_EXT);
+	else
+		wireless_seq_printf_stats(seq, v);
+	return 0;
+}
+
+static const struct seq_operations wireless_seq_ops = {
+	.start = dev_seq_start,
+	.next  = dev_seq_next,
+	.stop  = dev_seq_stop,
+	.show  = wireless_seq_show,
+};
+
+static int wireless_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &wireless_seq_ops);
+}
+
+static const struct file_operations wireless_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = wireless_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+int __init wireless_proc_init(void)
+{
+	/* Create /proc/net/wireless entry */
+	if (!proc_net_fops_create("wireless", S_IRUGO, &wireless_seq_fops))
+		return -ENOMEM;
+
+	return 0;
+}
+#endif	/* CONFIG_PROC_FS */
+
+/************************** IOCTL SUPPORT **************************/
+/*
+ * The original user space API to configure all those Wireless Extensions
+ * is through IOCTLs.
+ * In there, we check if we need to call the new driver API (iw_handler)
+ * or just call the driver ioctl handler.
+ */
+
+/* ---------------------------------------------------------------- */
+/*
+ * Wrapper to call a standard Wireless Extension handler.
+ * We do various checks and also take care of moving data between
+ * user space and kernel space.
+ */
+static int ioctl_standard_call(struct net_device *	dev,
+			       struct ifreq *		ifr,
+			       unsigned int		cmd,
+			       iw_handler		handler)
+{
+	struct iwreq *				iwr = (struct iwreq *) ifr;
+	const struct iw_ioctl_description *	descr;
+	struct iw_request_info			info;
+	int					ret = -EINVAL;
+
+	/* Get the description of the IOCTL */
+	if ((cmd - SIOCIWFIRST) >= standard_ioctl_num)
+		return -EOPNOTSUPP;
+	descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
+
+#ifdef WE_IOCTL_DEBUG
+	printk(KERN_DEBUG "%s (WE) : Found standard handler for 0x%04X\n",
+	       ifr->ifr_name, cmd);
+	printk(KERN_DEBUG "%s (WE) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
+#endif	/* WE_IOCTL_DEBUG */
+
+	/* Prepare the call */
+	info.cmd = cmd;
+	info.flags = 0;
+
+	/* Check if we have a pointer to user space data or not */
+	if (descr->header_type != IW_HEADER_TYPE_POINT) {
+
+		/* No extra arguments. Trivial to handle */
+		ret = handler(dev, &info, &(iwr->u), NULL);
+
+#ifdef WE_SET_EVENT
+		/* Generate an event to notify listeners of the change */
+		if ((descr->flags & IW_DESCR_FLAG_EVENT) &&
+		   ((ret == 0) || (ret == -EIWCOMMIT)))
+			wireless_send_event(dev, cmd, &(iwr->u), NULL);
+#endif	/* WE_SET_EVENT */
+	} else {
+		char *	extra;
+		int	extra_size;
+		int	user_length = 0;
+		int	err;
+		int	essid_compat = 0;
+
+		/* Calculate space needed by arguments. Always allocate
+		 * for max space. Easier, and won't last long... */
+		extra_size = descr->max_tokens * descr->token_size;
+
+		/* Check need for ESSID compatibility for WE < 21 */
+		switch (cmd) {
+		case SIOCSIWESSID:
+		case SIOCGIWESSID:
+		case SIOCSIWNICKN:
+		case SIOCGIWNICKN:
+			if (iwr->u.data.length == descr->max_tokens + 1)
+				essid_compat = 1;
+			else if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
+				char essid[IW_ESSID_MAX_SIZE + 1];
+
+				err = copy_from_user(essid, iwr->u.data.pointer,
+						     iwr->u.data.length *
+						     descr->token_size);
+				if (err)
+					return -EFAULT;
+
+				if (essid[iwr->u.data.length - 1] == '\0')
+					essid_compat = 1;
+			}
+			break;
+		default:
+			break;
+		}
+
+		iwr->u.data.length -= essid_compat;
+
+		/* Check what user space is giving us */
+		if (IW_IS_SET(cmd)) {
+			/* Check NULL pointer */
+			if ((iwr->u.data.pointer == NULL) &&
+			   (iwr->u.data.length != 0))
+				return -EFAULT;
+			/* Check if number of token fits within bounds */
+			if (iwr->u.data.length > descr->max_tokens)
+				return -E2BIG;
+			if (iwr->u.data.length < descr->min_tokens)
+				return -EINVAL;
+		} else {
+			/* Check NULL pointer */
+			if (iwr->u.data.pointer == NULL)
+				return -EFAULT;
+			/* Save user space buffer size for checking */
+			user_length = iwr->u.data.length;
+
+			/* Don't check if user_length > max to allow forward
+			 * compatibility. The test user_length < min is
+			 * implied by the test at the end. */
+
+			/* Support for very large requests */
+			if ((descr->flags & IW_DESCR_FLAG_NOMAX) &&
+			   (user_length > descr->max_tokens)) {
+				/* Allow userspace to GET more than max so
+				 * we can support any size GET requests.
+				 * There is still a limit : -ENOMEM. */
+				extra_size = user_length * descr->token_size;
+				/* Note : user_length is originally a __u16,
+				 * and token_size is controlled by us,
+				 * so extra_size won't get negative and
+				 * won't overflow... */
+			}
+		}
+
+#ifdef WE_IOCTL_DEBUG
+		printk(KERN_DEBUG "%s (WE) : Malloc %d bytes\n",
+		       dev->name, extra_size);
+#endif	/* WE_IOCTL_DEBUG */
+
+		/* Create the kernel buffer */
+		/*    kzalloc ensures NULL-termination for essid_compat */
+		extra = kzalloc(extra_size, GFP_KERNEL);
+		if (extra == NULL) {
+			return -ENOMEM;
+		}
+
+		/* If it is a SET, get all the extra data in here */
+		if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
+			err = copy_from_user(extra, iwr->u.data.pointer,
+					     iwr->u.data.length *
+					     descr->token_size);
+			if (err) {
+				kfree(extra);
+				return -EFAULT;
+			}
+#ifdef WE_IOCTL_DEBUG
+			printk(KERN_DEBUG "%s (WE) : Got %d bytes\n",
+			       dev->name,
+			       iwr->u.data.length * descr->token_size);
+#endif	/* WE_IOCTL_DEBUG */
+		}
+
+		/* Call the handler */
+		ret = handler(dev, &info, &(iwr->u), extra);
+
+		iwr->u.data.length += essid_compat;
+
+		/* If we have something to return to the user */
+		if (!ret && IW_IS_GET(cmd)) {
+			/* Check if there is enough buffer up there */
+			if (user_length < iwr->u.data.length) {
+				kfree(extra);
+				return -E2BIG;
+			}
+
+			err = copy_to_user(iwr->u.data.pointer, extra,
+					   iwr->u.data.length *
+					   descr->token_size);
+			if (err)
+				ret =  -EFAULT;
+#ifdef WE_IOCTL_DEBUG
+			printk(KERN_DEBUG "%s (WE) : Wrote %d bytes\n",
+			       dev->name,
+			       iwr->u.data.length * descr->token_size);
+#endif	/* WE_IOCTL_DEBUG */
+		}
+
+#ifdef WE_SET_EVENT
+		/* Generate an event to notify listeners of the change */
+		if ((descr->flags & IW_DESCR_FLAG_EVENT) &&
+		   ((ret == 0) || (ret == -EIWCOMMIT))) {
+			if (descr->flags & IW_DESCR_FLAG_RESTRICT)
+				/* If the event is restricted, don't
+				 * export the payload */
+				wireless_send_event(dev, cmd, &(iwr->u), NULL);
+			else
+				wireless_send_event(dev, cmd, &(iwr->u),
+						    extra);
+		}
+#endif	/* WE_SET_EVENT */
+
+		/* Cleanup - I told you it wasn't that long ;-) */
+		kfree(extra);
+	}
+
+	/* Call commit handler if needed and defined */
+	if (ret == -EIWCOMMIT)
+		ret = call_commit_handler(dev);
+
+	/* Here, we will generate the appropriate event if needed */
+
+	return ret;
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Wrapper to call a private Wireless Extension handler.
+ * We do various checks and also take care of moving data between
+ * user space and kernel space.
+ * It's not as nice and slimline as the standard wrapper. The cause
+ * is struct iw_priv_args, which was not really designed for the
+ * job we are going here.
+ *
+ * IMPORTANT : This function prevent to set and get data on the same
+ * IOCTL and enforce the SET/GET convention. Not doing it would be
+ * far too hairy...
+ * If you need to set and get data at the same time, please don't use
+ * a iw_handler but process it in your ioctl handler (i.e. use the
+ * old driver API).
+ */
+static inline int ioctl_private_call(struct net_device *	dev,
+				     struct ifreq *		ifr,
+				     unsigned int		cmd,
+				     iw_handler		handler)
+{
+	struct iwreq *			iwr = (struct iwreq *) ifr;
+	const struct iw_priv_args *	descr = NULL;
+	struct iw_request_info		info;
+	int				extra_size = 0;
+	int				i;
+	int				ret = -EINVAL;
+
+	/* Get the description of the IOCTL */
+	for (i = 0; i < dev->wireless_handlers->num_private_args; i++)
+		if (cmd == dev->wireless_handlers->private_args[i].cmd) {
+			descr = &(dev->wireless_handlers->private_args[i]);
+			break;
+		}
+
+#ifdef WE_IOCTL_DEBUG
+	printk(KERN_DEBUG "%s (WE) : Found private handler for 0x%04X\n",
+	       ifr->ifr_name, cmd);
+	if (descr) {
+		printk(KERN_DEBUG "%s (WE) : Name %s, set %X, get %X\n",
+		       dev->name, descr->name,
+		       descr->set_args, descr->get_args);
+	}
+#endif	/* WE_IOCTL_DEBUG */
+
+	/* Compute the size of the set/get arguments */
+	if (descr != NULL) {
+		if (IW_IS_SET(cmd)) {
+			int	offset = 0;	/* For sub-ioctls */
+			/* Check for sub-ioctl handler */
+			if (descr->name[0] == '\0')
+				/* Reserve one int for sub-ioctl index */
+				offset = sizeof(__u32);
+
+			/* Size of set arguments */
+			extra_size = get_priv_size(descr->set_args);
+
+			/* Does it fits in iwr ? */
+			if ((descr->set_args & IW_PRIV_SIZE_FIXED) &&
+			   ((extra_size + offset) <= IFNAMSIZ))
+				extra_size = 0;
+		} else {
+			/* Size of get arguments */
+			extra_size = get_priv_size(descr->get_args);
+
+			/* Does it fits in iwr ? */
+			if ((descr->get_args & IW_PRIV_SIZE_FIXED) &&
+			   (extra_size <= IFNAMSIZ))
+				extra_size = 0;
+		}
+	}
+
+	/* Prepare the call */
+	info.cmd = cmd;
+	info.flags = 0;
+
+	/* Check if we have a pointer to user space data or not. */
+	if (extra_size == 0) {
+		/* No extra arguments. Trivial to handle */
+		ret = handler(dev, &info, &(iwr->u), (char *) &(iwr->u));
+	} else {
+		char *	extra;
+		int	err;
+
+		/* Check what user space is giving us */
+		if (IW_IS_SET(cmd)) {
+			/* Check NULL pointer */
+			if ((iwr->u.data.pointer == NULL) &&
+			   (iwr->u.data.length != 0))
+				return -EFAULT;
+
+			/* Does it fits within bounds ? */
+			if (iwr->u.data.length > (descr->set_args &
+						 IW_PRIV_SIZE_MASK))
+				return -E2BIG;
+		} else {
+			/* Check NULL pointer */
+			if (iwr->u.data.pointer == NULL)
+				return -EFAULT;
+		}
+
+#ifdef WE_IOCTL_DEBUG
+		printk(KERN_DEBUG "%s (WE) : Malloc %d bytes\n",
+		       dev->name, extra_size);
+#endif	/* WE_IOCTL_DEBUG */
+
+		/* Always allocate for max space. Easier, and won't last
+		 * long... */
+		extra = kmalloc(extra_size, GFP_KERNEL);
+		if (extra == NULL) {
+			return -ENOMEM;
+		}
+
+		/* If it is a SET, get all the extra data in here */
+		if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
+			err = copy_from_user(extra, iwr->u.data.pointer,
+					     extra_size);
+			if (err) {
+				kfree(extra);
+				return -EFAULT;
+			}
+#ifdef WE_IOCTL_DEBUG
+			printk(KERN_DEBUG "%s (WE) : Got %d elem\n",
+			       dev->name, iwr->u.data.length);
+#endif	/* WE_IOCTL_DEBUG */
+		}
+
+		/* Call the handler */
+		ret = handler(dev, &info, &(iwr->u), extra);
+
+		/* If we have something to return to the user */
+		if (!ret && IW_IS_GET(cmd)) {
+
+			/* Adjust for the actual length if it's variable,
+			 * avoid leaking kernel bits outside. */
+			if (!(descr->get_args & IW_PRIV_SIZE_FIXED)) {
+				extra_size = adjust_priv_size(descr->get_args,
+							      &(iwr->u));
+			}
+
+			err = copy_to_user(iwr->u.data.pointer, extra,
+					   extra_size);
+			if (err)
+				ret =  -EFAULT;
+#ifdef WE_IOCTL_DEBUG
+			printk(KERN_DEBUG "%s (WE) : Wrote %d elem\n",
+			       dev->name, iwr->u.data.length);
+#endif	/* WE_IOCTL_DEBUG */
+		}
+
+		/* Cleanup - I told you it wasn't that long ;-) */
+		kfree(extra);
+	}
+
+
+	/* Call commit handler if needed and defined */
+	if (ret == -EIWCOMMIT)
+		ret = call_commit_handler(dev);
+
+	return ret;
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Main IOCTl dispatcher. Called from the main networking code
+ * (dev_ioctl() in net/core/dev.c).
+ * Check the type of IOCTL and call the appropriate wrapper...
+ */
+int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd)
+{
+	struct net_device *dev;
+	iw_handler	handler;
+
+	/* Permissions are already checked in dev_ioctl() before calling us.
+	 * The copy_to/from_user() of ifr is also dealt with in there */
+
+	/* Make sure the device exist */
+	if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL)
+		return -ENODEV;
+
+	/* A bunch of special cases, then the generic case...
+	 * Note that 'cmd' is already filtered in dev_ioctl() with
+	 * (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) */
+	switch (cmd) {
+	case SIOCGIWSTATS:
+		/* Get Wireless Stats */
+		return ioctl_standard_call(dev,
+					   ifr,
+					   cmd,
+					   &iw_handler_get_iwstats);
+
+	case SIOCGIWPRIV:
+		/* Check if we have some wireless handlers defined */
+		if (dev->wireless_handlers != NULL) {
+			/* We export to user space the definition of
+			 * the private handler ourselves */
+			return ioctl_standard_call(dev,
+						   ifr,
+						   cmd,
+						   &iw_handler_get_private);
+		}
+		// ## Fall-through for old API ##
+	default:
+		/* Generic IOCTL */
+		/* Basic check */
+		if (!netif_device_present(dev))
+			return -ENODEV;
+		/* New driver API : try to find the handler */
+		handler = get_handler(dev, cmd);
+		if (handler != NULL) {
+			/* Standard and private are not the same */
+			if (cmd < SIOCIWFIRSTPRIV)
+				return ioctl_standard_call(dev,
+							   ifr,
+							   cmd,
+							   handler);
+			else
+				return ioctl_private_call(dev,
+							  ifr,
+							  cmd,
+							  handler);
+		}
+		/* Old driver API : call driver ioctl handler */
+		if (dev->do_ioctl) {
+			return dev->do_ioctl(dev, ifr, cmd);
+		}
+		return -EOPNOTSUPP;
+	}
+	/* Not reached */
+	return -EINVAL;
+}
+
+
+/************************* EVENT PROCESSING *************************/
+/*
+ * Process events generated by the wireless layer or the driver.
+ * Most often, the event will be propagated through rtnetlink
+ */
+
+#ifdef WE_EVENT_RTNETLINK
+/* ---------------------------------------------------------------- */
+/*
+ * Locking...
+ * ----------
+ *
+ * Thanks to Herbert Xu <herbert@gondor.apana.org.au> for fixing
+ * the locking issue in here and implementing this code !
+ *
+ * The issue : wireless_send_event() is often called in interrupt context,
+ * while the Netlink layer can never be called in interrupt context.
+ * The fully formed RtNetlink events are queued, and then a tasklet is run
+ * to feed those to Netlink.
+ * The skb_queue is interrupt safe, and its lock is not held while calling
+ * Netlink, so there is no possibility of dealock.
+ * Jean II
+ */
+
+static struct sk_buff_head wireless_nlevent_queue;
+
+static int __init wireless_nlevent_init(void)
+{
+	skb_queue_head_init(&wireless_nlevent_queue);
+	return 0;
+}
+
+subsys_initcall(wireless_nlevent_init);
+
+static void wireless_nlevent_process(unsigned long data)
+{
+	struct sk_buff *skb;
+
+	while ((skb = skb_dequeue(&wireless_nlevent_queue)))
+		rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
+}
+
+static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0);
+
+/* ---------------------------------------------------------------- */
+/*
+ * Fill a rtnetlink message with our event data.
+ * Note that we propage only the specified event and don't dump the
+ * current wireless config. Dumping the wireless config is far too
+ * expensive (for each parameter, the driver need to query the hardware).
+ */
+static inline int rtnetlink_fill_iwinfo(struct sk_buff *	skb,
+					struct net_device *	dev,
+					int			type,
+					char *			event,
+					int			event_len)
+{
+	struct ifinfomsg *r;
+	struct nlmsghdr  *nlh;
+	unsigned char	 *b = skb_tail_pointer(skb);
+
+	nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(*r));
+	r = NLMSG_DATA(nlh);
+	r->ifi_family = AF_UNSPEC;
+	r->__ifi_pad = 0;
+	r->ifi_type = dev->type;
+	r->ifi_index = dev->ifindex;
+	r->ifi_flags = dev_get_flags(dev);
+	r->ifi_change = 0;	/* Wireless changes don't affect those flags */
+
+	/* Add the wireless events in the netlink packet */
+	RTA_PUT(skb, IFLA_WIRELESS, event_len, event);
+
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+	return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Create and broadcast and send it on the standard rtnetlink socket
+ * This is a pure clone rtmsg_ifinfo() in net/core/rtnetlink.c
+ * Andrzej Krzysztofowicz mandated that I used a IFLA_XXX field
+ * within a RTM_NEWLINK event.
+ */
+static inline void rtmsg_iwinfo(struct net_device *	dev,
+				char *			event,
+				int			event_len)
+{
+	struct sk_buff *skb;
+	int size = NLMSG_GOODSIZE;
+
+	skb = alloc_skb(size, GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	if (rtnetlink_fill_iwinfo(skb, dev, RTM_NEWLINK,
+				  event, event_len) < 0) {
+		kfree_skb(skb);
+		return;
+	}
+	NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
+	skb_queue_tail(&wireless_nlevent_queue, skb);
+	tasklet_schedule(&wireless_nlevent_tasklet);
+}
+
+#endif	/* WE_EVENT_RTNETLINK */
+
+/* ---------------------------------------------------------------- */
+/*
+ * Main event dispatcher. Called from other parts and drivers.
+ * Send the event on the appropriate channels.
+ * May be called from interrupt context.
+ */
+void wireless_send_event(struct net_device *	dev,
+			 unsigned int		cmd,
+			 union iwreq_data *	wrqu,
+			 char *			extra)
+{
+	const struct iw_ioctl_description *	descr = NULL;
+	int extra_len = 0;
+	struct iw_event  *event;		/* Mallocated whole event */
+	int event_len;				/* Its size */
+	int hdr_len;				/* Size of the event header */
+	int wrqu_off = 0;			/* Offset in wrqu */
+	/* Don't "optimise" the following variable, it will crash */
+	unsigned	cmd_index;		/* *MUST* be unsigned */
+
+	/* Get the description of the Event */
+	if (cmd <= SIOCIWLAST) {
+		cmd_index = cmd - SIOCIWFIRST;
+		if (cmd_index < standard_ioctl_num)
+			descr = &(standard_ioctl[cmd_index]);
+	} else {
+		cmd_index = cmd - IWEVFIRST;
+		if (cmd_index < standard_event_num)
+			descr = &(standard_event[cmd_index]);
+	}
+	/* Don't accept unknown events */
+	if (descr == NULL) {
+		/* Note : we don't return an error to the driver, because
+		 * the driver would not know what to do about it. It can't
+		 * return an error to the user, because the event is not
+		 * initiated by a user request.
+		 * The best the driver could do is to log an error message.
+		 * We will do it ourselves instead...
+		 */
+		printk(KERN_ERR "%s (WE) : Invalid/Unknown Wireless Event (0x%04X)\n",
+		       dev->name, cmd);
+		return;
+	}
+#ifdef WE_EVENT_DEBUG
+	printk(KERN_DEBUG "%s (WE) : Got event 0x%04X\n",
+	       dev->name, cmd);
+	printk(KERN_DEBUG "%s (WE) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
+#endif	/* WE_EVENT_DEBUG */
+
+	/* Check extra parameters and set extra_len */
+	if (descr->header_type == IW_HEADER_TYPE_POINT) {
+		/* Check if number of token fits within bounds */
+		if (wrqu->data.length > descr->max_tokens) {
+			printk(KERN_ERR "%s (WE) : Wireless Event too big (%d)\n", dev->name, wrqu->data.length);
+			return;
+		}
+		if (wrqu->data.length < descr->min_tokens) {
+			printk(KERN_ERR "%s (WE) : Wireless Event too small (%d)\n", dev->name, wrqu->data.length);
+			return;
+		}
+		/* Calculate extra_len - extra is NULL for restricted events */
+		if (extra != NULL)
+			extra_len = wrqu->data.length * descr->token_size;
+		/* Always at an offset in wrqu */
+		wrqu_off = IW_EV_POINT_OFF;
+#ifdef WE_EVENT_DEBUG
+		printk(KERN_DEBUG "%s (WE) : Event 0x%04X, tokens %d, extra_len %d\n", dev->name, cmd, wrqu->data.length, extra_len);
+#endif	/* WE_EVENT_DEBUG */
+	}
+
+	/* Total length of the event */
+	hdr_len = event_type_size[descr->header_type];
+	event_len = hdr_len + extra_len;
+
+#ifdef WE_EVENT_DEBUG
+	printk(KERN_DEBUG "%s (WE) : Event 0x%04X, hdr_len %d, wrqu_off %d, event_len %d\n", dev->name, cmd, hdr_len, wrqu_off, event_len);
+#endif	/* WE_EVENT_DEBUG */
+
+	/* Create temporary buffer to hold the event */
+	event = kmalloc(event_len, GFP_ATOMIC);
+	if (event == NULL)
+		return;
+
+	/* Fill event */
+	event->len = event_len;
+	event->cmd = cmd;
+	memcpy(&event->u, ((char *) wrqu) + wrqu_off, hdr_len - IW_EV_LCP_LEN);
+	if (extra != NULL)
+		memcpy(((char *) event) + hdr_len, extra, extra_len);
+
+#ifdef WE_EVENT_RTNETLINK
+	/* Send via the RtNetlink event channel */
+	rtmsg_iwinfo(dev, (char *) event, event_len);
+#endif	/* WE_EVENT_RTNETLINK */
+
+	/* Cleanup */
+	kfree(event);
+
+	return;		/* Always success, I guess ;-) */
+}
+
+/********************** ENHANCED IWSPY SUPPORT **********************/
+/*
+ * In the old days, the driver was handling spy support all by itself.
+ * Now, the driver can delegate this task to Wireless Extensions.
+ * It needs to use those standard spy iw_handler in struct iw_handler_def,
+ * push data to us via wireless_spy_update() and include struct iw_spy_data
+ * in its private part (and export it in net_device->wireless_data->spy_data).
+ * One of the main advantage of centralising spy support here is that
+ * it becomes much easier to improve and extend it without having to touch
+ * the drivers. One example is the addition of the Spy-Threshold events.
+ */
+
+/* ---------------------------------------------------------------- */
+/*
+ * Return the pointer to the spy data in the driver.
+ * Because this is called on the Rx path via wireless_spy_update(),
+ * we want it to be efficient...
+ */
+static inline struct iw_spy_data * get_spydata(struct net_device *dev)
+{
+	/* This is the new way */
+	if (dev->wireless_data)
+		return(dev->wireless_data->spy_data);
+	return NULL;
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Standard Wireless Handler : set Spy List
+ */
+int iw_handler_set_spy(struct net_device *	dev,
+		       struct iw_request_info *	info,
+		       union iwreq_data *	wrqu,
+		       char *			extra)
+{
+	struct iw_spy_data *	spydata = get_spydata(dev);
+	struct sockaddr *	address = (struct sockaddr *) extra;
+
+	/* Make sure driver is not buggy or using the old API */
+	if (!spydata)
+		return -EOPNOTSUPP;
+
+	/* Disable spy collection while we copy the addresses.
+	 * While we copy addresses, any call to wireless_spy_update()
+	 * will NOP. This is OK, as anyway the addresses are changing. */
+	spydata->spy_number = 0;
+
+	/* We want to operate without locking, because wireless_spy_update()
+	 * most likely will happen in the interrupt handler, and therefore
+	 * have its own locking constraints and needs performance.
+	 * The rtnl_lock() make sure we don't race with the other iw_handlers.
+	 * This make sure wireless_spy_update() "see" that the spy list
+	 * is temporarily disabled. */
+	smp_wmb();
+
+	/* Are there are addresses to copy? */
+	if (wrqu->data.length > 0) {
+		int i;
+
+		/* Copy addresses */
+		for (i = 0; i < wrqu->data.length; i++)
+			memcpy(spydata->spy_address[i], address[i].sa_data,
+			       ETH_ALEN);
+		/* Reset stats */
+		memset(spydata->spy_stat, 0,
+		       sizeof(struct iw_quality) * IW_MAX_SPY);
+
+#ifdef WE_SPY_DEBUG
+		printk(KERN_DEBUG "iw_handler_set_spy() :  wireless_data %p, spydata %p, num %d\n", dev->wireless_data, spydata, wrqu->data.length);
+		for (i = 0; i < wrqu->data.length; i++)
+			printk(KERN_DEBUG
+			       "%02X:%02X:%02X:%02X:%02X:%02X \n",
+			       spydata->spy_address[i][0],
+			       spydata->spy_address[i][1],
+			       spydata->spy_address[i][2],
+			       spydata->spy_address[i][3],
+			       spydata->spy_address[i][4],
+			       spydata->spy_address[i][5]);
+#endif	/* WE_SPY_DEBUG */
+	}
+
+	/* Make sure above is updated before re-enabling */
+	smp_wmb();
+
+	/* Enable addresses */
+	spydata->spy_number = wrqu->data.length;
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Standard Wireless Handler : get Spy List
+ */
+int iw_handler_get_spy(struct net_device *	dev,
+		       struct iw_request_info *	info,
+		       union iwreq_data *	wrqu,
+		       char *			extra)
+{
+	struct iw_spy_data *	spydata = get_spydata(dev);
+	struct sockaddr *	address = (struct sockaddr *) extra;
+	int			i;
+
+	/* Make sure driver is not buggy or using the old API */
+	if (!spydata)
+		return -EOPNOTSUPP;
+
+	wrqu->data.length = spydata->spy_number;
+
+	/* Copy addresses. */
+	for (i = 0; i < spydata->spy_number; i++) 	{
+		memcpy(address[i].sa_data, spydata->spy_address[i], ETH_ALEN);
+		address[i].sa_family = AF_UNIX;
+	}
+	/* Copy stats to the user buffer (just after). */
+	if (spydata->spy_number > 0)
+		memcpy(extra  + (sizeof(struct sockaddr) *spydata->spy_number),
+		       spydata->spy_stat,
+		       sizeof(struct iw_quality) * spydata->spy_number);
+	/* Reset updated flags. */
+	for (i = 0; i < spydata->spy_number; i++)
+		spydata->spy_stat[i].updated &= ~IW_QUAL_ALL_UPDATED;
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Standard Wireless Handler : set spy threshold
+ */
+int iw_handler_set_thrspy(struct net_device *	dev,
+			  struct iw_request_info *info,
+			  union iwreq_data *	wrqu,
+			  char *		extra)
+{
+	struct iw_spy_data *	spydata = get_spydata(dev);
+	struct iw_thrspy *	threshold = (struct iw_thrspy *) extra;
+
+	/* Make sure driver is not buggy or using the old API */
+	if (!spydata)
+		return -EOPNOTSUPP;
+
+	/* Just do it */
+	memcpy(&(spydata->spy_thr_low), &(threshold->low),
+	       2 * sizeof(struct iw_quality));
+
+	/* Clear flag */
+	memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under));
+
+#ifdef WE_SPY_DEBUG
+	printk(KERN_DEBUG "iw_handler_set_thrspy() :  low %d ; high %d\n", spydata->spy_thr_low.level, spydata->spy_thr_high.level);
+#endif	/* WE_SPY_DEBUG */
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Standard Wireless Handler : get spy threshold
+ */
+int iw_handler_get_thrspy(struct net_device *	dev,
+			  struct iw_request_info *info,
+			  union iwreq_data *	wrqu,
+			  char *		extra)
+{
+	struct iw_spy_data *	spydata = get_spydata(dev);
+	struct iw_thrspy *	threshold = (struct iw_thrspy *) extra;
+
+	/* Make sure driver is not buggy or using the old API */
+	if (!spydata)
+		return -EOPNOTSUPP;
+
+	/* Just do it */
+	memcpy(&(threshold->low), &(spydata->spy_thr_low),
+	       2 * sizeof(struct iw_quality));
+
+	return 0;
+}
+
+/*------------------------------------------------------------------*/
+/*
+ * Prepare and send a Spy Threshold event
+ */
+static void iw_send_thrspy_event(struct net_device *	dev,
+				 struct iw_spy_data *	spydata,
+				 unsigned char *	address,
+				 struct iw_quality *	wstats)
+{
+	union iwreq_data	wrqu;
+	struct iw_thrspy	threshold;
+
+	/* Init */
+	wrqu.data.length = 1;
+	wrqu.data.flags = 0;
+	/* Copy address */
+	memcpy(threshold.addr.sa_data, address, ETH_ALEN);
+	threshold.addr.sa_family = ARPHRD_ETHER;
+	/* Copy stats */
+	memcpy(&(threshold.qual), wstats, sizeof(struct iw_quality));
+	/* Copy also thresholds */
+	memcpy(&(threshold.low), &(spydata->spy_thr_low),
+	       2 * sizeof(struct iw_quality));
+
+#ifdef WE_SPY_DEBUG
+	printk(KERN_DEBUG "iw_send_thrspy_event() : address %02X:%02X:%02X:%02X:%02X:%02X, level %d, up = %d\n",
+	       threshold.addr.sa_data[0],
+	       threshold.addr.sa_data[1],
+	       threshold.addr.sa_data[2],
+	       threshold.addr.sa_data[3],
+	       threshold.addr.sa_data[4],
+	       threshold.addr.sa_data[5], threshold.qual.level);
+#endif	/* WE_SPY_DEBUG */
+
+	/* Send event to user space */
+	wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold);
+}
+
+/* ---------------------------------------------------------------- */
+/*
+ * Call for the driver to update the spy data.
+ * For now, the spy data is a simple array. As the size of the array is
+ * small, this is good enough. If we wanted to support larger number of
+ * spy addresses, we should use something more efficient...
+ */
+void wireless_spy_update(struct net_device *	dev,
+			 unsigned char *	address,
+			 struct iw_quality *	wstats)
+{
+	struct iw_spy_data *	spydata = get_spydata(dev);
+	int			i;
+	int			match = -1;
+
+	/* Make sure driver is not buggy or using the old API */
+	if (!spydata)
+		return;
+
+#ifdef WE_SPY_DEBUG
+	printk(KERN_DEBUG "wireless_spy_update() :  wireless_data %p, spydata %p, address %02X:%02X:%02X:%02X:%02X:%02X\n", dev->wireless_data, spydata, address[0], address[1], address[2], address[3], address[4], address[5]);
+#endif	/* WE_SPY_DEBUG */
+
+	/* Update all records that match */
+	for (i = 0; i < spydata->spy_number; i++)
+		if (!compare_ether_addr(address, spydata->spy_address[i])) {
+			memcpy(&(spydata->spy_stat[i]), wstats,
+			       sizeof(struct iw_quality));
+			match = i;
+		}
+
+	/* Generate an event if we cross the spy threshold.
+	 * To avoid event storms, we have a simple hysteresis : we generate
+	 * event only when we go under the low threshold or above the
+	 * high threshold. */
+	if (match >= 0) {
+		if (spydata->spy_thr_under[match]) {
+			if (wstats->level > spydata->spy_thr_high.level) {
+				spydata->spy_thr_under[match] = 0;
+				iw_send_thrspy_event(dev, spydata,
+						     address, wstats);
+			}
+		} else {
+			if (wstats->level < spydata->spy_thr_low.level) {
+				spydata->spy_thr_under[match] = 1;
+				iw_send_thrspy_event(dev, spydata,
+						     address, wstats);
+			}
+		}
+	}
+}
+
+EXPORT_SYMBOL(iw_handler_get_spy);
+EXPORT_SYMBOL(iw_handler_get_thrspy);
+EXPORT_SYMBOL(iw_handler_set_spy);
+EXPORT_SYMBOL(iw_handler_set_thrspy);
+EXPORT_SYMBOL(wireless_send_event);
+EXPORT_SYMBOL(wireless_spy_update);
-- 
cgit v1.2.3


From 295f4a1fa3ecdf816b18393ef7bcd37c032df2fa Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 26 Apr 2007 20:43:56 -0700
Subject: [WEXT]: Clean up how wext is called.

This patch cleans up the call paths from the core code into wext.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/iw_handler.h | 11 +----------
 include/net/wext.h       | 24 ++++++++++++++++++++++++
 net/core/dev.c           | 34 ++++------------------------------
 net/wireless/wext.c      | 28 ++++++++++++++++++++++++----
 4 files changed, 53 insertions(+), 44 deletions(-)
 create mode 100644 include/net/wext.h

diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h
index 909ca87d13b..f23d07ca7c5 100644
--- a/include/net/iw_handler.h
+++ b/include/net/iw_handler.h
@@ -431,16 +431,7 @@ struct iw_public_data {
  * Those may be called only within the kernel.
  */
 
-/* First : function strictly used inside the kernel */
-
-/* Handle /proc/net/wireless, called in net/code/dev.c */
-extern int dev_get_wireless_info(char * buffer, char **start, off_t offset,
-				 int length);
-
-/* Handle IOCTLs, called in net/core/dev.c */
-extern int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd);
-
-/* Second : functions that may be called by driver modules */
+/* functions that may be called by driver modules */
 
 /* Send a single event to user space */
 extern void wireless_send_event(struct net_device *	dev,
diff --git a/include/net/wext.h b/include/net/wext.h
new file mode 100644
index 00000000000..55741836a67
--- /dev/null
+++ b/include/net/wext.h
@@ -0,0 +1,24 @@
+#ifndef __NET_WEXT_H
+#define __NET_WEXT_H
+
+/*
+ * wireless extensions interface to the core code
+ */
+
+#ifdef CONFIG_WIRELESS_EXT
+extern int wext_proc_init(void);
+extern int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd,
+			     void __user *arg);
+#else
+static inline int wext_proc_init()
+{
+	return 0;
+}
+static inline int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd,
+				    void __user *arg)
+{
+	return -EINVAL;
+}
+#endif
+
+#endif /* __NET_WEXT_H */
diff --git a/net/core/dev.c b/net/core/dev.c
index 700e4b5081b..d5e42d13bd6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -109,7 +109,7 @@
 #include <linux/netpoll.h>
 #include <linux/rcupdate.h>
 #include <linux/delay.h>
-#include <linux/wireless.h>
+#include <net/wext.h>
 #include <net/iw_handler.h>
 #include <asm/current.h>
 #include <linux/audit.h>
@@ -2348,12 +2348,6 @@ static const struct file_operations ptype_seq_fops = {
 };
 
 
-#ifdef CONFIG_WIRELESS_EXT
-extern int wireless_proc_init(void);
-#else
-#define wireless_proc_init() 0
-#endif
-
 static int __init dev_proc_init(void)
 {
 	int rc = -ENOMEM;
@@ -2365,7 +2359,7 @@ static int __init dev_proc_init(void)
 	if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops))
 		goto out_dev2;
 
-	if (wireless_proc_init())
+	if (wext_proc_init())
 		goto out_softnet;
 	rc = 0;
 out:
@@ -2923,29 +2917,9 @@ int dev_ioctl(unsigned int cmd, void __user *arg)
 					ret = -EFAULT;
 				return ret;
 			}
-#ifdef CONFIG_WIRELESS_EXT
 			/* Take care of Wireless Extensions */
-			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
-				/* If command is `set a parameter', or
-				 * `get the encoding parameters', check if
-				 * the user has the right to do it */
-				if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE
-				    || cmd == SIOCGIWENCODEEXT) {
-					if (!capable(CAP_NET_ADMIN))
-						return -EPERM;
-				}
-				dev_load(ifr.ifr_name);
-				rtnl_lock();
-				/* Follow me in net/wireless/wext.c */
-				ret = wireless_process_ioctl(&ifr, cmd);
-				rtnl_unlock();
-				if (IW_IS_GET(cmd) &&
-				    copy_to_user(arg, &ifr,
-						 sizeof(struct ifreq)))
-					ret = -EFAULT;
-				return ret;
-			}
-#endif	/* CONFIG_WIRELESS_EXT */
+			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
+				return wext_handle_ioctl(&ifr, cmd, arg);
 			return -EINVAL;
 	}
 }
diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index fba295e05e7..a6cf1034e07 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -97,6 +97,7 @@
 #include <linux/wireless.h>		/* Pretty obvious */
 #include <net/iw_handler.h>		/* New driver API */
 #include <net/netlink.h>
+#include <net/wext.h>
 
 #include <asm/uaccess.h>		/* copy_to_user() */
 
@@ -696,7 +697,7 @@ static const struct file_operations wireless_seq_fops = {
 	.release = seq_release,
 };
 
-int __init wireless_proc_init(void)
+int __init wext_proc_init(void)
 {
 	/* Create /proc/net/wireless entry */
 	if (!proc_net_fops_create("wireless", S_IRUGO, &wireless_seq_fops))
@@ -1075,11 +1076,10 @@ static inline int ioctl_private_call(struct net_device *	dev,
 
 /* ---------------------------------------------------------------- */
 /*
- * Main IOCTl dispatcher. Called from the main networking code
- * (dev_ioctl() in net/core/dev.c).
+ * Main IOCTl dispatcher.
  * Check the type of IOCTL and call the appropriate wrapper...
  */
-int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd)
+static int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd)
 {
 	struct net_device *dev;
 	iw_handler	handler;
@@ -1143,6 +1143,26 @@ int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd)
 	return -EINVAL;
 }
 
+/* entry point from dev ioctl */
+int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd,
+		      void __user *arg)
+{
+	int ret;
+
+	/* If command is `set a parameter', or
+	 * `get the encoding parameters', check if
+	 * the user has the right to do it */
+	if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE || cmd == SIOCGIWENCODEEXT)
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+	dev_load(ifr->ifr_name);
+	rtnl_lock();
+	ret = wireless_process_ioctl(ifr, cmd);
+	rtnl_unlock();
+	if (IW_IS_GET(cmd) && copy_to_user(arg, ifr, sizeof(struct ifreq)))
+		return -EFAULT;
+	return ret;
+}
 
 /************************* EVENT PROCESSING *************************/
 /*
-- 
cgit v1.2.3


From 235c107ba08becb3ae6c3d3449c8b1053a5a9d75 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 26 Apr 2007 20:44:35 -0700
Subject: [WEXT]: Remove dead debug code.

This patch kills a whole bunch of code that can only ever be used by
defining some things in wext.c. Also, the things that are printed are
mostly useless since the API is fairly well-tested.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/wext.c | 92 -----------------------------------------------------
 1 file changed, 92 deletions(-)

diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index a6cf1034e07..1db2be7ba2f 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -103,11 +103,6 @@
 
 /**************************** CONSTANTS ****************************/
 
-/* Debugging stuff */
-#undef WE_IOCTL_DEBUG		/* Debug IOCTL API */
-#undef WE_EVENT_DEBUG		/* Debug Event dispatcher */
-#undef WE_SPY_DEBUG		/* Debug enhanced spy support */
-
 /* Options */
 #define WE_EVENT_RTNETLINK	/* Propagate events using RtNetlink */
 #define WE_SET_EVENT		/* Generate an event on some set commands */
@@ -736,12 +731,6 @@ static int ioctl_standard_call(struct net_device *	dev,
 		return -EOPNOTSUPP;
 	descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
 
-#ifdef WE_IOCTL_DEBUG
-	printk(KERN_DEBUG "%s (WE) : Found standard handler for 0x%04X\n",
-	       ifr->ifr_name, cmd);
-	printk(KERN_DEBUG "%s (WE) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
-#endif	/* WE_IOCTL_DEBUG */
-
 	/* Prepare the call */
 	info.cmd = cmd;
 	info.flags = 0;
@@ -832,11 +821,6 @@ static int ioctl_standard_call(struct net_device *	dev,
 			}
 		}
 
-#ifdef WE_IOCTL_DEBUG
-		printk(KERN_DEBUG "%s (WE) : Malloc %d bytes\n",
-		       dev->name, extra_size);
-#endif	/* WE_IOCTL_DEBUG */
-
 		/* Create the kernel buffer */
 		/*    kzalloc ensures NULL-termination for essid_compat */
 		extra = kzalloc(extra_size, GFP_KERNEL);
@@ -853,11 +837,6 @@ static int ioctl_standard_call(struct net_device *	dev,
 				kfree(extra);
 				return -EFAULT;
 			}
-#ifdef WE_IOCTL_DEBUG
-			printk(KERN_DEBUG "%s (WE) : Got %d bytes\n",
-			       dev->name,
-			       iwr->u.data.length * descr->token_size);
-#endif	/* WE_IOCTL_DEBUG */
 		}
 
 		/* Call the handler */
@@ -878,11 +857,6 @@ static int ioctl_standard_call(struct net_device *	dev,
 					   descr->token_size);
 			if (err)
 				ret =  -EFAULT;
-#ifdef WE_IOCTL_DEBUG
-			printk(KERN_DEBUG "%s (WE) : Wrote %d bytes\n",
-			       dev->name,
-			       iwr->u.data.length * descr->token_size);
-#endif	/* WE_IOCTL_DEBUG */
 		}
 
 #ifdef WE_SET_EVENT
@@ -947,16 +921,6 @@ static inline int ioctl_private_call(struct net_device *	dev,
 			break;
 		}
 
-#ifdef WE_IOCTL_DEBUG
-	printk(KERN_DEBUG "%s (WE) : Found private handler for 0x%04X\n",
-	       ifr->ifr_name, cmd);
-	if (descr) {
-		printk(KERN_DEBUG "%s (WE) : Name %s, set %X, get %X\n",
-		       dev->name, descr->name,
-		       descr->set_args, descr->get_args);
-	}
-#endif	/* WE_IOCTL_DEBUG */
-
 	/* Compute the size of the set/get arguments */
 	if (descr != NULL) {
 		if (IW_IS_SET(cmd)) {
@@ -1013,11 +977,6 @@ static inline int ioctl_private_call(struct net_device *	dev,
 				return -EFAULT;
 		}
 
-#ifdef WE_IOCTL_DEBUG
-		printk(KERN_DEBUG "%s (WE) : Malloc %d bytes\n",
-		       dev->name, extra_size);
-#endif	/* WE_IOCTL_DEBUG */
-
 		/* Always allocate for max space. Easier, and won't last
 		 * long... */
 		extra = kmalloc(extra_size, GFP_KERNEL);
@@ -1033,10 +992,6 @@ static inline int ioctl_private_call(struct net_device *	dev,
 				kfree(extra);
 				return -EFAULT;
 			}
-#ifdef WE_IOCTL_DEBUG
-			printk(KERN_DEBUG "%s (WE) : Got %d elem\n",
-			       dev->name, iwr->u.data.length);
-#endif	/* WE_IOCTL_DEBUG */
 		}
 
 		/* Call the handler */
@@ -1056,10 +1011,6 @@ static inline int ioctl_private_call(struct net_device *	dev,
 					   extra_size);
 			if (err)
 				ret =  -EFAULT;
-#ifdef WE_IOCTL_DEBUG
-			printk(KERN_DEBUG "%s (WE) : Wrote %d elem\n",
-			       dev->name, iwr->u.data.length);
-#endif	/* WE_IOCTL_DEBUG */
 		}
 
 		/* Cleanup - I told you it wasn't that long ;-) */
@@ -1319,11 +1270,6 @@ void wireless_send_event(struct net_device *	dev,
 		       dev->name, cmd);
 		return;
 	}
-#ifdef WE_EVENT_DEBUG
-	printk(KERN_DEBUG "%s (WE) : Got event 0x%04X\n",
-	       dev->name, cmd);
-	printk(KERN_DEBUG "%s (WE) : Header type : %d, Token type : %d, size : %d, token : %d\n", dev->name, descr->header_type, descr->token_type, descr->token_size, descr->max_tokens);
-#endif	/* WE_EVENT_DEBUG */
 
 	/* Check extra parameters and set extra_len */
 	if (descr->header_type == IW_HEADER_TYPE_POINT) {
@@ -1341,19 +1287,12 @@ void wireless_send_event(struct net_device *	dev,
 			extra_len = wrqu->data.length * descr->token_size;
 		/* Always at an offset in wrqu */
 		wrqu_off = IW_EV_POINT_OFF;
-#ifdef WE_EVENT_DEBUG
-		printk(KERN_DEBUG "%s (WE) : Event 0x%04X, tokens %d, extra_len %d\n", dev->name, cmd, wrqu->data.length, extra_len);
-#endif	/* WE_EVENT_DEBUG */
 	}
 
 	/* Total length of the event */
 	hdr_len = event_type_size[descr->header_type];
 	event_len = hdr_len + extra_len;
 
-#ifdef WE_EVENT_DEBUG
-	printk(KERN_DEBUG "%s (WE) : Event 0x%04X, hdr_len %d, wrqu_off %d, event_len %d\n", dev->name, cmd, hdr_len, wrqu_off, event_len);
-#endif	/* WE_EVENT_DEBUG */
-
 	/* Create temporary buffer to hold the event */
 	event = kmalloc(event_len, GFP_ATOMIC);
 	if (event == NULL)
@@ -1443,19 +1382,6 @@ int iw_handler_set_spy(struct net_device *	dev,
 		/* Reset stats */
 		memset(spydata->spy_stat, 0,
 		       sizeof(struct iw_quality) * IW_MAX_SPY);
-
-#ifdef WE_SPY_DEBUG
-		printk(KERN_DEBUG "iw_handler_set_spy() :  wireless_data %p, spydata %p, num %d\n", dev->wireless_data, spydata, wrqu->data.length);
-		for (i = 0; i < wrqu->data.length; i++)
-			printk(KERN_DEBUG
-			       "%02X:%02X:%02X:%02X:%02X:%02X \n",
-			       spydata->spy_address[i][0],
-			       spydata->spy_address[i][1],
-			       spydata->spy_address[i][2],
-			       spydata->spy_address[i][3],
-			       spydata->spy_address[i][4],
-			       spydata->spy_address[i][5]);
-#endif	/* WE_SPY_DEBUG */
 	}
 
 	/* Make sure above is updated before re-enabling */
@@ -1525,10 +1451,6 @@ int iw_handler_set_thrspy(struct net_device *	dev,
 	/* Clear flag */
 	memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under));
 
-#ifdef WE_SPY_DEBUG
-	printk(KERN_DEBUG "iw_handler_set_thrspy() :  low %d ; high %d\n", spydata->spy_thr_low.level, spydata->spy_thr_high.level);
-#endif	/* WE_SPY_DEBUG */
-
 	return 0;
 }
 
@@ -1579,16 +1501,6 @@ static void iw_send_thrspy_event(struct net_device *	dev,
 	memcpy(&(threshold.low), &(spydata->spy_thr_low),
 	       2 * sizeof(struct iw_quality));
 
-#ifdef WE_SPY_DEBUG
-	printk(KERN_DEBUG "iw_send_thrspy_event() : address %02X:%02X:%02X:%02X:%02X:%02X, level %d, up = %d\n",
-	       threshold.addr.sa_data[0],
-	       threshold.addr.sa_data[1],
-	       threshold.addr.sa_data[2],
-	       threshold.addr.sa_data[3],
-	       threshold.addr.sa_data[4],
-	       threshold.addr.sa_data[5], threshold.qual.level);
-#endif	/* WE_SPY_DEBUG */
-
 	/* Send event to user space */
 	wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold);
 }
@@ -1612,10 +1524,6 @@ void wireless_spy_update(struct net_device *	dev,
 	if (!spydata)
 		return;
 
-#ifdef WE_SPY_DEBUG
-	printk(KERN_DEBUG "wireless_spy_update() :  wireless_data %p, spydata %p, address %02X:%02X:%02X:%02X:%02X:%02X\n", dev->wireless_data, spydata, address[0], address[1], address[2], address[3], address[4], address[5]);
-#endif	/* WE_SPY_DEBUG */
-
 	/* Update all records that match */
 	for (i = 0; i < spydata->spy_number; i++)
 		if (!compare_ether_addr(address, spydata->spy_address[i])) {
-- 
cgit v1.2.3


From 4b1e255384570138c2a823904796d46f628e8350 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 26 Apr 2007 20:45:14 -0700
Subject: [WEXT]: Remove options.

This patch kills the two options in wext that are required to be
enabled anyway because they influence the userspace API.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/wext.c | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index 1db2be7ba2f..dabc6453518 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -101,12 +101,6 @@
 
 #include <asm/uaccess.h>		/* copy_to_user() */
 
-/**************************** CONSTANTS ****************************/
-
-/* Options */
-#define WE_EVENT_RTNETLINK	/* Propagate events using RtNetlink */
-#define WE_SET_EVENT		/* Generate an event on some set commands */
-
 /************************* GLOBAL VARIABLES *************************/
 /*
  * You should not use global variables, because of re-entrancy.
@@ -741,12 +735,10 @@ static int ioctl_standard_call(struct net_device *	dev,
 		/* No extra arguments. Trivial to handle */
 		ret = handler(dev, &info, &(iwr->u), NULL);
 
-#ifdef WE_SET_EVENT
 		/* Generate an event to notify listeners of the change */
 		if ((descr->flags & IW_DESCR_FLAG_EVENT) &&
 		   ((ret == 0) || (ret == -EIWCOMMIT)))
 			wireless_send_event(dev, cmd, &(iwr->u), NULL);
-#endif	/* WE_SET_EVENT */
 	} else {
 		char *	extra;
 		int	extra_size;
@@ -859,7 +851,6 @@ static int ioctl_standard_call(struct net_device *	dev,
 				ret =  -EFAULT;
 		}
 
-#ifdef WE_SET_EVENT
 		/* Generate an event to notify listeners of the change */
 		if ((descr->flags & IW_DESCR_FLAG_EVENT) &&
 		   ((ret == 0) || (ret == -EIWCOMMIT))) {
@@ -871,7 +862,6 @@ static int ioctl_standard_call(struct net_device *	dev,
 				wireless_send_event(dev, cmd, &(iwr->u),
 						    extra);
 		}
-#endif	/* WE_SET_EVENT */
 
 		/* Cleanup - I told you it wasn't that long ;-) */
 		kfree(extra);
@@ -1121,7 +1111,6 @@ int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd,
  * Most often, the event will be propagated through rtnetlink
  */
 
-#ifdef WE_EVENT_RTNETLINK
 /* ---------------------------------------------------------------- */
 /*
  * Locking...
@@ -1225,8 +1214,6 @@ static inline void rtmsg_iwinfo(struct net_device *	dev,
 	tasklet_schedule(&wireless_nlevent_tasklet);
 }
 
-#endif	/* WE_EVENT_RTNETLINK */
-
 /* ---------------------------------------------------------------- */
 /*
  * Main event dispatcher. Called from other parts and drivers.
@@ -1305,10 +1292,8 @@ void wireless_send_event(struct net_device *	dev,
 	if (extra != NULL)
 		memcpy(((char *) event) + hdr_len, extra, extra_len);
 
-#ifdef WE_EVENT_RTNETLINK
 	/* Send via the RtNetlink event channel */
 	rtmsg_iwinfo(dev, (char *) event, event_len);
-#endif	/* WE_EVENT_RTNETLINK */
 
 	/* Cleanup */
 	kfree(event);
-- 
cgit v1.2.3


From dd8ceabcd10d47f6f28ecfaf2eac7beffca11b3c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 26 Apr 2007 20:45:47 -0700
Subject: [WEXT]: Cleanup early ioctl call path.

This patch makes the code in wireless_process_ioctl somewhat more
readable.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/wext.c | 73 +++++++++++++++++++----------------------------------
 1 file changed, 26 insertions(+), 47 deletions(-)

diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index dabc6453518..adcb08344d5 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -1035,53 +1035,31 @@ static int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd)
 	/* A bunch of special cases, then the generic case...
 	 * Note that 'cmd' is already filtered in dev_ioctl() with
 	 * (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) */
-	switch (cmd) {
-	case SIOCGIWSTATS:
-		/* Get Wireless Stats */
-		return ioctl_standard_call(dev,
-					   ifr,
-					   cmd,
+	if (cmd == SIOCGIWSTATS)
+		return ioctl_standard_call(dev, ifr, cmd,
 					   &iw_handler_get_iwstats);
 
-	case SIOCGIWPRIV:
-		/* Check if we have some wireless handlers defined */
-		if (dev->wireless_handlers != NULL) {
-			/* We export to user space the definition of
-			 * the private handler ourselves */
-			return ioctl_standard_call(dev,
-						   ifr,
-						   cmd,
-						   &iw_handler_get_private);
-		}
-		// ## Fall-through for old API ##
-	default:
-		/* Generic IOCTL */
-		/* Basic check */
-		if (!netif_device_present(dev))
-			return -ENODEV;
-		/* New driver API : try to find the handler */
-		handler = get_handler(dev, cmd);
-		if (handler != NULL) {
-			/* Standard and private are not the same */
-			if (cmd < SIOCIWFIRSTPRIV)
-				return ioctl_standard_call(dev,
-							   ifr,
-							   cmd,
-							   handler);
-			else
-				return ioctl_private_call(dev,
-							  ifr,
-							  cmd,
-							  handler);
-		}
-		/* Old driver API : call driver ioctl handler */
-		if (dev->do_ioctl) {
-			return dev->do_ioctl(dev, ifr, cmd);
-		}
-		return -EOPNOTSUPP;
+	if (cmd == SIOCGIWPRIV && dev->wireless_handlers)
+		return ioctl_standard_call(dev, ifr, cmd,
+					   &iw_handler_get_private);
+
+	/* Basic check */
+	if (!netif_device_present(dev))
+		return -ENODEV;
+
+	/* New driver API : try to find the handler */
+	handler = get_handler(dev, cmd);
+	if (handler) {
+		/* Standard and private are not the same */
+		if (cmd < SIOCIWFIRSTPRIV)
+			return ioctl_standard_call(dev, ifr, cmd, handler);
+		else
+			return ioctl_private_call(dev, ifr, cmd, handler);
 	}
-	/* Not reached */
-	return -EINVAL;
+	/* Old driver API : call driver ioctl handler */
+	if (dev->do_ioctl)
+		return dev->do_ioctl(dev, ifr, cmd);
+	return -EOPNOTSUPP;
 }
 
 /* entry point from dev ioctl */
@@ -1093,9 +1071,10 @@ int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd,
 	/* If command is `set a parameter', or
 	 * `get the encoding parameters', check if
 	 * the user has the right to do it */
-	if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE || cmd == SIOCGIWENCODEEXT)
-		if (!capable(CAP_NET_ADMIN))
-			return -EPERM;
+	if ((IW_IS_SET(cmd) || cmd == SIOCGIWENCODE || cmd == SIOCGIWENCODEEXT)
+	    && !capable(CAP_NET_ADMIN))
+		return -EPERM;
+
 	dev_load(ifr->ifr_name);
 	rtnl_lock();
 	ret = wireless_process_ioctl(ifr, cmd);
-- 
cgit v1.2.3


From 7a9df167db0f200d5f8e393376dba8ceeae0fd53 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 26 Apr 2007 20:46:23 -0700
Subject: [WEXT]: Move EXPORT_SYMBOL statements where they belong.

EXPORT_SYMBOL statements are supposed to go together with the symbol
they're exporting. This patch moves them accordingly.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/wext.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index adcb08344d5..53478465b4b 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -1279,6 +1279,7 @@ void wireless_send_event(struct net_device *	dev,
 
 	return;		/* Always success, I guess ;-) */
 }
+EXPORT_SYMBOL(wireless_send_event);
 
 /********************** ENHANCED IWSPY SUPPORT **********************/
 /*
@@ -1356,6 +1357,7 @@ int iw_handler_set_spy(struct net_device *	dev,
 
 	return 0;
 }
+EXPORT_SYMBOL(iw_handler_set_spy);
 
 /*------------------------------------------------------------------*/
 /*
@@ -1391,6 +1393,7 @@ int iw_handler_get_spy(struct net_device *	dev,
 		spydata->spy_stat[i].updated &= ~IW_QUAL_ALL_UPDATED;
 	return 0;
 }
+EXPORT_SYMBOL(iw_handler_get_spy);
 
 /*------------------------------------------------------------------*/
 /*
@@ -1417,6 +1420,7 @@ int iw_handler_set_thrspy(struct net_device *	dev,
 
 	return 0;
 }
+EXPORT_SYMBOL(iw_handler_set_thrspy);
 
 /*------------------------------------------------------------------*/
 /*
@@ -1440,6 +1444,7 @@ int iw_handler_get_thrspy(struct net_device *	dev,
 
 	return 0;
 }
+EXPORT_SYMBOL(iw_handler_get_thrspy);
 
 /*------------------------------------------------------------------*/
 /*
@@ -1516,10 +1521,4 @@ void wireless_spy_update(struct net_device *	dev,
 		}
 	}
 }
-
-EXPORT_SYMBOL(iw_handler_get_spy);
-EXPORT_SYMBOL(iw_handler_get_thrspy);
-EXPORT_SYMBOL(iw_handler_set_spy);
-EXPORT_SYMBOL(iw_handler_set_thrspy);
-EXPORT_SYMBOL(wireless_send_event);
 EXPORT_SYMBOL(wireless_spy_update);
-- 
cgit v1.2.3


From bdf51894c1d7ce3fba8c8fdf485e85173ac60c6c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 26 Apr 2007 20:46:55 -0700
Subject: [WEXT]: Reduce inline abuse.

This patch removes a bunch of inline abuse from wext. Most functions
that were marked inline are only used once so the compiler will inline
them anyway, others are used multiple times but there's no requirement
for them to be inline since they aren't in any fast paths.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/wext.c | 29 ++++++++++-------------------
 1 file changed, 10 insertions(+), 19 deletions(-)

diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index 53478465b4b..f733303f4bd 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -440,10 +440,8 @@ static const int event_type_pk_size[] = {
 /* ---------------------------------------------------------------- */
 /*
  * Return the driver handler associated with a specific Wireless Extension.
- * Called from various place, so make sure it remains efficient.
  */
-static inline iw_handler get_handler(struct net_device *dev,
-				     unsigned int cmd)
+static iw_handler get_handler(struct net_device *dev, unsigned int cmd)
 {
 	/* Don't "optimise" the following variable, it will crash */
 	unsigned int	index;		/* *MUST* be unsigned */
@@ -470,7 +468,7 @@ static inline iw_handler get_handler(struct net_device *dev,
 /*
  * Get statistics out of the driver
  */
-static inline struct iw_statistics *get_wireless_stats(struct net_device *dev)
+static struct iw_statistics *get_wireless_stats(struct net_device *dev)
 {
 	/* New location */
 	if ((dev->wireless_handlers != NULL) &&
@@ -500,7 +498,7 @@ static inline struct iw_statistics *get_wireless_stats(struct net_device *dev)
  * netif_running(dev) test. I'm open on that one...
  * Hopefully, the driver will remember to do a commit in "open()" ;-)
  */
-static inline int call_commit_handler(struct net_device *	dev)
+static int call_commit_handler(struct net_device *dev)
 {
 	if ((netif_running(dev)) &&
 	   (dev->wireless_handlers->standard[0] != NULL)) {
@@ -622,8 +620,8 @@ static int iw_handler_get_private(struct net_device *		dev,
 /*
  * Print one entry (line) of /proc/net/wireless
  */
-static __inline__ void wireless_seq_printf_stats(struct seq_file *seq,
-						 struct net_device *dev)
+static void wireless_seq_printf_stats(struct seq_file *seq,
+				      struct net_device *dev)
 {
 	/* Get stats from the driver */
 	struct iw_statistics *stats = get_wireless_stats(dev);
@@ -892,10 +890,8 @@ static int ioctl_standard_call(struct net_device *	dev,
  * a iw_handler but process it in your ioctl handler (i.e. use the
  * old driver API).
  */
-static inline int ioctl_private_call(struct net_device *	dev,
-				     struct ifreq *		ifr,
-				     unsigned int		cmd,
-				     iw_handler		handler)
+static int ioctl_private_call(struct net_device *dev, struct ifreq *ifr,
+			      unsigned int cmd, iw_handler handler)
 {
 	struct iwreq *			iwr = (struct iwreq *) ifr;
 	const struct iw_priv_args *	descr = NULL;
@@ -1134,11 +1130,8 @@ static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0);
  * current wireless config. Dumping the wireless config is far too
  * expensive (for each parameter, the driver need to query the hardware).
  */
-static inline int rtnetlink_fill_iwinfo(struct sk_buff *	skb,
-					struct net_device *	dev,
-					int			type,
-					char *			event,
-					int			event_len)
+static int rtnetlink_fill_iwinfo(struct sk_buff *skb, struct net_device *dev,
+				 int type, char *event, int event_len)
 {
 	struct ifinfomsg *r;
 	struct nlmsghdr  *nlh;
@@ -1172,9 +1165,7 @@ rtattr_failure:
  * Andrzej Krzysztofowicz mandated that I used a IFLA_XXX field
  * within a RTM_NEWLINK event.
  */
-static inline void rtmsg_iwinfo(struct net_device *	dev,
-				char *			event,
-				int			event_len)
+static void rtmsg_iwinfo(struct net_device *dev, char *event, int event_len)
 {
 	struct sk_buff *skb;
 	int size = NLMSG_GOODSIZE;
-- 
cgit v1.2.3


From 4d44e0dfe961e02489d40d32334454ebe0e784e8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 26 Apr 2007 20:47:25 -0700
Subject: [WEXT]: Misc code cleanups.

Just a few things that didn't fit in with the other patches.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/wext.c | 28 +++++++++++-----------------
 1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index f733303f4bd..d6aaf65192e 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -476,7 +476,7 @@ static struct iw_statistics *get_wireless_stats(struct net_device *dev)
 		return dev->wireless_handlers->get_wireless_stats(dev);
 
 	/* Not found */
-	return (struct iw_statistics *) NULL;
+	return NULL;
 }
 
 /* ---------------------------------------------------------------- */
@@ -501,11 +501,11 @@ static struct iw_statistics *get_wireless_stats(struct net_device *dev)
 static int call_commit_handler(struct net_device *dev)
 {
 	if ((netif_running(dev)) &&
-	   (dev->wireless_handlers->standard[0] != NULL)) {
+	   (dev->wireless_handlers->standard[0] != NULL))
 		/* Call the commit handler on the driver */
 		return dev->wireless_handlers->standard[0](dev, NULL,
 							   NULL, NULL);
-	} else
+	else
 		return 0;		/* Command completed successfully */
 }
 
@@ -554,8 +554,7 @@ static int iw_handler_get_iwstats(struct net_device *		dev,
 	struct iw_statistics *stats;
 
 	stats = get_wireless_stats(dev);
-	if (stats != (struct iw_statistics *) NULL) {
-
+	if (stats) {
 		/* Copy statistics to extra */
 		memcpy(extra, stats, sizeof(struct iw_statistics));
 		wrqu->data.length = sizeof(struct iw_statistics);
@@ -814,9 +813,8 @@ static int ioctl_standard_call(struct net_device *	dev,
 		/* Create the kernel buffer */
 		/*    kzalloc ensures NULL-termination for essid_compat */
 		extra = kzalloc(extra_size, GFP_KERNEL);
-		if (extra == NULL) {
+		if (extra == NULL)
 			return -ENOMEM;
-		}
 
 		/* If it is a SET, get all the extra data in here */
 		if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
@@ -957,18 +955,14 @@ static int ioctl_private_call(struct net_device *dev, struct ifreq *ifr,
 			if (iwr->u.data.length > (descr->set_args &
 						 IW_PRIV_SIZE_MASK))
 				return -E2BIG;
-		} else {
-			/* Check NULL pointer */
-			if (iwr->u.data.pointer == NULL)
-				return -EFAULT;
-		}
+		} else if (iwr->u.data.pointer == NULL)
+			return -EFAULT;
 
 		/* Always allocate for max space. Easier, and won't last
 		 * long... */
 		extra = kmalloc(extra_size, GFP_KERNEL);
-		if (extra == NULL) {
+		if (extra == NULL)
 			return -ENOMEM;
-		}
 
 		/* If it is a SET, get all the extra data in here */
 		if (IW_IS_SET(cmd) && (iwr->u.data.length != 0)) {
@@ -1259,7 +1253,7 @@ void wireless_send_event(struct net_device *	dev,
 	event->len = event_len;
 	event->cmd = cmd;
 	memcpy(&event->u, ((char *) wrqu) + wrqu_off, hdr_len - IW_EV_LCP_LEN);
-	if (extra != NULL)
+	if (extra)
 		memcpy(((char *) event) + hdr_len, extra, extra_len);
 
 	/* Send via the RtNetlink event channel */
@@ -1290,11 +1284,11 @@ EXPORT_SYMBOL(wireless_send_event);
  * Because this is called on the Rx path via wireless_spy_update(),
  * we want it to be efficient...
  */
-static inline struct iw_spy_data * get_spydata(struct net_device *dev)
+static inline struct iw_spy_data *get_spydata(struct net_device *dev)
 {
 	/* This is the new way */
 	if (dev->wireless_data)
-		return(dev->wireless_data->spy_data);
+		return dev->wireless_data->spy_data;
 	return NULL;
 }
 
-- 
cgit v1.2.3


From b86e0280bb5585a610783ff5392d9d439dee7ddd Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 26 Apr 2007 20:48:23 -0700
Subject: [WEXT] net_device: Don't include wext bits if not required.

This patch makes the wext bits in struct net_device depend on
CONFIG_WIRELESS_EXT.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 584c199ec2d..e027a3750a7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -352,12 +352,13 @@ struct net_device
 	struct net_device_stats* (*get_stats)(struct net_device *dev);
 	struct net_device_stats	stats;
 
+#ifdef CONFIG_WIRELESS_EXT
 	/* List of functions to handle Wireless Extensions (instead of ioctl).
 	 * See <net/iw_handler.h> for details. Jean II */
 	const struct iw_handler_def *	wireless_handlers;
 	/* Instance data managed by the core of Wireless Extensions. */
 	struct iw_public_data *	wireless_data;
-
+#endif
 	const struct ethtool_ops *ethtool_ops;
 
 	/*
-- 
cgit v1.2.3


From a4d73ee168eeaed3baea86542ad42e1fd7e192d3 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 26 Apr 2007 20:50:35 -0700
Subject: [WIRELESS] cfg80211: Fix locking in wiphy_new.

This patch fixes the locking in wiphy new. Ingo Oeser
<netdev@axxeo.de> noticed that locking in the error case was wrong and
also suggested this fix.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/core.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 532e1e09e02..24a21add0ba 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -53,21 +53,25 @@ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv)
 
 	mutex_lock(&cfg80211_drv_mutex);
 
-	if (unlikely(wiphy_counter<0)) {
+	drv->idx = wiphy_counter;
+
+	/* now increase counter for the next device unless
+	 * it has wrapped previously */
+	if (wiphy_counter >= 0)
+		wiphy_counter++;
+
+	mutex_unlock(&cfg80211_drv_mutex);
+
+	if (unlikely(drv->idx < 0)) {
 		/* ugh, wrapped! */
 		kfree(drv);
 		return NULL;
 	}
-	drv->idx = wiphy_counter;
 
 	/* give it a proper name */
 	snprintf(drv->wiphy.dev.bus_id, BUS_ID_SIZE,
 		 PHY_NAME "%d", drv->idx);
 
-	/* now increase counter for the next time */
-	wiphy_counter++;
-	mutex_unlock(&cfg80211_drv_mutex);
-
 	mutex_init(&drv->mtx);
 	mutex_init(&drv->devlist_mtx);
 	INIT_LIST_HEAD(&drv->netdev_list);
-- 
cgit v1.2.3


From f16bfc1c0958ff340a02779ab139b03fb5ba6e82 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 26 Apr 2007 20:51:12 -0700
Subject: [WIRELESS] cfg80211: Clarify locking comment.

This patch clarifies the comment about locking in wiphy_unregister.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/core.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 24a21add0ba..7eabd55417a 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -91,7 +91,6 @@ int wiphy_register(struct wiphy *wiphy)
 
 	mutex_lock(&cfg80211_drv_mutex);
 
-
 	res = device_add(&drv->wiphy.dev);
 	if (res)
 		goto out_unlock;
@@ -114,14 +113,26 @@ void wiphy_unregister(struct wiphy *wiphy)
 {
 	struct cfg80211_registered_device *drv = wiphy_to_dev(wiphy);
 
+	/* protect the device list */
 	mutex_lock(&cfg80211_drv_mutex);
 
-	/* hold registered driver mutex during list removal as well
-	 * to make sure no commands are in progress at the moment */
+	BUG_ON(!list_empty(&drv->netdev_list));
+
+	/*
+	 * Try to grab drv->mtx. If a command is still in progress,
+	 * hopefully the driver will refuse it since it's tearing
+	 * down the device already. We wait for this command to complete
+	 * before unlinking the item from the list.
+	 * Note: as codified by the BUG_ON above we cannot get here if
+	 * a virtual interface is still associated. Hence, we can only
+	 * get to lock contention here if userspace issues a command
+	 * that identified the hardware by wiphy index.
+	 */
 	mutex_lock(&drv->mtx);
-	list_del(&drv->list);
+	/* unlock again before freeing */
 	mutex_unlock(&drv->mtx);
 
+	list_del(&drv->list);
 	device_del(&drv->wiphy.dev);
 	debugfs_remove(drv->wiphy.debugfsdir);
 
-- 
cgit v1.2.3


From 49e9f70f8e7a4df00a5185e7f5c91e3c583847db Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Fri, 27 Apr 2007 01:04:23 -0700
Subject: [IPV4]: Add multipath cached to feature-removal-schedule.txt

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/feature-removal-schedule.txt | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 976c8a1bd7c..6da663607f7 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -292,3 +292,22 @@ Why:	the i8xx_tco watchdog driver has been replaced by the iTCO_wdt
 Who:	Wim Van Sebroeck <wim@iguana.be>
 
 ---------------------------
+
+What:	Multipath cached routing support in ipv4
+When:	in 2.6.23
+Why:	Code was merged, then submitter immediately disappeared leaving
+	us with no maintainer and lots of bugs.  The code should not have
+	been merged in the first place, and many aspects of it's
+	implementation are blocking more critical core networking
+	development.  It's marked EXPERIMENTAL and no distribution
+	enables it because it cause obscure crashes due to unfixable bugs
+	(interfaces don't return errors so memory allocation can't be
+	handled, calling contexts of these interfaces make handling
+	errors impossible too because they get called after we've
+	totally commited to creating a route object, for example).
+	This problem has existed for years and no forward progress
+	has ever been made, and nobody steps up to try and salvage
+	this code, so we're going to finally just get rid of it.
+Who:	David S. Miller <davem@davemloft.net>
+
+---------------------------
-- 
cgit v1.2.3


From ebbd90a730711280142017e482f27ec3fbb4f227 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Fri, 27 Apr 2007 02:13:39 -0700
Subject: [IPV6]: Fix thinko in ipv6_rthdr_rcv() changes.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/exthdrs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 4aa7fb024b3..6d8e4ac7bda 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -400,6 +400,7 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
 
 	switch (hdr->type) {
 #ifdef CONFIG_IPV6_MIP6
+	case IPV6_SRCRT_TYPE_2:
 		break;
 #endif
 	case IPV6_SRCRT_TYPE_0:
-- 
cgit v1.2.3


From 912a41a4ab935ce8c4308428ec13fc7f8b1f18f4 Mon Sep 17 00:00:00 2001
From: Sergey Vlasov <vsu@altlinux.ru>
Date: Fri, 27 Apr 2007 02:17:19 -0700
Subject: [IPV4] nl_fib_lookup: Initialise res.r before fib_res_put(&res)

When CONFIG_IP_MULTIPLE_TABLES is enabled, the code in nl_fib_lookup()
needs to initialize the res.r field before fib_res_put(&res) - unlike
fib_lookup(), a direct call to ->tb_lookup does not set this field.

Signed-off-by: Sergey Vlasov <vsu@altlinux.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 953dd458c23..837f2957fa8 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -777,6 +777,10 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
 							    .tos = frn->fl_tos,
 							    .scope = frn->fl_scope } } };
 
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	res.r = NULL;
+#endif
+
 	frn->err = -ENOENT;
 	if (tb) {
 		local_bh_disable();
-- 
cgit v1.2.3