153 files changed, 6243 insertions, 2390 deletions
diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index f094a0879c1..9ef07eda2c4 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -105,10 +105,9 @@ static struct class_device_attribute *atm_attrs[] = {
 	NULL
 };
 
-static int atm_uevent(struct class_device *cdev, char **envp, int num_envp, char *buf, int size)
+static int atm_uevent(struct class_device *cdev, struct kobj_uevent_env *env)
 {
 	struct atm_dev *adev;
-	int i = 0, len = 0;
 
 	if (!cdev)
 		return -ENODEV;
@@ -117,11 +116,9 @@ static int atm_uevent(struct class_device *cdev, char **envp, int num_envp, char
 	if (!adev)
 		return -ENODEV;
 
-	if (add_uevent_var(envp, num_envp, &i, buf, size, &len,
-			   "NAME=%s%d", adev->type, adev->number))
+	if (add_uevent_var(env, "NAME=%s%d", adev->type, adev->number))
 		return -ENOMEM;
 
-	envp[i] = NULL;
 	return 0;
 }
 
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 848b8fa8bed..93867bb6cc9 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -23,7 +23,7 @@
 
 #include "br_private.h"
 
-int (*br_should_route_hook) (struct sk_buff **pskb) = NULL;
+int (*br_should_route_hook)(struct sk_buff *skb);
 
 static struct llc_sap *br_stp_sap;
 
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 3a8a015c92e..3cedd4eeeed 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -126,6 +126,10 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
 	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
 		goto drop;
 
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (!skb)
+		return NULL;
+
 	if (unlikely(is_link_local(dest))) {
 		/* Pause frames shouldn't be passed up by driver anyway */
 		if (skb->protocol == htons(ETH_P_PAUSE))
@@ -145,7 +149,7 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
 	case BR_STATE_FORWARDING:
 
 		if (br_should_route_hook) {
-			if (br_should_route_hook(&skb))
+			if (br_should_route_hook(skb))
 				return skb;
 			dest = eth_hdr(skb)->h_dest;
 		}
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 8245f051ccb..da22f900e89 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -503,18 +503,14 @@ inhdr_error:
  * receiving device) to make netfilter happy, the REDIRECT
  * target in particular.  Save the original destination IP
  * address to be able to detect DNAT afterwards. */
-static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
+static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
 				      const struct net_device *in,
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
 {
 	struct iphdr *iph;
-	struct sk_buff *skb = *pskb;
 	__u32 len = nf_bridge_encap_header_len(skb);
 
-	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
-		return NF_STOLEN;
-
 	if (unlikely(!pskb_may_pull(skb, len)))
 		goto out;
 
@@ -584,13 +580,11 @@ out:
  * took place when the packet entered the bridge), but we
  * register an IPv4 PRE_ROUTING 'sabotage' hook that will
  * prevent this from happening. */
-static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff **pskb,
+static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb,
 				   const struct net_device *in,
 				   const struct net_device *out,
 				   int (*okfn)(struct sk_buff *))
 {
-	struct sk_buff *skb = *pskb;
-
 	if (skb->dst == (struct dst_entry *)&__fake_rtable) {
 		dst_release(skb->dst);
 		skb->dst = NULL;
@@ -625,12 +619,11 @@ static int br_nf_forward_finish(struct sk_buff *skb)
  * but we are still able to filter on the 'real' indev/outdev
  * because of the physdev module. For ARP, indev and outdev are the
  * bridge ports. */
-static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb,
+static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
 				     const struct net_device *in,
 				     const struct net_device *out,
 				     int (*okfn)(struct sk_buff *))
 {
-	struct sk_buff *skb = *pskb;
 	struct nf_bridge_info *nf_bridge;
 	struct net_device *parent;
 	int pf;
@@ -648,7 +641,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb,
 	else
 		pf = PF_INET6;
 
-	nf_bridge_pull_encap_header(*pskb);
+	nf_bridge_pull_encap_header(skb);
 
 	nf_bridge = skb->nf_bridge;
 	if (skb->pkt_type == PACKET_OTHERHOST) {
@@ -666,12 +659,11 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb,
 	return NF_STOLEN;
 }
 
-static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb,
+static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
 				      const struct net_device *in,
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
 {
-	struct sk_buff *skb = *pskb;
 	struct net_device **d = (struct net_device **)(skb->cb);
 
 #ifdef CONFIG_SYSCTL
@@ -682,12 +674,12 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb,
 	if (skb->protocol != htons(ETH_P_ARP)) {
 		if (!IS_VLAN_ARP(skb))
 			return NF_ACCEPT;
-		nf_bridge_pull_encap_header(*pskb);
+		nf_bridge_pull_encap_header(skb);
 	}
 
 	if (arp_hdr(skb)->ar_pln != 4) {
 		if (IS_VLAN_ARP(skb))
-			nf_bridge_push_encap_header(*pskb);
+			nf_bridge_push_encap_header(skb);
 		return NF_ACCEPT;
 	}
 	*d = (struct net_device *)in;
@@ -709,13 +701,12 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb,
  * NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor
  * will be executed.
  */
-static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
+static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb,
 				    const struct net_device *in,
 				    const struct net_device *out,
 				    int (*okfn)(struct sk_buff *))
 {
 	struct net_device *realindev;
-	struct sk_buff *skb = *pskb;
 	struct nf_bridge_info *nf_bridge;
 
 	if (!skb->nf_bridge)
@@ -752,13 +743,12 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 }
 
 /* PF_BRIDGE/POST_ROUTING ********************************************/
-static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
+static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
 				       const struct net_device *in,
 				       const struct net_device *out,
 				       int (*okfn)(struct sk_buff *))
 {
-	struct sk_buff *skb = *pskb;
-	struct nf_bridge_info *nf_bridge = (*pskb)->nf_bridge;
+	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
 	struct net_device *realoutdev = bridge_parent(skb->dev);
 	int pf;
 
@@ -828,13 +818,13 @@ print_error:
 /* IP/SABOTAGE *****************************************************/
 /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING
  * for the second time. */
-static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff **pskb,
+static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb,
 				   const struct net_device *in,
 				   const struct net_device *out,
 				   int (*okfn)(struct sk_buff *))
 {
-	if ((*pskb)->nf_bridge &&
-	    !((*pskb)->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) {
+	if (skb->nf_bridge &&
+	    !(skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) {
 		return NF_STOP;
 	}
 
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index c65f54e0e27..3312e8f2abe 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -435,7 +435,7 @@ int br_sysfs_addbr(struct net_device *dev)
 	err = kobject_register(&br->ifobj);
 	if (err) {
 		pr_info("%s: can't add kobject (directory) %s/%s\n",
-			__FUNCTION__, dev->name, br->ifobj.name);
+			__FUNCTION__, dev->name, kobject_name(&br->ifobj));
 		goto out3;
 	}
 	return 0;
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index ffe468a632e..48a80e42328 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -15,7 +15,7 @@
 #include <net/arp.h>
 #include <linux/module.h>
 
-static int ebt_target_reply(struct sk_buff **pskb, unsigned int hooknr,
+static int ebt_target_reply(struct sk_buff *skb, unsigned int hooknr,
    const struct net_device *in, const struct net_device *out,
    const void *data, unsigned int datalen)
 {
@@ -23,7 +23,6 @@ static int ebt_target_reply(struct sk_buff **pskb, unsigned int hooknr,
 	__be32 _sip, *siptr, _dip, *diptr;
 	struct arphdr _ah, *ap;
 	unsigned char _sha[ETH_ALEN], *shp;
-	struct sk_buff *skb = *pskb;
 
 	ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah);
 	if (ap == NULL)
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index 4582659dff0..74262e9a566 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -8,29 +8,22 @@
  *
  */
 
+#include <linux/netfilter.h>
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_nat.h>
 #include <linux/module.h>
 #include <net/sock.h>
 
-static int ebt_target_dnat(struct sk_buff **pskb, unsigned int hooknr,
+static int ebt_target_dnat(struct sk_buff *skb, unsigned int hooknr,
    const struct net_device *in, const struct net_device *out,
    const void *data, unsigned int datalen)
 {
 	struct ebt_nat_info *info = (struct ebt_nat_info *)data;
 
-	if (skb_shared(*pskb) || skb_cloned(*pskb)) {
-		struct sk_buff *nskb;
+	if (skb_make_writable(skb, 0))
+		return NF_DROP;
 
-		nskb = skb_copy(*pskb, GFP_ATOMIC);
-		if (!nskb)
-			return NF_DROP;
-		if ((*pskb)->sk)
-			skb_set_owner_w(nskb, (*pskb)->sk);
-		kfree_skb(*pskb);
-		*pskb = nskb;
-	}
-	memcpy(eth_hdr(*pskb)->h_dest, info->mac, ETH_ALEN);
+	memcpy(eth_hdr(skb)->h_dest, info->mac, ETH_ALEN);
 	return info->target;
 }
 
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 62d23c7b25e..6cba54309c0 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -17,7 +17,7 @@
 #include <linux/netfilter_bridge/ebt_mark_t.h>
 #include <linux/module.h>
 
-static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr,
+static int ebt_target_mark(struct sk_buff *skb, unsigned int hooknr,
    const struct net_device *in, const struct net_device *out,
    const void *data, unsigned int datalen)
 {
@@ -25,13 +25,13 @@ static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr,
 	int action = info->target & -16;
 
 	if (action == MARK_SET_VALUE)
-		(*pskb)->mark = info->mark;
+		skb->mark = info->mark;
 	else if (action == MARK_OR_VALUE)
-		(*pskb)->mark |= info->mark;
+		skb->mark |= info->mark;
 	else if (action == MARK_AND_VALUE)
-		(*pskb)->mark &= info->mark;
+		skb->mark &= info->mark;
 	else
-		(*pskb)->mark ^= info->mark;
+		skb->mark ^= info->mark;
 
 	return info->target | ~EBT_VERDICT_BITS;
 }
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 9f378eab72d..422cb834cff 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -8,35 +8,28 @@
  *
  */
 
+#include <linux/netfilter.h>
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_redirect.h>
 #include <linux/module.h>
 #include <net/sock.h>
 #include "../br_private.h"
 
-static int ebt_target_redirect(struct sk_buff **pskb, unsigned int hooknr,
+static int ebt_target_redirect(struct sk_buff *skb, unsigned int hooknr,
    const struct net_device *in, const struct net_device *out,
    const void *data, unsigned int datalen)
 {
 	struct ebt_redirect_info *info = (struct ebt_redirect_info *)data;
 
-	if (skb_shared(*pskb) || skb_cloned(*pskb)) {
-		struct sk_buff *nskb;
+	if (skb_make_writable(skb, 0))
+		return NF_DROP;
 
-		nskb = skb_copy(*pskb, GFP_ATOMIC);
-		if (!nskb)
-			return NF_DROP;
-		if ((*pskb)->sk)
-			skb_set_owner_w(nskb, (*pskb)->sk);
-		kfree_skb(*pskb);
-		*pskb = nskb;
-	}
 	if (hooknr != NF_BR_BROUTING)
-		memcpy(eth_hdr(*pskb)->h_dest,
+		memcpy(eth_hdr(skb)->h_dest,
 		       in->br_port->br->dev->dev_addr, ETH_ALEN);
 	else
-		memcpy(eth_hdr(*pskb)->h_dest, in->dev_addr, ETH_ALEN);
-	(*pskb)->pkt_type = PACKET_HOST;
+		memcpy(eth_hdr(skb)->h_dest, in->dev_addr, ETH_ALEN);
+	skb->pkt_type = PACKET_HOST;
 	return info->target;
 }
 
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index a50722182bf..425ac920904 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -8,6 +8,7 @@
  *
  */
 
+#include <linux/netfilter.h>
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_nat.h>
 #include <linux/module.h>
@@ -15,34 +16,26 @@
 #include <linux/if_arp.h>
 #include <net/arp.h>
 
-static int ebt_target_snat(struct sk_buff **pskb, unsigned int hooknr,
+static int ebt_target_snat(struct sk_buff *skb, unsigned int hooknr,
    const struct net_device *in, const struct net_device *out,
    const void *data, unsigned int datalen)
 {
 	struct ebt_nat_info *info = (struct ebt_nat_info *) data;
 
-	if (skb_shared(*pskb) || skb_cloned(*pskb)) {
-		struct sk_buff *nskb;
+	if (skb_make_writable(skb, 0))
+		return NF_DROP;
 
-		nskb = skb_copy(*pskb, GFP_ATOMIC);
-		if (!nskb)
-			return NF_DROP;
-		if ((*pskb)->sk)
-			skb_set_owner_w(nskb, (*pskb)->sk);
-		kfree_skb(*pskb);
-		*pskb = nskb;
-	}
-	memcpy(eth_hdr(*pskb)->h_source, info->mac, ETH_ALEN);
+	memcpy(eth_hdr(skb)->h_source, info->mac, ETH_ALEN);
 	if (!(info->target & NAT_ARP_BIT) &&
-	    eth_hdr(*pskb)->h_proto == htons(ETH_P_ARP)) {
+	    eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) {
 		struct arphdr _ah, *ap;
 
-		ap = skb_header_pointer(*pskb, 0, sizeof(_ah), &_ah);
+		ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah);
 		if (ap == NULL)
 			return EBT_DROP;
 		if (ap->ar_hln != ETH_ALEN)
 			goto out;
-		if (skb_store_bits(*pskb, sizeof(_ah), info->mac,ETH_ALEN))
+		if (skb_store_bits(skb, sizeof(_ah), info->mac,ETH_ALEN))
 			return EBT_DROP;
 	}
 out:
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index d37ce047893..e44519ebf1d 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -51,11 +51,11 @@ static struct ebt_table broute_table =
 	.me		= THIS_MODULE,
 };
 
-static int ebt_broute(struct sk_buff **pskb)
+static int ebt_broute(struct sk_buff *skb)
 {
 	int ret;
 
-	ret = ebt_do_table(NF_BR_BROUTING, pskb, (*pskb)->dev, NULL,
+	ret = ebt_do_table(NF_BR_BROUTING, skb, skb->dev, NULL,
 	   &broute_table);
 	if (ret == NF_DROP)
 		return 1; /* route it */
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 81d84145c41..210493f99bc 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -61,10 +61,10 @@ static struct ebt_table frame_filter =
 };
 
 static unsigned int
-ebt_hook (unsigned int hook, struct sk_buff **pskb, const struct net_device *in,
+ebt_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in,
    const struct net_device *out, int (*okfn)(struct sk_buff *))
 {
-	return ebt_do_table(hook, pskb, in, out, &frame_filter);
+	return ebt_do_table(hook, skb, in, out, &frame_filter);
 }
 
 static struct nf_hook_ops ebt_ops_filter[] = {
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 9c50488b62e..3e58c2e5ee2 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -61,17 +61,17 @@ static struct ebt_table frame_nat =
 };
 
 static unsigned int
-ebt_nat_dst(unsigned int hook, struct sk_buff **pskb, const struct net_device *in
+ebt_nat_dst(unsigned int hook, struct sk_buff *skb, const struct net_device *in
    , const struct net_device *out, int (*okfn)(struct sk_buff *))
 {
-	return ebt_do_table(hook, pskb, in, out, &frame_nat);
+	return ebt_do_table(hook, skb, in, out, &frame_nat);
 }
 
 static unsigned int
-ebt_nat_src(unsigned int hook, struct sk_buff **pskb, const struct net_device *in
+ebt_nat_src(unsigned int hook, struct sk_buff *skb, const struct net_device *in
    , const struct net_device *out, int (*okfn)(struct sk_buff *))
 {
-	return ebt_do_table(hook, pskb, in, out, &frame_nat);
+	return ebt_do_table(hook, skb, in, out, &frame_nat);
 }
 
 static struct nf_hook_ops ebt_ops_nat[] = {
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 6018d0e5193..d5a09eaef91 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -142,7 +142,7 @@ static inline int ebt_basic_match(struct ebt_entry *e, struct ethhdr *h,
 }
 
 /* Do some firewalling */
-unsigned int ebt_do_table (unsigned int hook, struct sk_buff **pskb,
+unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
    const struct net_device *in, const struct net_device *out,
    struct ebt_table *table)
 {
@@ -172,19 +172,19 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff **pskb,
 	base = private->entries;
 	i = 0;
 	while (i < nentries) {
-		if (ebt_basic_match(point, eth_hdr(*pskb), in, out))
+		if (ebt_basic_match(point, eth_hdr(skb), in, out))
 			goto letscontinue;
 
-		if (EBT_MATCH_ITERATE(point, ebt_do_match, *pskb, in, out) != 0)
+		if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, in, out) != 0)
 			goto letscontinue;
 
 		/* increase counter */
 		(*(counter_base + i)).pcnt++;
-		(*(counter_base + i)).bcnt+=(**pskb).len;
+		(*(counter_base + i)).bcnt += skb->len;
 
 		/* these should only watch: not modify, nor tell us
 		   what to do with the packet */
-		EBT_WATCHER_ITERATE(point, ebt_do_watcher, *pskb, hook, in,
+		EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, hook, in,
 		   out);
 
 		t = (struct ebt_entry_target *)
@@ -193,7 +193,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff **pskb,
 		if (!t->u.target->target)
 			verdict = ((struct ebt_standard_target *)t)->verdict;
 		else
-			verdict = t->u.target->target(pskb, hook,
+			verdict = t->u.target->target(skb, hook,
 			   in, out, t->data, t->target_size);
 		if (verdict == EBT_ACCEPT) {
 			read_unlock_bh(&table->lock);
diff --git a/net/core/dev.c b/net/core/dev.c
index 1e169a541ce..38b03da5c1c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -557,6 +557,7 @@ __setup("netdev=", netdev_boot_setup);
 
 /**
  *	__dev_get_by_name	- find a device by its name
+ *	@net: the applicable net namespace
  *	@name: name to find
  *
  *	Find an interface by name. Must be called under RTNL semaphore
@@ -581,6 +582,7 @@ struct net_device *__dev_get_by_name(struct net *net, const char *name)
 
 /**
  *	dev_get_by_name		- find a device by its name
+ *	@net: the applicable net namespace
  *	@name: name to find
  *
  *	Find an interface by name. This can be called from any
@@ -604,6 +606,7 @@ struct net_device *dev_get_by_name(struct net *net, const char *name)
 
 /**
  *	__dev_get_by_index - find a device by its ifindex
+ *	@net: the applicable net namespace
  *	@ifindex: index of device
  *
  *	Search for an interface by index. Returns %NULL if the device
@@ -629,6 +632,7 @@ struct net_device *__dev_get_by_index(struct net *net, int ifindex)
 
 /**
  *	dev_get_by_index - find a device by its ifindex
+ *	@net: the applicable net namespace
  *	@ifindex: index of device
  *
  *	Search for an interface by index. Returns NULL if the device
@@ -651,6 +655,7 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
 
 /**
  *	dev_getbyhwaddr - find a device by its hardware address
+ *	@net: the applicable net namespace
  *	@type: media type of device
  *	@ha: hardware address
  *
@@ -709,6 +714,7 @@ EXPORT_SYMBOL(dev_getfirstbyhwtype);
 
 /**
  *	dev_get_by_flags - find any device with given flags
+ *	@net: the applicable net namespace
  *	@if_flags: IFF_* values
  *	@mask: bitmask of bits in if_flags to check
  *
@@ -948,6 +954,7 @@ void netdev_state_change(struct net_device *dev)
 
 /**
  *	dev_load 	- load a network module
+ *	@net: the applicable net namespace
  *	@name: name of interface
  *
  *	If a network interface is not present and the process has suitable
@@ -1185,7 +1192,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
 /**
  *	call_netdevice_notifiers - call all network notifier blocks
  *      @val: value passed unmodified to notifier function
- *      @v:   pointer passed unmodified to notifier function
+ *      @dev: net_device pointer passed unmodified to notifier function
  *
  *	Call all network notifier blocks.  Parameters and return value
  *	are as for raw_notifier_call_chain().
@@ -1355,22 +1362,21 @@ int skb_checksum_help(struct sk_buff *skb)
 		goto out_set_summed;
 	}
 
-	if (skb_cloned(skb)) {
+	offset = skb->csum_start - skb_headroom(skb);
+	BUG_ON(offset >= skb_headlen(skb));
+	csum = skb_checksum(skb, offset, skb->len - offset, 0);
+
+	offset += skb->csum_offset;
+	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
+
+	if (skb_cloned(skb) &&
+	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
 		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
 		if (ret)
 			goto out;
 	}
 
-	offset = skb->csum_start - skb_headroom(skb);
-	BUG_ON(offset > (int)skb->len);
-	csum = skb_checksum(skb, offset, skb->len-offset, 0);
-
-	offset = skb_headlen(skb) - offset;
-	BUG_ON(offset <= 0);
-	BUG_ON(skb->csum_offset + 2 > offset);
-
-	*(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) =
-		csum_fold(csum);
+	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
 out_set_summed:
 	skb->ip_summed = CHECKSUM_NONE;
 out:
@@ -1942,27 +1948,51 @@ static int ing_filter(struct sk_buff *skb)
 	struct Qdisc *q;
 	struct net_device *dev = skb->dev;
 	int result = TC_ACT_OK;
+	u32 ttl = G_TC_RTTL(skb->tc_verd);
 
-	if (dev->qdisc_ingress) {
-		__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
-		if (MAX_RED_LOOP < ttl++) {
-			printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n",
-				skb->iif, skb->dev->ifindex);
-			return TC_ACT_SHOT;
-		}
+	if (MAX_RED_LOOP < ttl++) {
+		printk(KERN_WARNING
+		       "Redir loop detected Dropping packet (%d->%d)\n",
+		       skb->iif, dev->ifindex);
+		return TC_ACT_SHOT;
+	}
+
+	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
+	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
 
-		skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
+	spin_lock(&dev->ingress_lock);
+	if ((q = dev->qdisc_ingress) != NULL)
+		result = q->enqueue(skb, q);
+	spin_unlock(&dev->ingress_lock);
 
-		skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
+	return result;
+}
+
+static inline struct sk_buff *handle_ing(struct sk_buff *skb,
+					 struct packet_type **pt_prev,
+					 int *ret, struct net_device *orig_dev)
+{
+	if (!skb->dev->qdisc_ingress)
+		goto out;
 
-		spin_lock(&dev->ingress_lock);
-		if ((q = dev->qdisc_ingress) != NULL)
-			result = q->enqueue(skb, q);
-		spin_unlock(&dev->ingress_lock);
+	if (*pt_prev) {
+		*ret = deliver_skb(skb, *pt_prev, orig_dev);
+		*pt_prev = NULL;
+	} else {
+		/* Huh? Why does turning on AF_PACKET affect this? */
+		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
+	}
 
+	switch (ing_filter(skb)) {
+	case TC_ACT_SHOT:
+	case TC_ACT_STOLEN:
+		kfree_skb(skb);
+		return NULL;
 	}
 
-	return result;
+out:
+	skb->tc_verd = 0;
+	return skb;
 }
 #endif
 
@@ -2014,21 +2044,9 @@ int netif_receive_skb(struct sk_buff *skb)
 	}
 
 #ifdef CONFIG_NET_CLS_ACT
-	if (pt_prev) {
-		ret = deliver_skb(skb, pt_prev, orig_dev);
-		pt_prev = NULL; /* noone else should process this after*/
-	} else {
-		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
-	}
-
-	ret = ing_filter(skb);
-
-	if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
-		kfree_skb(skb);
+	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
+	if (!skb)
 		goto out;
-	}
-
-	skb->tc_verd = 0;
 ncls:
 #endif
 
@@ -2097,7 +2115,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
 
 /**
  * __napi_schedule - schedule for receive
- * @napi: entry to schedule
+ * @n: entry to schedule
  *
  * The entry's receive function will be scheduled to run
  */
@@ -3259,6 +3277,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
 
 /**
  *	dev_ioctl	-	network device ioctl
+ *	@net: the applicable net namespace
  *	@cmd: command to issue
  *	@arg: pointer to a struct ifreq in user space
  *
@@ -3436,6 +3455,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 
 /**
  *	dev_new_index	-	allocate an ifindex
+ *	@net: the applicable net namespace
  *
  *	Returns a suitable unique value for a new device interface
  *	number.  The caller must hold the rtnl semaphore or the
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index c52df858d0b..cd3af59b38a 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -481,6 +481,8 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
 	if (!creat)
 		goto out;
 
+	ASSERT_RTNL();
+
 	n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
 	if (!n)
 		goto out;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 909a03d6c0e..6628e457ddc 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -396,28 +396,22 @@ static struct attribute_group wireless_group = {
 #endif /* CONFIG_SYSFS */
 
 #ifdef CONFIG_HOTPLUG
-static int netdev_uevent(struct device *d, char **envp,
-			 int num_envp, char *buf, int size)
+static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
 {
 	struct net_device *dev = to_net_dev(d);
-	int retval, len = 0, i = 0;
+	int retval;
 
 	/* pass interface to uevent. */
-	retval = add_uevent_var(envp, num_envp, &i,
-				buf, size, &len,
-				"INTERFACE=%s", dev->name);
+	retval = add_uevent_var(env, "INTERFACE=%s", dev->name);
 	if (retval)
 		goto exit;
 
 	/* pass ifindex to uevent.
 	 * ifindex is useful as it won't change (interface name may change)
 	 * and is what RtNetlink uses natively. */
-	retval = add_uevent_var(envp, num_envp, &i,
-				buf, size, &len,
-				"IFINDEX=%d", dev->ifindex);
+	retval = add_uevent_var(env, "IFINDEX=%d", dev->ifindex);
 
 exit:
-	envp[i] = NULL;
 	return retval;
 }
 #endif
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 944189d9632..70d9b5da96a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -362,6 +362,97 @@ void kfree_skb(struct sk_buff *skb)
 	__kfree_skb(skb);
 }
 
+static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
+{
+	new->tstamp		= old->tstamp;
+	new->dev		= old->dev;
+	new->transport_header	= old->transport_header;
+	new->network_header	= old->network_header;
+	new->mac_header		= old->mac_header;
+	new->dst		= dst_clone(old->dst);
+#ifdef CONFIG_INET
+	new->sp			= secpath_get(old->sp);
+#endif
+	memcpy(new->cb, old->cb, sizeof(old->cb));
+	new->csum_start		= old->csum_start;
+	new->csum_offset	= old->csum_offset;
+	new->local_df		= old->local_df;
+	new->pkt_type		= old->pkt_type;
+	new->ip_summed		= old->ip_summed;
+	skb_copy_queue_mapping(new, old);
+	new->priority		= old->priority;
+#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
+	new->ipvs_property	= old->ipvs_property;
+#endif
+	new->protocol		= old->protocol;
+	new->mark		= old->mark;
+	__nf_copy(new, old);
+#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
+    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+	new->nf_trace		= old->nf_trace;
+#endif
+#ifdef CONFIG_NET_SCHED
+	new->tc_index		= old->tc_index;
+#ifdef CONFIG_NET_CLS_ACT
+	new->tc_verd		= old->tc_verd;
+#endif
+#endif
+	skb_copy_secmark(new, old);
+}
+
+static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
+{
+#define C(x) n->x = skb->x
+
+	n->next = n->prev = NULL;
+	n->sk = NULL;
+	__copy_skb_header(n, skb);
+
+	C(len);
+	C(data_len);
+	C(mac_len);
+	n->cloned = 1;
+	n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
+	n->nohdr = 0;
+	n->destructor = NULL;
+#ifdef CONFIG_NET_CLS_ACT
+	/* FIXME What is this and why don't we do it in copy_skb_header? */
+	n->tc_verd = SET_TC_VERD(n->tc_verd,0);
+	n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd);
+	n->tc_verd = CLR_TC_MUNGED(n->tc_verd);
+	C(iif);
+#endif
+	C(truesize);
+	atomic_set(&n->users, 1);
+	C(head);
+	C(data);
+	C(tail);
+	C(end);
+
+	atomic_inc(&(skb_shinfo(skb)->dataref));
+	skb->cloned = 1;
+
+	return n;
+#undef C
+}
+
+/**
+ *	skb_morph	-	morph one skb into another
+ *	@dst: the skb to receive the contents
+ *	@src: the skb to supply the contents
+ *
+ *	This is identical to skb_clone except that the target skb is
+ *	supplied by the user.
+ *
+ *	The target skb is returned upon exit.
+ */
+struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
+{
+	skb_release_data(dst);
+	return __skb_clone(dst, src);
+}
+EXPORT_SYMBOL_GPL(skb_morph);
+
 /**
  *	skb_clone	-	duplicate an sk_buff
  *	@skb: buffer to clone
@@ -393,66 +484,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 		n->fclone = SKB_FCLONE_UNAVAILABLE;
 	}
 
-#define C(x) n->x = skb->x
-
-	n->next = n->prev = NULL;
-	n->sk = NULL;
-	C(tstamp);
-	C(dev);
-	C(transport_header);
-	C(network_header);
-	C(mac_header);
-	C(dst);
-	dst_clone(skb->dst);
-	C(sp);
-#ifdef CONFIG_INET
-	secpath_get(skb->sp);
-#endif
-	memcpy(n->cb, skb->cb, sizeof(skb->cb));
-	C(len);
-	C(data_len);
-	C(mac_len);
-	C(csum);
-	C(local_df);
-	n->cloned = 1;
-	n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
-	n->nohdr = 0;
-	C(pkt_type);
-	C(ip_summed);
-	skb_copy_queue_mapping(n, skb);
-	C(priority);
-#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
-	C(ipvs_property);
-#endif
-	C(protocol);
-	n->destructor = NULL;
-	C(mark);
-	__nf_copy(n, skb);
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
-    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
-	C(nf_trace);
-#endif
-#ifdef CONFIG_NET_SCHED
-	C(tc_index);
-#ifdef CONFIG_NET_CLS_ACT
-	n->tc_verd = SET_TC_VERD(skb->tc_verd,0);
-	n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd);
-	n->tc_verd = CLR_TC_MUNGED(n->tc_verd);
-	C(iif);
-#endif
-#endif
-	skb_copy_secmark(n, skb);
-	C(truesize);
-	atomic_set(&n->users, 1);
-	C(head);
-	C(data);
-	C(tail);
-	C(end);
-
-	atomic_inc(&(skb_shinfo(skb)->dataref));
-	skb->cloned = 1;
-
-	return n;
+	return __skb_clone(n, skb);
 }
 
 static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
@@ -463,50 +495,15 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	 */
 	unsigned long offset = new->data - old->data;
 #endif
-	new->sk		= NULL;
-	new->dev	= old->dev;
-	skb_copy_queue_mapping(new, old);
-	new->priority	= old->priority;
-	new->protocol	= old->protocol;
-	new->dst	= dst_clone(old->dst);
-#ifdef CONFIG_INET
-	new->sp		= secpath_get(old->sp);
-#endif
-	new->csum_start = old->csum_start;
-	new->csum_offset = old->csum_offset;
-	new->ip_summed = old->ip_summed;
-	new->transport_header = old->transport_header;
-	new->network_header   = old->network_header;
-	new->mac_header	      = old->mac_header;
+
+	__copy_skb_header(new, old);
+
 #ifndef NET_SKBUFF_DATA_USES_OFFSET
 	/* {transport,network,mac}_header are relative to skb->head */
 	new->transport_header += offset;
 	new->network_header   += offset;
 	new->mac_header	      += offset;
 #endif
-	memcpy(new->cb, old->cb, sizeof(old->cb));
-	new->local_df	= old->local_df;
-	new->fclone	= SKB_FCLONE_UNAVAILABLE;
-	new->pkt_type	= old->pkt_type;
-	new->tstamp	= old->tstamp;
-	new->destructor = NULL;
-	new->mark	= old->mark;
-	__nf_copy(new, old);
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
-    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
-	new->nf_trace	= old->nf_trace;
-#endif
-#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
-	new->ipvs_property = old->ipvs_property;
-#endif
-#ifdef CONFIG_NET_SCHED
-#ifdef CONFIG_NET_CLS_ACT
-	new->tc_verd = old->tc_verd;
-#endif
-	new->tc_index	= old->tc_index;
-#endif
-	skb_copy_secmark(new, old);
-	atomic_set(&new->users, 1);
 	skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
 	skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
 	skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
@@ -685,7 +682,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 	skb->transport_header += off;
 	skb->network_header   += off;
 	skb->mac_header	      += off;
-	skb->csum_start       += off;
+	skb->csum_start       += nhead;
 	skb->cloned   = 0;
 	skb->hdr_len  = 0;
 	skb->nohdr    = 0;
diff --git a/net/core/sock.c b/net/core/sock.c
index 4ed9b507c1e..d45ecdccc6a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -869,6 +869,7 @@ static inline void sock_lock_init(struct sock *sk)
 
 /**
  *	sk_alloc - All socket objects are allocated here
+ *	@net: the applicable net namespace
  *	@family: protocol family
  *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
  *	@prot: struct proto associated with this new sock instance
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 006a3834fbc..cac53548c2d 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -767,10 +767,9 @@ discard:
 	return 0;
 }
 
-static int dccp_v6_rcv(struct sk_buff **pskb)
+static int dccp_v6_rcv(struct sk_buff *skb)
 {
 	const struct dccp_hdr *dh;
-	struct sk_buff *skb = *pskb;
 	struct sock *sk;
 	int min_cov;
 
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index f7fba7721e6..43fcd29046d 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -88,12 +88,12 @@ static void dnrmg_send_peer(struct sk_buff *skb)
 
 
 static unsigned int dnrmg_hook(unsigned int hook,
-			struct sk_buff **pskb,
+			struct sk_buff *skb,
 			const struct net_device *in,
 			const struct net_device *out,
 			int (*okfn)(struct sk_buff *))
 {
-	dnrmg_send_peer(*pskb);
+	dnrmg_send_peer(skb);
 	return NF_ACCEPT;
 }
 
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index a02c36d0a13..93fe3966805 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -10,7 +10,8 @@ obj-y     := route.o inetpeer.o protocol.o \
 	     tcp_minisocks.o tcp_cong.o \
 	     datagram.o raw.o udp.o udplite.o \
 	     arp.o icmp.o devinet.o af_inet.o  igmp.o \
-	     sysctl_net_ipv4.o fib_frontend.o fib_semantics.o
+	     sysctl_net_ipv4.o fib_frontend.o fib_semantics.o \
+	     inet_fragment.o
 
 obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
 obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
new file mode 100644
index 00000000000..484cf512858
--- /dev/null
+++ b/net/ipv4/inet_fragment.c
@@ -0,0 +1,174 @@
+/*
+ * inet fragments management
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * 		Authors:	Pavel Emelyanov <xemul@openvz.org>
+ *				Started as consolidation of ipv4/ip_fragment.c,
+ *				ipv6/reassembly. and ipv6 nf conntrack reassembly
+ */
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/random.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+
+#include <net/inet_frag.h>
+
+static void inet_frag_secret_rebuild(unsigned long dummy)
+{
+	struct inet_frags *f = (struct inet_frags *)dummy;
+	unsigned long now = jiffies;
+	int i;
+
+	write_lock(&f->lock);
+	get_random_bytes(&f->rnd, sizeof(u32));
+	for (i = 0; i < INETFRAGS_HASHSZ; i++) {
+		struct inet_frag_queue *q;
+		struct hlist_node *p, *n;
+
+		hlist_for_each_entry_safe(q, p, n, &f->hash[i], list) {
+			unsigned int hval = f->hashfn(q);
+
+			if (hval != i) {
+				hlist_del(&q->list);
+
+				/* Relink to new hash chain. */
+				hlist_add_head(&q->list, &f->hash[hval]);
+			}
+		}
+	}
+	write_unlock(&f->lock);
+
+	mod_timer(&f->secret_timer, now + f->ctl->secret_interval);
+}
+
+void inet_frags_init(struct inet_frags *f)
+{
+	int i;
+
+	for (i = 0; i < INETFRAGS_HASHSZ; i++)
+		INIT_HLIST_HEAD(&f->hash[i]);
+
+	INIT_LIST_HEAD(&f->lru_list);
+	rwlock_init(&f->lock);
+
+	f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
+				   (jiffies ^ (jiffies >> 6)));
+
+	f->nqueues = 0;
+	atomic_set(&f->mem, 0);
+
+	init_timer(&f->secret_timer);
+	f->secret_timer.function = inet_frag_secret_rebuild;
+	f->secret_timer.data = (unsigned long)f;
+	f->secret_timer.expires = jiffies + f->ctl->secret_interval;
+	add_timer(&f->secret_timer);
+}
+EXPORT_SYMBOL(inet_frags_init);
+
+void inet_frags_fini(struct inet_frags *f)
+{
+	del_timer(&f->secret_timer);
+}
+EXPORT_SYMBOL(inet_frags_fini);
+
+static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
+{
+	write_lock(&f->lock);
+	hlist_del(&fq->list);
+	list_del(&fq->lru_list);
+	f->nqueues--;
+	write_unlock(&f->lock);
+}
+
+void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
+{
+	if (del_timer(&fq->timer))
+		atomic_dec(&fq->refcnt);
+
+	if (!(fq->last_in & COMPLETE)) {
+		fq_unlink(fq, f);
+		atomic_dec(&fq->refcnt);
+		fq->last_in |= COMPLETE;
+	}
+}
+
+EXPORT_SYMBOL(inet_frag_kill);
+
+static inline void frag_kfree_skb(struct inet_frags *f, struct sk_buff *skb,
+						int *work)
+{
+	if (work)
+		*work -= skb->truesize;
+
+	atomic_sub(skb->truesize, &f->mem);
+	if (f->skb_free)
+		f->skb_free(skb);
+	kfree_skb(skb);
+}
+
+void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
+					int *work)
+{
+	struct sk_buff *fp;
+
+	BUG_TRAP(q->last_in & COMPLETE);
+	BUG_TRAP(del_timer(&q->timer) == 0);
+
+	/* Release all fragment data. */
+	fp = q->fragments;
+	while (fp) {
+		struct sk_buff *xp = fp->next;
+
+		frag_kfree_skb(f, fp, work);
+		fp = xp;
+	}
+
+	if (work)
+		*work -= f->qsize;
+	atomic_sub(f->qsize, &f->mem);
+
+	f->destructor(q);
+
+}
+EXPORT_SYMBOL(inet_frag_destroy);
+
+int inet_frag_evictor(struct inet_frags *f)
+{
+	struct inet_frag_queue *q;
+	int work, evicted = 0;
+
+	work = atomic_read(&f->mem) - f->ctl->low_thresh;
+	while (work > 0) {
+		read_lock(&f->lock);
+		if (list_empty(&f->lru_list)) {
+			read_unlock(&f->lock);
+			break;
+		}
+
+		q = list_first_entry(&f->lru_list,
+				struct inet_frag_queue, lru_list);
+		atomic_inc(&q->refcnt);
+		read_unlock(&f->lock);
+
+		spin_lock(&q->lock);
+		if (!(q->last_in & COMPLETE))
+			inet_frag_kill(q, f);
+		spin_unlock(&q->lock);
+
+		if (atomic_dec_and_test(&q->refcnt))
+			inet_frag_destroy(q, f, &work);
+		evicted++;
+	}
+
+	return evicted;
+}
+EXPORT_SYMBOL(inet_frag_evictor);
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
index 4545b64e281..ac3b1d3dba2 100644
--- a/net/ipv4/inet_lro.c
+++ b/net/ipv4/inet_lro.c
@@ -77,7 +77,7 @@ static int lro_tcp_ip_check(struct iphdr *iph, struct tcphdr *tcph,
 
 	/* check tcp options (only timestamp allowed) */
 	if (tcph->doff == TCPH_LEN_W_TIMESTAMP) {
-		u32 *topt = (u32 *)(tcph + 1);
+		__be32 *topt = (__be32 *)(tcph + 1);
 
 		if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
 				   | (TCPOPT_TIMESTAMP << 8)
@@ -103,14 +103,14 @@ static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
 {
 	struct iphdr *iph = lro_desc->iph;
 	struct tcphdr *tcph = lro_desc->tcph;
-	u32 *p;
+	__be32 *p;
 	__wsum tcp_hdr_csum;
 
 	tcph->ack_seq = lro_desc->tcp_ack;
 	tcph->window = lro_desc->tcp_window;
 
 	if (lro_desc->tcp_saw_tstamp) {
-		p = (u32 *)(tcph + 1);
+		p = (__be32 *)(tcph + 1);
 		*(p+2) = lro_desc->tcp_rcv_tsecr;
 	}
 
@@ -150,7 +150,7 @@ static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
 			  u16 vlan_tag, struct vlan_group *vgrp)
 {
 	int nr_frags;
-	u32 *ptr;
+	__be32 *ptr;
 	u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
 
 	nr_frags = skb_shinfo(skb)->nr_frags;
@@ -159,14 +159,14 @@ static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
 	lro_desc->iph = iph;
 	lro_desc->tcph = tcph;
 	lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len;
-	lro_desc->tcp_ack = ntohl(tcph->ack_seq);
+	lro_desc->tcp_ack = tcph->ack_seq;
 	lro_desc->tcp_window = tcph->window;
 
 	lro_desc->pkt_aggr_cnt = 1;
 	lro_desc->ip_tot_len = ntohs(iph->tot_len);
 
 	if (tcph->doff == 8) {
-		ptr = (u32 *)(tcph+1);
+		ptr = (__be32 *)(tcph+1);
 		lro_desc->tcp_saw_tstamp = 1;
 		lro_desc->tcp_rcv_tsval = *(ptr+1);
 		lro_desc->tcp_rcv_tsecr = *(ptr+2);
@@ -190,7 +190,7 @@ static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
 			   struct tcphdr *tcph, int tcp_data_len)
 {
 	struct sk_buff *parent = lro_desc->parent;
-	u32 *topt;
+	__be32 *topt;
 
 	lro_desc->pkt_aggr_cnt++;
 	lro_desc->ip_tot_len += tcp_data_len;
@@ -200,7 +200,7 @@ static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
 
 	/* don't update tcp_rcv_tsval, would not work with PAWS */
 	if (lro_desc->tcp_saw_tstamp) {
-		topt = (u32 *) (tcph + 1);
+		topt = (__be32 *) (tcph + 1);
 		lro_desc->tcp_rcv_tsecr = *(topt + 2);
 	}
 
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index afbf938836f..877da3ed52e 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -40,7 +40,7 @@
 #include <net/route.h>
 #include <net/xfrm.h>
 
-static inline int ip_forward_finish(struct sk_buff *skb)
+static int ip_forward_finish(struct sk_buff *skb)
 {
 	struct ip_options * opt	= &(IPCB(skb)->opt);
 
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index fabb86db763..443b3f89192 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -39,6 +39,7 @@
 #include <net/icmp.h>
 #include <net/checksum.h>
 #include <net/inetpeer.h>
+#include <net/inet_frag.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/inet.h>
@@ -49,21 +50,8 @@
  * as well. Or notify me, at least. --ANK
  */
 
-/* Fragment cache limits. We will commit 256K at one time. Should we
- * cross that limit we will prune down to 192K. This should cope with
- * even the most extreme cases without allowing an attacker to measurably
- * harm machine performance.
- */
-int sysctl_ipfrag_high_thresh __read_mostly = 256*1024;
-int sysctl_ipfrag_low_thresh __read_mostly = 192*1024;
-
 int sysctl_ipfrag_max_dist __read_mostly = 64;
 
-/* Important NOTE! Fragment queue must be destroyed before MSL expires.
- * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
- */
-int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME;
-
 struct ipfrag_skb_cb
 {
 	struct inet_skb_parm	h;
@@ -74,153 +62,102 @@ struct ipfrag_skb_cb
 
 /* Describe an entry in the "incomplete datagrams" queue. */
 struct ipq {
-	struct hlist_node list;
-	struct list_head lru_list;	/* lru list member 			*/
+	struct inet_frag_queue q;
+
 	u32		user;
 	__be32		saddr;
 	__be32		daddr;
 	__be16		id;
 	u8		protocol;
-	u8		last_in;
-#define COMPLETE		4
-#define FIRST_IN		2
-#define LAST_IN			1
-
-	struct sk_buff	*fragments;	/* linked list of received fragments	*/
-	int		len;		/* total length of original datagram	*/
-	int		meat;
-	spinlock_t	lock;
-	atomic_t	refcnt;
-	struct timer_list timer;	/* when will this queue expire?		*/
-	ktime_t		stamp;
 	int             iif;
 	unsigned int    rid;
 	struct inet_peer *peer;
 };
 
-/* Hash table. */
+struct inet_frags_ctl ip4_frags_ctl __read_mostly = {
+	/*
+	 * Fragment cache limits. We will commit 256K at one time. Should we
+	 * cross that limit we will prune down to 192K. This should cope with
+	 * even the most extreme cases without allowing an attacker to
+	 * measurably harm machine performance.
+	 */
+	.high_thresh	 = 256 * 1024,
+	.low_thresh	 = 192 * 1024,
 
-#define IPQ_HASHSZ	64
+	/*
+	 * Important NOTE! Fragment queue must be destroyed before MSL expires.
+	 * RFC791 is wrong proposing to prolongate timer each fragment arrival
+	 * by TTL.
+	 */
+	.timeout	 = IP_FRAG_TIME,
+	.secret_interval = 10 * 60 * HZ,
+};
 
-/* Per-bucket lock is easy to add now. */
-static struct hlist_head ipq_hash[IPQ_HASHSZ];
-static DEFINE_RWLOCK(ipfrag_lock);
-static u32 ipfrag_hash_rnd;
-static LIST_HEAD(ipq_lru_list);
-int ip_frag_nqueues = 0;
+static struct inet_frags ip4_frags;
 
-static __inline__ void __ipq_unlink(struct ipq *qp)
+int ip_frag_nqueues(void)
 {
-	hlist_del(&qp->list);
-	list_del(&qp->lru_list);
-	ip_frag_nqueues--;
+	return ip4_frags.nqueues;
 }
 
-static __inline__ void ipq_unlink(struct ipq *ipq)
+int ip_frag_mem(void)
 {
-	write_lock(&ipfrag_lock);
-	__ipq_unlink(ipq);
-	write_unlock(&ipfrag_lock);
+	return atomic_read(&ip4_frags.mem);
 }
 
+static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+			 struct net_device *dev);
+
 static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
 {
 	return jhash_3words((__force u32)id << 16 | prot,
 			    (__force u32)saddr, (__force u32)daddr,
-			    ipfrag_hash_rnd) & (IPQ_HASHSZ - 1);
+			    ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1);
 }
 
-static struct timer_list ipfrag_secret_timer;
-int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ;
-
-static void ipfrag_secret_rebuild(unsigned long dummy)
+static unsigned int ip4_hashfn(struct inet_frag_queue *q)
 {
-	unsigned long now = jiffies;
-	int i;
-
-	write_lock(&ipfrag_lock);
-	get_random_bytes(&ipfrag_hash_rnd, sizeof(u32));
-	for (i = 0; i < IPQ_HASHSZ; i++) {
-		struct ipq *q;
-		struct hlist_node *p, *n;
-
-		hlist_for_each_entry_safe(q, p, n, &ipq_hash[i], list) {
-			unsigned int hval = ipqhashfn(q->id, q->saddr,
-						      q->daddr, q->protocol);
-
-			if (hval != i) {
-				hlist_del(&q->list);
+	struct ipq *ipq;
 
-				/* Relink to new hash chain. */
-				hlist_add_head(&q->list, &ipq_hash[hval]);
-			}
-		}
-	}
-	write_unlock(&ipfrag_lock);
-
-	mod_timer(&ipfrag_secret_timer, now + sysctl_ipfrag_secret_interval);
+	ipq = container_of(q, struct ipq, q);
+	return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
 }
 
-atomic_t ip_frag_mem = ATOMIC_INIT(0);	/* Memory used for fragments */
-
 /* Memory Tracking Functions. */
 static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work)
 {
 	if (work)
 		*work -= skb->truesize;
-	atomic_sub(skb->truesize, &ip_frag_mem);
+	atomic_sub(skb->truesize, &ip4_frags.mem);
 	kfree_skb(skb);
 }
 
-static __inline__ void frag_free_queue(struct ipq *qp, int *work)
+static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
 {
-	if (work)
-		*work -= sizeof(struct ipq);
-	atomic_sub(sizeof(struct ipq), &ip_frag_mem);
+	struct ipq *qp;
+
+	qp = container_of(q, struct ipq, q);
+	if (qp->peer)
+		inet_putpeer(qp->peer);
 	kfree(qp);
 }
 
 static __inline__ struct ipq *frag_alloc_queue(void)
 {
-	struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC);
+	struct ipq *qp = kzalloc(sizeof(struct ipq), GFP_ATOMIC);
 
 	if (!qp)
 		return NULL;
-	atomic_add(sizeof(struct ipq), &ip_frag_mem);
+	atomic_add(sizeof(struct ipq), &ip4_frags.mem);
 	return qp;
 }
 
 
 /* Destruction primitives. */
 
-/* Complete destruction of ipq. */
-static void ip_frag_destroy(struct ipq *qp, int *work)
-{
-	struct sk_buff *fp;
-
-	BUG_TRAP(qp->last_in&COMPLETE);
-	BUG_TRAP(del_timer(&qp->timer) == 0);
-
-	if (qp->peer)
-		inet_putpeer(qp->peer);
-
-	/* Release all fragment data. */
-	fp = qp->fragments;
-	while (fp) {
-		struct sk_buff *xp = fp->next;
-
-		frag_kfree_skb(fp, work);
-		fp = xp;
-	}
-
-	/* Finally, release the queue descriptor itself. */
-	frag_free_queue(qp, work);
-}
-
-static __inline__ void ipq_put(struct ipq *ipq, int *work)
+static __inline__ void ipq_put(struct ipq *ipq)
 {
-	if (atomic_dec_and_test(&ipq->refcnt))
-		ip_frag_destroy(ipq, work);
+	inet_frag_put(&ipq->q, &ip4_frags);
 }
 
 /* Kill ipq entry. It is not destroyed immediately,
@@ -228,14 +165,7 @@ static __inline__ void ipq_put(struct ipq *ipq, int *work)
  */
 static void ipq_kill(struct ipq *ipq)
 {
-	if (del_timer(&ipq->timer))
-		atomic_dec(&ipq->refcnt);
-
-	if (!(ipq->last_in & COMPLETE)) {
-		ipq_unlink(ipq);
-		atomic_dec(&ipq->refcnt);
-		ipq->last_in |= COMPLETE;
-	}
+	inet_frag_kill(&ipq->q, &ip4_frags);
 }
 
 /* Memory limiting on fragments.  Evictor trashes the oldest
@@ -243,33 +173,11 @@ static void ipq_kill(struct ipq *ipq)
  */
 static void ip_evictor(void)
 {
-	struct ipq *qp;
-	struct list_head *tmp;
-	int work;
-
-	work = atomic_read(&ip_frag_mem) - sysctl_ipfrag_low_thresh;
-	if (work <= 0)
-		return;
-
-	while (work > 0) {
-		read_lock(&ipfrag_lock);
-		if (list_empty(&ipq_lru_list)) {
-			read_unlock(&ipfrag_lock);
-			return;
-		}
-		tmp = ipq_lru_list.next;
-		qp = list_entry(tmp, struct ipq, lru_list);
-		atomic_inc(&qp->refcnt);
-		read_unlock(&ipfrag_lock);
+	int evicted;
 
-		spin_lock(&qp->lock);
-		if (!(qp->last_in&COMPLETE))
-			ipq_kill(qp);
-		spin_unlock(&qp->lock);
-
-		ipq_put(qp, &work);
-		IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
-	}
+	evicted = inet_frag_evictor(&ip4_frags);
+	if (evicted)
+		IP_ADD_STATS_BH(IPSTATS_MIB_REASMFAILS, evicted);
 }
 
 /*
@@ -279,9 +187,9 @@ static void ip_expire(unsigned long arg)
 {
 	struct ipq *qp = (struct ipq *) arg;
 
-	spin_lock(&qp->lock);
+	spin_lock(&qp->q.lock);
 
-	if (qp->last_in & COMPLETE)
+	if (qp->q.last_in & COMPLETE)
 		goto out;
 
 	ipq_kill(qp);
@@ -289,8 +197,8 @@ static void ip_expire(unsigned long arg)
 	IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT);
 	IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
 
-	if ((qp->last_in&FIRST_IN) && qp->fragments != NULL) {
-		struct sk_buff *head = qp->fragments;
+	if ((qp->q.last_in&FIRST_IN) && qp->q.fragments != NULL) {
+		struct sk_buff *head = qp->q.fragments;
 		/* Send an ICMP "Fragment Reassembly Timeout" message. */
 		if ((head->dev = dev_get_by_index(&init_net, qp->iif)) != NULL) {
 			icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
@@ -298,8 +206,8 @@ static void ip_expire(unsigned long arg)
 		}
 	}
 out:
-	spin_unlock(&qp->lock);
-	ipq_put(qp, NULL);
+	spin_unlock(&qp->q.lock);
+	ipq_put(qp);
 }
 
 /* Creation primitives. */
@@ -312,7 +220,7 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in)
 #endif
 	unsigned int hash;
 
-	write_lock(&ipfrag_lock);
+	write_lock(&ip4_frags.lock);
 	hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr,
 			 qp_in->protocol);
 #ifdef CONFIG_SMP
@@ -320,31 +228,31 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in)
 	 * such entry could be created on other cpu, while we
 	 * promoted read lock to write lock.
 	 */
-	hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
+	hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) {
 		if (qp->id == qp_in->id		&&
 		    qp->saddr == qp_in->saddr	&&
 		    qp->daddr == qp_in->daddr	&&
 		    qp->protocol == qp_in->protocol &&
 		    qp->user == qp_in->user) {
-			atomic_inc(&qp->refcnt);
-			write_unlock(&ipfrag_lock);
-			qp_in->last_in |= COMPLETE;
-			ipq_put(qp_in, NULL);
+			atomic_inc(&qp->q.refcnt);
+			write_unlock(&ip4_frags.lock);
+			qp_in->q.last_in |= COMPLETE;
+			ipq_put(qp_in);
 			return qp;
 		}
 	}
 #endif
 	qp = qp_in;
 
-	if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time))
-		atomic_inc(&qp->refcnt);
+	if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout))
+		atomic_inc(&qp->q.refcnt);
 
-	atomic_inc(&qp->refcnt);
-	hlist_add_head(&qp->list, &ipq_hash[hash]);
-	INIT_LIST_HEAD(&qp->lru_list);
-	list_add_tail(&qp->lru_list, &ipq_lru_list);
-	ip_frag_nqueues++;
-	write_unlock(&ipfrag_lock);
+	atomic_inc(&qp->q.refcnt);
+	hlist_add_head(&qp->q.list, &ip4_frags.hash[hash]);
+	INIT_LIST_HEAD(&qp->q.lru_list);
+	list_add_tail(&qp->q.lru_list, &ip4_frags.lru_list);
+	ip4_frags.nqueues++;
+	write_unlock(&ip4_frags.lock);
 	return qp;
 }
 
@@ -357,23 +265,18 @@ static struct ipq *ip_frag_create(struct iphdr *iph, u32 user)
 		goto out_nomem;
 
 	qp->protocol = iph->protocol;
-	qp->last_in = 0;
 	qp->id = iph->id;
 	qp->saddr = iph->saddr;
 	qp->daddr = iph->daddr;
 	qp->user = user;
-	qp->len = 0;
-	qp->meat = 0;
-	qp->fragments = NULL;
-	qp->iif = 0;
 	qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL;
 
 	/* Initialize a timer for this entry. */
-	init_timer(&qp->timer);
-	qp->timer.data = (unsigned long) qp;	/* pointer to queue	*/
-	qp->timer.function = ip_expire;		/* expire function	*/
-	spin_lock_init(&qp->lock);
-	atomic_set(&qp->refcnt, 1);
+	init_timer(&qp->q.timer);
+	qp->q.timer.data = (unsigned long) qp;	/* pointer to queue	*/
+	qp->q.timer.function = ip_expire;		/* expire function	*/
+	spin_lock_init(&qp->q.lock);
+	atomic_set(&qp->q.refcnt, 1);
 
 	return ip_frag_intern(qp);
 
@@ -395,20 +298,20 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
 	struct ipq *qp;
 	struct hlist_node *n;
 
-	read_lock(&ipfrag_lock);
+	read_lock(&ip4_frags.lock);
 	hash = ipqhashfn(id, saddr, daddr, protocol);
-	hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
+	hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) {
 		if (qp->id == id		&&
 		    qp->saddr == saddr	&&
 		    qp->daddr == daddr	&&
 		    qp->protocol == protocol &&
 		    qp->user == user) {
-			atomic_inc(&qp->refcnt);
-			read_unlock(&ipfrag_lock);
+			atomic_inc(&qp->q.refcnt);
+			read_unlock(&ip4_frags.lock);
 			return qp;
 		}
 	}
-	read_unlock(&ipfrag_lock);
+	read_unlock(&ip4_frags.lock);
 
 	return ip_frag_create(iph, user);
 }
@@ -429,7 +332,7 @@ static inline int ip_frag_too_far(struct ipq *qp)
 	end = atomic_inc_return(&peer->rid);
 	qp->rid = end;
 
-	rc = qp->fragments && (end - start) > max;
+	rc = qp->q.fragments && (end - start) > max;
 
 	if (rc) {
 		IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
@@ -442,39 +345,42 @@ static int ip_frag_reinit(struct ipq *qp)
 {
 	struct sk_buff *fp;
 
-	if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) {
-		atomic_inc(&qp->refcnt);
+	if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout)) {
+		atomic_inc(&qp->q.refcnt);
 		return -ETIMEDOUT;
 	}
 
-	fp = qp->fragments;
+	fp = qp->q.fragments;
 	do {
 		struct sk_buff *xp = fp->next;
 		frag_kfree_skb(fp, NULL);
 		fp = xp;
 	} while (fp);
 
-	qp->last_in = 0;
-	qp->len = 0;
-	qp->meat = 0;
-	qp->fragments = NULL;
+	qp->q.last_in = 0;
+	qp->q.len = 0;
+	qp->q.meat = 0;
+	qp->q.fragments = NULL;
 	qp->iif = 0;
 
 	return 0;
 }
 
 /* Add new segment to existing queue. */
-static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 {
 	struct sk_buff *prev, *next;
+	struct net_device *dev;
 	int flags, offset;
 	int ihl, end;
+	int err = -ENOENT;
 
-	if (qp->last_in & COMPLETE)
+	if (qp->q.last_in & COMPLETE)
 		goto err;
 
 	if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
-	    unlikely(ip_frag_too_far(qp)) && unlikely(ip_frag_reinit(qp))) {
+	    unlikely(ip_frag_too_far(qp)) &&
+	    unlikely(err = ip_frag_reinit(qp))) {
 		ipq_kill(qp);
 		goto err;
 	}
@@ -487,36 +393,40 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 
 	/* Determine the position of this fragment. */
 	end = offset + skb->len - ihl;
+	err = -EINVAL;
 
 	/* Is this the final fragment? */
 	if ((flags & IP_MF) == 0) {
 		/* If we already have some bits beyond end
 		 * or have different end, the segment is corrrupted.
 		 */
-		if (end < qp->len ||
-		    ((qp->last_in & LAST_IN) && end != qp->len))
+		if (end < qp->q.len ||
+		    ((qp->q.last_in & LAST_IN) && end != qp->q.len))
 			goto err;
-		qp->last_in |= LAST_IN;
-		qp->len = end;
+		qp->q.last_in |= LAST_IN;
+		qp->q.len = end;
 	} else {
 		if (end&7) {
 			end &= ~7;
 			if (skb->ip_summed != CHECKSUM_UNNECESSARY)
 				skb->ip_summed = CHECKSUM_NONE;
 		}
-		if (end > qp->len) {
+		if (end > qp->q.len) {
 			/* Some bits beyond end -> corruption. */
-			if (qp->last_in & LAST_IN)
+			if (qp->q.last_in & LAST_IN)
 				goto err;
-			qp->len = end;
+			qp->q.len = end;
 		}
 	}
 	if (end == offset)
 		goto err;
 
+	err = -ENOMEM;
 	if (pskb_pull(skb, ihl) == NULL)
 		goto err;
-	if (pskb_trim_rcsum(skb, end-offset))
+
+	err = pskb_trim_rcsum(skb, end - offset);
+	if (err)
 		goto err;
 
 	/* Find out which fragments are in front and at the back of us
@@ -524,7 +434,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	 * this fragment, right?
 	 */
 	prev = NULL;
-	for (next = qp->fragments; next != NULL; next = next->next) {
+	for (next = qp->q.fragments; next != NULL; next = next->next) {
 		if (FRAG_CB(next)->offset >= offset)
 			break;	/* bingo! */
 		prev = next;
@@ -539,8 +449,10 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 
 		if (i > 0) {
 			offset += i;
+			err = -EINVAL;
 			if (end <= offset)
 				goto err;
+			err = -ENOMEM;
 			if (!pskb_pull(skb, i))
 				goto err;
 			if (skb->ip_summed != CHECKSUM_UNNECESSARY)
@@ -548,6 +460,8 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 		}
 	}
 
+	err = -ENOMEM;
+
 	while (next && FRAG_CB(next)->offset < end) {
 		int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */
 
@@ -558,7 +472,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 			if (!pskb_pull(next, i))
 				goto err;
 			FRAG_CB(next)->offset += i;
-			qp->meat -= i;
+			qp->q.meat -= i;
 			if (next->ip_summed != CHECKSUM_UNNECESSARY)
 				next->ip_summed = CHECKSUM_NONE;
 			break;
@@ -573,9 +487,9 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 			if (prev)
 				prev->next = next;
 			else
-				qp->fragments = next;
+				qp->q.fragments = next;
 
-			qp->meat -= free_it->len;
+			qp->q.meat -= free_it->len;
 			frag_kfree_skb(free_it, NULL);
 		}
 	}
@@ -587,50 +501,77 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	if (prev)
 		prev->next = skb;
 	else
-		qp->fragments = skb;
-
-	if (skb->dev)
-		qp->iif = skb->dev->ifindex;
-	skb->dev = NULL;
-	qp->stamp = skb->tstamp;
-	qp->meat += skb->len;
-	atomic_add(skb->truesize, &ip_frag_mem);
+		qp->q.fragments = skb;
+
+	dev = skb->dev;
+	if (dev) {
+		qp->iif = dev->ifindex;
+		skb->dev = NULL;
+	}
+	qp->q.stamp = skb->tstamp;
+	qp->q.meat += skb->len;
+	atomic_add(skb->truesize, &ip4_frags.mem);
 	if (offset == 0)
-		qp->last_in |= FIRST_IN;
+		qp->q.last_in |= FIRST_IN;
 
-	write_lock(&ipfrag_lock);
-	list_move_tail(&qp->lru_list, &ipq_lru_list);
-	write_unlock(&ipfrag_lock);
+	if (qp->q.last_in == (FIRST_IN | LAST_IN) && qp->q.meat == qp->q.len)
+		return ip_frag_reasm(qp, prev, dev);
 
-	return;
+	write_lock(&ip4_frags.lock);
+	list_move_tail(&qp->q.lru_list, &ip4_frags.lru_list);
+	write_unlock(&ip4_frags.lock);
+	return -EINPROGRESS;
 
 err:
 	kfree_skb(skb);
+	return err;
 }
 
 
 /* Build a new IP datagram from all its fragments. */
 
-static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
+static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+			 struct net_device *dev)
 {
 	struct iphdr *iph;
-	struct sk_buff *fp, *head = qp->fragments;
+	struct sk_buff *fp, *head = qp->q.fragments;
 	int len;
 	int ihlen;
+	int err;
 
 	ipq_kill(qp);
 
+	/* Make the one we just received the head. */
+	if (prev) {
+		head = prev->next;
+		fp = skb_clone(head, GFP_ATOMIC);
+
+		if (!fp)
+			goto out_nomem;
+
+		fp->next = head->next;
+		prev->next = fp;
+
+		skb_morph(head, qp->q.fragments);
+		head->next = qp->q.fragments->next;
+
+		kfree_skb(qp->q.fragments);
+		qp->q.fragments = head;
+	}
+
 	BUG_TRAP(head != NULL);
 	BUG_TRAP(FRAG_CB(head)->offset == 0);
 
 	/* Allocate a new buffer for the datagram. */
 	ihlen = ip_hdrlen(head);
-	len = ihlen + qp->len;
+	len = ihlen + qp->q.len;
 
+	err = -E2BIG;
 	if (len > 65535)
 		goto out_oversize;
 
 	/* Head of list must not be cloned. */
+	err = -ENOMEM;
 	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
 		goto out_nomem;
 
@@ -654,12 +595,12 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
 		head->len -= clone->len;
 		clone->csum = 0;
 		clone->ip_summed = head->ip_summed;
-		atomic_add(clone->truesize, &ip_frag_mem);
+		atomic_add(clone->truesize, &ip4_frags.mem);
 	}
 
 	skb_shinfo(head)->frag_list = head->next;
 	skb_push(head, head->data - skb_network_header(head));
-	atomic_sub(head->truesize, &ip_frag_mem);
+	atomic_sub(head->truesize, &ip4_frags.mem);
 
 	for (fp=head->next; fp; fp = fp->next) {
 		head->data_len += fp->len;
@@ -669,19 +610,19 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
 		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
-		atomic_sub(fp->truesize, &ip_frag_mem);
+		atomic_sub(fp->truesize, &ip4_frags.mem);
 	}
 
 	head->next = NULL;
 	head->dev = dev;
-	head->tstamp = qp->stamp;
+	head->tstamp = qp->q.stamp;
 
 	iph = ip_hdr(head);
 	iph->frag_off = 0;
 	iph->tot_len = htons(len);
 	IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
-	qp->fragments = NULL;
-	return head;
+	qp->q.fragments = NULL;
+	return 0;
 
 out_nomem:
 	LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing "
@@ -694,54 +635,46 @@ out_oversize:
 			NIPQUAD(qp->saddr));
 out_fail:
 	IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
-	return NULL;
+	return err;
 }
 
 /* Process an incoming IP datagram fragment. */
-struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
+int ip_defrag(struct sk_buff *skb, u32 user)
 {
 	struct ipq *qp;
-	struct net_device *dev;
 
 	IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
 
 	/* Start by cleaning up the memory. */
-	if (atomic_read(&ip_frag_mem) > sysctl_ipfrag_high_thresh)
+	if (atomic_read(&ip4_frags.mem) > ip4_frags_ctl.high_thresh)
 		ip_evictor();
 
-	dev = skb->dev;
-
 	/* Lookup (or create) queue header */
 	if ((qp = ip_find(ip_hdr(skb), user)) != NULL) {
-		struct sk_buff *ret = NULL;
-
-		spin_lock(&qp->lock);
+		int ret;
 
-		ip_frag_queue(qp, skb);
+		spin_lock(&qp->q.lock);
 
-		if (qp->last_in == (FIRST_IN|LAST_IN) &&
-		    qp->meat == qp->len)
-			ret = ip_frag_reasm(qp, dev);
+		ret = ip_frag_queue(qp, skb);
 
-		spin_unlock(&qp->lock);
-		ipq_put(qp, NULL);
+		spin_unlock(&qp->q.lock);
+		ipq_put(qp);
 		return ret;
 	}
 
 	IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
 	kfree_skb(skb);
-	return NULL;
+	return -ENOMEM;
 }
 
 void __init ipfrag_init(void)
 {
-	ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
-				 (jiffies ^ (jiffies >> 6)));
-
-	init_timer(&ipfrag_secret_timer);
-	ipfrag_secret_timer.function = ipfrag_secret_rebuild;
-	ipfrag_secret_timer.expires = jiffies + sysctl_ipfrag_secret_interval;
-	add_timer(&ipfrag_secret_timer);
+	ip4_frags.ctl = &ip4_frags_ctl;
+	ip4_frags.hashfn = ip4_hashfn;
+	ip4_frags.destructor = ip4_frag_free;
+	ip4_frags.skb_free = NULL;
+	ip4_frags.qsize = sizeof(struct ipq);
+	inet_frags_init(&ip4_frags);
 }
 
 EXPORT_SYMBOL(ip_defrag);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 41d8964591e..168c871fcd7 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -172,8 +172,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
 		    (!sk->sk_bound_dev_if ||
 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
 			if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
-				skb = ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN);
-				if (skb == NULL) {
+				if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) {
 					read_unlock(&ip_ra_lock);
 					return 1;
 				}
@@ -196,7 +195,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
 	return 0;
 }
 
-static inline int ip_local_deliver_finish(struct sk_buff *skb)
+static int ip_local_deliver_finish(struct sk_buff *skb)
 {
 	__skb_pull(skb, ip_hdrlen(skb));
 
@@ -265,8 +264,7 @@ int ip_local_deliver(struct sk_buff *skb)
 	 */
 
 	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
-		skb = ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER);
-		if (!skb)
+		if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER))
 			return 0;
 	}
 
@@ -326,7 +324,7 @@ drop:
 	return -1;
 }
 
-static inline int ip_rcv_finish(struct sk_buff *skb)
+static int ip_rcv_finish(struct sk_buff *skb)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	struct rtable *rt;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 699f06781fd..f508835ba71 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -202,7 +202,7 @@ static inline int ip_skb_dst_mtu(struct sk_buff *skb)
 	       skb->dst->dev->mtu : dst_mtu(skb->dst);
 }
 
-static inline int ip_finish_output(struct sk_buff *skb)
+static int ip_finish_output(struct sk_buff *skb)
 {
 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
 	/* Policy lookup after SNAT yielded a new policy */
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index 341474eefa5..664cb8e97c1 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -25,6 +25,7 @@
 #include <linux/skbuff.h>
 #include <linux/in.h>
 #include <linux/ip.h>
+#include <linux/netfilter.h>
 #include <net/net_namespace.h>
 #include <net/protocol.h>
 #include <net/tcp.h>
@@ -328,18 +329,18 @@ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
 	spin_unlock(&cp->lock);
 }
 
-static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb,
+static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
 				  struct ip_vs_app *app)
 {
 	int diff;
-	const unsigned int tcp_offset = ip_hdrlen(*pskb);
+	const unsigned int tcp_offset = ip_hdrlen(skb);
 	struct tcphdr *th;
 	__u32 seq;
 
-	if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
+	if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
 		return 0;
 
-	th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset);
+	th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
 
 	/*
 	 *	Remember seq number in case this pkt gets resized
@@ -360,7 +361,7 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb,
 	if (app->pkt_out == NULL)
 		return 1;
 
-	if (!app->pkt_out(app, cp, pskb, &diff))
+	if (!app->pkt_out(app, cp, skb, &diff))
 		return 0;
 
 	/*
@@ -378,7 +379,7 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb,
  *	called by ipvs packet handler, assumes previously checked cp!=NULL
  *	returns false if it can't handle packet (oom)
  */
-int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb)
+int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
 {
 	struct ip_vs_app *app;
 
@@ -391,7 +392,7 @@ int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb)
 
 	/* TCP is complicated */
 	if (cp->protocol == IPPROTO_TCP)
-		return app_tcp_pkt_out(cp, pskb, app);
+		return app_tcp_pkt_out(cp, skb, app);
 
 	/*
 	 *	Call private output hook function
@@ -399,22 +400,22 @@ int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb)
 	if (app->pkt_out == NULL)
 		return 1;
 
-	return app->pkt_out(app, cp, pskb, NULL);
+	return app->pkt_out(app, cp, skb, NULL);
 }
 
 
-static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb,
+static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
 				 struct ip_vs_app *app)
 {
 	int diff;
-	const unsigned int tcp_offset = ip_hdrlen(*pskb);
+	const unsigned int tcp_offset = ip_hdrlen(skb);
 	struct tcphdr *th;
 	__u32 seq;
 
-	if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
+	if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
 		return 0;
 
-	th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset);
+	th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
 
 	/*
 	 *	Remember seq number in case this pkt gets resized
@@ -435,7 +436,7 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb,
 	if (app->pkt_in == NULL)
 		return 1;
 
-	if (!app->pkt_in(app, cp, pskb, &diff))
+	if (!app->pkt_in(app, cp, skb, &diff))
 		return 0;
 
 	/*
@@ -453,7 +454,7 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb,
  *	called by ipvs packet handler, assumes previously checked cp!=NULL.
  *	returns false if can't handle packet (oom).
  */
-int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb)
+int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
 {
 	struct ip_vs_app *app;
 
@@ -466,7 +467,7 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb)
 
 	/* TCP is complicated */
 	if (cp->protocol == IPPROTO_TCP)
-		return app_tcp_pkt_in(cp, pskb, app);
+		return app_tcp_pkt_in(cp, skb, app);
 
 	/*
 	 *	Call private input hook function
@@ -474,7 +475,7 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb)
 	if (app->pkt_in == NULL)
 		return 1;
 
-	return app->pkt_in(app, cp, pskb, NULL);
+	return app->pkt_in(app, cp, skb, NULL);
 }
 
 
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index fbca2a2ff29..c6ed7654e83 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -58,7 +58,6 @@ EXPORT_SYMBOL(ip_vs_conn_put);
 #ifdef CONFIG_IP_VS_DEBUG
 EXPORT_SYMBOL(ip_vs_get_debug_level);
 #endif
-EXPORT_SYMBOL(ip_vs_make_skb_writable);
 
 
 /* ID used in ICMP lookups */
@@ -163,42 +162,6 @@ ip_vs_set_state(struct ip_vs_conn *cp, int direction,
 }
 
 
-int ip_vs_make_skb_writable(struct sk_buff **pskb, int writable_len)
-{
-	struct sk_buff *skb = *pskb;
-
-	/* skb is already used, better copy skb and its payload */
-	if (unlikely(skb_shared(skb) || skb->sk))
-		goto copy_skb;
-
-	/* skb data is already used, copy it */
-	if (unlikely(skb_cloned(skb)))
-		goto copy_data;
-
-	return pskb_may_pull(skb, writable_len);
-
-  copy_data:
-	if (unlikely(writable_len > skb->len))
-		return 0;
-	return !pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
-
-  copy_skb:
-	if (unlikely(writable_len > skb->len))
-		return 0;
-	skb = skb_copy(skb, GFP_ATOMIC);
-	if (!skb)
-		return 0;
-	BUG_ON(skb_is_nonlinear(skb));
-
-	/* Rest of kernel will get very unhappy if we pass it a
-	   suddenly-orphaned skbuff */
-	if ((*pskb)->sk)
-		skb_set_owner_w(skb, (*pskb)->sk);
-	kfree_skb(*pskb);
-	*pskb = skb;
-	return 1;
-}
-
 /*
  *  IPVS persistent scheduling function
  *  It creates a connection entry according to its template if exists,
@@ -525,12 +488,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
  *      for VS/NAT.
  */
 static unsigned int ip_vs_post_routing(unsigned int hooknum,
-				       struct sk_buff **pskb,
+				       struct sk_buff *skb,
 				       const struct net_device *in,
 				       const struct net_device *out,
 				       int (*okfn)(struct sk_buff *))
 {
-	if (!((*pskb)->ipvs_property))
+	if (!skb->ipvs_property)
 		return NF_ACCEPT;
 	/* The packet was sent from IPVS, exit this chain */
 	return NF_STOP;
@@ -541,13 +504,14 @@ __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
 	return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
 }
 
-static inline struct sk_buff *
-ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
+static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
 {
-	skb = ip_defrag(skb, user);
-	if (skb)
+	int err = ip_defrag(skb, user);
+
+	if (!err)
 		ip_send_check(ip_hdr(skb));
-	return skb;
+
+	return err;
 }
 
 /*
@@ -605,9 +569,8 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
  *	Currently handles error types - unreachable, quench, ttl exceeded.
  *	(Only used in VS/NAT)
  */
-static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
+static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
 {
-	struct sk_buff *skb = *pskb;
 	struct iphdr *iph;
 	struct icmphdr	_icmph, *ic;
 	struct iphdr	_ciph, *cih;	/* The ip header contained within the ICMP */
@@ -619,10 +582,8 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
 
 	/* reassemble IP fragments */
 	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
-		skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
-		if (!skb)
+		if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
 			return NF_STOLEN;
-		*pskb = skb;
 	}
 
 	iph = ip_hdr(skb);
@@ -690,9 +651,8 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
 
 	if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
 		offset += 2 * sizeof(__u16);
-	if (!ip_vs_make_skb_writable(pskb, offset))
+	if (!skb_make_writable(skb, offset))
 		goto out;
-	skb = *pskb;
 
 	ip_vs_nat_icmp(skb, pp, cp, 1);
 
@@ -724,11 +684,10 @@ static inline int is_tcp_reset(const struct sk_buff *skb)
  *      rewrite addresses of the packet and send it on its way...
  */
 static unsigned int
-ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
+ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
 	  const struct net_device *in, const struct net_device *out,
 	  int (*okfn)(struct sk_buff *))
 {
-	struct sk_buff  *skb = *pskb;
 	struct iphdr	*iph;
 	struct ip_vs_protocol *pp;
 	struct ip_vs_conn *cp;
@@ -741,11 +700,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
 
 	iph = ip_hdr(skb);
 	if (unlikely(iph->protocol == IPPROTO_ICMP)) {
-		int related, verdict = ip_vs_out_icmp(pskb, &related);
+		int related, verdict = ip_vs_out_icmp(skb, &related);
 
 		if (related)
 			return verdict;
-		skb = *pskb;
 		iph = ip_hdr(skb);
 	}
 
@@ -756,11 +714,9 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
 	/* reassemble IP fragments */
 	if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) &&
 		     !pp->dont_defrag)) {
-		skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
-		if (!skb)
+		if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
 			return NF_STOLEN;
 		iph = ip_hdr(skb);
-		*pskb = skb;
 	}
 
 	ihl = iph->ihl << 2;
@@ -802,13 +758,12 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
 
 	IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
 
-	if (!ip_vs_make_skb_writable(pskb, ihl))
+	if (!skb_make_writable(skb, ihl))
 		goto drop;
 
 	/* mangle the packet */
-	if (pp->snat_handler && !pp->snat_handler(pskb, pp, cp))
+	if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
 		goto drop;
-	skb = *pskb;
 	ip_hdr(skb)->saddr = cp->vaddr;
 	ip_send_check(ip_hdr(skb));
 
@@ -818,9 +773,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
 	 * if it came from this machine itself.  So re-compute
 	 * the routing information.
 	 */
-	if (ip_route_me_harder(pskb, RTN_LOCAL) != 0)
+	if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
 		goto drop;
-	skb = *pskb;
 
 	IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
 
@@ -835,7 +789,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
 
   drop:
 	ip_vs_conn_put(cp);
-	kfree_skb(*pskb);
+	kfree_skb(skb);
 	return NF_STOLEN;
 }
 
@@ -847,9 +801,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
  *	Currently handles error types - unreachable, quench, ttl exceeded.
  */
 static int
-ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
+ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 {
-	struct sk_buff *skb = *pskb;
 	struct iphdr *iph;
 	struct icmphdr	_icmph, *ic;
 	struct iphdr	_ciph, *cih;	/* The ip header contained within the ICMP */
@@ -861,12 +814,9 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
 
 	/* reassemble IP fragments */
 	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
-		skb = ip_vs_gather_frags(skb,
-					 hooknum == NF_IP_LOCAL_IN ?
-					 IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD);
-		if (!skb)
+		if (ip_vs_gather_frags(skb, hooknum == NF_IP_LOCAL_IN ?
+					    IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD))
 			return NF_STOLEN;
-		*pskb = skb;
 	}
 
 	iph = ip_hdr(skb);
@@ -945,11 +895,10 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
  *	and send it on its way...
  */
 static unsigned int
-ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
+ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
 	 const struct net_device *in, const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	struct sk_buff	*skb = *pskb;
 	struct iphdr	*iph;
 	struct ip_vs_protocol *pp;
 	struct ip_vs_conn *cp;
@@ -971,11 +920,10 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
 
 	iph = ip_hdr(skb);
 	if (unlikely(iph->protocol == IPPROTO_ICMP)) {
-		int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum);
+		int related, verdict = ip_vs_in_icmp(skb, &related, hooknum);
 
 		if (related)
 			return verdict;
-		skb = *pskb;
 		iph = ip_hdr(skb);
 	}
 
@@ -1056,16 +1004,16 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
  *      and send them to ip_vs_in_icmp.
  */
 static unsigned int
-ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff **pskb,
+ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
 		   const struct net_device *in, const struct net_device *out,
 		   int (*okfn)(struct sk_buff *))
 {
 	int r;
 
-	if (ip_hdr(*pskb)->protocol != IPPROTO_ICMP)
+	if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
 		return NF_ACCEPT;
 
-	return ip_vs_in_icmp(pskb, &r, hooknum);
+	return ip_vs_in_icmp(skb, &r, hooknum);
 }
 
 
diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c
index 344ddbbdc75..59aa166b767 100644
--- a/net/ipv4/ipvs/ip_vs_ftp.c
+++ b/net/ipv4/ipvs/ip_vs_ftp.c
@@ -30,6 +30,7 @@
 #include <linux/skbuff.h>
 #include <linux/in.h>
 #include <linux/ip.h>
+#include <linux/netfilter.h>
 #include <net/protocol.h>
 #include <net/tcp.h>
 #include <asm/unaligned.h>
@@ -135,7 +136,7 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
  * xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number.
  */
 static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
-			 struct sk_buff **pskb, int *diff)
+			 struct sk_buff *skb, int *diff)
 {
 	struct iphdr *iph;
 	struct tcphdr *th;
@@ -155,14 +156,14 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		return 1;
 
 	/* Linear packets are much easier to deal with. */
-	if (!ip_vs_make_skb_writable(pskb, (*pskb)->len))
+	if (!skb_make_writable(skb, skb->len))
 		return 0;
 
 	if (cp->app_data == &ip_vs_ftp_pasv) {
-		iph = ip_hdr(*pskb);
+		iph = ip_hdr(skb);
 		th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
 		data = (char *)th + (th->doff << 2);
-		data_limit = skb_tail_pointer(*pskb);
+		data_limit = skb_tail_pointer(skb);
 
 		if (ip_vs_ftp_get_addrport(data, data_limit,
 					   SERVER_STRING,
@@ -213,7 +214,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 			memcpy(start, buf, buf_len);
 			ret = 1;
 		} else {
-			ret = !ip_vs_skb_replace(*pskb, GFP_ATOMIC, start,
+			ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start,
 					  end-start, buf, buf_len);
 		}
 
@@ -238,7 +239,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
  * the client.
  */
 static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
-			struct sk_buff **pskb, int *diff)
+			struct sk_buff *skb, int *diff)
 {
 	struct iphdr *iph;
 	struct tcphdr *th;
@@ -256,20 +257,20 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		return 1;
 
 	/* Linear packets are much easier to deal with. */
-	if (!ip_vs_make_skb_writable(pskb, (*pskb)->len))
+	if (!skb_make_writable(skb, skb->len))
 		return 0;
 
 	/*
 	 * Detecting whether it is passive
 	 */
-	iph = ip_hdr(*pskb);
+	iph = ip_hdr(skb);
 	th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
 
 	/* Since there may be OPTIONS in the TCP packet and the HLEN is
 	   the length of the header in 32-bit multiples, it is accurate
 	   to calculate data address by th+HLEN*4 */
 	data = data_start = (char *)th + (th->doff << 2);
-	data_limit = skb_tail_pointer(*pskb);
+	data_limit = skb_tail_pointer(skb);
 
 	while (data <= data_limit - 6) {
 		if (strnicmp(data, "PASV\r\n", 6) == 0) {
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index e65577a7700..12dc0d640b6 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -20,6 +20,7 @@
 #include <linux/tcp.h>                  /* for tcphdr */
 #include <net/ip.h>
 #include <net/tcp.h>                    /* for csum_tcpudp_magic */
+#include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 
 #include <net/ip_vs.h>
@@ -122,27 +123,27 @@ tcp_fast_csum_update(struct tcphdr *tcph, __be32 oldip, __be32 newip,
 
 
 static int
-tcp_snat_handler(struct sk_buff **pskb,
+tcp_snat_handler(struct sk_buff *skb,
 		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 {
 	struct tcphdr *tcph;
-	const unsigned int tcphoff = ip_hdrlen(*pskb);
+	const unsigned int tcphoff = ip_hdrlen(skb);
 
 	/* csum_check requires unshared skb */
-	if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
+	if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
 		return 0;
 
 	if (unlikely(cp->app != NULL)) {
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(*pskb, pp))
+		if (pp->csum_check && !pp->csum_check(skb, pp))
 			return 0;
 
 		/* Call application helper if needed */
-		if (!ip_vs_app_pkt_out(cp, pskb))
+		if (!ip_vs_app_pkt_out(cp, skb))
 			return 0;
 	}
 
-	tcph = (void *)ip_hdr(*pskb) + tcphoff;
+	tcph = (void *)ip_hdr(skb) + tcphoff;
 	tcph->source = cp->vport;
 
 	/* Adjust TCP checksums */
@@ -150,17 +151,15 @@ tcp_snat_handler(struct sk_buff **pskb,
 		/* Only port and addr are changed, do fast csum update */
 		tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
 				     cp->dport, cp->vport);
-		if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
-			(*pskb)->ip_summed = CHECKSUM_NONE;
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->ip_summed = CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
 		tcph->check = 0;
-		(*pskb)->csum = skb_checksum(*pskb, tcphoff,
-					     (*pskb)->len - tcphoff, 0);
+		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
 		tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
-						(*pskb)->len - tcphoff,
-						cp->protocol,
-						(*pskb)->csum);
+						skb->len - tcphoff,
+						cp->protocol, skb->csum);
 		IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
 			  pp->name, tcph->check,
 			  (char*)&(tcph->check) - (char*)tcph);
@@ -170,30 +169,30 @@ tcp_snat_handler(struct sk_buff **pskb,
 
 
 static int
-tcp_dnat_handler(struct sk_buff **pskb,
+tcp_dnat_handler(struct sk_buff *skb,
 		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 {
 	struct tcphdr *tcph;
-	const unsigned int tcphoff = ip_hdrlen(*pskb);
+	const unsigned int tcphoff = ip_hdrlen(skb);
 
 	/* csum_check requires unshared skb */
-	if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
+	if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
 		return 0;
 
 	if (unlikely(cp->app != NULL)) {
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(*pskb, pp))
+		if (pp->csum_check && !pp->csum_check(skb, pp))
 			return 0;
 
 		/*
 		 *	Attempt ip_vs_app call.
 		 *	It will fix ip_vs_conn and iph ack_seq stuff
 		 */
-		if (!ip_vs_app_pkt_in(cp, pskb))
+		if (!ip_vs_app_pkt_in(cp, skb))
 			return 0;
 	}
 
-	tcph = (void *)ip_hdr(*pskb) + tcphoff;
+	tcph = (void *)ip_hdr(skb) + tcphoff;
 	tcph->dest = cp->dport;
 
 	/*
@@ -203,18 +202,16 @@ tcp_dnat_handler(struct sk_buff **pskb,
 		/* Only port and addr are changed, do fast csum update */
 		tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
 				     cp->vport, cp->dport);
-		if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
-			(*pskb)->ip_summed = CHECKSUM_NONE;
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->ip_summed = CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
 		tcph->check = 0;
-		(*pskb)->csum = skb_checksum(*pskb, tcphoff,
-					     (*pskb)->len - tcphoff, 0);
+		skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
 		tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
-						(*pskb)->len - tcphoff,
-						cp->protocol,
-						(*pskb)->csum);
-		(*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
+						skb->len - tcphoff,
+						cp->protocol, skb->csum);
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 	return 1;
 }
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 8ee5fe6a101..1fa7b330b9a 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -18,6 +18,7 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/kernel.h>
+#include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/udp.h>
 
@@ -129,29 +130,29 @@ udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip,
 }
 
 static int
-udp_snat_handler(struct sk_buff **pskb,
+udp_snat_handler(struct sk_buff *skb,
 		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 {
 	struct udphdr *udph;
-	const unsigned int udphoff = ip_hdrlen(*pskb);
+	const unsigned int udphoff = ip_hdrlen(skb);
 
 	/* csum_check requires unshared skb */
-	if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
+	if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
 		return 0;
 
 	if (unlikely(cp->app != NULL)) {
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(*pskb, pp))
+		if (pp->csum_check && !pp->csum_check(skb, pp))
 			return 0;
 
 		/*
 		 *	Call application helper if needed
 		 */
-		if (!ip_vs_app_pkt_out(cp, pskb))
+		if (!ip_vs_app_pkt_out(cp, skb))
 			return 0;
 	}
 
-	udph = (void *)ip_hdr(*pskb) + udphoff;
+	udph = (void *)ip_hdr(skb) + udphoff;
 	udph->source = cp->vport;
 
 	/*
@@ -161,17 +162,15 @@ udp_snat_handler(struct sk_buff **pskb,
 		/* Only port and addr are changed, do fast csum update */
 		udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
 				     cp->dport, cp->vport);
-		if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
-			(*pskb)->ip_summed = CHECKSUM_NONE;
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->ip_summed = CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
 		udph->check = 0;
-		(*pskb)->csum = skb_checksum(*pskb, udphoff,
-					     (*pskb)->len - udphoff, 0);
+		skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
 		udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
-						(*pskb)->len - udphoff,
-						cp->protocol,
-						(*pskb)->csum);
+						skb->len - udphoff,
+						cp->protocol, skb->csum);
 		if (udph->check == 0)
 			udph->check = CSUM_MANGLED_0;
 		IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
@@ -183,30 +182,30 @@ udp_snat_handler(struct sk_buff **pskb,
 
 
 static int
-udp_dnat_handler(struct sk_buff **pskb,
+udp_dnat_handler(struct sk_buff *skb,
 		 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 {
 	struct udphdr *udph;
-	unsigned int udphoff = ip_hdrlen(*pskb);
+	unsigned int udphoff = ip_hdrlen(skb);
 
 	/* csum_check requires unshared skb */
-	if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
+	if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
 		return 0;
 
 	if (unlikely(cp->app != NULL)) {
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(*pskb, pp))
+		if (pp->csum_check && !pp->csum_check(skb, pp))
 			return 0;
 
 		/*
 		 *	Attempt ip_vs_app call.
 		 *	It will fix ip_vs_conn
 		 */
-		if (!ip_vs_app_pkt_in(cp, pskb))
+		if (!ip_vs_app_pkt_in(cp, skb))
 			return 0;
 	}
 
-	udph = (void *)ip_hdr(*pskb) + udphoff;
+	udph = (void *)ip_hdr(skb) + udphoff;
 	udph->dest = cp->dport;
 
 	/*
@@ -216,20 +215,18 @@ udp_dnat_handler(struct sk_buff **pskb,
 		/* Only port and addr are changed, do fast csum update */
 		udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
 				     cp->vport, cp->dport);
-		if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
-			(*pskb)->ip_summed = CHECKSUM_NONE;
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->ip_summed = CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
 		udph->check = 0;
-		(*pskb)->csum = skb_checksum(*pskb, udphoff,
-					     (*pskb)->len - udphoff, 0);
+		skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
 		udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
-						(*pskb)->len - udphoff,
-						cp->protocol,
-						(*pskb)->csum);
+						skb->len - udphoff,
+						cp->protocol, skb->csum);
 		if (udph->check == 0)
 			udph->check = CSUM_MANGLED_0;
-		(*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 	return 1;
 }
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index 666e080a74a..d0a92dec105 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -253,7 +253,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	}
 
 	/* copy-on-write the packet before mangling it */
-	if (!ip_vs_make_skb_writable(&skb, sizeof(struct iphdr)))
+	if (!skb_make_writable(skb, sizeof(struct iphdr)))
 		goto tx_error_put;
 
 	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
@@ -264,7 +264,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	skb->dst = &rt->u.dst;
 
 	/* mangle the packet */
-	if (pp->dnat_handler && !pp->dnat_handler(&skb, pp, cp))
+	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
 		goto tx_error;
 	ip_hdr(skb)->daddr = cp->daddr;
 	ip_send_check(ip_hdr(skb));
@@ -529,7 +529,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	}
 
 	/* copy-on-write the packet before mangling it */
-	if (!ip_vs_make_skb_writable(&skb, offset))
+	if (!skb_make_writable(skb, offset))
 		goto tx_error_put;
 
 	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index b44192924f9..5539debf497 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -3,14 +3,15 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/ip.h>
+#include <linux/skbuff.h>
 #include <net/route.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
 
 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
-int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type)
+int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 {
-	const struct iphdr *iph = ip_hdr(*pskb);
+	const struct iphdr *iph = ip_hdr(skb);
 	struct rtable *rt;
 	struct flowi fl = {};
 	struct dst_entry *odst;
@@ -29,14 +30,14 @@ int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type)
 		if (type == RTN_LOCAL)
 			fl.nl_u.ip4_u.saddr = iph->saddr;
 		fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
-		fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0;
-		fl.mark = (*pskb)->mark;
+		fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
+		fl.mark = skb->mark;
 		if (ip_route_output_key(&rt, &fl) != 0)
 			return -1;
 
 		/* Drop old route. */
-		dst_release((*pskb)->dst);
-		(*pskb)->dst = &rt->u.dst;
+		dst_release(skb->dst);
+		skb->dst = &rt->u.dst;
 	} else {
 		/* non-local src, find valid iif to satisfy
 		 * rp-filter when calling ip_route_input. */
@@ -44,8 +45,8 @@ int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type)
 		if (ip_route_output_key(&rt, &fl) != 0)
 			return -1;
 
-		odst = (*pskb)->dst;
-		if (ip_route_input(*pskb, iph->daddr, iph->saddr,
+		odst = skb->dst;
+		if (ip_route_input(skb, iph->daddr, iph->saddr,
 				   RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
 			dst_release(&rt->u.dst);
 			return -1;
@@ -54,70 +55,54 @@ int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type)
 		dst_release(odst);
 	}
 
-	if ((*pskb)->dst->error)
+	if (skb->dst->error)
 		return -1;
 
 #ifdef CONFIG_XFRM
-	if (!(IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) &&
-	    xfrm_decode_session(*pskb, &fl, AF_INET) == 0)
-		if (xfrm_lookup(&(*pskb)->dst, &fl, (*pskb)->sk, 0))
+	if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
+	    xfrm_decode_session(skb, &fl, AF_INET) == 0)
+		if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0))
 			return -1;
 #endif
 
 	/* Change in oif may mean change in hh_len. */
-	hh_len = (*pskb)->dst->dev->hard_header_len;
-	if (skb_headroom(*pskb) < hh_len) {
-		struct sk_buff *nskb;
-
-		nskb = skb_realloc_headroom(*pskb, hh_len);
-		if (!nskb)
-			return -1;
-		if ((*pskb)->sk)
-			skb_set_owner_w(nskb, (*pskb)->sk);
-		kfree_skb(*pskb);
-		*pskb = nskb;
-	}
+	hh_len = skb->dst->dev->hard_header_len;
+	if (skb_headroom(skb) < hh_len &&
+	    pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
+		return -1;
 
 	return 0;
 }
 EXPORT_SYMBOL(ip_route_me_harder);
 
 #ifdef CONFIG_XFRM
-int ip_xfrm_me_harder(struct sk_buff **pskb)
+int ip_xfrm_me_harder(struct sk_buff *skb)
 {
 	struct flowi fl;
 	unsigned int hh_len;
 	struct dst_entry *dst;
 
-	if (IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED)
+	if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
 		return 0;
-	if (xfrm_decode_session(*pskb, &fl, AF_INET) < 0)
+	if (xfrm_decode_session(skb, &fl, AF_INET) < 0)
 		return -1;
 
-	dst = (*pskb)->dst;
+	dst = skb->dst;
 	if (dst->xfrm)
 		dst = ((struct xfrm_dst *)dst)->route;
 	dst_hold(dst);
 
-	if (xfrm_lookup(&dst, &fl, (*pskb)->sk, 0) < 0)
+	if (xfrm_lookup(&dst, &fl, skb->sk, 0) < 0)
 		return -1;
 
-	dst_release((*pskb)->dst);
-	(*pskb)->dst = dst;
+	dst_release(skb->dst);
+	skb->dst = dst;
 
 	/* Change in oif may mean change in hh_len. */
-	hh_len = (*pskb)->dst->dev->hard_header_len;
-	if (skb_headroom(*pskb) < hh_len) {
-		struct sk_buff *nskb;
-
-		nskb = skb_realloc_headroom(*pskb, hh_len);
-		if (!nskb)
-			return -1;
-		if ((*pskb)->sk)
-			skb_set_owner_w(nskb, (*pskb)->sk);
-		kfree_skb(*pskb);
-		*pskb = nskb;
-	}
+	hh_len = skb->dst->dev->hard_header_len;
+	if (skb_headroom(skb) < hh_len &&
+	    pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
+		return -1;
 	return 0;
 }
 EXPORT_SYMBOL(ip_xfrm_me_harder);
@@ -150,17 +135,17 @@ static void nf_ip_saveroute(const struct sk_buff *skb, struct nf_info *info)
 	}
 }
 
-static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info)
+static int nf_ip_reroute(struct sk_buff *skb, const struct nf_info *info)
 {
 	const struct ip_rt_info *rt_info = nf_info_reroute(info);
 
 	if (info->hook == NF_IP_LOCAL_OUT) {
-		const struct iphdr *iph = ip_hdr(*pskb);
+		const struct iphdr *iph = ip_hdr(skb);
 
 		if (!(iph->tos == rt_info->tos
 		      && iph->daddr == rt_info->daddr
 		      && iph->saddr == rt_info->saddr))
-			return ip_route_me_harder(pskb, RTN_UNSPEC);
+			return ip_route_me_harder(skb, RTN_UNSPEC);
 	}
 	return 0;
 }
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 29114a9ccd1..2909c92ecd9 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -197,7 +197,7 @@ static inline int arp_checkentry(const struct arpt_arp *arp)
 	return 1;
 }
 
-static unsigned int arpt_error(struct sk_buff **pskb,
+static unsigned int arpt_error(struct sk_buff *skb,
 			       const struct net_device *in,
 			       const struct net_device *out,
 			       unsigned int hooknum,
@@ -215,7 +215,7 @@ static inline struct arpt_entry *get_entry(void *base, unsigned int offset)
 	return (struct arpt_entry *)(base + offset);
 }
 
-unsigned int arpt_do_table(struct sk_buff **pskb,
+unsigned int arpt_do_table(struct sk_buff *skb,
 			   unsigned int hook,
 			   const struct net_device *in,
 			   const struct net_device *out,
@@ -231,9 +231,9 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
 	struct xt_table_info *private;
 
 	/* ARP header, plus 2 device addresses, plus 2 IP addresses.  */
-	if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) +
-				     (2 * (*pskb)->dev->addr_len) +
-				     (2 * sizeof(u32)))))
+	if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
+				 (2 * skb->dev->addr_len) +
+				 (2 * sizeof(u32)))))
 		return NF_DROP;
 
 	indev = in ? in->name : nulldevname;
@@ -245,14 +245,14 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
 	e = get_entry(table_base, private->hook_entry[hook]);
 	back = get_entry(table_base, private->underflow[hook]);
 
-	arp = arp_hdr(*pskb);
+	arp = arp_hdr(skb);
 	do {
-		if (arp_packet_match(arp, (*pskb)->dev, indev, outdev, &e->arp)) {
+		if (arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
 			struct arpt_entry_target *t;
 			int hdr_len;
 
 			hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
-				(2 * (*pskb)->dev->addr_len);
+				(2 * skb->dev->addr_len);
 			ADD_COUNTER(e->counters, hdr_len, 1);
 
 			t = arpt_get_target(e);
@@ -290,14 +290,14 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
 				/* Targets which reenter must return
 				 * abs. verdicts
 				 */
-				verdict = t->u.kernel.target->target(pskb,
+				verdict = t->u.kernel.target->target(skb,
 								     in, out,
 								     hook,
 								     t->u.kernel.target,
 								     t->data);
 
 				/* Target might have changed stuff. */
-				arp = arp_hdr(*pskb);
+				arp = arp_hdr(skb);
 
 				if (verdict == ARPT_CONTINUE)
 					e = (void *)e + e->next_offset;
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index c4bdab47597..45fa4e20094 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -1,5 +1,6 @@
 /* module that allows mangling of the arp payload */
 #include <linux/module.h>
+#include <linux/netfilter.h>
 #include <linux/netfilter_arp/arpt_mangle.h>
 #include <net/sock.h>
 
@@ -8,7 +9,7 @@ MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
 MODULE_DESCRIPTION("arptables arp payload mangle target");
 
 static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
        const struct net_device *in, const struct net_device *out,
        unsigned int hooknum, const struct xt_target *target,
        const void *targinfo)
@@ -18,47 +19,38 @@ target(struct sk_buff **pskb,
 	unsigned char *arpptr;
 	int pln, hln;
 
-	if (skb_shared(*pskb) || skb_cloned(*pskb)) {
-		struct sk_buff *nskb;
+	if (skb_make_writable(skb, skb->len))
+		return NF_DROP;
 
-		nskb = skb_copy(*pskb, GFP_ATOMIC);
-		if (!nskb)
-			return NF_DROP;
-		if ((*pskb)->sk)
-			skb_set_owner_w(nskb, (*pskb)->sk);
-		kfree_skb(*pskb);
-		*pskb = nskb;
-	}
-
-	arp = arp_hdr(*pskb);
-	arpptr = skb_network_header(*pskb) + sizeof(*arp);
+	arp = arp_hdr(skb);
+	arpptr = skb_network_header(skb) + sizeof(*arp);
 	pln = arp->ar_pln;
 	hln = arp->ar_hln;
 	/* We assume that pln and hln were checked in the match */
 	if (mangle->flags & ARPT_MANGLE_SDEV) {
 		if (ARPT_DEV_ADDR_LEN_MAX < hln ||
-		   (arpptr + hln > skb_tail_pointer(*pskb)))
+		   (arpptr + hln > skb_tail_pointer(skb)))
 			return NF_DROP;
 		memcpy(arpptr, mangle->src_devaddr, hln);
 	}
 	arpptr += hln;
 	if (mangle->flags & ARPT_MANGLE_SIP) {
 		if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
-		   (arpptr + pln > skb_tail_pointer(*pskb)))
+		   (arpptr + pln > skb_tail_pointer(skb)))
 			return NF_DROP;
 		memcpy(arpptr, &mangle->u_s.src_ip, pln);
 	}
 	arpptr += pln;
 	if (mangle->flags & ARPT_MANGLE_TDEV) {
 		if (ARPT_DEV_ADDR_LEN_MAX < hln ||
-		   (arpptr + hln > skb_tail_pointer(*pskb)))
+		   (arpptr + hln > skb_tail_pointer(skb)))
 			return NF_DROP;
 		memcpy(arpptr, mangle->tgt_devaddr, hln);
 	}
 	arpptr += hln;
 	if (mangle->flags & ARPT_MANGLE_TIP) {
 		if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
-		   (arpptr + pln > skb_tail_pointer(*pskb)))
+		   (arpptr + pln > skb_tail_pointer(skb)))
 			return NF_DROP;
 		memcpy(arpptr, &mangle->u_t.tgt_ip, pln);
 	}
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 75c02306253..302d3da5f69 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -56,12 +56,12 @@ static struct arpt_table packet_filter = {
 
 /* The work comes in here from netfilter.c */
 static unsigned int arpt_hook(unsigned int hook,
-			      struct sk_buff **pskb,
+			      struct sk_buff *skb,
 			      const struct net_device *in,
 			      const struct net_device *out,
 			      int (*okfn)(struct sk_buff *))
 {
-	return arpt_do_table(pskb, hook, in, out, &packet_filter);
+	return arpt_do_table(skb, hook, in, out, &packet_filter);
 }
 
 static struct nf_hook_ops arpt_ops[] = {
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 23cbfc7c80f..10a2ce09fd8 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -335,6 +335,7 @@ static int
 ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
 {
 	int diff;
+	int err;
 	struct iphdr *user_iph = (struct iphdr *)v->payload;
 
 	if (v->data_len < sizeof(*user_iph))
@@ -347,25 +348,18 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
 		if (v->data_len > 0xFFFF)
 			return -EINVAL;
 		if (diff > skb_tailroom(e->skb)) {
-			struct sk_buff *newskb;
-
-			newskb = skb_copy_expand(e->skb,
-						 skb_headroom(e->skb),
-						 diff,
-						 GFP_ATOMIC);
-			if (newskb == NULL) {
-				printk(KERN_WARNING "ip_queue: OOM "
-				      "in mangle, dropping packet\n");
-				return -ENOMEM;
+			err = pskb_expand_head(e->skb, 0,
+					       diff - skb_tailroom(e->skb),
+					       GFP_ATOMIC);
+			if (err) {
+				printk(KERN_WARNING "ip_queue: error "
+				      "in mangle, dropping packet: %d\n", -err);
+				return err;
 			}
-			if (e->skb->sk)
-				skb_set_owner_w(newskb, e->skb->sk);
-			kfree_skb(e->skb);
-			e->skb = newskb;
 		}
 		skb_put(e->skb, diff);
 	}
-	if (!skb_make_writable(&e->skb, v->data_len))
+	if (!skb_make_writable(e->skb, v->data_len))
 		return -ENOMEM;
 	skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
 	e->skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 6486894f450..4b10b98640a 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -169,7 +169,7 @@ ip_checkentry(const struct ipt_ip *ip)
 }
 
 static unsigned int
-ipt_error(struct sk_buff **pskb,
+ipt_error(struct sk_buff *skb,
 	  const struct net_device *in,
 	  const struct net_device *out,
 	  unsigned int hooknum,
@@ -312,7 +312,7 @@ static void trace_packet(struct sk_buff *skb,
 
 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
 unsigned int
-ipt_do_table(struct sk_buff **pskb,
+ipt_do_table(struct sk_buff *skb,
 	     unsigned int hook,
 	     const struct net_device *in,
 	     const struct net_device *out,
@@ -331,8 +331,8 @@ ipt_do_table(struct sk_buff **pskb,
 	struct xt_table_info *private;
 
 	/* Initialization */
-	ip = ip_hdr(*pskb);
-	datalen = (*pskb)->len - ip->ihl * 4;
+	ip = ip_hdr(skb);
+	datalen = skb->len - ip->ihl * 4;
 	indev = in ? in->name : nulldevname;
 	outdev = out ? out->name : nulldevname;
 	/* We handle fragments by dealing with the first fragment as
@@ -359,7 +359,7 @@ ipt_do_table(struct sk_buff **pskb,
 			struct ipt_entry_target *t;
 
 			if (IPT_MATCH_ITERATE(e, do_match,
-					      *pskb, in, out,
+					      skb, in, out,
 					      offset, &hotdrop) != 0)
 				goto no_match;
 
@@ -371,8 +371,8 @@ ipt_do_table(struct sk_buff **pskb,
 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
 			/* The packet is traced: log it */
-			if (unlikely((*pskb)->nf_trace))
-				trace_packet(*pskb, hook, in, out,
+			if (unlikely(skb->nf_trace))
+				trace_packet(skb, hook, in, out,
 					     table->name, private, e);
 #endif
 			/* Standard target? */
@@ -410,7 +410,7 @@ ipt_do_table(struct sk_buff **pskb,
 				((struct ipt_entry *)table_base)->comefrom
 					= 0xeeeeeeec;
 #endif
-				verdict = t->u.kernel.target->target(pskb,
+				verdict = t->u.kernel.target->target(skb,
 								     in, out,
 								     hook,
 								     t->u.kernel.target,
@@ -428,8 +428,8 @@ ipt_do_table(struct sk_buff **pskb,
 					= 0x57acc001;
 #endif
 				/* Target might have changed stuff. */
-				ip = ip_hdr(*pskb);
-				datalen = (*pskb)->len - ip->ihl * 4;
+				ip = ip_hdr(skb);
+				datalen = skb->len - ip->ihl * 4;
 
 				if (verdict == IPT_CONTINUE)
 					e = (void *)e + e->next_offset;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 27f14e1ebd8..2f544dac72d 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -289,7 +289,7 @@ clusterip_responsible(const struct clusterip_config *config, u_int32_t hash)
  ***********************************************************************/
 
 static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
        const struct net_device *in,
        const struct net_device *out,
        unsigned int hooknum,
@@ -305,7 +305,7 @@ target(struct sk_buff **pskb,
 	 * is only decremented by destroy() - and ip_tables guarantees
 	 * that the ->target() function isn't called after ->destroy() */
 
-	ct = nf_ct_get(*pskb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 	if (ct == NULL) {
 		printk(KERN_ERR "CLUSTERIP: no conntrack!\n");
 			/* FIXME: need to drop invalid ones, since replies
@@ -316,7 +316,7 @@ target(struct sk_buff **pskb,
 
 	/* special case: ICMP error handling. conntrack distinguishes between
 	 * error messages (RELATED) and information requests (see below) */
-	if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP
+	if (ip_hdr(skb)->protocol == IPPROTO_ICMP
 	    && (ctinfo == IP_CT_RELATED
 		|| ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY))
 		return XT_CONTINUE;
@@ -325,7 +325,7 @@ target(struct sk_buff **pskb,
 	 * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here
 	 * on, which all have an ID field [relevant for hashing]. */
 
-	hash = clusterip_hashfn(*pskb, cipinfo->config);
+	hash = clusterip_hashfn(skb, cipinfo->config);
 
 	switch (ctinfo) {
 		case IP_CT_NEW:
@@ -355,7 +355,7 @@ target(struct sk_buff **pskb,
 
 	/* despite being received via linklayer multicast, this is
 	 * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */
-	(*pskb)->pkt_type = PACKET_HOST;
+	skb->pkt_type = PACKET_HOST;
 
 	return XT_CONTINUE;
 }
@@ -505,12 +505,12 @@ static void arp_print(struct arp_payload *payload)
 
 static unsigned int
 arp_mangle(unsigned int hook,
-	   struct sk_buff **pskb,
+	   struct sk_buff *skb,
 	   const struct net_device *in,
 	   const struct net_device *out,
 	   int (*okfn)(struct sk_buff *))
 {
-	struct arphdr *arp = arp_hdr(*pskb);
+	struct arphdr *arp = arp_hdr(skb);
 	struct arp_payload *payload;
 	struct clusterip_config *c;
 
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index f1253bd3837..add110060a2 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -26,15 +26,15 @@ MODULE_DESCRIPTION("iptables ECN modification module");
 /* set ECT codepoint from IP header.
  * 	return false if there was an error. */
 static inline bool
-set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
+set_ect_ip(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
 {
-	struct iphdr *iph = ip_hdr(*pskb);
+	struct iphdr *iph = ip_hdr(skb);
 
 	if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
 		__u8 oldtos;
-		if (!skb_make_writable(pskb, sizeof(struct iphdr)))
+		if (!skb_make_writable(skb, sizeof(struct iphdr)))
 			return false;
-		iph = ip_hdr(*pskb);
+		iph = ip_hdr(skb);
 		oldtos = iph->tos;
 		iph->tos &= ~IPT_ECN_IP_MASK;
 		iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
@@ -45,14 +45,13 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 
 /* Return false if there was an error. */
 static inline bool
-set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
+set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
 {
 	struct tcphdr _tcph, *tcph;
 	__be16 oldval;
 
 	/* Not enought header? */
-	tcph = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
-				  sizeof(_tcph), &_tcph);
+	tcph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
 	if (!tcph)
 		return false;
 
@@ -62,9 +61,9 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 	     tcph->cwr == einfo->proto.tcp.cwr))
 		return true;
 
-	if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph)))
+	if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph)))
 		return false;
-	tcph = (void *)ip_hdr(*pskb) + ip_hdrlen(*pskb);
+	tcph = (void *)ip_hdr(skb) + ip_hdrlen(skb);
 
 	oldval = ((__be16 *)tcph)[6];
 	if (einfo->operation & IPT_ECN_OP_SET_ECE)
@@ -72,13 +71,13 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
 	if (einfo->operation & IPT_ECN_OP_SET_CWR)
 		tcph->cwr = einfo->proto.tcp.cwr;
 
-	nf_proto_csum_replace2(&tcph->check, *pskb,
+	nf_proto_csum_replace2(&tcph->check, skb,
 				oldval, ((__be16 *)tcph)[6], 0);
 	return true;
 }
 
 static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
        const struct net_device *in,
        const struct net_device *out,
        unsigned int hooknum,
@@ -88,12 +87,12 @@ target(struct sk_buff **pskb,
 	const struct ipt_ECN_info *einfo = targinfo;
 
 	if (einfo->operation & IPT_ECN_OP_SET_IP)
-		if (!set_ect_ip(pskb, einfo))
+		if (!set_ect_ip(skb, einfo))
 			return NF_DROP;
 
 	if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR)
-	    && ip_hdr(*pskb)->protocol == IPPROTO_TCP)
-		if (!set_ect_tcp(pskb, einfo))
+	    && ip_hdr(skb)->protocol == IPPROTO_TCP)
+		if (!set_ect_tcp(skb, einfo))
 			return NF_DROP;
 
 	return XT_CONTINUE;
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 127a5e89bf1..4b5e8216a4e 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -418,7 +418,7 @@ ipt_log_packet(unsigned int pf,
 }
 
 static unsigned int
-ipt_log_target(struct sk_buff **pskb,
+ipt_log_target(struct sk_buff *skb,
 	       const struct net_device *in,
 	       const struct net_device *out,
 	       unsigned int hooknum,
@@ -432,7 +432,7 @@ ipt_log_target(struct sk_buff **pskb,
 	li.u.log.level = loginfo->level;
 	li.u.log.logflags = loginfo->logflags;
 
-	ipt_log_packet(PF_INET, hooknum, *pskb, in, out, &li,
+	ipt_log_packet(PF_INET, hooknum, skb, in, out, &li,
 		       loginfo->prefix);
 	return XT_CONTINUE;
 }
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 3e0b562b2db..44b516e7cb7 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -52,7 +52,7 @@ masquerade_check(const char *tablename,
 }
 
 static unsigned int
-masquerade_target(struct sk_buff **pskb,
+masquerade_target(struct sk_buff *skb,
 		  const struct net_device *in,
 		  const struct net_device *out,
 		  unsigned int hooknum,
@@ -69,7 +69,7 @@ masquerade_target(struct sk_buff **pskb,
 
 	NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING);
 
-	ct = nf_ct_get(*pskb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 	nat = nfct_nat(ct);
 
 	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
@@ -82,7 +82,7 @@ masquerade_target(struct sk_buff **pskb,
 		return NF_ACCEPT;
 
 	mr = targinfo;
-	rt = (struct rtable *)(*pskb)->dst;
+	rt = (struct rtable *)skb->dst;
 	newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
 	if (!newsrc) {
 		printk("MASQUERADE: %s ate my IP address\n", out->name);
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 41a011d5a06..f8699291e33 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -43,7 +43,7 @@ check(const char *tablename,
 }
 
 static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
        const struct net_device *in,
        const struct net_device *out,
        unsigned int hooknum,
@@ -59,14 +59,14 @@ target(struct sk_buff **pskb,
 	NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING
 		     || hooknum == NF_IP_POST_ROUTING
 		     || hooknum == NF_IP_LOCAL_OUT);
-	ct = nf_ct_get(*pskb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 
 	netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
 
 	if (hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT)
-		new_ip = ip_hdr(*pskb)->daddr & ~netmask;
+		new_ip = ip_hdr(skb)->daddr & ~netmask;
 	else
-		new_ip = ip_hdr(*pskb)->saddr & ~netmask;
+		new_ip = ip_hdr(skb)->saddr & ~netmask;
 	new_ip |= mr->range[0].min_ip & netmask;
 
 	newrange = ((struct nf_nat_range)
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 6ac7a237331..f7cf7d61a2d 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -47,7 +47,7 @@ redirect_check(const char *tablename,
 }
 
 static unsigned int
-redirect_target(struct sk_buff **pskb,
+redirect_target(struct sk_buff *skb,
 		const struct net_device *in,
 		const struct net_device *out,
 		unsigned int hooknum,
@@ -63,7 +63,7 @@ redirect_target(struct sk_buff **pskb,
 	NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING
 		     || hooknum == NF_IP_LOCAL_OUT);
 
-	ct = nf_ct_get(*pskb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
 
 	/* Local packets: make them go to loopback */
@@ -76,7 +76,7 @@ redirect_target(struct sk_buff **pskb,
 		newdst = 0;
 
 		rcu_read_lock();
-		indev = __in_dev_get_rcu((*pskb)->dev);
+		indev = __in_dev_get_rcu(skb->dev);
 		if (indev && (ifa = indev->ifa_list))
 			newdst = ifa->ifa_local;
 		rcu_read_unlock();
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index cb038c8fbc9..dcf4d21d511 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -131,7 +131,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	   )
 		addr_type = RTN_LOCAL;
 
-	if (ip_route_me_harder(&nskb, addr_type))
+	if (ip_route_me_harder(nskb, addr_type))
 		goto free_nskb;
 
 	nskb->ip_summed = CHECKSUM_NONE;
@@ -162,7 +162,7 @@ static inline void send_unreach(struct sk_buff *skb_in, int code)
 	icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
 }
 
-static unsigned int reject(struct sk_buff **pskb,
+static unsigned int reject(struct sk_buff *skb,
 			   const struct net_device *in,
 			   const struct net_device *out,
 			   unsigned int hooknum,
@@ -173,7 +173,7 @@ static unsigned int reject(struct sk_buff **pskb,
 
 	/* Our naive response construction doesn't deal with IP
 	   options, and probably shouldn't try. */
-	if (ip_hdrlen(*pskb) != sizeof(struct iphdr))
+	if (ip_hdrlen(skb) != sizeof(struct iphdr))
 		return NF_DROP;
 
 	/* WARNING: This code causes reentry within iptables.
@@ -181,28 +181,28 @@ static unsigned int reject(struct sk_buff **pskb,
 	   must return an absolute verdict. --RR */
 	switch (reject->with) {
 	case IPT_ICMP_NET_UNREACHABLE:
-		send_unreach(*pskb, ICMP_NET_UNREACH);
+		send_unreach(skb, ICMP_NET_UNREACH);
 		break;
 	case IPT_ICMP_HOST_UNREACHABLE:
-		send_unreach(*pskb, ICMP_HOST_UNREACH);
+		send_unreach(skb, ICMP_HOST_UNREACH);
 		break;
 	case IPT_ICMP_PROT_UNREACHABLE:
-		send_unreach(*pskb, ICMP_PROT_UNREACH);
+		send_unreach(skb, ICMP_PROT_UNREACH);
 		break;
 	case IPT_ICMP_PORT_UNREACHABLE:
-		send_unreach(*pskb, ICMP_PORT_UNREACH);
+		send_unreach(skb, ICMP_PORT_UNREACH);
 		break;
 	case IPT_ICMP_NET_PROHIBITED:
-		send_unreach(*pskb, ICMP_NET_ANO);
+		send_unreach(skb, ICMP_NET_ANO);
 		break;
 	case IPT_ICMP_HOST_PROHIBITED:
-		send_unreach(*pskb, ICMP_HOST_ANO);
+		send_unreach(skb, ICMP_HOST_ANO);
 		break;
 	case IPT_ICMP_ADMIN_PROHIBITED:
-		send_unreach(*pskb, ICMP_PKT_FILTERED);
+		send_unreach(skb, ICMP_PKT_FILTERED);
 		break;
 	case IPT_TCP_RESET:
-		send_reset(*pskb, hooknum);
+		send_reset(skb, hooknum);
 	case IPT_ICMP_ECHOREPLY:
 		/* Doesn't happen. */
 		break;
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index 97641f1a97f..8988571436b 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -104,7 +104,7 @@ same_destroy(const struct xt_target *target, void *targinfo)
 }
 
 static unsigned int
-same_target(struct sk_buff **pskb,
+same_target(struct sk_buff *skb,
 		const struct net_device *in,
 		const struct net_device *out,
 		unsigned int hooknum,
@@ -121,7 +121,7 @@ same_target(struct sk_buff **pskb,
 
 	NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
 			hooknum == NF_IP_POST_ROUTING);
-	ct = nf_ct_get(*pskb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 
 	t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
 
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index 25f5d0b3906..d4573baa7f2 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -21,7 +21,7 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("iptables TOS mangling module");
 
 static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
        const struct net_device *in,
        const struct net_device *out,
        unsigned int hooknum,
@@ -29,13 +29,13 @@ target(struct sk_buff **pskb,
        const void *targinfo)
 {
 	const struct ipt_tos_target_info *tosinfo = targinfo;
-	struct iphdr *iph = ip_hdr(*pskb);
+	struct iphdr *iph = ip_hdr(skb);
 
 	if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
 		__u8 oldtos;
-		if (!skb_make_writable(pskb, sizeof(struct iphdr)))
+		if (!skb_make_writable(skb, sizeof(struct iphdr)))
 			return NF_DROP;
-		iph = ip_hdr(*pskb);
+		iph = ip_hdr(skb);
 		oldtos = iph->tos;
 		iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos;
 		nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index 2b54e7b0cfe..c620a052766 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -20,7 +20,7 @@ MODULE_DESCRIPTION("IP tables TTL modification module");
 MODULE_LICENSE("GPL");
 
 static unsigned int
-ipt_ttl_target(struct sk_buff **pskb,
+ipt_ttl_target(struct sk_buff *skb,
 	       const struct net_device *in, const struct net_device *out,
 	       unsigned int hooknum, const struct xt_target *target,
 	       const void *targinfo)
@@ -29,10 +29,10 @@ ipt_ttl_target(struct sk_buff **pskb,
 	const struct ipt_TTL_info *info = targinfo;
 	int new_ttl;
 
-	if (!skb_make_writable(pskb, (*pskb)->len))
+	if (!skb_make_writable(skb, skb->len))
 		return NF_DROP;
 
-	iph = ip_hdr(*pskb);
+	iph = ip_hdr(skb);
 
 	switch (info->mode) {
 		case IPT_TTL_SET:
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index c636d6d6357..212b830765a 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -279,7 +279,7 @@ alloc_failure:
 	spin_unlock_bh(&ulog_lock);
 }
 
-static unsigned int ipt_ulog_target(struct sk_buff **pskb,
+static unsigned int ipt_ulog_target(struct sk_buff *skb,
 				    const struct net_device *in,
 				    const struct net_device *out,
 				    unsigned int hooknum,
@@ -288,7 +288,7 @@ static unsigned int ipt_ulog_target(struct sk_buff **pskb,
 {
 	struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
 
-	ipt_ulog_packet(hooknum, *pskb, in, out, loginfo, NULL);
+	ipt_ulog_packet(hooknum, skb, in, out, loginfo, NULL);
 
 	return XT_CONTINUE;
 }
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 4f51c1d7d2d..ba3262c6043 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -62,31 +62,31 @@ static struct xt_table packet_filter = {
 /* The work comes in here from netfilter.c. */
 static unsigned int
 ipt_hook(unsigned int hook,
-	 struct sk_buff **pskb,
+	 struct sk_buff *skb,
 	 const struct net_device *in,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ipt_do_table(pskb, hook, in, out, &packet_filter);
+	return ipt_do_table(skb, hook, in, out, &packet_filter);
 }
 
 static unsigned int
 ipt_local_out_hook(unsigned int hook,
-		   struct sk_buff **pskb,
+		   struct sk_buff *skb,
 		   const struct net_device *in,
 		   const struct net_device *out,
 		   int (*okfn)(struct sk_buff *))
 {
 	/* root is playing with raw sockets. */
-	if ((*pskb)->len < sizeof(struct iphdr)
-	    || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+	if (skb->len < sizeof(struct iphdr) ||
+	    ip_hdrlen(skb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
 			printk("iptable_filter: ignoring short SOCK_RAW "
 			       "packet.\n");
 		return NF_ACCEPT;
 	}
 
-	return ipt_do_table(pskb, hook, in, out, &packet_filter);
+	return ipt_do_table(skb, hook, in, out, &packet_filter);
 }
 
 static struct nf_hook_ops ipt_ops[] = {
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 902446f7cbc..b4360a69d5c 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -75,17 +75,17 @@ static struct xt_table packet_mangler = {
 /* The work comes in here from netfilter.c. */
 static unsigned int
 ipt_route_hook(unsigned int hook,
-	 struct sk_buff **pskb,
+	 struct sk_buff *skb,
 	 const struct net_device *in,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ipt_do_table(pskb, hook, in, out, &packet_mangler);
+	return ipt_do_table(skb, hook, in, out, &packet_mangler);
 }
 
 static unsigned int
 ipt_local_hook(unsigned int hook,
-		   struct sk_buff **pskb,
+		   struct sk_buff *skb,
 		   const struct net_device *in,
 		   const struct net_device *out,
 		   int (*okfn)(struct sk_buff *))
@@ -97,8 +97,8 @@ ipt_local_hook(unsigned int hook,
 	u_int32_t mark;
 
 	/* root is playing with raw sockets. */
-	if ((*pskb)->len < sizeof(struct iphdr)
-	    || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+	if (skb->len < sizeof(struct iphdr)
+	    || ip_hdrlen(skb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
 			printk("iptable_mangle: ignoring short SOCK_RAW "
 			       "packet.\n");
@@ -106,22 +106,22 @@ ipt_local_hook(unsigned int hook,
 	}
 
 	/* Save things which could affect route */
-	mark = (*pskb)->mark;
-	iph = ip_hdr(*pskb);
+	mark = skb->mark;
+	iph = ip_hdr(skb);
 	saddr = iph->saddr;
 	daddr = iph->daddr;
 	tos = iph->tos;
 
-	ret = ipt_do_table(pskb, hook, in, out, &packet_mangler);
+	ret = ipt_do_table(skb, hook, in, out, &packet_mangler);
 	/* Reroute for ANY change. */
 	if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) {
-		iph = ip_hdr(*pskb);
+		iph = ip_hdr(skb);
 
 		if (iph->saddr != saddr ||
 		    iph->daddr != daddr ||
-		    (*pskb)->mark != mark ||
+		    skb->mark != mark ||
 		    iph->tos != tos)
-			if (ip_route_me_harder(pskb, RTN_UNSPEC))
+			if (ip_route_me_harder(skb, RTN_UNSPEC))
 				ret = NF_DROP;
 	}
 
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index d6e50339568..5de6e57ac55 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -47,30 +47,30 @@ static struct xt_table packet_raw = {
 /* The work comes in here from netfilter.c. */
 static unsigned int
 ipt_hook(unsigned int hook,
-	 struct sk_buff **pskb,
+	 struct sk_buff *skb,
 	 const struct net_device *in,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ipt_do_table(pskb, hook, in, out, &packet_raw);
+	return ipt_do_table(skb, hook, in, out, &packet_raw);
 }
 
 static unsigned int
 ipt_local_hook(unsigned int hook,
-	       struct sk_buff **pskb,
+	       struct sk_buff *skb,
 	       const struct net_device *in,
 	       const struct net_device *out,
 	       int (*okfn)(struct sk_buff *))
 {
 	/* root is playing with raw sockets. */
-	if ((*pskb)->len < sizeof(struct iphdr) ||
-	    ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+	if (skb->len < sizeof(struct iphdr) ||
+	    ip_hdrlen(skb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
 			printk("iptable_raw: ignoring short SOCK_RAW"
 			       "packet.\n");
 		return NF_ACCEPT;
 	}
-	return ipt_do_table(pskb, hook, in, out, &packet_raw);
+	return ipt_do_table(skb, hook, in, out, &packet_raw);
 }
 
 /* 'raw' is the very first table. */
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 2fcb9249a8d..831e9b29806 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -63,19 +63,20 @@ static int ipv4_print_conntrack(struct seq_file *s,
 }
 
 /* Returns new sk_buff, or NULL */
-static struct sk_buff *
-nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
+static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
 {
+	int err;
+
 	skb_orphan(skb);
 
 	local_bh_disable();
-	skb = ip_defrag(skb, user);
+	err = ip_defrag(skb, user);
 	local_bh_enable();
 
-	if (skb)
+	if (!err)
 		ip_send_check(ip_hdr(skb));
 
-	return skb;
+	return err;
 }
 
 static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
@@ -99,17 +100,17 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 }
 
 static unsigned int ipv4_confirm(unsigned int hooknum,
-				 struct sk_buff **pskb,
+				 struct sk_buff *skb,
 				 const struct net_device *in,
 				 const struct net_device *out,
 				 int (*okfn)(struct sk_buff *))
 {
 	/* We've seen it coming out the other side: confirm it */
-	return nf_conntrack_confirm(pskb);
+	return nf_conntrack_confirm(skb);
 }
 
 static unsigned int ipv4_conntrack_help(unsigned int hooknum,
-				      struct sk_buff **pskb,
+				      struct sk_buff *skb,
 				      const struct net_device *in,
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
@@ -120,7 +121,7 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum,
 	struct nf_conntrack_helper *helper;
 
 	/* This is where we call the helper: as the packet goes out. */
-	ct = nf_ct_get(*pskb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 	if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
 		return NF_ACCEPT;
 
@@ -131,56 +132,55 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum,
 	helper = rcu_dereference(help->helper);
 	if (!helper)
 		return NF_ACCEPT;
-	return helper->help(pskb, skb_network_offset(*pskb) + ip_hdrlen(*pskb),
+	return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
 			    ct, ctinfo);
 }
 
 static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
-					  struct sk_buff **pskb,
+					  struct sk_buff *skb,
 					  const struct net_device *in,
 					  const struct net_device *out,
 					  int (*okfn)(struct sk_buff *))
 {
 	/* Previously seen (loopback)?  Ignore.  Do this before
 	   fragment check. */
-	if ((*pskb)->nfct)
+	if (skb->nfct)
 		return NF_ACCEPT;
 
 	/* Gather fragments. */
-	if (ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)) {
-		*pskb = nf_ct_ipv4_gather_frags(*pskb,
-						hooknum == NF_IP_PRE_ROUTING ?
-						IP_DEFRAG_CONNTRACK_IN :
-						IP_DEFRAG_CONNTRACK_OUT);
-		if (!*pskb)
+	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
+		if (nf_ct_ipv4_gather_frags(skb,
+					    hooknum == NF_IP_PRE_ROUTING ?
+					    IP_DEFRAG_CONNTRACK_IN :
+					    IP_DEFRAG_CONNTRACK_OUT))
 			return NF_STOLEN;
 	}
 	return NF_ACCEPT;
 }
 
 static unsigned int ipv4_conntrack_in(unsigned int hooknum,
-				      struct sk_buff **pskb,
+				      struct sk_buff *skb,
 				      const struct net_device *in,
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
 {
-	return nf_conntrack_in(PF_INET, hooknum, pskb);
+	return nf_conntrack_in(PF_INET, hooknum, skb);
 }
 
 static unsigned int ipv4_conntrack_local(unsigned int hooknum,
-					 struct sk_buff **pskb,
+					 struct sk_buff *skb,
 					 const struct net_device *in,
 					 const struct net_device *out,
 					 int (*okfn)(struct sk_buff *))
 {
 	/* root is playing with raw sockets. */
-	if ((*pskb)->len < sizeof(struct iphdr)
-	    || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+	if (skb->len < sizeof(struct iphdr) ||
+	    ip_hdrlen(skb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
 			printk("ipt_hook: happy cracking.\n");
 		return NF_ACCEPT;
 	}
-	return nf_conntrack_in(PF_INET, hooknum, pskb);
+	return nf_conntrack_in(PF_INET, hooknum, skb);
 }
 
 /* Connection tracking may drop packets, but never alters them, so
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
index bd93a1d7105..35a5aa69cd9 100644
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -24,7 +24,7 @@ MODULE_DESCRIPTION("Amanda NAT helper");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ip_nat_amanda");
 
-static unsigned int help(struct sk_buff **pskb,
+static unsigned int help(struct sk_buff *skb,
 			 enum ip_conntrack_info ctinfo,
 			 unsigned int matchoff,
 			 unsigned int matchlen,
@@ -53,7 +53,7 @@ static unsigned int help(struct sk_buff **pskb,
 		return NF_DROP;
 
 	sprintf(buffer, "%u", port);
-	ret = nf_nat_mangle_udp_packet(pskb, exp->master, ctinfo,
+	ret = nf_nat_mangle_udp_packet(skb, exp->master, ctinfo,
 				       matchoff, matchlen,
 				       buffer, strlen(buffer));
 	if (ret != NF_ACCEPT)
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 7221aa20e6f..56e93f692e8 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -349,7 +349,7 @@ EXPORT_SYMBOL(nf_nat_setup_info);
 /* Returns true if succeeded. */
 static int
 manip_pkt(u_int16_t proto,
-	  struct sk_buff **pskb,
+	  struct sk_buff *skb,
 	  unsigned int iphdroff,
 	  const struct nf_conntrack_tuple *target,
 	  enum nf_nat_manip_type maniptype)
@@ -357,19 +357,19 @@ manip_pkt(u_int16_t proto,
 	struct iphdr *iph;
 	struct nf_nat_protocol *p;
 
-	if (!skb_make_writable(pskb, iphdroff + sizeof(*iph)))
+	if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
 		return 0;
 
-	iph = (void *)(*pskb)->data + iphdroff;
+	iph = (void *)skb->data + iphdroff;
 
 	/* Manipulate protcol part. */
 
 	/* rcu_read_lock()ed by nf_hook_slow */
 	p = __nf_nat_proto_find(proto);
-	if (!p->manip_pkt(pskb, iphdroff, target, maniptype))
+	if (!p->manip_pkt(skb, iphdroff, target, maniptype))
 		return 0;
 
-	iph = (void *)(*pskb)->data + iphdroff;
+	iph = (void *)skb->data + iphdroff;
 
 	if (maniptype == IP_NAT_MANIP_SRC) {
 		nf_csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
@@ -385,7 +385,7 @@ manip_pkt(u_int16_t proto,
 unsigned int nf_nat_packet(struct nf_conn *ct,
 			   enum ip_conntrack_info ctinfo,
 			   unsigned int hooknum,
-			   struct sk_buff **pskb)
+			   struct sk_buff *skb)
 {
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	unsigned long statusbit;
@@ -407,7 +407,7 @@ unsigned int nf_nat_packet(struct nf_conn *ct,
 		/* We are aiming to look like inverse of other direction. */
 		nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
 
-		if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype))
+		if (!manip_pkt(target.dst.protonum, skb, 0, &target, mtype))
 			return NF_DROP;
 	}
 	return NF_ACCEPT;
@@ -418,7 +418,7 @@ EXPORT_SYMBOL_GPL(nf_nat_packet);
 int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 				  enum ip_conntrack_info ctinfo,
 				  unsigned int hooknum,
-				  struct sk_buff **pskb)
+				  struct sk_buff *skb)
 {
 	struct {
 		struct icmphdr icmp;
@@ -426,24 +426,24 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 	} *inside;
 	struct nf_conntrack_l4proto *l4proto;
 	struct nf_conntrack_tuple inner, target;
-	int hdrlen = ip_hdrlen(*pskb);
+	int hdrlen = ip_hdrlen(skb);
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	unsigned long statusbit;
 	enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
 
-	if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
+	if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
 		return 0;
 
-	inside = (void *)(*pskb)->data + ip_hdrlen(*pskb);
+	inside = (void *)skb->data + ip_hdrlen(skb);
 
 	/* We're actually going to mangle it beyond trivial checksum
 	   adjustment, so make sure the current checksum is correct. */
-	if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0))
+	if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
 		return 0;
 
 	/* Must be RELATED */
-	NF_CT_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
-		     (*pskb)->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
+	NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED ||
+		     skb->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
 
 	/* Redirects on non-null nats must be dropped, else they'll
 	   start talking to each other without our translation, and be
@@ -458,15 +458,15 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 	}
 
 	pr_debug("icmp_reply_translation: translating error %p manip %u "
-		 "dir %s\n", *pskb, manip,
+		 "dir %s\n", skb, manip,
 		 dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
 
 	/* rcu_read_lock()ed by nf_hook_slow */
 	l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
 
-	if (!nf_ct_get_tuple(*pskb,
-			     ip_hdrlen(*pskb) + sizeof(struct icmphdr),
-			     (ip_hdrlen(*pskb) +
+	if (!nf_ct_get_tuple(skb,
+			     ip_hdrlen(skb) + sizeof(struct icmphdr),
+			     (ip_hdrlen(skb) +
 			      sizeof(struct icmphdr) + inside->ip.ihl * 4),
 			     (u_int16_t)AF_INET,
 			     inside->ip.protocol,
@@ -478,19 +478,19 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 	   pass all hooks (locally-generated ICMP).  Consider incoming
 	   packet: PREROUTING (DST manip), routing produces ICMP, goes
 	   through POSTROUTING (which must correct the DST manip). */
-	if (!manip_pkt(inside->ip.protocol, pskb,
-		       ip_hdrlen(*pskb) + sizeof(inside->icmp),
+	if (!manip_pkt(inside->ip.protocol, skb,
+		       ip_hdrlen(skb) + sizeof(inside->icmp),
 		       &ct->tuplehash[!dir].tuple,
 		       !manip))
 		return 0;
 
-	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
 		/* Reloading "inside" here since manip_pkt inner. */
-		inside = (void *)(*pskb)->data + ip_hdrlen(*pskb);
+		inside = (void *)skb->data + ip_hdrlen(skb);
 		inside->icmp.checksum = 0;
 		inside->icmp.checksum =
-			csum_fold(skb_checksum(*pskb, hdrlen,
-					       (*pskb)->len - hdrlen, 0));
+			csum_fold(skb_checksum(skb, hdrlen,
+					       skb->len - hdrlen, 0));
 	}
 
 	/* Change outer to look the reply to an incoming packet
@@ -506,7 +506,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 
 	if (ct->status & statusbit) {
 		nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
-		if (!manip_pkt(0, pskb, 0, &target, manip))
+		if (!manip_pkt(0, skb, 0, &target, manip))
 			return 0;
 	}
 
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
index 3663bd879c3..e1a16d3ea4c 100644
--- a/net/ipv4/netfilter/nf_nat_ftp.c
+++ b/net/ipv4/netfilter/nf_nat_ftp.c
@@ -28,7 +28,7 @@ MODULE_ALIAS("ip_nat_ftp");
 /* FIXME: Time out? --RR */
 
 static int
-mangle_rfc959_packet(struct sk_buff **pskb,
+mangle_rfc959_packet(struct sk_buff *skb,
 		     __be32 newip,
 		     u_int16_t port,
 		     unsigned int matchoff,
@@ -43,13 +43,13 @@ mangle_rfc959_packet(struct sk_buff **pskb,
 
 	pr_debug("calling nf_nat_mangle_tcp_packet\n");
 
-	return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
+	return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
 					matchlen, buffer, strlen(buffer));
 }
 
 /* |1|132.235.1.2|6275| */
 static int
-mangle_eprt_packet(struct sk_buff **pskb,
+mangle_eprt_packet(struct sk_buff *skb,
 		   __be32 newip,
 		   u_int16_t port,
 		   unsigned int matchoff,
@@ -63,13 +63,13 @@ mangle_eprt_packet(struct sk_buff **pskb,
 
 	pr_debug("calling nf_nat_mangle_tcp_packet\n");
 
-	return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
+	return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
 					matchlen, buffer, strlen(buffer));
 }
 
 /* |1|132.235.1.2|6275| */
 static int
-mangle_epsv_packet(struct sk_buff **pskb,
+mangle_epsv_packet(struct sk_buff *skb,
 		   __be32 newip,
 		   u_int16_t port,
 		   unsigned int matchoff,
@@ -83,11 +83,11 @@ mangle_epsv_packet(struct sk_buff **pskb,
 
 	pr_debug("calling nf_nat_mangle_tcp_packet\n");
 
-	return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
+	return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
 					matchlen, buffer, strlen(buffer));
 }
 
-static int (*mangle[])(struct sk_buff **, __be32, u_int16_t,
+static int (*mangle[])(struct sk_buff *, __be32, u_int16_t,
 		       unsigned int, unsigned int, struct nf_conn *,
 		       enum ip_conntrack_info)
 = {
@@ -99,7 +99,7 @@ static int (*mangle[])(struct sk_buff **, __be32, u_int16_t,
 
 /* So, this packet has hit the connection tracking matching code.
    Mangle it, and change the expectation to match the new version. */
-static unsigned int nf_nat_ftp(struct sk_buff **pskb,
+static unsigned int nf_nat_ftp(struct sk_buff *skb,
 			       enum ip_conntrack_info ctinfo,
 			       enum nf_ct_ftp_type type,
 			       unsigned int matchoff,
@@ -132,7 +132,7 @@ static unsigned int nf_nat_ftp(struct sk_buff **pskb,
 	if (port == 0)
 		return NF_DROP;
 
-	if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo)) {
+	if (!mangle[type](skb, newip, port, matchoff, matchlen, ct, ctinfo)) {
 		nf_ct_unexpect_related(exp);
 		return NF_DROP;
 	}
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index c1b059a7370..a868c8c4132 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -22,12 +22,12 @@
 #include <linux/netfilter/nf_conntrack_h323.h>
 
 /****************************************************************************/
-static int set_addr(struct sk_buff **pskb,
+static int set_addr(struct sk_buff *skb,
 		    unsigned char **data, int dataoff,
 		    unsigned int addroff, __be32 ip, __be16 port)
 {
 	enum ip_conntrack_info ctinfo;
-	struct nf_conn *ct = nf_ct_get(*pskb, &ctinfo);
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 	struct {
 		__be32 ip;
 		__be16 port;
@@ -38,8 +38,8 @@ static int set_addr(struct sk_buff **pskb,
 	buf.port = port;
 	addroff += dataoff;
 
-	if (ip_hdr(*pskb)->protocol == IPPROTO_TCP) {
-		if (!nf_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+	if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
+		if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
 					      addroff, sizeof(buf),
 					      (char *) &buf, sizeof(buf))) {
 			if (net_ratelimit())
@@ -49,14 +49,13 @@ static int set_addr(struct sk_buff **pskb,
 		}
 
 		/* Relocate data pointer */
-		th = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
+		th = skb_header_pointer(skb, ip_hdrlen(skb),
 					sizeof(_tcph), &_tcph);
 		if (th == NULL)
 			return -1;
-		*data = (*pskb)->data + ip_hdrlen(*pskb) +
-		    th->doff * 4 + dataoff;
+		*data = skb->data + ip_hdrlen(skb) + th->doff * 4 + dataoff;
 	} else {
-		if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
+		if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
 					      addroff, sizeof(buf),
 					      (char *) &buf, sizeof(buf))) {
 			if (net_ratelimit())
@@ -67,36 +66,35 @@ static int set_addr(struct sk_buff **pskb,
 		/* nf_nat_mangle_udp_packet uses skb_make_writable() to copy
 		 * or pull everything in a linear buffer, so we can safely
 		 * use the skb pointers now */
-		*data = ((*pskb)->data + ip_hdrlen(*pskb) +
-			 sizeof(struct udphdr));
+		*data = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
 	}
 
 	return 0;
 }
 
 /****************************************************************************/
-static int set_h225_addr(struct sk_buff **pskb,
+static int set_h225_addr(struct sk_buff *skb,
 			 unsigned char **data, int dataoff,
 			 TransportAddress *taddr,
 			 union nf_conntrack_address *addr, __be16 port)
 {
-	return set_addr(pskb, data, dataoff, taddr->ipAddress.ip,
+	return set_addr(skb, data, dataoff, taddr->ipAddress.ip,
 			addr->ip, port);
 }
 
 /****************************************************************************/
-static int set_h245_addr(struct sk_buff **pskb,
+static int set_h245_addr(struct sk_buff *skb,
 			 unsigned char **data, int dataoff,
 			 H245_TransportAddress *taddr,
 			 union nf_conntrack_address *addr, __be16 port)
 {
-	return set_addr(pskb, data, dataoff,
+	return set_addr(skb, data, dataoff,
 			taddr->unicastAddress.iPAddress.network,
 			addr->ip, port);
 }
 
 /****************************************************************************/
-static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct,
+static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct,
 			enum ip_conntrack_info ctinfo,
 			unsigned char **data,
 			TransportAddress *taddr, int count)
@@ -125,7 +123,7 @@ static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct,
 					 NIPQUAD(addr.ip), port,
 					 NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip),
 					 info->sig_port[!dir]);
-				return set_h225_addr(pskb, data, 0, &taddr[i],
+				return set_h225_addr(skb, data, 0, &taddr[i],
 						     &ct->tuplehash[!dir].
 						     tuple.dst.u3,
 						     info->sig_port[!dir]);
@@ -137,7 +135,7 @@ static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct,
 					 NIPQUAD(addr.ip), port,
 					 NIPQUAD(ct->tuplehash[!dir].tuple.src.u3.ip),
 					 info->sig_port[!dir]);
-				return set_h225_addr(pskb, data, 0, &taddr[i],
+				return set_h225_addr(skb, data, 0, &taddr[i],
 						     &ct->tuplehash[!dir].
 						     tuple.src.u3,
 						     info->sig_port[!dir]);
@@ -149,7 +147,7 @@ static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct,
 }
 
 /****************************************************************************/
-static int set_ras_addr(struct sk_buff **pskb, struct nf_conn *ct,
+static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct,
 			enum ip_conntrack_info ctinfo,
 			unsigned char **data,
 			TransportAddress *taddr, int count)
@@ -168,7 +166,7 @@ static int set_ras_addr(struct sk_buff **pskb, struct nf_conn *ct,
 				 NIPQUAD(addr.ip), ntohs(port),
 				 NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip),
 				 ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port));
-			return set_h225_addr(pskb, data, 0, &taddr[i],
+			return set_h225_addr(skb, data, 0, &taddr[i],
 					     &ct->tuplehash[!dir].tuple.dst.u3,
 					     ct->tuplehash[!dir].tuple.
 								dst.u.udp.port);
@@ -179,7 +177,7 @@ static int set_ras_addr(struct sk_buff **pskb, struct nf_conn *ct,
 }
 
 /****************************************************************************/
-static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct,
+static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
 			enum ip_conntrack_info ctinfo,
 			unsigned char **data, int dataoff,
 			H245_TransportAddress *taddr,
@@ -244,7 +242,7 @@ static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct,
 	}
 
 	/* Modify signal */
-	if (set_h245_addr(pskb, data, dataoff, taddr,
+	if (set_h245_addr(skb, data, dataoff, taddr,
 			  &ct->tuplehash[!dir].tuple.dst.u3,
 			  htons((port & htons(1)) ? nated_port + 1 :
 						    nated_port)) == 0) {
@@ -273,7 +271,7 @@ static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct,
 }
 
 /****************************************************************************/
-static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct,
+static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
 		    enum ip_conntrack_info ctinfo,
 		    unsigned char **data, int dataoff,
 		    H245_TransportAddress *taddr, __be16 port,
@@ -301,7 +299,7 @@ static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct,
 	}
 
 	/* Modify signal */
-	if (set_h245_addr(pskb, data, dataoff, taddr,
+	if (set_h245_addr(skb, data, dataoff, taddr,
 			  &ct->tuplehash[!dir].tuple.dst.u3,
 			  htons(nated_port)) < 0) {
 		nf_ct_unexpect_related(exp);
@@ -318,7 +316,7 @@ static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct,
 }
 
 /****************************************************************************/
-static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct,
+static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
 		    enum ip_conntrack_info ctinfo,
 		    unsigned char **data, int dataoff,
 		    TransportAddress *taddr, __be16 port,
@@ -351,7 +349,7 @@ static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct,
 	}
 
 	/* Modify signal */
-	if (set_h225_addr(pskb, data, dataoff, taddr,
+	if (set_h225_addr(skb, data, dataoff, taddr,
 			  &ct->tuplehash[!dir].tuple.dst.u3,
 			  htons(nated_port)) == 0) {
 		/* Save ports */
@@ -406,7 +404,7 @@ static void ip_nat_q931_expect(struct nf_conn *new,
 }
 
 /****************************************************************************/
-static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct,
+static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
 		    enum ip_conntrack_info ctinfo,
 		    unsigned char **data, TransportAddress *taddr, int idx,
 		    __be16 port, struct nf_conntrack_expect *exp)
@@ -439,7 +437,7 @@ static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct,
 	}
 
 	/* Modify signal */
-	if (set_h225_addr(pskb, data, 0, &taddr[idx],
+	if (set_h225_addr(skb, data, 0, &taddr[idx],
 			  &ct->tuplehash[!dir].tuple.dst.u3,
 			  htons(nated_port)) == 0) {
 		/* Save ports */
@@ -450,7 +448,7 @@ static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct,
 		if (idx > 0 &&
 		    get_h225_addr(ct, *data, &taddr[0], &addr, &port) &&
 		    (ntohl(addr.ip) & 0xff000000) == 0x7f000000) {
-			set_h225_addr(pskb, data, 0, &taddr[0],
+			set_h225_addr(skb, data, 0, &taddr[0],
 				      &ct->tuplehash[!dir].tuple.dst.u3,
 				      info->sig_port[!dir]);
 		}
@@ -495,7 +493,7 @@ static void ip_nat_callforwarding_expect(struct nf_conn *new,
 }
 
 /****************************************************************************/
-static int nat_callforwarding(struct sk_buff **pskb, struct nf_conn *ct,
+static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
 			      enum ip_conntrack_info ctinfo,
 			      unsigned char **data, int dataoff,
 			      TransportAddress *taddr, __be16 port,
@@ -525,7 +523,7 @@ static int nat_callforwarding(struct sk_buff **pskb, struct nf_conn *ct,
 	}
 
 	/* Modify signal */
-	if (!set_h225_addr(pskb, data, dataoff, taddr,
+	if (!set_h225_addr(skb, data, dataoff, taddr,
 			   &ct->tuplehash[!dir].tuple.dst.u3,
 			   htons(nated_port)) == 0) {
 		nf_ct_unexpect_related(exp);
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 93d8a0a8f03..8718da00ef2 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -111,22 +111,14 @@ static void mangle_contents(struct sk_buff *skb,
 }
 
 /* Unusual, but possible case. */
-static int enlarge_skb(struct sk_buff **pskb, unsigned int extra)
+static int enlarge_skb(struct sk_buff *skb, unsigned int extra)
 {
-	struct sk_buff *nskb;
-
-	if ((*pskb)->len + extra > 65535)
+	if (skb->len + extra > 65535)
 		return 0;
 
-	nskb = skb_copy_expand(*pskb, skb_headroom(*pskb), extra, GFP_ATOMIC);
-	if (!nskb)
+	if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC))
 		return 0;
 
-	/* Transfer socket to new skb. */
-	if ((*pskb)->sk)
-		skb_set_owner_w(nskb, (*pskb)->sk);
-	kfree_skb(*pskb);
-	*pskb = nskb;
 	return 1;
 }
 
@@ -139,7 +131,7 @@ static int enlarge_skb(struct sk_buff **pskb, unsigned int extra)
  *
  * */
 int
-nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
+nf_nat_mangle_tcp_packet(struct sk_buff *skb,
 			 struct nf_conn *ct,
 			 enum ip_conntrack_info ctinfo,
 			 unsigned int match_offset,
@@ -147,37 +139,37 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
 			 const char *rep_buffer,
 			 unsigned int rep_len)
 {
-	struct rtable *rt = (struct rtable *)(*pskb)->dst;
+	struct rtable *rt = (struct rtable *)skb->dst;
 	struct iphdr *iph;
 	struct tcphdr *tcph;
 	int oldlen, datalen;
 
-	if (!skb_make_writable(pskb, (*pskb)->len))
+	if (!skb_make_writable(skb, skb->len))
 		return 0;
 
 	if (rep_len > match_len &&
-	    rep_len - match_len > skb_tailroom(*pskb) &&
-	    !enlarge_skb(pskb, rep_len - match_len))
+	    rep_len - match_len > skb_tailroom(skb) &&
+	    !enlarge_skb(skb, rep_len - match_len))
 		return 0;
 
-	SKB_LINEAR_ASSERT(*pskb);
+	SKB_LINEAR_ASSERT(skb);
 
-	iph = ip_hdr(*pskb);
+	iph = ip_hdr(skb);
 	tcph = (void *)iph + iph->ihl*4;
 
-	oldlen = (*pskb)->len - iph->ihl*4;
-	mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4,
+	oldlen = skb->len - iph->ihl*4;
+	mangle_contents(skb, iph->ihl*4 + tcph->doff*4,
 			match_offset, match_len, rep_buffer, rep_len);
 
-	datalen = (*pskb)->len - iph->ihl*4;
-	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+	datalen = skb->len - iph->ihl*4;
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
 		if (!(rt->rt_flags & RTCF_LOCAL) &&
-		    (*pskb)->dev->features & NETIF_F_V4_CSUM) {
-			(*pskb)->ip_summed = CHECKSUM_PARTIAL;
-			(*pskb)->csum_start = skb_headroom(*pskb) +
-					      skb_network_offset(*pskb) +
-					      iph->ihl * 4;
-			(*pskb)->csum_offset = offsetof(struct tcphdr, check);
+		    skb->dev->features & NETIF_F_V4_CSUM) {
+			skb->ip_summed = CHECKSUM_PARTIAL;
+			skb->csum_start = skb_headroom(skb) +
+					  skb_network_offset(skb) +
+					  iph->ihl * 4;
+			skb->csum_offset = offsetof(struct tcphdr, check);
 			tcph->check = ~tcp_v4_check(datalen,
 						    iph->saddr, iph->daddr, 0);
 		} else {
@@ -188,7 +180,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
 								datalen, 0));
 		}
 	} else
-		nf_proto_csum_replace2(&tcph->check, *pskb,
+		nf_proto_csum_replace2(&tcph->check, skb,
 				       htons(oldlen), htons(datalen), 1);
 
 	if (rep_len != match_len) {
@@ -197,7 +189,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
 				    (int)rep_len - (int)match_len,
 				    ct, ctinfo);
 		/* Tell TCP window tracking about seq change */
-		nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb),
+		nf_conntrack_tcp_update(skb, ip_hdrlen(skb),
 					ct, CTINFO2DIR(ctinfo));
 	}
 	return 1;
@@ -215,7 +207,7 @@ EXPORT_SYMBOL(nf_nat_mangle_tcp_packet);
  *       should be fairly easy to do.
  */
 int
-nf_nat_mangle_udp_packet(struct sk_buff **pskb,
+nf_nat_mangle_udp_packet(struct sk_buff *skb,
 			 struct nf_conn *ct,
 			 enum ip_conntrack_info ctinfo,
 			 unsigned int match_offset,
@@ -223,48 +215,48 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb,
 			 const char *rep_buffer,
 			 unsigned int rep_len)
 {
-	struct rtable *rt = (struct rtable *)(*pskb)->dst;
+	struct rtable *rt = (struct rtable *)skb->dst;
 	struct iphdr *iph;
 	struct udphdr *udph;
 	int datalen, oldlen;
 
 	/* UDP helpers might accidentally mangle the wrong packet */
-	iph = ip_hdr(*pskb);
-	if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) +
+	iph = ip_hdr(skb);
+	if (skb->len < iph->ihl*4 + sizeof(*udph) +
 			       match_offset + match_len)
 		return 0;
 
-	if (!skb_make_writable(pskb, (*pskb)->len))
+	if (!skb_make_writable(skb, skb->len))
 		return 0;
 
 	if (rep_len > match_len &&
-	    rep_len - match_len > skb_tailroom(*pskb) &&
-	    !enlarge_skb(pskb, rep_len - match_len))
+	    rep_len - match_len > skb_tailroom(skb) &&
+	    !enlarge_skb(skb, rep_len - match_len))
 		return 0;
 
-	iph = ip_hdr(*pskb);
+	iph = ip_hdr(skb);
 	udph = (void *)iph + iph->ihl*4;
 
-	oldlen = (*pskb)->len - iph->ihl*4;
-	mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph),
+	oldlen = skb->len - iph->ihl*4;
+	mangle_contents(skb, iph->ihl*4 + sizeof(*udph),
 			match_offset, match_len, rep_buffer, rep_len);
 
 	/* update the length of the UDP packet */
-	datalen = (*pskb)->len - iph->ihl*4;
+	datalen = skb->len - iph->ihl*4;
 	udph->len = htons(datalen);
 
 	/* fix udp checksum if udp checksum was previously calculated */
-	if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL)
+	if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL)
 		return 1;
 
-	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
 		if (!(rt->rt_flags & RTCF_LOCAL) &&
-		    (*pskb)->dev->features & NETIF_F_V4_CSUM) {
-			(*pskb)->ip_summed = CHECKSUM_PARTIAL;
-			(*pskb)->csum_start = skb_headroom(*pskb) +
-					      skb_network_offset(*pskb) +
-					      iph->ihl * 4;
-			(*pskb)->csum_offset = offsetof(struct udphdr, check);
+		    skb->dev->features & NETIF_F_V4_CSUM) {
+			skb->ip_summed = CHECKSUM_PARTIAL;
+			skb->csum_start = skb_headroom(skb) +
+					  skb_network_offset(skb) +
+					  iph->ihl * 4;
+			skb->csum_offset = offsetof(struct udphdr, check);
 			udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
 							 datalen, IPPROTO_UDP,
 							 0);
@@ -278,7 +270,7 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb,
 				udph->check = CSUM_MANGLED_0;
 		}
 	} else
-		nf_proto_csum_replace2(&udph->check, *pskb,
+		nf_proto_csum_replace2(&udph->check, skb,
 				       htons(oldlen), htons(datalen), 1);
 
 	return 1;
@@ -330,7 +322,7 @@ sack_adjust(struct sk_buff *skb,
 
 /* TCP SACK sequence number adjustment */
 static inline unsigned int
-nf_nat_sack_adjust(struct sk_buff **pskb,
+nf_nat_sack_adjust(struct sk_buff *skb,
 		   struct tcphdr *tcph,
 		   struct nf_conn *ct,
 		   enum ip_conntrack_info ctinfo)
@@ -338,17 +330,17 @@ nf_nat_sack_adjust(struct sk_buff **pskb,
 	unsigned int dir, optoff, optend;
 	struct nf_conn_nat *nat = nfct_nat(ct);
 
-	optoff = ip_hdrlen(*pskb) + sizeof(struct tcphdr);
-	optend = ip_hdrlen(*pskb) + tcph->doff * 4;
+	optoff = ip_hdrlen(skb) + sizeof(struct tcphdr);
+	optend = ip_hdrlen(skb) + tcph->doff * 4;
 
-	if (!skb_make_writable(pskb, optend))
+	if (!skb_make_writable(skb, optend))
 		return 0;
 
 	dir = CTINFO2DIR(ctinfo);
 
 	while (optoff < optend) {
 		/* Usually: option, length. */
-		unsigned char *op = (*pskb)->data + optoff;
+		unsigned char *op = skb->data + optoff;
 
 		switch (op[0]) {
 		case TCPOPT_EOL:
@@ -365,7 +357,7 @@ nf_nat_sack_adjust(struct sk_buff **pskb,
 			if (op[0] == TCPOPT_SACK &&
 			    op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
 			    ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
-				sack_adjust(*pskb, tcph, optoff+2,
+				sack_adjust(skb, tcph, optoff+2,
 					    optoff+op[1], &nat->seq[!dir]);
 			optoff += op[1];
 		}
@@ -375,7 +367,7 @@ nf_nat_sack_adjust(struct sk_buff **pskb,
 
 /* TCP sequence number adjustment.  Returns 1 on success, 0 on failure */
 int
-nf_nat_seq_adjust(struct sk_buff **pskb,
+nf_nat_seq_adjust(struct sk_buff *skb,
 		  struct nf_conn *ct,
 		  enum ip_conntrack_info ctinfo)
 {
@@ -390,10 +382,10 @@ nf_nat_seq_adjust(struct sk_buff **pskb,
 	this_way = &nat->seq[dir];
 	other_way = &nat->seq[!dir];
 
-	if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph)))
+	if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph)))
 		return 0;
 
-	tcph = (void *)(*pskb)->data + ip_hdrlen(*pskb);
+	tcph = (void *)skb->data + ip_hdrlen(skb);
 	if (after(ntohl(tcph->seq), this_way->correction_pos))
 		newseq = htonl(ntohl(tcph->seq) + this_way->offset_after);
 	else
@@ -405,8 +397,8 @@ nf_nat_seq_adjust(struct sk_buff **pskb,
 	else
 		newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before);
 
-	nf_proto_csum_replace4(&tcph->check, *pskb, tcph->seq, newseq, 0);
-	nf_proto_csum_replace4(&tcph->check, *pskb, tcph->ack_seq, newack, 0);
+	nf_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
+	nf_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
 
 	pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
 		 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
@@ -415,10 +407,10 @@ nf_nat_seq_adjust(struct sk_buff **pskb,
 	tcph->seq = newseq;
 	tcph->ack_seq = newack;
 
-	if (!nf_nat_sack_adjust(pskb, tcph, ct, ctinfo))
+	if (!nf_nat_sack_adjust(skb, tcph, ct, ctinfo))
 		return 0;
 
-	nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb), ct, dir);
+	nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir);
 
 	return 1;
 }
diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c
index bcf274bba60..766e2c16c6b 100644
--- a/net/ipv4/netfilter/nf_nat_irc.c
+++ b/net/ipv4/netfilter/nf_nat_irc.c
@@ -27,7 +27,7 @@ MODULE_DESCRIPTION("IRC (DCC) NAT helper");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ip_nat_irc");
 
-static unsigned int help(struct sk_buff **pskb,
+static unsigned int help(struct sk_buff *skb,
 			 enum ip_conntrack_info ctinfo,
 			 unsigned int matchoff,
 			 unsigned int matchlen,
@@ -58,7 +58,7 @@ static unsigned int help(struct sk_buff **pskb,
 	pr_debug("nf_nat_irc: inserting '%s' == %u.%u.%u.%u, port %u\n",
 		 buffer, NIPQUAD(ip), port);
 
-	ret = nf_nat_mangle_tcp_packet(pskb, exp->master, ctinfo,
+	ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo,
 				       matchoff, matchlen, buffer,
 				       strlen(buffer));
 	if (ret != NF_ACCEPT)
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 984ec8308b2..e1385a09907 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -110,7 +110,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
 
 /* outbound packets == from PNS to PAC */
 static int
-pptp_outbound_pkt(struct sk_buff **pskb,
+pptp_outbound_pkt(struct sk_buff *skb,
 		  struct nf_conn *ct,
 		  enum ip_conntrack_info ctinfo,
 		  struct PptpControlHeader *ctlh,
@@ -175,7 +175,7 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 		 ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid));
 
 	/* mangle packet */
-	if (nf_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+	if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
 				     cid_off + sizeof(struct pptp_pkt_hdr) +
 				     sizeof(struct PptpControlHeader),
 				     sizeof(new_callid), (char *)&new_callid,
@@ -213,7 +213,7 @@ pptp_exp_gre(struct nf_conntrack_expect *expect_orig,
 
 /* inbound packets == from PAC to PNS */
 static int
-pptp_inbound_pkt(struct sk_buff **pskb,
+pptp_inbound_pkt(struct sk_buff *skb,
 		 struct nf_conn *ct,
 		 enum ip_conntrack_info ctinfo,
 		 struct PptpControlHeader *ctlh,
@@ -268,7 +268,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 	pr_debug("altering peer call id from 0x%04x to 0x%04x\n",
 		 ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid));
 
-	if (nf_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+	if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
 				     pcid_off + sizeof(struct pptp_pkt_hdr) +
 				     sizeof(struct PptpControlHeader),
 				     sizeof(new_pcid), (char *)&new_pcid,
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index d562290b182..b820f996035 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -98,21 +98,21 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
 
 /* manipulate a GRE packet according to maniptype */
 static int
-gre_manip_pkt(struct sk_buff **pskb, unsigned int iphdroff,
+gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
 	      const struct nf_conntrack_tuple *tuple,
 	      enum nf_nat_manip_type maniptype)
 {
 	struct gre_hdr *greh;
 	struct gre_hdr_pptp *pgreh;
-	struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+	struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
 	unsigned int hdroff = iphdroff + iph->ihl * 4;
 
 	/* pgreh includes two optional 32bit fields which are not required
 	 * to be there.  That's where the magic '8' comes from */
-	if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh) - 8))
+	if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8))
 		return 0;
 
-	greh = (void *)(*pskb)->data + hdroff;
+	greh = (void *)skb->data + hdroff;
 	pgreh = (struct gre_hdr_pptp *)greh;
 
 	/* we only have destination manip of a packet, since 'source key'
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index 898d7377115..b9fc724388f 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -52,20 +52,20 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
 }
 
 static int
-icmp_manip_pkt(struct sk_buff **pskb,
+icmp_manip_pkt(struct sk_buff *skb,
 	       unsigned int iphdroff,
 	       const struct nf_conntrack_tuple *tuple,
 	       enum nf_nat_manip_type maniptype)
 {
-	struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+	struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
 	struct icmphdr *hdr;
 	unsigned int hdroff = iphdroff + iph->ihl*4;
 
-	if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
+	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
 		return 0;
 
-	hdr = (struct icmphdr *)((*pskb)->data + hdroff);
-	nf_proto_csum_replace2(&hdr->checksum, *pskb,
+	hdr = (struct icmphdr *)(skb->data + hdroff);
+	nf_proto_csum_replace2(&hdr->checksum, skb,
 			       hdr->un.echo.id, tuple->src.u.icmp.id, 0);
 	hdr->un.echo.id = tuple->src.u.icmp.id;
 	return 1;
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
index 5bbbb2acdc7..6bab2e18445 100644
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -88,12 +88,12 @@ tcp_unique_tuple(struct nf_conntrack_tuple *tuple,
 }
 
 static int
-tcp_manip_pkt(struct sk_buff **pskb,
+tcp_manip_pkt(struct sk_buff *skb,
 	      unsigned int iphdroff,
 	      const struct nf_conntrack_tuple *tuple,
 	      enum nf_nat_manip_type maniptype)
 {
-	struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+	struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
 	struct tcphdr *hdr;
 	unsigned int hdroff = iphdroff + iph->ihl*4;
 	__be32 oldip, newip;
@@ -103,14 +103,14 @@ tcp_manip_pkt(struct sk_buff **pskb,
 	/* this could be a inner header returned in icmp packet; in such
 	   cases we cannot update the checksum field since it is outside of
 	   the 8 bytes of transport layer headers we are guaranteed */
-	if ((*pskb)->len >= hdroff + sizeof(struct tcphdr))
+	if (skb->len >= hdroff + sizeof(struct tcphdr))
 		hdrsize = sizeof(struct tcphdr);
 
-	if (!skb_make_writable(pskb, hdroff + hdrsize))
+	if (!skb_make_writable(skb, hdroff + hdrsize))
 		return 0;
 
-	iph = (struct iphdr *)((*pskb)->data + iphdroff);
-	hdr = (struct tcphdr *)((*pskb)->data + hdroff);
+	iph = (struct iphdr *)(skb->data + iphdroff);
+	hdr = (struct tcphdr *)(skb->data + hdroff);
 
 	if (maniptype == IP_NAT_MANIP_SRC) {
 		/* Get rid of src ip and src pt */
@@ -132,8 +132,8 @@ tcp_manip_pkt(struct sk_buff **pskb,
 	if (hdrsize < sizeof(*hdr))
 		return 1;
 
-	nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1);
-	nf_proto_csum_replace2(&hdr->check, *pskb, oldport, newport, 0);
+	nf_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
+	nf_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0);
 	return 1;
 }
 
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
index a0af4fd9558..cbf1a61e290 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -86,22 +86,22 @@ udp_unique_tuple(struct nf_conntrack_tuple *tuple,
 }
 
 static int
-udp_manip_pkt(struct sk_buff **pskb,
+udp_manip_pkt(struct sk_buff *skb,
 	      unsigned int iphdroff,
 	      const struct nf_conntrack_tuple *tuple,
 	      enum nf_nat_manip_type maniptype)
 {
-	struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+	struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
 	struct udphdr *hdr;
 	unsigned int hdroff = iphdroff + iph->ihl*4;
 	__be32 oldip, newip;
 	__be16 *portptr, newport;
 
-	if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
+	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
 		return 0;
 
-	iph = (struct iphdr *)((*pskb)->data + iphdroff);
-	hdr = (struct udphdr *)((*pskb)->data + hdroff);
+	iph = (struct iphdr *)(skb->data + iphdroff);
+	hdr = (struct udphdr *)(skb->data + hdroff);
 
 	if (maniptype == IP_NAT_MANIP_SRC) {
 		/* Get rid of src ip and src pt */
@@ -116,9 +116,9 @@ udp_manip_pkt(struct sk_buff **pskb,
 		newport = tuple->dst.u.udp.port;
 		portptr = &hdr->dest;
 	}
-	if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) {
-		nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1);
-		nf_proto_csum_replace2(&hdr->check, *pskb, *portptr, newport,
+	if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+		nf_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
+		nf_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
 				       0);
 		if (!hdr->check)
 			hdr->check = CSUM_MANGLED_0;
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
index f50d0203f9c..cfd2742e970 100644
--- a/net/ipv4/netfilter/nf_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c
@@ -37,7 +37,7 @@ static int unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
 }
 
 static int
-unknown_manip_pkt(struct sk_buff **pskb,
+unknown_manip_pkt(struct sk_buff *skb,
 		  unsigned int iphdroff,
 		  const struct nf_conntrack_tuple *tuple,
 		  enum nf_nat_manip_type maniptype)
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 76ec59ae524..46b25ab5f78 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -65,7 +65,7 @@ static struct xt_table nat_table = {
 };
 
 /* Source NAT */
-static unsigned int ipt_snat_target(struct sk_buff **pskb,
+static unsigned int ipt_snat_target(struct sk_buff *skb,
 				    const struct net_device *in,
 				    const struct net_device *out,
 				    unsigned int hooknum,
@@ -78,7 +78,7 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb,
 
 	NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING);
 
-	ct = nf_ct_get(*pskb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 
 	/* Connection must be valid and new. */
 	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
@@ -107,7 +107,7 @@ static void warn_if_extra_mangle(__be32 dstip, __be32 srcip)
 	ip_rt_put(rt);
 }
 
-static unsigned int ipt_dnat_target(struct sk_buff **pskb,
+static unsigned int ipt_dnat_target(struct sk_buff *skb,
 				    const struct net_device *in,
 				    const struct net_device *out,
 				    unsigned int hooknum,
@@ -121,14 +121,14 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb,
 	NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
 		     hooknum == NF_IP_LOCAL_OUT);
 
-	ct = nf_ct_get(*pskb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 
 	/* Connection must be valid and new. */
 	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
 
 	if (hooknum == NF_IP_LOCAL_OUT &&
 	    mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
-		warn_if_extra_mangle(ip_hdr(*pskb)->daddr,
+		warn_if_extra_mangle(ip_hdr(skb)->daddr,
 				     mr->range[0].min_ip);
 
 	return nf_nat_setup_info(ct, &mr->range[0], hooknum);
@@ -204,7 +204,7 @@ alloc_null_binding_confirmed(struct nf_conn *ct, unsigned int hooknum)
 	return nf_nat_setup_info(ct, &range, hooknum);
 }
 
-int nf_nat_rule_find(struct sk_buff **pskb,
+int nf_nat_rule_find(struct sk_buff *skb,
 		     unsigned int hooknum,
 		     const struct net_device *in,
 		     const struct net_device *out,
@@ -212,7 +212,7 @@ int nf_nat_rule_find(struct sk_buff **pskb,
 {
 	int ret;
 
-	ret = ipt_do_table(pskb, hooknum, in, out, &nat_table);
+	ret = ipt_do_table(skb, hooknum, in, out, &nat_table);
 
 	if (ret == NF_ACCEPT) {
 		if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index e14d41976c2..ce9edbcc01e 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -60,7 +60,7 @@ static void addr_map_init(struct nf_conn *ct, struct addr_map *map)
 	}
 }
 
-static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo,
+static int map_sip_addr(struct sk_buff *skb, enum ip_conntrack_info ctinfo,
 			struct nf_conn *ct, const char **dptr, size_t dlen,
 			enum sip_header_pos pos, struct addr_map *map)
 {
@@ -84,15 +84,15 @@ static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo,
 	} else
 		return 1;
 
-	if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
+	if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
 				      matchoff, matchlen, addr, addrlen))
 		return 0;
-	*dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr);
+	*dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
 	return 1;
 
 }
 
-static unsigned int ip_nat_sip(struct sk_buff **pskb,
+static unsigned int ip_nat_sip(struct sk_buff *skb,
 			       enum ip_conntrack_info ctinfo,
 			       struct nf_conn *ct,
 			       const char **dptr)
@@ -101,8 +101,8 @@ static unsigned int ip_nat_sip(struct sk_buff **pskb,
 	struct addr_map map;
 	int dataoff, datalen;
 
-	dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
-	datalen = (*pskb)->len - dataoff;
+	dataoff = ip_hdrlen(skb) + sizeof(struct udphdr);
+	datalen = skb->len - dataoff;
 	if (datalen < sizeof("SIP/2.0") - 1)
 		return NF_ACCEPT;
 
@@ -121,19 +121,19 @@ static unsigned int ip_nat_sip(struct sk_buff **pskb,
 		else
 			pos = POS_REQ_URI;
 
-		if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, pos, &map))
+		if (!map_sip_addr(skb, ctinfo, ct, dptr, datalen, pos, &map))
 			return NF_DROP;
 	}
 
-	if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_FROM, &map) ||
-	    !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_TO, &map) ||
-	    !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_VIA, &map) ||
-	    !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map))
+	if (!map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_FROM, &map) ||
+	    !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_TO, &map) ||
+	    !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_VIA, &map) ||
+	    !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map))
 		return NF_DROP;
 	return NF_ACCEPT;
 }
 
-static unsigned int mangle_sip_packet(struct sk_buff **pskb,
+static unsigned int mangle_sip_packet(struct sk_buff *skb,
 				      enum ip_conntrack_info ctinfo,
 				      struct nf_conn *ct,
 				      const char **dptr, size_t dlen,
@@ -145,16 +145,16 @@ static unsigned int mangle_sip_packet(struct sk_buff **pskb,
 	if (ct_sip_get_info(ct, *dptr, dlen, &matchoff, &matchlen, pos) <= 0)
 		return 0;
 
-	if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
+	if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
 				      matchoff, matchlen, buffer, bufflen))
 		return 0;
 
 	/* We need to reload this. Thanks Patrick. */
-	*dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr);
+	*dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
 	return 1;
 }
 
-static int mangle_content_len(struct sk_buff **pskb,
+static int mangle_content_len(struct sk_buff *skb,
 			      enum ip_conntrack_info ctinfo,
 			      struct nf_conn *ct,
 			      const char *dptr)
@@ -163,22 +163,22 @@ static int mangle_content_len(struct sk_buff **pskb,
 	char buffer[sizeof("65536")];
 	int bufflen;
 
-	dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
+	dataoff = ip_hdrlen(skb) + sizeof(struct udphdr);
 
 	/* Get actual SDP lenght */
-	if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff,
+	if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff,
 			    &matchlen, POS_SDP_HEADER) > 0) {
 
 		/* since ct_sip_get_info() give us a pointer passing 'v='
 		   we need to add 2 bytes in this count. */
-		int c_len = (*pskb)->len - dataoff - matchoff + 2;
+		int c_len = skb->len - dataoff - matchoff + 2;
 
 		/* Now, update SDP length */
-		if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff,
+		if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff,
 				    &matchlen, POS_CONTENT) > 0) {
 
 			bufflen = sprintf(buffer, "%u", c_len);
-			return nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
+			return nf_nat_mangle_udp_packet(skb, ct, ctinfo,
 							matchoff, matchlen,
 							buffer, bufflen);
 		}
@@ -186,7 +186,7 @@ static int mangle_content_len(struct sk_buff **pskb,
 	return 0;
 }
 
-static unsigned int mangle_sdp(struct sk_buff **pskb,
+static unsigned int mangle_sdp(struct sk_buff *skb,
 			       enum ip_conntrack_info ctinfo,
 			       struct nf_conn *ct,
 			       __be32 newip, u_int16_t port,
@@ -195,25 +195,25 @@ static unsigned int mangle_sdp(struct sk_buff **pskb,
 	char buffer[sizeof("nnn.nnn.nnn.nnn")];
 	unsigned int dataoff, bufflen;
 
-	dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
+	dataoff = ip_hdrlen(skb) + sizeof(struct udphdr);
 
 	/* Mangle owner and contact info. */
 	bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip));
-	if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
+	if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff,
 			       buffer, bufflen, POS_OWNER_IP4))
 		return 0;
 
-	if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
+	if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff,
 			       buffer, bufflen, POS_CONNECTION_IP4))
 		return 0;
 
 	/* Mangle media port. */
 	bufflen = sprintf(buffer, "%u", port);
-	if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
+	if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff,
 			       buffer, bufflen, POS_MEDIA))
 		return 0;
 
-	return mangle_content_len(pskb, ctinfo, ct, dptr);
+	return mangle_content_len(skb, ctinfo, ct, dptr);
 }
 
 static void ip_nat_sdp_expect(struct nf_conn *ct,
@@ -241,7 +241,7 @@ static void ip_nat_sdp_expect(struct nf_conn *ct,
 
 /* So, this packet has hit the connection tracking matching code.
    Mangle it, and change the expectation to match the new version. */
-static unsigned int ip_nat_sdp(struct sk_buff **pskb,
+static unsigned int ip_nat_sdp(struct sk_buff *skb,
 			       enum ip_conntrack_info ctinfo,
 			       struct nf_conntrack_expect *exp,
 			       const char *dptr)
@@ -277,7 +277,7 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb,
 	if (port == 0)
 		return NF_DROP;
 
-	if (!mangle_sdp(pskb, ctinfo, ct, newip, port, dptr)) {
+	if (!mangle_sdp(skb, ctinfo, ct, newip, port, dptr)) {
 		nf_ct_unexpect_related(exp);
 		return NF_DROP;
 	}
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 6bfcd3a90f0..03709d6b4b0 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1188,9 +1188,9 @@ static int snmp_parse_mangle(unsigned char *msg,
  */
 static int snmp_translate(struct nf_conn *ct,
 			  enum ip_conntrack_info ctinfo,
-			  struct sk_buff **pskb)
+			  struct sk_buff *skb)
 {
-	struct iphdr *iph = ip_hdr(*pskb);
+	struct iphdr *iph = ip_hdr(skb);
 	struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
 	u_int16_t udplen = ntohs(udph->len);
 	u_int16_t paylen = udplen - sizeof(struct udphdr);
@@ -1225,13 +1225,13 @@ static int snmp_translate(struct nf_conn *ct,
 
 /* We don't actually set up expectations, just adjust internal IP
  * addresses if this is being NATted */
-static int help(struct sk_buff **pskb, unsigned int protoff,
+static int help(struct sk_buff *skb, unsigned int protoff,
 		struct nf_conn *ct,
 		enum ip_conntrack_info ctinfo)
 {
 	int dir = CTINFO2DIR(ctinfo);
 	unsigned int ret;
-	struct iphdr *iph = ip_hdr(*pskb);
+	struct iphdr *iph = ip_hdr(skb);
 	struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
 
 	/* SNMP replies and originating SNMP traps get mangled */
@@ -1250,7 +1250,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
 	 * enough room for a UDP header.  Just verify the UDP length field so we
 	 * can mess around with the payload.
 	 */
-	if (ntohs(udph->len) != (*pskb)->len - (iph->ihl << 2)) {
+	if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) {
 		 if (net_ratelimit())
 			 printk(KERN_WARNING "SNMP: dropping malformed packet "
 				"src=%u.%u.%u.%u dst=%u.%u.%u.%u\n",
@@ -1258,11 +1258,11 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
 		 return NF_DROP;
 	}
 
-	if (!skb_make_writable(pskb, (*pskb)->len))
+	if (!skb_make_writable(skb, skb->len))
 		return NF_DROP;
 
 	spin_lock_bh(&snmp_lock);
-	ret = snmp_translate(ct, ctinfo, pskb);
+	ret = snmp_translate(ct, ctinfo, skb);
 	spin_unlock_bh(&snmp_lock);
 	return ret;
 }
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 46cc99def16..7db76ea9af9 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -67,7 +67,7 @@ static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
 
 static unsigned int
 nf_nat_fn(unsigned int hooknum,
-	  struct sk_buff **pskb,
+	  struct sk_buff *skb,
 	  const struct net_device *in,
 	  const struct net_device *out,
 	  int (*okfn)(struct sk_buff *))
@@ -80,9 +80,9 @@ nf_nat_fn(unsigned int hooknum,
 
 	/* We never see fragments: conntrack defrags on pre-routing
 	   and local-out, and nf_nat_out protects post-routing. */
-	NF_CT_ASSERT(!(ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)));
+	NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)));
 
-	ct = nf_ct_get(*pskb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 	/* Can't track?  It's not due to stress, or conntrack would
 	   have dropped it.  Hence it's the user's responsibilty to
 	   packet filter it out, or implement conntrack/NAT for that
@@ -91,10 +91,10 @@ nf_nat_fn(unsigned int hooknum,
 		/* Exception: ICMP redirect to new connection (not in
 		   hash table yet).  We must not let this through, in
 		   case we're doing NAT to the same network. */
-		if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) {
+		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
 			struct icmphdr _hdr, *hp;
 
-			hp = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
+			hp = skb_header_pointer(skb, ip_hdrlen(skb),
 						sizeof(_hdr), &_hdr);
 			if (hp != NULL &&
 			    hp->type == ICMP_REDIRECT)
@@ -119,9 +119,9 @@ nf_nat_fn(unsigned int hooknum,
 	switch (ctinfo) {
 	case IP_CT_RELATED:
 	case IP_CT_RELATED+IP_CT_IS_REPLY:
-		if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) {
+		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
 			if (!nf_nat_icmp_reply_translation(ct, ctinfo,
-							   hooknum, pskb))
+							   hooknum, skb))
 				return NF_DROP;
 			else
 				return NF_ACCEPT;
@@ -141,7 +141,7 @@ nf_nat_fn(unsigned int hooknum,
 				/* LOCAL_IN hook doesn't have a chain!  */
 				ret = alloc_null_binding(ct, hooknum);
 			else
-				ret = nf_nat_rule_find(pskb, hooknum, in, out,
+				ret = nf_nat_rule_find(skb, hooknum, in, out,
 						       ct);
 
 			if (ret != NF_ACCEPT) {
@@ -159,31 +159,31 @@ nf_nat_fn(unsigned int hooknum,
 			     ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));
 	}
 
-	return nf_nat_packet(ct, ctinfo, hooknum, pskb);
+	return nf_nat_packet(ct, ctinfo, hooknum, skb);
 }
 
 static unsigned int
 nf_nat_in(unsigned int hooknum,
-	  struct sk_buff **pskb,
+	  struct sk_buff *skb,
 	  const struct net_device *in,
 	  const struct net_device *out,
 	  int (*okfn)(struct sk_buff *))
 {
 	unsigned int ret;
-	__be32 daddr = ip_hdr(*pskb)->daddr;
+	__be32 daddr = ip_hdr(skb)->daddr;
 
-	ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
+	ret = nf_nat_fn(hooknum, skb, in, out, okfn);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
-	    daddr != ip_hdr(*pskb)->daddr) {
-		dst_release((*pskb)->dst);
-		(*pskb)->dst = NULL;
+	    daddr != ip_hdr(skb)->daddr) {
+		dst_release(skb->dst);
+		skb->dst = NULL;
 	}
 	return ret;
 }
 
 static unsigned int
 nf_nat_out(unsigned int hooknum,
-	   struct sk_buff **pskb,
+	   struct sk_buff *skb,
 	   const struct net_device *in,
 	   const struct net_device *out,
 	   int (*okfn)(struct sk_buff *))
@@ -195,14 +195,14 @@ nf_nat_out(unsigned int hooknum,
 	unsigned int ret;
 
 	/* root is playing with raw sockets. */
-	if ((*pskb)->len < sizeof(struct iphdr) ||
-	    ip_hdrlen(*pskb) < sizeof(struct iphdr))
+	if (skb->len < sizeof(struct iphdr) ||
+	    ip_hdrlen(skb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
-	ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
+	ret = nf_nat_fn(hooknum, skb, in, out, okfn);
 #ifdef CONFIG_XFRM
 	if (ret != NF_DROP && ret != NF_STOLEN &&
-	    (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) {
+	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 
 		if (ct->tuplehash[dir].tuple.src.u3.ip !=
@@ -210,7 +210,7 @@ nf_nat_out(unsigned int hooknum,
 		    || ct->tuplehash[dir].tuple.src.u.all !=
 		       ct->tuplehash[!dir].tuple.dst.u.all
 		    )
-			return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP;
+			return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP;
 	}
 #endif
 	return ret;
@@ -218,7 +218,7 @@ nf_nat_out(unsigned int hooknum,
 
 static unsigned int
 nf_nat_local_fn(unsigned int hooknum,
-		struct sk_buff **pskb,
+		struct sk_buff *skb,
 		const struct net_device *in,
 		const struct net_device *out,
 		int (*okfn)(struct sk_buff *))
@@ -228,24 +228,24 @@ nf_nat_local_fn(unsigned int hooknum,
 	unsigned int ret;
 
 	/* root is playing with raw sockets. */
-	if ((*pskb)->len < sizeof(struct iphdr) ||
-	    ip_hdrlen(*pskb) < sizeof(struct iphdr))
+	if (skb->len < sizeof(struct iphdr) ||
+	    ip_hdrlen(skb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
-	ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
+	ret = nf_nat_fn(hooknum, skb, in, out, okfn);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
-	    (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) {
+	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 
 		if (ct->tuplehash[dir].tuple.dst.u3.ip !=
 		    ct->tuplehash[!dir].tuple.src.u3.ip) {
-			if (ip_route_me_harder(pskb, RTN_UNSPEC))
+			if (ip_route_me_harder(skb, RTN_UNSPEC))
 				ret = NF_DROP;
 		}
 #ifdef CONFIG_XFRM
 		else if (ct->tuplehash[dir].tuple.dst.u.all !=
 			 ct->tuplehash[!dir].tuple.src.u.all)
-			if (ip_xfrm_me_harder(pskb))
+			if (ip_xfrm_me_harder(skb))
 				ret = NF_DROP;
 #endif
 	}
@@ -254,7 +254,7 @@ nf_nat_local_fn(unsigned int hooknum,
 
 static unsigned int
 nf_nat_adjust(unsigned int hooknum,
-	      struct sk_buff **pskb,
+	      struct sk_buff *skb,
 	      const struct net_device *in,
 	      const struct net_device *out,
 	      int (*okfn)(struct sk_buff *))
@@ -262,10 +262,10 @@ nf_nat_adjust(unsigned int hooknum,
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 
-	ct = nf_ct_get(*pskb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 	if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
 		pr_debug("nf_nat_standalone: adjusting sequence number\n");
-		if (!nf_nat_seq_adjust(pskb, ct, ctinfo))
+		if (!nf_nat_seq_adjust(skb, ct, ctinfo))
 			return NF_DROP;
 	}
 	return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c
index 04dfeaefec0..0ecec701cb4 100644
--- a/net/ipv4/netfilter/nf_nat_tftp.c
+++ b/net/ipv4/netfilter/nf_nat_tftp.c
@@ -20,7 +20,7 @@ MODULE_DESCRIPTION("TFTP NAT helper");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ip_nat_tftp");
 
-static unsigned int help(struct sk_buff **pskb,
+static unsigned int help(struct sk_buff *skb,
 			 enum ip_conntrack_info ctinfo,
 			 struct nf_conntrack_expect *exp)
 {
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index e5b05b03910..fd16cb8f8ab 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -70,8 +70,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
 	seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot));
 	seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot));
 	seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot));
-	seq_printf(seq,  "FRAG: inuse %d memory %d\n", ip_frag_nqueues,
-		   atomic_read(&ip_frag_mem));
+	seq_printf(seq,  "FRAG: inuse %d memory %d\n",
+			ip_frag_nqueues(), ip_frag_mem());
 	return 0;
 }
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index eb286abcf5d..c98ef16effd 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -19,6 +19,7 @@
 #include <net/route.h>
 #include <net/tcp.h>
 #include <net/cipso_ipv4.h>
+#include <net/inet_frag.h>
 
 /* From af_inet.c */
 extern int sysctl_ip_nonlocal_bind;
@@ -357,7 +358,7 @@ ctl_table ipv4_table[] = {
 	{
 		.ctl_name	= NET_IPV4_IPFRAG_HIGH_THRESH,
 		.procname	= "ipfrag_high_thresh",
-		.data		= &sysctl_ipfrag_high_thresh,
+		.data		= &ip4_frags_ctl.high_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
@@ -365,7 +366,7 @@ ctl_table ipv4_table[] = {
 	{
 		.ctl_name	= NET_IPV4_IPFRAG_LOW_THRESH,
 		.procname	= "ipfrag_low_thresh",
-		.data		= &sysctl_ipfrag_low_thresh,
+		.data		= &ip4_frags_ctl.low_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
@@ -381,7 +382,7 @@ ctl_table ipv4_table[] = {
 	{
 		.ctl_name	= NET_IPV4_IPFRAG_TIME,
 		.procname	= "ipfrag_time",
-		.data		= &sysctl_ipfrag_time,
+		.data		= &ip4_frags_ctl.timeout,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -732,7 +733,7 @@ ctl_table ipv4_table[] = {
 	{
 		.ctl_name	= NET_IPV4_IPFRAG_SECRET_INTERVAL,
 		.procname	= "ipfrag_secret_interval",
-		.data		= &sysctl_ipfrag_secret_interval,
+		.data		= &ip4_frags_ctl.secret_interval,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0a42e934034..0f00966b178 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1995,8 +1995,7 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp,
 }
 
 /* Mark head of queue up as lost. */
-static void tcp_mark_head_lost(struct sock *sk,
-			       int packets, u32 high_seq)
+static void tcp_mark_head_lost(struct sock *sk, int packets)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
@@ -2019,7 +2018,7 @@ static void tcp_mark_head_lost(struct sock *sk,
 		tp->lost_skb_hint = skb;
 		tp->lost_cnt_hint = cnt;
 		cnt += tcp_skb_pcount(skb);
-		if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq))
+		if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
 			break;
 		if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {
 			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
@@ -2040,9 +2039,9 @@ static void tcp_update_scoreboard(struct sock *sk)
 		int lost = tp->fackets_out - tp->reordering;
 		if (lost <= 0)
 			lost = 1;
-		tcp_mark_head_lost(sk, lost, tp->high_seq);
+		tcp_mark_head_lost(sk, lost);
 	} else {
-		tcp_mark_head_lost(sk, 1, tp->high_seq);
+		tcp_mark_head_lost(sk, 1);
 	}
 
 	/* New heuristics: it is possible only after we switched
@@ -2381,7 +2380,7 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
 	    before(tp->snd_una, tp->high_seq) &&
 	    icsk->icsk_ca_state != TCP_CA_Open &&
 	    tp->fackets_out > tp->reordering) {
-		tcp_mark_head_lost(sk, tp->fackets_out-tp->reordering, tp->high_seq);
+		tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering);
 		NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
 	}
 
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 434ef302ba8..a4edd666318 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -78,7 +78,7 @@ static int xfrm4_output_finish2(struct sk_buff *skb)
 	while (likely((err = xfrm4_output_one(skb)) == 0)) {
 		nf_reset(skb);
 
-		err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, &skb, NULL,
+		err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
 			      skb->dst->dev, dst_output);
 		if (unlikely(err != 1))
 			break;
@@ -86,7 +86,7 @@ static int xfrm4_output_finish2(struct sk_buff *skb)
 		if (!skb->dst->xfrm)
 			return dst_output(skb);
 
-		err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL,
+		err = nf_hook(PF_INET, NF_IP_POST_ROUTING, skb, NULL,
 			      skb->dst->dev, xfrm4_output_finish2);
 		if (unlikely(err != 1))
 			break;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index c82d4d49f71..1e89efd38a0 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -102,7 +102,7 @@ EXPORT_SYMBOL_GPL(ipv6_find_tlv);
 
 struct tlvtype_proc {
 	int	type;
-	int	(*func)(struct sk_buff **skbp, int offset);
+	int	(*func)(struct sk_buff *skb, int offset);
 };
 
 /*********************
@@ -111,10 +111,8 @@ struct tlvtype_proc {
 
 /* An unknown option is detected, decide what to do */
 
-static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff)
+static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
 {
-	struct sk_buff *skb = *skbp;
-
 	switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) {
 	case 0: /* ignore */
 		return 1;
@@ -139,9 +137,8 @@ static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff)
 
 /* Parse tlv encoded option header (hop-by-hop or destination) */
 
-static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
+static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb)
 {
-	struct sk_buff *skb = *skbp;
 	struct tlvtype_proc *curr;
 	const unsigned char *nh = skb_network_header(skb);
 	int off = skb_network_header_len(skb);
@@ -172,13 +169,13 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
 					/* type specific length/alignment
 					   checks will be performed in the
 					   func(). */
-					if (curr->func(skbp, off) == 0)
+					if (curr->func(skb, off) == 0)
 						return 0;
 					break;
 				}
 			}
 			if (curr->type < 0) {
-				if (ip6_tlvopt_unknown(skbp, off) == 0)
+				if (ip6_tlvopt_unknown(skb, off) == 0)
 					return 0;
 			}
 			break;
@@ -198,9 +195,8 @@ bad:
  *****************************/
 
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
-static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
+static int ipv6_dest_hao(struct sk_buff *skb, int optoff)
 {
-	struct sk_buff *skb = *skbp;
 	struct ipv6_destopt_hao *hao;
 	struct inet6_skb_parm *opt = IP6CB(skb);
 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
@@ -234,22 +230,13 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
 		goto discard;
 
 	if (skb_cloned(skb)) {
-		struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
-		struct inet6_skb_parm *opt2;
-
-		if (skb2 == NULL)
+		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
 			goto discard;
 
-		opt2 = IP6CB(skb2);
-		memcpy(opt2, opt, sizeof(*opt2));
-
-		kfree_skb(skb);
-
 		/* update all variable using below by copied skbuff */
-		*skbp = skb = skb2;
-		hao = (struct ipv6_destopt_hao *)(skb_network_header(skb2) +
+		hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) +
 						  optoff);
-		ipv6h = ipv6_hdr(skb2);
+		ipv6h = ipv6_hdr(skb);
 	}
 
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
@@ -280,9 +267,8 @@ static struct tlvtype_proc tlvprocdestopt_lst[] = {
 	{-1,			NULL}
 };
 
-static int ipv6_destopt_rcv(struct sk_buff **skbp)
+static int ipv6_destopt_rcv(struct sk_buff *skb)
 {
-	struct sk_buff *skb = *skbp;
 	struct inet6_skb_parm *opt = IP6CB(skb);
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 	__u16 dstbuf;
@@ -304,9 +290,8 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
 #endif
 
 	dst = dst_clone(skb->dst);
-	if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) {
+	if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
 		dst_release(dst);
-		skb = *skbp;
 		skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
 		opt = IP6CB(skb);
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
@@ -337,10 +322,8 @@ void __init ipv6_destopt_init(void)
   NONE header. No data in packet.
  ********************************/
 
-static int ipv6_nodata_rcv(struct sk_buff **skbp)
+static int ipv6_nodata_rcv(struct sk_buff *skb)
 {
-	struct sk_buff *skb = *skbp;
-
 	kfree_skb(skb);
 	return 0;
 }
@@ -360,9 +343,8 @@ void __init ipv6_nodata_init(void)
   Routing header.
  ********************************/
 
-static int ipv6_rthdr_rcv(struct sk_buff **skbp)
+static int ipv6_rthdr_rcv(struct sk_buff *skb)
 {
-	struct sk_buff *skb = *skbp;
 	struct inet6_skb_parm *opt = IP6CB(skb);
 	struct in6_addr *addr = NULL;
 	struct in6_addr daddr;
@@ -464,18 +446,14 @@ looped_back:
 	   Do not damage packets queued somewhere.
 	 */
 	if (skb_cloned(skb)) {
-		struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
 		/* the copy is a forwarded packet */
-		if (skb2 == NULL) {
+		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
 			IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
 					 IPSTATS_MIB_OUTDISCARDS);
 			kfree_skb(skb);
 			return -1;
 		}
-		kfree_skb(skb);
-		*skbp = skb = skb2;
-		opt = IP6CB(skb2);
-		hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb2);
+		hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb);
 	}
 
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
@@ -578,9 +556,8 @@ static inline struct inet6_dev *ipv6_skb_idev(struct sk_buff *skb)
 
 /* Router Alert as of RFC 2711 */
 
-static int ipv6_hop_ra(struct sk_buff **skbp, int optoff)
+static int ipv6_hop_ra(struct sk_buff *skb, int optoff)
 {
-	struct sk_buff *skb = *skbp;
 	const unsigned char *nh = skb_network_header(skb);
 
 	if (nh[optoff + 1] == 2) {
@@ -595,9 +572,8 @@ static int ipv6_hop_ra(struct sk_buff **skbp, int optoff)
 
 /* Jumbo payload */
 
-static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff)
+static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
 {
-	struct sk_buff *skb = *skbp;
 	const unsigned char *nh = skb_network_header(skb);
 	u32 pkt_len;
 
@@ -648,9 +624,8 @@ static struct tlvtype_proc tlvprochopopt_lst[] = {
 	{ -1, }
 };
 
-int ipv6_parse_hopopts(struct sk_buff **skbp)
+int ipv6_parse_hopopts(struct sk_buff *skb)
 {
-	struct sk_buff *skb = *skbp;
 	struct inet6_skb_parm *opt = IP6CB(skb);
 
 	/*
@@ -667,8 +642,7 @@ int ipv6_parse_hopopts(struct sk_buff **skbp)
 	}
 
 	opt->hop = sizeof(struct ipv6hdr);
-	if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) {
-		skb = *skbp;
+	if (ip6_parse_tlv(tlvprochopopt_lst, skb)) {
 		skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
 		opt = IP6CB(skb);
 		opt->nhoff = sizeof(struct ipv6hdr);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 47b8ce232e8..9bb031fa1c2 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -82,7 +82,7 @@ EXPORT_SYMBOL(icmpv6msg_statistics);
 static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL;
 #define icmpv6_socket	__get_cpu_var(__icmpv6_socket)
 
-static int icmpv6_rcv(struct sk_buff **pskb);
+static int icmpv6_rcv(struct sk_buff *skb);
 
 static struct inet6_protocol icmpv6_protocol = {
 	.handler	=	icmpv6_rcv,
@@ -614,9 +614,8 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
  *	Handle icmp messages
  */
 
-static int icmpv6_rcv(struct sk_buff **pskb)
+static int icmpv6_rcv(struct sk_buff *skb)
 {
-	struct sk_buff *skb = *pskb;
 	struct net_device *dev = skb->dev;
 	struct inet6_dev *idev = __in6_dev_get(dev);
 	struct in6_addr *saddr, *daddr;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 25b93170974..78de42ada84 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -146,7 +146,7 @@ void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst,
 	__ip6_dst_store(sk, dst, daddr, saddr);
 
 #ifdef CONFIG_XFRM
-	if (dst) {
+	{
 		struct rt6_info *rt = (struct rt6_info  *)dst;
 		rt->rt6i_flow_cache_genid = atomic_read(&flow_cache_genid);
 	}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 9149fc23975..fac6f7f9dd7 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -125,7 +125,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	}
 
 	if (hdr->nexthdr == NEXTHDR_HOP) {
-		if (ipv6_parse_hopopts(&skb) < 0) {
+		if (ipv6_parse_hopopts(skb) < 0) {
 			IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS);
 			rcu_read_unlock();
 			return 0;
@@ -149,7 +149,7 @@ out:
  */
 
 
-static inline int ip6_input_finish(struct sk_buff *skb)
+static int ip6_input_finish(struct sk_buff *skb)
 {
 	struct inet6_protocol *ipprot;
 	struct sock *raw_sk;
@@ -199,7 +199,7 @@ resubmit:
 		    !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
 			goto discard;
 
-		ret = ipprot->handler(&skb);
+		ret = ipprot->handler(skb);
 		if (ret > 0)
 			goto resubmit;
 		else if (ret == 0)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 011082ed921..13565dfb1b4 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -70,7 +70,7 @@ static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *f
 	spin_unlock_bh(&ip6_id_lock);
 }
 
-static inline int ip6_output_finish(struct sk_buff *skb)
+static int ip6_output_finish(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb->dst;
 
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 38b14961391..b1326c2bf8a 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -68,15 +68,15 @@ static void nf_ip6_saveroute(const struct sk_buff *skb, struct nf_info *info)
 	}
 }
 
-static int nf_ip6_reroute(struct sk_buff **pskb, const struct nf_info *info)
+static int nf_ip6_reroute(struct sk_buff *skb, const struct nf_info *info)
 {
 	struct ip6_rt_info *rt_info = nf_info_reroute(info);
 
 	if (info->hook == NF_IP6_LOCAL_OUT) {
-		struct ipv6hdr *iph = ipv6_hdr(*pskb);
+		struct ipv6hdr *iph = ipv6_hdr(skb);
 		if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
 		    !ipv6_addr_equal(&iph->saddr, &rt_info->saddr))
-			return ip6_route_me_harder(*pskb);
+			return ip6_route_me_harder(skb);
 	}
 	return 0;
 }
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 0473145ac53..6413a30d9f6 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -332,6 +332,7 @@ static int
 ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
 {
 	int diff;
+	int err;
 	struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload;
 
 	if (v->data_len < sizeof(*user_iph))
@@ -344,25 +345,18 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
 		if (v->data_len > 0xFFFF)
 			return -EINVAL;
 		if (diff > skb_tailroom(e->skb)) {
-			struct sk_buff *newskb;
-
-			newskb = skb_copy_expand(e->skb,
-						 skb_headroom(e->skb),
-						 diff,
-						 GFP_ATOMIC);
-			if (newskb == NULL) {
+			err = pskb_expand_head(e->skb, 0,
+					       diff - skb_tailroom(e->skb),
+					       GFP_ATOMIC);
+			if (err) {
 				printk(KERN_WARNING "ip6_queue: OOM "
 				      "in mangle, dropping packet\n");
-				return -ENOMEM;
+				return err;
 			}
-			if (e->skb->sk)
-				skb_set_owner_w(newskb, e->skb->sk);
-			kfree_skb(e->skb);
-			e->skb = newskb;
 		}
 		skb_put(e->skb, diff);
 	}
-	if (!skb_make_writable(&e->skb, v->data_len))
+	if (!skb_make_writable(e->skb, v->data_len))
 		return -ENOMEM;
 	skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
 	e->skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index cd9df02bb85..acaba153793 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -205,7 +205,7 @@ ip6_checkentry(const struct ip6t_ip6 *ipv6)
 }
 
 static unsigned int
-ip6t_error(struct sk_buff **pskb,
+ip6t_error(struct sk_buff *skb,
 	  const struct net_device *in,
 	  const struct net_device *out,
 	  unsigned int hooknum,
@@ -350,7 +350,7 @@ static void trace_packet(struct sk_buff *skb,
 
 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
 unsigned int
-ip6t_do_table(struct sk_buff **pskb,
+ip6t_do_table(struct sk_buff *skb,
 	      unsigned int hook,
 	      const struct net_device *in,
 	      const struct net_device *out,
@@ -389,17 +389,17 @@ ip6t_do_table(struct sk_buff **pskb,
 	do {
 		IP_NF_ASSERT(e);
 		IP_NF_ASSERT(back);
-		if (ip6_packet_match(*pskb, indev, outdev, &e->ipv6,
+		if (ip6_packet_match(skb, indev, outdev, &e->ipv6,
 			&protoff, &offset, &hotdrop)) {
 			struct ip6t_entry_target *t;
 
 			if (IP6T_MATCH_ITERATE(e, do_match,
-					       *pskb, in, out,
+					       skb, in, out,
 					       offset, protoff, &hotdrop) != 0)
 				goto no_match;
 
 			ADD_COUNTER(e->counters,
-				    ntohs(ipv6_hdr(*pskb)->payload_len)
+				    ntohs(ipv6_hdr(skb)->payload_len)
 				    + IPV6_HDR_LEN,
 				    1);
 
@@ -409,8 +409,8 @@ ip6t_do_table(struct sk_buff **pskb,
 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
 			/* The packet is traced: log it */
-			if (unlikely((*pskb)->nf_trace))
-				trace_packet(*pskb, hook, in, out,
+			if (unlikely(skb->nf_trace))
+				trace_packet(skb, hook, in, out,
 					     table->name, private, e);
 #endif
 			/* Standard target? */
@@ -448,7 +448,7 @@ ip6t_do_table(struct sk_buff **pskb,
 				((struct ip6t_entry *)table_base)->comefrom
 					= 0xeeeeeeec;
 #endif
-				verdict = t->u.kernel.target->target(pskb,
+				verdict = t->u.kernel.target->target(skb,
 								     in, out,
 								     hook,
 								     t->u.kernel.target,
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index ad4d94310b8..9afc836fd45 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -18,7 +18,7 @@ MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
 MODULE_DESCRIPTION("IP6 tables Hop Limit modification module");
 MODULE_LICENSE("GPL");
 
-static unsigned int ip6t_hl_target(struct sk_buff **pskb,
+static unsigned int ip6t_hl_target(struct sk_buff *skb,
 				   const struct net_device *in,
 				   const struct net_device *out,
 				   unsigned int hooknum,
@@ -29,10 +29,10 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb,
 	const struct ip6t_HL_info *info = targinfo;
 	int new_hl;
 
-	if (!skb_make_writable(pskb, (*pskb)->len))
+	if (!skb_make_writable(skb, skb->len))
 		return NF_DROP;
 
-	ip6h = ipv6_hdr(*pskb);
+	ip6h = ipv6_hdr(skb);
 
 	switch (info->mode) {
 		case IP6T_HL_SET:
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 6ab99001dcc..7a48c342df4 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -431,7 +431,7 @@ ip6t_log_packet(unsigned int pf,
 }
 
 static unsigned int
-ip6t_log_target(struct sk_buff **pskb,
+ip6t_log_target(struct sk_buff *skb,
 		const struct net_device *in,
 		const struct net_device *out,
 		unsigned int hooknum,
@@ -445,8 +445,7 @@ ip6t_log_target(struct sk_buff **pskb,
 	li.u.log.level = loginfo->level;
 	li.u.log.logflags = loginfo->logflags;
 
-	ip6t_log_packet(PF_INET6, hooknum, *pskb, in, out, &li,
-			loginfo->prefix);
+	ip6t_log_packet(PF_INET6, hooknum, skb, in, out, &li, loginfo->prefix);
 	return XT_CONTINUE;
 }
 
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 3fd08d5567a..1a7d2917545 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -172,7 +172,7 @@ send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum)
 	icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL);
 }
 
-static unsigned int reject6_target(struct sk_buff **pskb,
+static unsigned int reject6_target(struct sk_buff *skb,
 			   const struct net_device *in,
 			   const struct net_device *out,
 			   unsigned int hooknum,
@@ -187,25 +187,25 @@ static unsigned int reject6_target(struct sk_buff **pskb,
 	   must return an absolute verdict. --RR */
 	switch (reject->with) {
 	case IP6T_ICMP6_NO_ROUTE:
-		send_unreach(*pskb, ICMPV6_NOROUTE, hooknum);
+		send_unreach(skb, ICMPV6_NOROUTE, hooknum);
 		break;
 	case IP6T_ICMP6_ADM_PROHIBITED:
-		send_unreach(*pskb, ICMPV6_ADM_PROHIBITED, hooknum);
+		send_unreach(skb, ICMPV6_ADM_PROHIBITED, hooknum);
 		break;
 	case IP6T_ICMP6_NOT_NEIGHBOUR:
-		send_unreach(*pskb, ICMPV6_NOT_NEIGHBOUR, hooknum);
+		send_unreach(skb, ICMPV6_NOT_NEIGHBOUR, hooknum);
 		break;
 	case IP6T_ICMP6_ADDR_UNREACH:
-		send_unreach(*pskb, ICMPV6_ADDR_UNREACH, hooknum);
+		send_unreach(skb, ICMPV6_ADDR_UNREACH, hooknum);
 		break;
 	case IP6T_ICMP6_PORT_UNREACH:
-		send_unreach(*pskb, ICMPV6_PORT_UNREACH, hooknum);
+		send_unreach(skb, ICMPV6_PORT_UNREACH, hooknum);
 		break;
 	case IP6T_ICMP6_ECHOREPLY:
 		/* Do nothing */
 		break;
 	case IP6T_TCP_RESET:
-		send_reset(*pskb);
+		send_reset(skb);
 		break;
 	default:
 		if (net_ratelimit())
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 7e32e2aaf7f..1d26b202bf3 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -60,32 +60,32 @@ static struct xt_table packet_filter = {
 /* The work comes in here from netfilter.c. */
 static unsigned int
 ip6t_hook(unsigned int hook,
-	 struct sk_buff **pskb,
+	 struct sk_buff *skb,
 	 const struct net_device *in,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ip6t_do_table(pskb, hook, in, out, &packet_filter);
+	return ip6t_do_table(skb, hook, in, out, &packet_filter);
 }
 
 static unsigned int
 ip6t_local_out_hook(unsigned int hook,
-		   struct sk_buff **pskb,
+		   struct sk_buff *skb,
 		   const struct net_device *in,
 		   const struct net_device *out,
 		   int (*okfn)(struct sk_buff *))
 {
 #if 0
 	/* root is playing with raw sockets. */
-	if ((*pskb)->len < sizeof(struct iphdr)
-	    || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+	if (skb->len < sizeof(struct iphdr)
+	    || ip_hdrlen(skb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
 			printk("ip6t_hook: happy cracking.\n");
 		return NF_ACCEPT;
 	}
 #endif
 
-	return ip6t_do_table(pskb, hook, in, out, &packet_filter);
+	return ip6t_do_table(skb, hook, in, out, &packet_filter);
 }
 
 static struct nf_hook_ops ip6t_ops[] = {
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index f0a9efa67fb..a0b6381f1e8 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -68,17 +68,17 @@ static struct xt_table packet_mangler = {
 /* The work comes in here from netfilter.c. */
 static unsigned int
 ip6t_route_hook(unsigned int hook,
-	 struct sk_buff **pskb,
+	 struct sk_buff *skb,
 	 const struct net_device *in,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ip6t_do_table(pskb, hook, in, out, &packet_mangler);
+	return ip6t_do_table(skb, hook, in, out, &packet_mangler);
 }
 
 static unsigned int
 ip6t_local_hook(unsigned int hook,
-		   struct sk_buff **pskb,
+		   struct sk_buff *skb,
 		   const struct net_device *in,
 		   const struct net_device *out,
 		   int (*okfn)(struct sk_buff *))
@@ -91,8 +91,8 @@ ip6t_local_hook(unsigned int hook,
 
 #if 0
 	/* root is playing with raw sockets. */
-	if ((*pskb)->len < sizeof(struct iphdr)
-	    || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+	if (skb->len < sizeof(struct iphdr)
+	    || ip_hdrlen(skb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
 			printk("ip6t_hook: happy cracking.\n");
 		return NF_ACCEPT;
@@ -100,22 +100,22 @@ ip6t_local_hook(unsigned int hook,
 #endif
 
 	/* save source/dest address, mark, hoplimit, flowlabel, priority,  */
-	memcpy(&saddr, &ipv6_hdr(*pskb)->saddr, sizeof(saddr));
-	memcpy(&daddr, &ipv6_hdr(*pskb)->daddr, sizeof(daddr));
-	mark = (*pskb)->mark;
-	hop_limit = ipv6_hdr(*pskb)->hop_limit;
+	memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
+	memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr));
+	mark = skb->mark;
+	hop_limit = ipv6_hdr(skb)->hop_limit;
 
 	/* flowlabel and prio (includes version, which shouldn't change either */
-	flowlabel = *((u_int32_t *)ipv6_hdr(*pskb));
+	flowlabel = *((u_int32_t *)ipv6_hdr(skb));
 
-	ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler);
+	ret = ip6t_do_table(skb, hook, in, out, &packet_mangler);
 
 	if (ret != NF_DROP && ret != NF_STOLEN
-		&& (memcmp(&ipv6_hdr(*pskb)->saddr, &saddr, sizeof(saddr))
-		    || memcmp(&ipv6_hdr(*pskb)->daddr, &daddr, sizeof(daddr))
-		    || (*pskb)->mark != mark
-		    || ipv6_hdr(*pskb)->hop_limit != hop_limit))
-		return ip6_route_me_harder(*pskb) == 0 ? ret : NF_DROP;
+		&& (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr))
+		    || memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr))
+		    || skb->mark != mark
+		    || ipv6_hdr(skb)->hop_limit != hop_limit))
+		return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP;
 
 	return ret;
 }
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index ec290e4ebdd..8f7109f991e 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -46,12 +46,12 @@ static struct xt_table packet_raw = {
 /* The work comes in here from netfilter.c. */
 static unsigned int
 ip6t_hook(unsigned int hook,
-	 struct sk_buff **pskb,
+	 struct sk_buff *skb,
 	 const struct net_device *in,
 	 const struct net_device *out,
 	 int (*okfn)(struct sk_buff *))
 {
-	return ip6t_do_table(pskb, hook, in, out, &packet_raw);
+	return ip6t_do_table(skb, hook, in, out, &packet_raw);
 }
 
 static struct nf_hook_ops ip6t_ops[] = {
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 37a3db92695..0e40948f4fc 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -18,6 +18,7 @@
 #include <linux/icmp.h>
 #include <linux/sysctl.h>
 #include <net/ipv6.h>
+#include <net/inet_frag.h>
 
 #include <linux/netfilter_ipv6.h>
 #include <net/netfilter/nf_conntrack.h>
@@ -145,7 +146,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 }
 
 static unsigned int ipv6_confirm(unsigned int hooknum,
-				 struct sk_buff **pskb,
+				 struct sk_buff *skb,
 				 const struct net_device *in,
 				 const struct net_device *out,
 				 int (*okfn)(struct sk_buff *))
@@ -155,12 +156,12 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
 	struct nf_conntrack_helper *helper;
 	enum ip_conntrack_info ctinfo;
 	unsigned int ret, protoff;
-	unsigned int extoff = (u8 *)(ipv6_hdr(*pskb) + 1) - (*pskb)->data;
-	unsigned char pnum = ipv6_hdr(*pskb)->nexthdr;
+	unsigned int extoff = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
+	unsigned char pnum = ipv6_hdr(skb)->nexthdr;
 
 
 	/* This is where we call the helper: as the packet goes out. */
-	ct = nf_ct_get(*pskb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 	if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
 		goto out;
 
@@ -172,23 +173,23 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
 	if (!helper)
 		goto out;
 
-	protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum,
-					 (*pskb)->len - extoff);
-	if (protoff > (*pskb)->len || pnum == NEXTHDR_FRAGMENT) {
+	protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum,
+					 skb->len - extoff);
+	if (protoff > skb->len || pnum == NEXTHDR_FRAGMENT) {
 		pr_debug("proto header not found\n");
 		return NF_ACCEPT;
 	}
 
-	ret = helper->help(pskb, protoff, ct, ctinfo);
+	ret = helper->help(skb, protoff, ct, ctinfo);
 	if (ret != NF_ACCEPT)
 		return ret;
 out:
 	/* We've seen it coming out the other side: confirm it */
-	return nf_conntrack_confirm(pskb);
+	return nf_conntrack_confirm(skb);
 }
 
 static unsigned int ipv6_defrag(unsigned int hooknum,
-				struct sk_buff **pskb,
+				struct sk_buff *skb,
 				const struct net_device *in,
 				const struct net_device *out,
 				int (*okfn)(struct sk_buff *))
@@ -196,17 +197,17 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
 	struct sk_buff *reasm;
 
 	/* Previously seen (loopback)?  */
-	if ((*pskb)->nfct)
+	if (skb->nfct)
 		return NF_ACCEPT;
 
-	reasm = nf_ct_frag6_gather(*pskb);
+	reasm = nf_ct_frag6_gather(skb);
 
 	/* queued */
 	if (reasm == NULL)
 		return NF_STOLEN;
 
 	/* error occured or not fragmented */
-	if (reasm == *pskb)
+	if (reasm == skb)
 		return NF_ACCEPT;
 
 	nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
@@ -216,12 +217,12 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
 }
 
 static unsigned int ipv6_conntrack_in(unsigned int hooknum,
-				      struct sk_buff **pskb,
+				      struct sk_buff *skb,
 				      const struct net_device *in,
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
 {
-	struct sk_buff *reasm = (*pskb)->nfct_reasm;
+	struct sk_buff *reasm = skb->nfct_reasm;
 
 	/* This packet is fragmented and has reassembled packet. */
 	if (reasm) {
@@ -229,32 +230,32 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum,
 		if (!reasm->nfct) {
 			unsigned int ret;
 
-			ret = nf_conntrack_in(PF_INET6, hooknum, &reasm);
+			ret = nf_conntrack_in(PF_INET6, hooknum, reasm);
 			if (ret != NF_ACCEPT)
 				return ret;
 		}
 		nf_conntrack_get(reasm->nfct);
-		(*pskb)->nfct = reasm->nfct;
-		(*pskb)->nfctinfo = reasm->nfctinfo;
+		skb->nfct = reasm->nfct;
+		skb->nfctinfo = reasm->nfctinfo;
 		return NF_ACCEPT;
 	}
 
-	return nf_conntrack_in(PF_INET6, hooknum, pskb);
+	return nf_conntrack_in(PF_INET6, hooknum, skb);
 }
 
 static unsigned int ipv6_conntrack_local(unsigned int hooknum,
-					 struct sk_buff **pskb,
+					 struct sk_buff *skb,
 					 const struct net_device *in,
 					 const struct net_device *out,
 					 int (*okfn)(struct sk_buff *))
 {
 	/* root is playing with raw sockets. */
-	if ((*pskb)->len < sizeof(struct ipv6hdr)) {
+	if (skb->len < sizeof(struct ipv6hdr)) {
 		if (net_ratelimit())
 			printk("ipv6_conntrack_local: packet too short\n");
 		return NF_ACCEPT;
 	}
-	return ipv6_conntrack_in(hooknum, pskb, in, out, okfn);
+	return ipv6_conntrack_in(hooknum, skb, in, out, okfn);
 }
 
 static struct nf_hook_ops ipv6_conntrack_ops[] = {
@@ -307,7 +308,7 @@ static ctl_table nf_ct_ipv6_sysctl_table[] = {
 	{
 		.ctl_name	= NET_NF_CONNTRACK_FRAG6_TIMEOUT,
 		.procname	= "nf_conntrack_frag6_timeout",
-		.data		= &nf_ct_frag6_timeout,
+		.data		= &nf_frags_ctl.timeout,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -315,7 +316,7 @@ static ctl_table nf_ct_ipv6_sysctl_table[] = {
 	{
 		.ctl_name	= NET_NF_CONNTRACK_FRAG6_LOW_THRESH,
 		.procname	= "nf_conntrack_frag6_low_thresh",
-		.data		= &nf_ct_frag6_low_thresh,
+		.data		= &nf_frags_ctl.low_thresh,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
@@ -323,7 +324,7 @@ static ctl_table nf_ct_ipv6_sysctl_table[] = {
 	{
 		.ctl_name	= NET_NF_CONNTRACK_FRAG6_HIGH_THRESH,
 		.procname	= "nf_conntrack_frag6_high_thresh",
-		.data		= &nf_ct_frag6_high_thresh,
+		.data		= &nf_frags_ctl.high_thresh,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 25442a8c1ba..726fafd4196 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -31,6 +31,7 @@
 
 #include <net/sock.h>
 #include <net/snmp.h>
+#include <net/inet_frag.h>
 
 #include <net/ipv6.h>
 #include <net/protocol.h>
@@ -48,10 +49,6 @@
 #define NF_CT_FRAG6_LOW_THRESH 196608  /* == 192*1024 */
 #define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT
 
-unsigned int nf_ct_frag6_high_thresh __read_mostly = 256*1024;
-unsigned int nf_ct_frag6_low_thresh __read_mostly = 192*1024;
-unsigned long nf_ct_frag6_timeout __read_mostly = IPV6_FRAG_TIMEOUT;
-
 struct nf_ct_frag6_skb_cb
 {
 	struct inet6_skb_parm	h;
@@ -63,51 +60,24 @@ struct nf_ct_frag6_skb_cb
 
 struct nf_ct_frag6_queue
 {
-	struct hlist_node	list;
-	struct list_head	lru_list;	/* lru list member	*/
+	struct inet_frag_queue	q;
 
 	__be32			id;		/* fragment id		*/
 	struct in6_addr		saddr;
 	struct in6_addr		daddr;
 
-	spinlock_t		lock;
-	atomic_t		refcnt;
-	struct timer_list	timer;		/* expire timer		*/
-	struct sk_buff		*fragments;
-	int			len;
-	int			meat;
-	ktime_t			stamp;
 	unsigned int		csum;
-	__u8			last_in;	/* has first/last segment arrived? */
-#define COMPLETE		4
-#define FIRST_IN		2
-#define LAST_IN			1
 	__u16			nhoffset;
 };
 
-/* Hash table. */
-
-#define FRAG6Q_HASHSZ	64
-
-static struct hlist_head nf_ct_frag6_hash[FRAG6Q_HASHSZ];
-static DEFINE_RWLOCK(nf_ct_frag6_lock);
-static u32 nf_ct_frag6_hash_rnd;
-static LIST_HEAD(nf_ct_frag6_lru_list);
-int nf_ct_frag6_nqueues = 0;
-
-static __inline__ void __fq_unlink(struct nf_ct_frag6_queue *fq)
-{
-	hlist_del(&fq->list);
-	list_del(&fq->lru_list);
-	nf_ct_frag6_nqueues--;
-}
+struct inet_frags_ctl nf_frags_ctl __read_mostly = {
+	.high_thresh	 = 256 * 1024,
+	.low_thresh	 = 192 * 1024,
+	.timeout	 = IPV6_FRAG_TIMEOUT,
+	.secret_interval = 10 * 60 * HZ,
+};
 
-static __inline__ void fq_unlink(struct nf_ct_frag6_queue *fq)
-{
-	write_lock(&nf_ct_frag6_lock);
-	__fq_unlink(fq);
-	write_unlock(&nf_ct_frag6_lock);
-}
+static struct inet_frags nf_frags;
 
 static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
 			       struct in6_addr *daddr)
@@ -120,7 +90,7 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
 
 	a += JHASH_GOLDEN_RATIO;
 	b += JHASH_GOLDEN_RATIO;
-	c += nf_ct_frag6_hash_rnd;
+	c += nf_frags.rnd;
 	__jhash_mix(a, b, c);
 
 	a += (__force u32)saddr->s6_addr32[3];
@@ -133,100 +103,54 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
 	c += (__force u32)id;
 	__jhash_mix(a, b, c);
 
-	return c & (FRAG6Q_HASHSZ - 1);
+	return c & (INETFRAGS_HASHSZ - 1);
 }
 
-static struct timer_list nf_ct_frag6_secret_timer;
-int nf_ct_frag6_secret_interval = 10 * 60 * HZ;
-
-static void nf_ct_frag6_secret_rebuild(unsigned long dummy)
+static unsigned int nf_hashfn(struct inet_frag_queue *q)
 {
-	unsigned long now = jiffies;
-	int i;
-
-	write_lock(&nf_ct_frag6_lock);
-	get_random_bytes(&nf_ct_frag6_hash_rnd, sizeof(u32));
-	for (i = 0; i < FRAG6Q_HASHSZ; i++) {
-		struct nf_ct_frag6_queue *q;
-		struct hlist_node *p, *n;
-
-		hlist_for_each_entry_safe(q, p, n, &nf_ct_frag6_hash[i], list) {
-			unsigned int hval = ip6qhashfn(q->id,
-						       &q->saddr,
-						       &q->daddr);
-			if (hval != i) {
-				hlist_del(&q->list);
-				/* Relink to new hash chain. */
-				hlist_add_head(&q->list,
-					       &nf_ct_frag6_hash[hval]);
-			}
-		}
-	}
-	write_unlock(&nf_ct_frag6_lock);
+	struct nf_ct_frag6_queue *nq;
 
-	mod_timer(&nf_ct_frag6_secret_timer, now + nf_ct_frag6_secret_interval);
+	nq = container_of(q, struct nf_ct_frag6_queue, q);
+	return ip6qhashfn(nq->id, &nq->saddr, &nq->daddr);
 }
 
-atomic_t nf_ct_frag6_mem = ATOMIC_INIT(0);
+static void nf_skb_free(struct sk_buff *skb)
+{
+	if (NFCT_FRAG6_CB(skb)->orig)
+		kfree_skb(NFCT_FRAG6_CB(skb)->orig);
+}
 
 /* Memory Tracking Functions. */
 static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work)
 {
 	if (work)
 		*work -= skb->truesize;
-	atomic_sub(skb->truesize, &nf_ct_frag6_mem);
-	if (NFCT_FRAG6_CB(skb)->orig)
-		kfree_skb(NFCT_FRAG6_CB(skb)->orig);
-
+	atomic_sub(skb->truesize, &nf_frags.mem);
+	nf_skb_free(skb);
 	kfree_skb(skb);
 }
 
-static inline void frag_free_queue(struct nf_ct_frag6_queue *fq,
-				   unsigned int *work)
+static void nf_frag_free(struct inet_frag_queue *q)
 {
-	if (work)
-		*work -= sizeof(struct nf_ct_frag6_queue);
-	atomic_sub(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem);
-	kfree(fq);
+	kfree(container_of(q, struct nf_ct_frag6_queue, q));
 }
 
 static inline struct nf_ct_frag6_queue *frag_alloc_queue(void)
 {
-	struct nf_ct_frag6_queue *fq = kmalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC);
+	struct nf_ct_frag6_queue *fq;
 
-	if (!fq)
+	fq = kzalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC);
+	if (fq == NULL)
 		return NULL;
-	atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem);
+	atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_frags.mem);
 	return fq;
 }
 
 /* Destruction primitives. */
 
-/* Complete destruction of fq. */
-static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq,
-				unsigned int *work)
+static __inline__ void fq_put(struct nf_ct_frag6_queue *fq)
 {
-	struct sk_buff *fp;
-
-	BUG_TRAP(fq->last_in&COMPLETE);
-	BUG_TRAP(del_timer(&fq->timer) == 0);
-
-	/* Release all fragment data. */
-	fp = fq->fragments;
-	while (fp) {
-		struct sk_buff *xp = fp->next;
-
-		frag_kfree_skb(fp, work);
-		fp = xp;
-	}
-
-	frag_free_queue(fq, work);
-}
-
-static __inline__ void fq_put(struct nf_ct_frag6_queue *fq, unsigned int *work)
-{
-	if (atomic_dec_and_test(&fq->refcnt))
-		nf_ct_frag6_destroy(fq, work);
+	inet_frag_put(&fq->q, &nf_frags);
 }
 
 /* Kill fq entry. It is not destroyed immediately,
@@ -234,62 +158,28 @@ static __inline__ void fq_put(struct nf_ct_frag6_queue *fq, unsigned int *work)
  */
 static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
 {
-	if (del_timer(&fq->timer))
-		atomic_dec(&fq->refcnt);
-
-	if (!(fq->last_in & COMPLETE)) {
-		fq_unlink(fq);
-		atomic_dec(&fq->refcnt);
-		fq->last_in |= COMPLETE;
-	}
+	inet_frag_kill(&fq->q, &nf_frags);
 }
 
 static void nf_ct_frag6_evictor(void)
 {
-	struct nf_ct_frag6_queue *fq;
-	struct list_head *tmp;
-	unsigned int work;
-
-	work = atomic_read(&nf_ct_frag6_mem);
-	if (work <= nf_ct_frag6_low_thresh)
-		return;
-
-	work -= nf_ct_frag6_low_thresh;
-	while (work > 0) {
-		read_lock(&nf_ct_frag6_lock);
-		if (list_empty(&nf_ct_frag6_lru_list)) {
-			read_unlock(&nf_ct_frag6_lock);
-			return;
-		}
-		tmp = nf_ct_frag6_lru_list.next;
-		BUG_ON(tmp == NULL);
-		fq = list_entry(tmp, struct nf_ct_frag6_queue, lru_list);
-		atomic_inc(&fq->refcnt);
-		read_unlock(&nf_ct_frag6_lock);
-
-		spin_lock(&fq->lock);
-		if (!(fq->last_in&COMPLETE))
-			fq_kill(fq);
-		spin_unlock(&fq->lock);
-
-		fq_put(fq, &work);
-	}
+	inet_frag_evictor(&nf_frags);
 }
 
 static void nf_ct_frag6_expire(unsigned long data)
 {
 	struct nf_ct_frag6_queue *fq = (struct nf_ct_frag6_queue *) data;
 
-	spin_lock(&fq->lock);
+	spin_lock(&fq->q.lock);
 
-	if (fq->last_in & COMPLETE)
+	if (fq->q.last_in & COMPLETE)
 		goto out;
 
 	fq_kill(fq);
 
 out:
-	spin_unlock(&fq->lock);
-	fq_put(fq, NULL);
+	spin_unlock(&fq->q.lock);
+	fq_put(fq);
 }
 
 /* Creation primitives. */
@@ -302,31 +192,31 @@ static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash,
 	struct hlist_node *n;
 #endif
 
-	write_lock(&nf_ct_frag6_lock);
+	write_lock(&nf_frags.lock);
 #ifdef CONFIG_SMP
-	hlist_for_each_entry(fq, n, &nf_ct_frag6_hash[hash], list) {
+	hlist_for_each_entry(fq, n, &nf_frags.hash[hash], q.list) {
 		if (fq->id == fq_in->id &&
 		    ipv6_addr_equal(&fq_in->saddr, &fq->saddr) &&
 		    ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) {
-			atomic_inc(&fq->refcnt);
-			write_unlock(&nf_ct_frag6_lock);
-			fq_in->last_in |= COMPLETE;
-			fq_put(fq_in, NULL);
+			atomic_inc(&fq->q.refcnt);
+			write_unlock(&nf_frags.lock);
+			fq_in->q.last_in |= COMPLETE;
+			fq_put(fq_in);
 			return fq;
 		}
 	}
 #endif
 	fq = fq_in;
 
-	if (!mod_timer(&fq->timer, jiffies + nf_ct_frag6_timeout))
-		atomic_inc(&fq->refcnt);
+	if (!mod_timer(&fq->q.timer, jiffies + nf_frags_ctl.timeout))
+		atomic_inc(&fq->q.refcnt);
 
-	atomic_inc(&fq->refcnt);
-	hlist_add_head(&fq->list, &nf_ct_frag6_hash[hash]);
-	INIT_LIST_HEAD(&fq->lru_list);
-	list_add_tail(&fq->lru_list, &nf_ct_frag6_lru_list);
-	nf_ct_frag6_nqueues++;
-	write_unlock(&nf_ct_frag6_lock);
+	atomic_inc(&fq->q.refcnt);
+	hlist_add_head(&fq->q.list, &nf_frags.hash[hash]);
+	INIT_LIST_HEAD(&fq->q.lru_list);
+	list_add_tail(&fq->q.lru_list, &nf_frags.lru_list);
+	nf_frags.nqueues++;
+	write_unlock(&nf_frags.lock);
 	return fq;
 }
 
@@ -341,15 +231,13 @@ nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src,				   str
 		goto oom;
 	}
 
-	memset(fq, 0, sizeof(struct nf_ct_frag6_queue));
-
 	fq->id = id;
 	ipv6_addr_copy(&fq->saddr, src);
 	ipv6_addr_copy(&fq->daddr, dst);
 
-	setup_timer(&fq->timer, nf_ct_frag6_expire, (unsigned long)fq);
-	spin_lock_init(&fq->lock);
-	atomic_set(&fq->refcnt, 1);
+	setup_timer(&fq->q.timer, nf_ct_frag6_expire, (unsigned long)fq);
+	spin_lock_init(&fq->q.lock);
+	atomic_set(&fq->q.refcnt, 1);
 
 	return nf_ct_frag6_intern(hash, fq);
 
@@ -364,17 +252,17 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst)
 	struct hlist_node *n;
 	unsigned int hash = ip6qhashfn(id, src, dst);
 
-	read_lock(&nf_ct_frag6_lock);
-	hlist_for_each_entry(fq, n, &nf_ct_frag6_hash[hash], list) {
+	read_lock(&nf_frags.lock);
+	hlist_for_each_entry(fq, n, &nf_frags.hash[hash], q.list) {
 		if (fq->id == id &&
 		    ipv6_addr_equal(src, &fq->saddr) &&
 		    ipv6_addr_equal(dst, &fq->daddr)) {
-			atomic_inc(&fq->refcnt);
-			read_unlock(&nf_ct_frag6_lock);
+			atomic_inc(&fq->q.refcnt);
+			read_unlock(&nf_frags.lock);
 			return fq;
 		}
 	}
-	read_unlock(&nf_ct_frag6_lock);
+	read_unlock(&nf_frags.lock);
 
 	return nf_ct_frag6_create(hash, id, src, dst);
 }
@@ -386,7 +274,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 	struct sk_buff *prev, *next;
 	int offset, end;
 
-	if (fq->last_in & COMPLETE) {
+	if (fq->q.last_in & COMPLETE) {
 		pr_debug("Allready completed\n");
 		goto err;
 	}
@@ -412,13 +300,13 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 		/* If we already have some bits beyond end
 		 * or have different end, the segment is corrupted.
 		 */
-		if (end < fq->len ||
-		    ((fq->last_in & LAST_IN) && end != fq->len)) {
+		if (end < fq->q.len ||
+		    ((fq->q.last_in & LAST_IN) && end != fq->q.len)) {
 			pr_debug("already received last fragment\n");
 			goto err;
 		}
-		fq->last_in |= LAST_IN;
-		fq->len = end;
+		fq->q.last_in |= LAST_IN;
+		fq->q.len = end;
 	} else {
 		/* Check if the fragment is rounded to 8 bytes.
 		 * Required by the RFC.
@@ -430,13 +318,13 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 			pr_debug("end of fragment not rounded to 8 bytes.\n");
 			return -1;
 		}
-		if (end > fq->len) {
+		if (end > fq->q.len) {
 			/* Some bits beyond end -> corruption. */
-			if (fq->last_in & LAST_IN) {
+			if (fq->q.last_in & LAST_IN) {
 				pr_debug("last packet already reached.\n");
 				goto err;
 			}
-			fq->len = end;
+			fq->q.len = end;
 		}
 	}
 
@@ -458,7 +346,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 	 * this fragment, right?
 	 */
 	prev = NULL;
-	for (next = fq->fragments; next != NULL; next = next->next) {
+	for (next = fq->q.fragments; next != NULL; next = next->next) {
 		if (NFCT_FRAG6_CB(next)->offset >= offset)
 			break;	/* bingo! */
 		prev = next;
@@ -503,7 +391,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 
 			/* next fragment */
 			NFCT_FRAG6_CB(next)->offset += i;
-			fq->meat -= i;
+			fq->q.meat -= i;
 			if (next->ip_summed != CHECKSUM_UNNECESSARY)
 				next->ip_summed = CHECKSUM_NONE;
 			break;
@@ -518,9 +406,9 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 			if (prev)
 				prev->next = next;
 			else
-				fq->fragments = next;
+				fq->q.fragments = next;
 
-			fq->meat -= free_it->len;
+			fq->q.meat -= free_it->len;
 			frag_kfree_skb(free_it, NULL);
 		}
 	}
@@ -532,23 +420,23 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 	if (prev)
 		prev->next = skb;
 	else
-		fq->fragments = skb;
+		fq->q.fragments = skb;
 
 	skb->dev = NULL;
-	fq->stamp = skb->tstamp;
-	fq->meat += skb->len;
-	atomic_add(skb->truesize, &nf_ct_frag6_mem);
+	fq->q.stamp = skb->tstamp;
+	fq->q.meat += skb->len;
+	atomic_add(skb->truesize, &nf_frags.mem);
 
 	/* The first fragment.
 	 * nhoffset is obtained from the first fragment, of course.
 	 */
 	if (offset == 0) {
 		fq->nhoffset = nhoff;
-		fq->last_in |= FIRST_IN;
+		fq->q.last_in |= FIRST_IN;
 	}
-	write_lock(&nf_ct_frag6_lock);
-	list_move_tail(&fq->lru_list, &nf_ct_frag6_lru_list);
-	write_unlock(&nf_ct_frag6_lock);
+	write_lock(&nf_frags.lock);
+	list_move_tail(&fq->q.lru_list, &nf_frags.lru_list);
+	write_unlock(&nf_frags.lock);
 	return 0;
 
 err:
@@ -567,7 +455,7 @@ err:
 static struct sk_buff *
 nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 {
-	struct sk_buff *fp, *op, *head = fq->fragments;
+	struct sk_buff *fp, *op, *head = fq->q.fragments;
 	int    payload_len;
 
 	fq_kill(fq);
@@ -577,7 +465,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 
 	/* Unfragmented part is taken from the first segment. */
 	payload_len = ((head->data - skb_network_header(head)) -
-		       sizeof(struct ipv6hdr) + fq->len -
+		       sizeof(struct ipv6hdr) + fq->q.len -
 		       sizeof(struct frag_hdr));
 	if (payload_len > IPV6_MAXPLEN) {
 		pr_debug("payload len is too large.\n");
@@ -614,7 +502,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 		clone->ip_summed = head->ip_summed;
 
 		NFCT_FRAG6_CB(clone)->orig = NULL;
-		atomic_add(clone->truesize, &nf_ct_frag6_mem);
+		atomic_add(clone->truesize, &nf_frags.mem);
 	}
 
 	/* We have to remove fragment header from datagram and to relocate
@@ -628,7 +516,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 	skb_shinfo(head)->frag_list = head->next;
 	skb_reset_transport_header(head);
 	skb_push(head, head->data - skb_network_header(head));
-	atomic_sub(head->truesize, &nf_ct_frag6_mem);
+	atomic_sub(head->truesize, &nf_frags.mem);
 
 	for (fp=head->next; fp; fp = fp->next) {
 		head->data_len += fp->len;
@@ -638,12 +526,12 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
-		atomic_sub(fp->truesize, &nf_ct_frag6_mem);
+		atomic_sub(fp->truesize, &nf_frags.mem);
 	}
 
 	head->next = NULL;
 	head->dev = dev;
-	head->tstamp = fq->stamp;
+	head->tstamp = fq->q.stamp;
 	ipv6_hdr(head)->payload_len = htons(payload_len);
 
 	/* Yes, and fold redundant checksum back. 8) */
@@ -652,7 +540,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 					  skb_network_header_len(head),
 					  head->csum);
 
-	fq->fragments = NULL;
+	fq->q.fragments = NULL;
 
 	/* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */
 	fp = skb_shinfo(head)->frag_list;
@@ -788,7 +676,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
 		goto ret_orig;
 	}
 
-	if (atomic_read(&nf_ct_frag6_mem) > nf_ct_frag6_high_thresh)
+	if (atomic_read(&nf_frags.mem) > nf_frags_ctl.high_thresh)
 		nf_ct_frag6_evictor();
 
 	fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr);
@@ -797,23 +685,23 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
 		goto ret_orig;
 	}
 
-	spin_lock(&fq->lock);
+	spin_lock(&fq->q.lock);
 
 	if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
-		spin_unlock(&fq->lock);
+		spin_unlock(&fq->q.lock);
 		pr_debug("Can't insert skb to queue\n");
-		fq_put(fq, NULL);
+		fq_put(fq);
 		goto ret_orig;
 	}
 
-	if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) {
+	if (fq->q.last_in == (FIRST_IN|LAST_IN) && fq->q.meat == fq->q.len) {
 		ret_skb = nf_ct_frag6_reasm(fq, dev);
 		if (ret_skb == NULL)
 			pr_debug("Can't reassemble fragmented packets\n");
 	}
-	spin_unlock(&fq->lock);
+	spin_unlock(&fq->q.lock);
 
-	fq_put(fq, NULL);
+	fq_put(fq);
 	return ret_skb;
 
 ret_orig:
@@ -859,20 +747,20 @@ int nf_ct_frag6_kfree_frags(struct sk_buff *skb)
 
 int nf_ct_frag6_init(void)
 {
-	nf_ct_frag6_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
-				   (jiffies ^ (jiffies >> 6)));
-
-	setup_timer(&nf_ct_frag6_secret_timer, nf_ct_frag6_secret_rebuild, 0);
-	nf_ct_frag6_secret_timer.expires = jiffies
-					   + nf_ct_frag6_secret_interval;
-	add_timer(&nf_ct_frag6_secret_timer);
+	nf_frags.ctl = &nf_frags_ctl;
+	nf_frags.hashfn = nf_hashfn;
+	nf_frags.destructor = nf_frag_free;
+	nf_frags.skb_free = nf_skb_free;
+	nf_frags.qsize = sizeof(struct nf_ct_frag6_queue);
+	inet_frags_init(&nf_frags);
 
 	return 0;
 }
 
 void nf_ct_frag6_cleanup(void)
 {
-	del_timer(&nf_ct_frag6_secret_timer);
-	nf_ct_frag6_low_thresh = 0;
+	inet_frags_fini(&nf_frags);
+
+	nf_frags_ctl.low_thresh = 0;
 	nf_ct_frag6_evictor();
 }
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index db945018579..be526ad9254 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -54,7 +54,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
 	seq_printf(seq, "RAW6: inuse %d\n",
 		       fold_prot_inuse(&rawv6_prot));
 	seq_printf(seq, "FRAG6: inuse %d memory %d\n",
-		       ip6_frag_nqueues, atomic_read(&ip6_frag_mem));
+		       ip6_frag_nqueues(), ip6_frag_mem());
 	return 0;
 }
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 31601c99354..6ad19cfc202 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -42,6 +42,7 @@
 #include <linux/icmpv6.h>
 #include <linux/random.h>
 #include <linux/jhash.h>
+#include <linux/skbuff.h>
 
 #include <net/sock.h>
 #include <net/snmp.h>
@@ -53,11 +54,7 @@
 #include <net/rawv6.h>
 #include <net/ndisc.h>
 #include <net/addrconf.h>
-
-int sysctl_ip6frag_high_thresh __read_mostly = 256*1024;
-int sysctl_ip6frag_low_thresh __read_mostly = 192*1024;
-
-int sysctl_ip6frag_time __read_mostly = IPV6_FRAG_TIMEOUT;
+#include <net/inet_frag.h>
 
 struct ip6frag_skb_cb
 {
@@ -74,53 +71,39 @@ struct ip6frag_skb_cb
 
 struct frag_queue
 {
-	struct hlist_node	list;
-	struct list_head lru_list;		/* lru list member	*/
+	struct inet_frag_queue	q;
 
 	__be32			id;		/* fragment id		*/
 	struct in6_addr		saddr;
 	struct in6_addr		daddr;
 
-	spinlock_t		lock;
-	atomic_t		refcnt;
-	struct timer_list	timer;		/* expire timer		*/
-	struct sk_buff		*fragments;
-	int			len;
-	int			meat;
 	int			iif;
-	ktime_t			stamp;
 	unsigned int		csum;
-	__u8			last_in;	/* has first/last segment arrived? */
-#define COMPLETE		4
-#define FIRST_IN		2
-#define LAST_IN			1
 	__u16			nhoffset;
 };
 
-/* Hash table. */
-
-#define IP6Q_HASHSZ	64
+struct inet_frags_ctl ip6_frags_ctl __read_mostly = {
+	.high_thresh 	 = 256 * 1024,
+	.low_thresh	 = 192 * 1024,
+	.timeout	 = IPV6_FRAG_TIMEOUT,
+	.secret_interval = 10 * 60 * HZ,
+};
 
-static struct hlist_head ip6_frag_hash[IP6Q_HASHSZ];
-static DEFINE_RWLOCK(ip6_frag_lock);
-static u32 ip6_frag_hash_rnd;
-static LIST_HEAD(ip6_frag_lru_list);
-int ip6_frag_nqueues = 0;
+static struct inet_frags ip6_frags;
 
-static __inline__ void __fq_unlink(struct frag_queue *fq)
+int ip6_frag_nqueues(void)
 {
-	hlist_del(&fq->list);
-	list_del(&fq->lru_list);
-	ip6_frag_nqueues--;
+	return ip6_frags.nqueues;
 }
 
-static __inline__ void fq_unlink(struct frag_queue *fq)
+int ip6_frag_mem(void)
 {
-	write_lock(&ip6_frag_lock);
-	__fq_unlink(fq);
-	write_unlock(&ip6_frag_lock);
+	return atomic_read(&ip6_frags.mem);
 }
 
+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
+			  struct net_device *dev);
+
 /*
  * callers should be careful not to use the hash value outside the ipfrag_lock
  * as doing so could race with ipfrag_hash_rnd being recalculated.
@@ -136,7 +119,7 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
 
 	a += JHASH_GOLDEN_RATIO;
 	b += JHASH_GOLDEN_RATIO;
-	c += ip6_frag_hash_rnd;
+	c += ip6_frags.rnd;
 	__jhash_mix(a, b, c);
 
 	a += (__force u32)saddr->s6_addr32[3];
@@ -149,60 +132,29 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
 	c += (__force u32)id;
 	__jhash_mix(a, b, c);
 
-	return c & (IP6Q_HASHSZ - 1);
+	return c & (INETFRAGS_HASHSZ - 1);
 }
 
-static struct timer_list ip6_frag_secret_timer;
-int sysctl_ip6frag_secret_interval __read_mostly = 10 * 60 * HZ;
-
-static void ip6_frag_secret_rebuild(unsigned long dummy)
+static unsigned int ip6_hashfn(struct inet_frag_queue *q)
 {
-	unsigned long now = jiffies;
-	int i;
-
-	write_lock(&ip6_frag_lock);
-	get_random_bytes(&ip6_frag_hash_rnd, sizeof(u32));
-	for (i = 0; i < IP6Q_HASHSZ; i++) {
-		struct frag_queue *q;
-		struct hlist_node *p, *n;
-
-		hlist_for_each_entry_safe(q, p, n, &ip6_frag_hash[i], list) {
-			unsigned int hval = ip6qhashfn(q->id,
-						       &q->saddr,
-						       &q->daddr);
-
-			if (hval != i) {
-				hlist_del(&q->list);
-
-				/* Relink to new hash chain. */
-				hlist_add_head(&q->list,
-					       &ip6_frag_hash[hval]);
-
-			}
-		}
-	}
-	write_unlock(&ip6_frag_lock);
+	struct frag_queue *fq;
 
-	mod_timer(&ip6_frag_secret_timer, now + sysctl_ip6frag_secret_interval);
+	fq = container_of(q, struct frag_queue, q);
+	return ip6qhashfn(fq->id, &fq->saddr, &fq->daddr);
 }
 
-atomic_t ip6_frag_mem = ATOMIC_INIT(0);
-
 /* Memory Tracking Functions. */
 static inline void frag_kfree_skb(struct sk_buff *skb, int *work)
 {
 	if (work)
 		*work -= skb->truesize;
-	atomic_sub(skb->truesize, &ip6_frag_mem);
+	atomic_sub(skb->truesize, &ip6_frags.mem);
 	kfree_skb(skb);
 }
 
-static inline void frag_free_queue(struct frag_queue *fq, int *work)
+static void ip6_frag_free(struct inet_frag_queue *fq)
 {
-	if (work)
-		*work -= sizeof(struct frag_queue);
-	atomic_sub(sizeof(struct frag_queue), &ip6_frag_mem);
-	kfree(fq);
+	kfree(container_of(fq, struct frag_queue, q));
 }
 
 static inline struct frag_queue *frag_alloc_queue(void)
@@ -211,36 +163,15 @@ static inline struct frag_queue *frag_alloc_queue(void)
 
 	if(!fq)
 		return NULL;
-	atomic_add(sizeof(struct frag_queue), &ip6_frag_mem);
+	atomic_add(sizeof(struct frag_queue), &ip6_frags.mem);
 	return fq;
 }
 
 /* Destruction primitives. */
 
-/* Complete destruction of fq. */
-static void ip6_frag_destroy(struct frag_queue *fq, int *work)
-{
-	struct sk_buff *fp;
-
-	BUG_TRAP(fq->last_in&COMPLETE);
-	BUG_TRAP(del_timer(&fq->timer) == 0);
-
-	/* Release all fragment data. */
-	fp = fq->fragments;
-	while (fp) {
-		struct sk_buff *xp = fp->next;
-
-		frag_kfree_skb(fp, work);
-		fp = xp;
-	}
-
-	frag_free_queue(fq, work);
-}
-
-static __inline__ void fq_put(struct frag_queue *fq, int *work)
+static __inline__ void fq_put(struct frag_queue *fq)
 {
-	if (atomic_dec_and_test(&fq->refcnt))
-		ip6_frag_destroy(fq, work);
+	inet_frag_put(&fq->q, &ip6_frags);
 }
 
 /* Kill fq entry. It is not destroyed immediately,
@@ -248,45 +179,16 @@ static __inline__ void fq_put(struct frag_queue *fq, int *work)
  */
 static __inline__ void fq_kill(struct frag_queue *fq)
 {
-	if (del_timer(&fq->timer))
-		atomic_dec(&fq->refcnt);
-
-	if (!(fq->last_in & COMPLETE)) {
-		fq_unlink(fq);
-		atomic_dec(&fq->refcnt);
-		fq->last_in |= COMPLETE;
-	}
+	inet_frag_kill(&fq->q, &ip6_frags);
 }
 
 static void ip6_evictor(struct inet6_dev *idev)
 {
-	struct frag_queue *fq;
-	struct list_head *tmp;
-	int work;
-
-	work = atomic_read(&ip6_frag_mem) - sysctl_ip6frag_low_thresh;
-	if (work <= 0)
-		return;
-
-	while(work > 0) {
-		read_lock(&ip6_frag_lock);
-		if (list_empty(&ip6_frag_lru_list)) {
-			read_unlock(&ip6_frag_lock);
-			return;
-		}
-		tmp = ip6_frag_lru_list.next;
-		fq = list_entry(tmp, struct frag_queue, lru_list);
-		atomic_inc(&fq->refcnt);
-		read_unlock(&ip6_frag_lock);
-
-		spin_lock(&fq->lock);
-		if (!(fq->last_in&COMPLETE))
-			fq_kill(fq);
-		spin_unlock(&fq->lock);
-
-		fq_put(fq, &work);
-		IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS);
-	}
+	int evicted;
+
+	evicted = inet_frag_evictor(&ip6_frags);
+	if (evicted)
+		IP6_ADD_STATS_BH(idev, IPSTATS_MIB_REASMFAILS, evicted);
 }
 
 static void ip6_frag_expire(unsigned long data)
@@ -294,9 +196,9 @@ static void ip6_frag_expire(unsigned long data)
 	struct frag_queue *fq = (struct frag_queue *) data;
 	struct net_device *dev = NULL;
 
-	spin_lock(&fq->lock);
+	spin_lock(&fq->q.lock);
 
-	if (fq->last_in & COMPLETE)
+	if (fq->q.last_in & COMPLETE)
 		goto out;
 
 	fq_kill(fq);
@@ -311,7 +213,7 @@ static void ip6_frag_expire(unsigned long data)
 	rcu_read_unlock();
 
 	/* Don't send error if the first segment did not arrive. */
-	if (!(fq->last_in&FIRST_IN) || !fq->fragments)
+	if (!(fq->q.last_in&FIRST_IN) || !fq->q.fragments)
 		goto out;
 
 	/*
@@ -319,13 +221,13 @@ static void ip6_frag_expire(unsigned long data)
 	   segment was received. And do not use fq->dev
 	   pointer directly, device might already disappeared.
 	 */
-	fq->fragments->dev = dev;
-	icmpv6_send(fq->fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev);
+	fq->q.fragments->dev = dev;
+	icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev);
 out:
 	if (dev)
 		dev_put(dev);
-	spin_unlock(&fq->lock);
-	fq_put(fq, NULL);
+	spin_unlock(&fq->q.lock);
+	fq_put(fq);
 }
 
 /* Creation primitives. */
@@ -339,32 +241,32 @@ static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in)
 	struct hlist_node *n;
 #endif
 
-	write_lock(&ip6_frag_lock);
+	write_lock(&ip6_frags.lock);
 	hash = ip6qhashfn(fq_in->id, &fq_in->saddr, &fq_in->daddr);
 #ifdef CONFIG_SMP
-	hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) {
+	hlist_for_each_entry(fq, n, &ip6_frags.hash[hash], q.list) {
 		if (fq->id == fq_in->id &&
 		    ipv6_addr_equal(&fq_in->saddr, &fq->saddr) &&
 		    ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) {
-			atomic_inc(&fq->refcnt);
-			write_unlock(&ip6_frag_lock);
-			fq_in->last_in |= COMPLETE;
-			fq_put(fq_in, NULL);
+			atomic_inc(&fq->q.refcnt);
+			write_unlock(&ip6_frags.lock);
+			fq_in->q.last_in |= COMPLETE;
+			fq_put(fq_in);
 			return fq;
 		}
 	}
 #endif
 	fq = fq_in;
 
-	if (!mod_timer(&fq->timer, jiffies + sysctl_ip6frag_time))
-		atomic_inc(&fq->refcnt);
+	if (!mod_timer(&fq->q.timer, jiffies + ip6_frags_ctl.timeout))
+		atomic_inc(&fq->q.refcnt);
 
-	atomic_inc(&fq->refcnt);
-	hlist_add_head(&fq->list, &ip6_frag_hash[hash]);
-	INIT_LIST_HEAD(&fq->lru_list);
-	list_add_tail(&fq->lru_list, &ip6_frag_lru_list);
-	ip6_frag_nqueues++;
-	write_unlock(&ip6_frag_lock);
+	atomic_inc(&fq->q.refcnt);
+	hlist_add_head(&fq->q.list, &ip6_frags.hash[hash]);
+	INIT_LIST_HEAD(&fq->q.lru_list);
+	list_add_tail(&fq->q.lru_list, &ip6_frags.lru_list);
+	ip6_frags.nqueues++;
+	write_unlock(&ip6_frags.lock);
 	return fq;
 }
 
@@ -382,11 +284,11 @@ ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst,
 	ipv6_addr_copy(&fq->saddr, src);
 	ipv6_addr_copy(&fq->daddr, dst);
 
-	init_timer(&fq->timer);
-	fq->timer.function = ip6_frag_expire;
-	fq->timer.data = (long) fq;
-	spin_lock_init(&fq->lock);
-	atomic_set(&fq->refcnt, 1);
+	init_timer(&fq->q.timer);
+	fq->q.timer.function = ip6_frag_expire;
+	fq->q.timer.data = (long) fq;
+	spin_lock_init(&fq->q.lock);
+	atomic_set(&fq->q.refcnt, 1);
 
 	return ip6_frag_intern(fq);
 
@@ -403,30 +305,31 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst,
 	struct hlist_node *n;
 	unsigned int hash;
 
-	read_lock(&ip6_frag_lock);
+	read_lock(&ip6_frags.lock);
 	hash = ip6qhashfn(id, src, dst);
-	hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) {
+	hlist_for_each_entry(fq, n, &ip6_frags.hash[hash], q.list) {
 		if (fq->id == id &&
 		    ipv6_addr_equal(src, &fq->saddr) &&
 		    ipv6_addr_equal(dst, &fq->daddr)) {
-			atomic_inc(&fq->refcnt);
-			read_unlock(&ip6_frag_lock);
+			atomic_inc(&fq->q.refcnt);
+			read_unlock(&ip6_frags.lock);
 			return fq;
 		}
 	}
-	read_unlock(&ip6_frag_lock);
+	read_unlock(&ip6_frags.lock);
 
 	return ip6_frag_create(id, src, dst, idev);
 }
 
 
-static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
+static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 			   struct frag_hdr *fhdr, int nhoff)
 {
 	struct sk_buff *prev, *next;
+	struct net_device *dev;
 	int offset, end;
 
-	if (fq->last_in & COMPLETE)
+	if (fq->q.last_in & COMPLETE)
 		goto err;
 
 	offset = ntohs(fhdr->frag_off) & ~0x7;
@@ -439,7 +342,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
 				  ((u8 *)&fhdr->frag_off -
 				   skb_network_header(skb)));
-		return;
+		return -1;
 	}
 
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
@@ -454,11 +357,11 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 		/* If we already have some bits beyond end
 		 * or have different end, the segment is corrupted.
 		 */
-		if (end < fq->len ||
-		    ((fq->last_in & LAST_IN) && end != fq->len))
+		if (end < fq->q.len ||
+		    ((fq->q.last_in & LAST_IN) && end != fq->q.len))
 			goto err;
-		fq->last_in |= LAST_IN;
-		fq->len = end;
+		fq->q.last_in |= LAST_IN;
+		fq->q.len = end;
 	} else {
 		/* Check if the fragment is rounded to 8 bytes.
 		 * Required by the RFC.
@@ -471,13 +374,13 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 					 IPSTATS_MIB_INHDRERRORS);
 			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
 					  offsetof(struct ipv6hdr, payload_len));
-			return;
+			return -1;
 		}
-		if (end > fq->len) {
+		if (end > fq->q.len) {
 			/* Some bits beyond end -> corruption. */
-			if (fq->last_in & LAST_IN)
+			if (fq->q.last_in & LAST_IN)
 				goto err;
-			fq->len = end;
+			fq->q.len = end;
 		}
 	}
 
@@ -496,7 +399,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 	 * this fragment, right?
 	 */
 	prev = NULL;
-	for(next = fq->fragments; next != NULL; next = next->next) {
+	for(next = fq->q.fragments; next != NULL; next = next->next) {
 		if (FRAG6_CB(next)->offset >= offset)
 			break;	/* bingo! */
 		prev = next;
@@ -533,7 +436,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 			if (!pskb_pull(next, i))
 				goto err;
 			FRAG6_CB(next)->offset += i;	/* next fragment */
-			fq->meat -= i;
+			fq->q.meat -= i;
 			if (next->ip_summed != CHECKSUM_UNNECESSARY)
 				next->ip_summed = CHECKSUM_NONE;
 			break;
@@ -548,9 +451,9 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 			if (prev)
 				prev->next = next;
 			else
-				fq->fragments = next;
+				fq->q.fragments = next;
 
-			fq->meat -= free_it->len;
+			fq->q.meat -= free_it->len;
 			frag_kfree_skb(free_it, NULL);
 		}
 	}
@@ -562,30 +465,37 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 	if (prev)
 		prev->next = skb;
 	else
-		fq->fragments = skb;
+		fq->q.fragments = skb;
 
-	if (skb->dev)
-		fq->iif = skb->dev->ifindex;
-	skb->dev = NULL;
-	fq->stamp = skb->tstamp;
-	fq->meat += skb->len;
-	atomic_add(skb->truesize, &ip6_frag_mem);
+	dev = skb->dev;
+	if (dev) {
+		fq->iif = dev->ifindex;
+		skb->dev = NULL;
+	}
+	fq->q.stamp = skb->tstamp;
+	fq->q.meat += skb->len;
+	atomic_add(skb->truesize, &ip6_frags.mem);
 
 	/* The first fragment.
 	 * nhoffset is obtained from the first fragment, of course.
 	 */
 	if (offset == 0) {
 		fq->nhoffset = nhoff;
-		fq->last_in |= FIRST_IN;
+		fq->q.last_in |= FIRST_IN;
 	}
-	write_lock(&ip6_frag_lock);
-	list_move_tail(&fq->lru_list, &ip6_frag_lru_list);
-	write_unlock(&ip6_frag_lock);
-	return;
+
+	if (fq->q.last_in == (FIRST_IN | LAST_IN) && fq->q.meat == fq->q.len)
+		return ip6_frag_reasm(fq, prev, dev);
+
+	write_lock(&ip6_frags.lock);
+	list_move_tail(&fq->q.lru_list, &ip6_frags.lru_list);
+	write_unlock(&ip6_frags.lock);
+	return -1;
 
 err:
 	IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS);
 	kfree_skb(skb);
+	return -1;
 }
 
 /*
@@ -597,21 +507,39 @@ err:
  *	queue is eligible for reassembly i.e. it is not COMPLETE,
  *	the last and the first frames arrived and all the bits are here.
  */
-static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 			  struct net_device *dev)
 {
-	struct sk_buff *fp, *head = fq->fragments;
+	struct sk_buff *fp, *head = fq->q.fragments;
 	int    payload_len;
 	unsigned int nhoff;
 
 	fq_kill(fq);
 
+	/* Make the one we just received the head. */
+	if (prev) {
+		head = prev->next;
+		fp = skb_clone(head, GFP_ATOMIC);
+
+		if (!fp)
+			goto out_oom;
+
+		fp->next = head->next;
+		prev->next = fp;
+
+		skb_morph(head, fq->q.fragments);
+		head->next = fq->q.fragments->next;
+
+		kfree_skb(fq->q.fragments);
+		fq->q.fragments = head;
+	}
+
 	BUG_TRAP(head != NULL);
 	BUG_TRAP(FRAG6_CB(head)->offset == 0);
 
 	/* Unfragmented part is taken from the first segment. */
 	payload_len = ((head->data - skb_network_header(head)) -
-		       sizeof(struct ipv6hdr) + fq->len -
+		       sizeof(struct ipv6hdr) + fq->q.len -
 		       sizeof(struct frag_hdr));
 	if (payload_len > IPV6_MAXPLEN)
 		goto out_oversize;
@@ -640,7 +568,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 		head->len -= clone->len;
 		clone->csum = 0;
 		clone->ip_summed = head->ip_summed;
-		atomic_add(clone->truesize, &ip6_frag_mem);
+		atomic_add(clone->truesize, &ip6_frags.mem);
 	}
 
 	/* We have to remove fragment header from datagram and to relocate
@@ -655,7 +583,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 	skb_shinfo(head)->frag_list = head->next;
 	skb_reset_transport_header(head);
 	skb_push(head, head->data - skb_network_header(head));
-	atomic_sub(head->truesize, &ip6_frag_mem);
+	atomic_sub(head->truesize, &ip6_frags.mem);
 
 	for (fp=head->next; fp; fp = fp->next) {
 		head->data_len += fp->len;
@@ -665,17 +593,15 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
-		atomic_sub(fp->truesize, &ip6_frag_mem);
+		atomic_sub(fp->truesize, &ip6_frags.mem);
 	}
 
 	head->next = NULL;
 	head->dev = dev;
-	head->tstamp = fq->stamp;
+	head->tstamp = fq->q.stamp;
 	ipv6_hdr(head)->payload_len = htons(payload_len);
 	IP6CB(head)->nhoff = nhoff;
 
-	*skb_in = head;
-
 	/* Yes, and fold redundant checksum back. 8) */
 	if (head->ip_summed == CHECKSUM_COMPLETE)
 		head->csum = csum_partial(skb_network_header(head),
@@ -685,7 +611,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 	rcu_read_lock();
 	IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
 	rcu_read_unlock();
-	fq->fragments = NULL;
+	fq->q.fragments = NULL;
 	return 1;
 
 out_oversize:
@@ -702,10 +628,8 @@ out_fail:
 	return -1;
 }
 
-static int ipv6_frag_rcv(struct sk_buff **skbp)
+static int ipv6_frag_rcv(struct sk_buff *skb)
 {
-	struct sk_buff *skb = *skbp;
-	struct net_device *dev = skb->dev;
 	struct frag_hdr *fhdr;
 	struct frag_queue *fq;
 	struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -739,23 +663,19 @@ static int ipv6_frag_rcv(struct sk_buff **skbp)
 		return 1;
 	}
 
-	if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh)
+	if (atomic_read(&ip6_frags.mem) > ip6_frags_ctl.high_thresh)
 		ip6_evictor(ip6_dst_idev(skb->dst));
 
 	if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr,
 			  ip6_dst_idev(skb->dst))) != NULL) {
-		int ret = -1;
+		int ret;
 
-		spin_lock(&fq->lock);
+		spin_lock(&fq->q.lock);
 
-		ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
+		ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
 
-		if (fq->last_in == (FIRST_IN|LAST_IN) &&
-		    fq->meat == fq->len)
-			ret = ip6_frag_reasm(fq, skbp, dev);
-
-		spin_unlock(&fq->lock);
-		fq_put(fq, NULL);
+		spin_unlock(&fq->q.lock);
+		fq_put(fq);
 		return ret;
 	}
 
@@ -775,11 +695,10 @@ void __init ipv6_frag_init(void)
 	if (inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT) < 0)
 		printk(KERN_ERR "ipv6_frag_init: Could not register protocol\n");
 
-	ip6_frag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
-				   (jiffies ^ (jiffies >> 6)));
-
-	init_timer(&ip6_frag_secret_timer);
-	ip6_frag_secret_timer.function = ip6_frag_secret_rebuild;
-	ip6_frag_secret_timer.expires = jiffies + sysctl_ip6frag_secret_interval;
-	add_timer(&ip6_frag_secret_timer);
+	ip6_frags.ctl = &ip6_frags_ctl;
+	ip6_frags.hashfn = ip6_hashfn;
+	ip6_frags.destructor = ip6_frag_free;
+	ip6_frags.skb_free = NULL;
+	ip6_frags.qsize = sizeof(struct frag_queue);
+	inet_frags_init(&ip6_frags);
 }
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 6ff19f9eb9e..cce9941c11c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -663,7 +663,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d
 	return rt;
 }
 
-static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
+static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif,
 					    struct flowi *fl, int flags)
 {
 	struct fib6_node *fn;
@@ -682,7 +682,7 @@ restart_2:
 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
 
 restart:
-	rt = rt6_select(fn, fl->iif, strict | reachable);
+	rt = rt6_select(fn, oif, strict | reachable);
 	BACKTRACK(&fl->fl6_src);
 	if (rt == &ip6_null_entry ||
 	    rt->rt6i_flags & RTF_CACHE)
@@ -735,6 +735,12 @@ out2:
 	return rt;
 }
 
+static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
+					    struct flowi *fl, int flags)
+{
+	return ip6_pol_route(table, fl->iif, fl, flags);
+}
+
 void ip6_route_input(struct sk_buff *skb)
 {
 	struct ipv6hdr *iph = ipv6_hdr(skb);
@@ -761,72 +767,7 @@ void ip6_route_input(struct sk_buff *skb)
 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
 					     struct flowi *fl, int flags)
 {
-	struct fib6_node *fn;
-	struct rt6_info *rt, *nrt;
-	int strict = 0;
-	int attempts = 3;
-	int err;
-	int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
-
-	strict |= flags & RT6_LOOKUP_F_IFACE;
-
-relookup:
-	read_lock_bh(&table->tb6_lock);
-
-restart_2:
-	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
-
-restart:
-	rt = rt6_select(fn, fl->oif, strict | reachable);
-	BACKTRACK(&fl->fl6_src);
-	if (rt == &ip6_null_entry ||
-	    rt->rt6i_flags & RTF_CACHE)
-		goto out;
-
-	dst_hold(&rt->u.dst);
-	read_unlock_bh(&table->tb6_lock);
-
-	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
-		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
-	else {
-#if CLONE_OFFLINK_ROUTE
-		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
-#else
-		goto out2;
-#endif
-	}
-
-	dst_release(&rt->u.dst);
-	rt = nrt ? : &ip6_null_entry;
-
-	dst_hold(&rt->u.dst);
-	if (nrt) {
-		err = ip6_ins_rt(nrt);
-		if (!err)
-			goto out2;
-	}
-
-	if (--attempts <= 0)
-		goto out2;
-
-	/*
-	 * Race condition! In the gap, when table->tb6_lock was
-	 * released someone could insert this route.  Relookup.
-	 */
-	dst_release(&rt->u.dst);
-	goto relookup;
-
-out:
-	if (reachable) {
-		reachable = 0;
-		goto restart_2;
-	}
-	dst_hold(&rt->u.dst);
-	read_unlock_bh(&table->tb6_lock);
-out2:
-	rt->u.dst.lastuse = jiffies;
-	rt->u.dst.__use++;
-	return rt;
+	return ip6_pol_route(table, fl->oif, fl, flags);
 }
 
 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 3fb44277207..68bb2548e46 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -12,6 +12,7 @@
 #include <net/ndisc.h>
 #include <net/ipv6.h>
 #include <net/addrconf.h>
+#include <net/inet_frag.h>
 
 #ifdef CONFIG_SYSCTL
 
@@ -41,7 +42,7 @@ static ctl_table ipv6_table[] = {
 	{
 		.ctl_name	= NET_IPV6_IP6FRAG_HIGH_THRESH,
 		.procname	= "ip6frag_high_thresh",
-		.data		= &sysctl_ip6frag_high_thresh,
+		.data		= &ip6_frags_ctl.high_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
@@ -49,7 +50,7 @@ static ctl_table ipv6_table[] = {
 	{
 		.ctl_name	= NET_IPV6_IP6FRAG_LOW_THRESH,
 		.procname	= "ip6frag_low_thresh",
-		.data		= &sysctl_ip6frag_low_thresh,
+		.data		= &ip6_frags_ctl.low_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec
@@ -57,7 +58,7 @@ static ctl_table ipv6_table[] = {
 	{
 		.ctl_name	= NET_IPV6_IP6FRAG_TIME,
 		.procname	= "ip6frag_time",
-		.data		= &sysctl_ip6frag_time,
+		.data		= &ip6_frags_ctl.timeout,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
@@ -66,7 +67,7 @@ static ctl_table ipv6_table[] = {
 	{
 		.ctl_name	= NET_IPV6_IP6FRAG_SECRET_INTERVAL,
 		.procname	= "ip6frag_secret_interval",
-		.data		= &sysctl_ip6frag_secret_interval,
+		.data		= &ip6_frags_ctl.secret_interval,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_jiffies,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index a07b59c528f..737b755342b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1668,9 +1668,8 @@ ipv6_pktoptions:
 	return 0;
 }
 
-static int tcp_v6_rcv(struct sk_buff **pskb)
+static int tcp_v6_rcv(struct sk_buff *skb)
 {
-	struct sk_buff *skb = *pskb;
 	struct tcphdr *th;
 	struct sock *sk;
 	int ret;
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index 23e2809878a..6323921b40b 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -87,9 +87,8 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)
 
 EXPORT_SYMBOL(xfrm6_tunnel_deregister);
 
-static int tunnel6_rcv(struct sk_buff **pskb)
+static int tunnel6_rcv(struct sk_buff *skb)
 {
-	struct sk_buff *skb = *pskb;
 	struct xfrm6_tunnel *handler;
 
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
@@ -106,9 +105,8 @@ drop:
 	return 0;
 }
 
-static int tunnel46_rcv(struct sk_buff **pskb)
+static int tunnel46_rcv(struct sk_buff *skb)
 {
-	struct sk_buff *skb = *pskb;
 	struct xfrm6_tunnel *handler;
 
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 82ff26dd447..caebad6ee51 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -405,10 +405,9 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh,
 	return 0;
 }
 
-int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
+int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
 		   int proto)
 {
-	struct sk_buff *skb = *pskb;
 	struct sock *sk;
 	struct udphdr *uh;
 	struct net_device *dev = skb->dev;
@@ -494,9 +493,9 @@ discard:
 	return 0;
 }
 
-static __inline__ int udpv6_rcv(struct sk_buff **pskb)
+static __inline__ int udpv6_rcv(struct sk_buff *skb)
 {
-	return __udp6_lib_rcv(pskb, udp_hash, IPPROTO_UDP);
+	return __udp6_lib_rcv(skb, udp_hash, IPPROTO_UDP);
 }
 
 /*
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 6e252f318f7..2d3fda60123 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -6,7 +6,7 @@
 #include <net/addrconf.h>
 #include <net/inet_common.h>
 
-extern int  	__udp6_lib_rcv(struct sk_buff **, struct hlist_head [], int );
+extern int  	__udp6_lib_rcv(struct sk_buff *, struct hlist_head [], int );
 extern void 	__udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *,
 			       int , int , int , __be32 , struct hlist_head []);
 
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index f54016a5500..766566f7de4 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -17,9 +17,9 @@
 
 DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6) __read_mostly;
 
-static int udplitev6_rcv(struct sk_buff **pskb)
+static int udplitev6_rcv(struct sk_buff *skb)
 {
-	return __udp6_lib_rcv(pskb, udplite_hash, IPPROTO_UDPLITE);
+	return __udp6_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE);
 }
 
 static void udplitev6_err(struct sk_buff *skb,
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index c858537cec4..02f69e544f6 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -133,9 +133,9 @@ drop:
 
 EXPORT_SYMBOL(xfrm6_rcv_spi);
 
-int xfrm6_rcv(struct sk_buff **pskb)
+int xfrm6_rcv(struct sk_buff *skb)
 {
-	return xfrm6_rcv_spi(*pskb, 0);
+	return xfrm6_rcv_spi(skb, 0);
 }
 
 EXPORT_SYMBOL(xfrm6_rcv);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 4618c18e611..a5a32c17249 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -80,7 +80,7 @@ static int xfrm6_output_finish2(struct sk_buff *skb)
 	while (likely((err = xfrm6_output_one(skb)) == 0)) {
 		nf_reset(skb);
 
-		err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, &skb, NULL,
+		err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL,
 			      skb->dst->dev, dst_output);
 		if (unlikely(err != 1))
 			break;
@@ -88,7 +88,7 @@ static int xfrm6_output_finish2(struct sk_buff *skb)
 		if (!skb->dst->xfrm)
 			return dst_output(skb);
 
-		err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL,
+		err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL,
 			      skb->dst->dev, xfrm6_output_finish2);
 		if (unlikely(err != 1))
 			break;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index a523fa4136e..bed9ba01e8e 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -117,7 +117,7 @@ void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
 EXPORT_SYMBOL(nf_unregister_hooks);
 
 unsigned int nf_iterate(struct list_head *head,
-			struct sk_buff **skb,
+			struct sk_buff *skb,
 			int hook,
 			const struct net_device *indev,
 			const struct net_device *outdev,
@@ -160,7 +160,7 @@ unsigned int nf_iterate(struct list_head *head,
 
 /* Returns 1 if okfn() needs to be executed by the caller,
  * -EPERM for NF_DROP, 0 otherwise. */
-int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
+int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
 		 struct net_device *indev,
 		 struct net_device *outdev,
 		 int (*okfn)(struct sk_buff *),
@@ -175,17 +175,17 @@ int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
 
 	elem = &nf_hooks[pf][hook];
 next_hook:
-	verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
+	verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev,
 			     outdev, &elem, okfn, hook_thresh);
 	if (verdict == NF_ACCEPT || verdict == NF_STOP) {
 		ret = 1;
 		goto unlock;
 	} else if (verdict == NF_DROP) {
-		kfree_skb(*pskb);
+		kfree_skb(skb);
 		ret = -EPERM;
 	} else if ((verdict & NF_VERDICT_MASK)  == NF_QUEUE) {
 		NFDEBUG("nf_hook: Verdict = QUEUE.\n");
-		if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn,
+		if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
 			      verdict >> NF_VERDICT_BITS))
 			goto next_hook;
 	}
@@ -196,34 +196,24 @@ unlock:
 EXPORT_SYMBOL(nf_hook_slow);
 
 
-int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len)
+int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
 {
-	struct sk_buff *nskb;
-
-	if (writable_len > (*pskb)->len)
+	if (writable_len > skb->len)
 		return 0;
 
 	/* Not exclusive use of packet?  Must copy. */
-	if (skb_cloned(*pskb) && !skb_clone_writable(*pskb, writable_len))
-		goto copy_skb;
-	if (skb_shared(*pskb))
-		goto copy_skb;
-
-	return pskb_may_pull(*pskb, writable_len);
-
-copy_skb:
-	nskb = skb_copy(*pskb, GFP_ATOMIC);
-	if (!nskb)
-		return 0;
-	BUG_ON(skb_is_nonlinear(nskb));
-
-	/* Rest of kernel will get very unhappy if we pass it a
-	   suddenly-orphaned skbuff */
-	if ((*pskb)->sk)
-		skb_set_owner_w(nskb, (*pskb)->sk);
-	kfree_skb(*pskb);
-	*pskb = nskb;
-	return 1;
+	if (!skb_cloned(skb)) {
+		if (writable_len <= skb_headlen(skb))
+			return 1;
+	} else if (skb_clone_writable(skb, writable_len))
+		return 1;
+
+	if (writable_len <= skb_headlen(skb))
+		writable_len = 0;
+	else
+		writable_len -= skb_headlen(skb);
+
+	return !!__pskb_pull_tail(skb, writable_len);
 }
 EXPORT_SYMBOL(skb_make_writable);
 
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index e42ab230ad8..7b8239c0cd5 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -36,7 +36,7 @@ MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
 module_param(ts_algo, charp, 0400);
 MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)");
 
-unsigned int (*nf_nat_amanda_hook)(struct sk_buff **pskb,
+unsigned int (*nf_nat_amanda_hook)(struct sk_buff *skb,
 				   enum ip_conntrack_info ctinfo,
 				   unsigned int matchoff,
 				   unsigned int matchlen,
@@ -79,7 +79,7 @@ static struct {
 	},
 };
 
-static int amanda_help(struct sk_buff **pskb,
+static int amanda_help(struct sk_buff *skb,
 		       unsigned int protoff,
 		       struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo)
@@ -101,25 +101,25 @@ static int amanda_help(struct sk_buff **pskb,
 
 	/* increase the UDP timeout of the master connection as replies from
 	 * Amanda clients to the server can be quite delayed */
-	nf_ct_refresh(ct, *pskb, master_timeout * HZ);
+	nf_ct_refresh(ct, skb, master_timeout * HZ);
 
 	/* No data? */
 	dataoff = protoff + sizeof(struct udphdr);
-	if (dataoff >= (*pskb)->len) {
+	if (dataoff >= skb->len) {
 		if (net_ratelimit())
-			printk("amanda_help: skblen = %u\n", (*pskb)->len);
+			printk("amanda_help: skblen = %u\n", skb->len);
 		return NF_ACCEPT;
 	}
 
 	memset(&ts, 0, sizeof(ts));
-	start = skb_find_text(*pskb, dataoff, (*pskb)->len,
+	start = skb_find_text(skb, dataoff, skb->len,
 			      search[SEARCH_CONNECT].ts, &ts);
 	if (start == UINT_MAX)
 		goto out;
 	start += dataoff + search[SEARCH_CONNECT].len;
 
 	memset(&ts, 0, sizeof(ts));
-	stop = skb_find_text(*pskb, start, (*pskb)->len,
+	stop = skb_find_text(skb, start, skb->len,
 			     search[SEARCH_NEWLINE].ts, &ts);
 	if (stop == UINT_MAX)
 		goto out;
@@ -127,13 +127,13 @@ static int amanda_help(struct sk_buff **pskb,
 
 	for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) {
 		memset(&ts, 0, sizeof(ts));
-		off = skb_find_text(*pskb, start, stop, search[i].ts, &ts);
+		off = skb_find_text(skb, start, stop, search[i].ts, &ts);
 		if (off == UINT_MAX)
 			continue;
 		off += start + search[i].len;
 
 		len = min_t(unsigned int, sizeof(pbuf) - 1, stop - off);
-		if (skb_copy_bits(*pskb, off, pbuf, len))
+		if (skb_copy_bits(skb, off, pbuf, len))
 			break;
 		pbuf[len] = '\0';
 
@@ -153,7 +153,7 @@ static int amanda_help(struct sk_buff **pskb,
 
 		nf_nat_amanda = rcu_dereference(nf_nat_amanda_hook);
 		if (nf_nat_amanda && ct->status & IPS_NAT_MASK)
-			ret = nf_nat_amanda(pskb, ctinfo, off - dataoff,
+			ret = nf_nat_amanda(skb, ctinfo, off - dataoff,
 					    len, exp);
 		else if (nf_ct_expect_related(exp) != 0)
 			ret = NF_DROP;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 83c30b45d17..4d6171bc082 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -307,7 +307,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert);
 
 /* Confirm a connection given skb; places it in hash table */
 int
-__nf_conntrack_confirm(struct sk_buff **pskb)
+__nf_conntrack_confirm(struct sk_buff *skb)
 {
 	unsigned int hash, repl_hash;
 	struct nf_conntrack_tuple_hash *h;
@@ -316,7 +316,7 @@ __nf_conntrack_confirm(struct sk_buff **pskb)
 	struct hlist_node *n;
 	enum ip_conntrack_info ctinfo;
 
-	ct = nf_ct_get(*pskb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 
 	/* ipt_REJECT uses nf_conntrack_attach to attach related
 	   ICMP/TCP RST packets in other direction.  Actual packet
@@ -367,14 +367,14 @@ __nf_conntrack_confirm(struct sk_buff **pskb)
 	write_unlock_bh(&nf_conntrack_lock);
 	help = nfct_help(ct);
 	if (help && help->helper)
-		nf_conntrack_event_cache(IPCT_HELPER, *pskb);
+		nf_conntrack_event_cache(IPCT_HELPER, skb);
 #ifdef CONFIG_NF_NAT_NEEDED
 	if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
 	    test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
-		nf_conntrack_event_cache(IPCT_NATINFO, *pskb);
+		nf_conntrack_event_cache(IPCT_NATINFO, skb);
 #endif
 	nf_conntrack_event_cache(master_ct(ct) ?
-				 IPCT_RELATED : IPCT_NEW, *pskb);
+				 IPCT_RELATED : IPCT_NEW, skb);
 	return NF_ACCEPT;
 
 out:
@@ -632,7 +632,7 @@ resolve_normal_ct(struct sk_buff *skb,
 }
 
 unsigned int
-nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
+nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
@@ -644,14 +644,14 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
 	int ret;
 
 	/* Previously seen (loopback or untracked)?  Ignore. */
-	if ((*pskb)->nfct) {
+	if (skb->nfct) {
 		NF_CT_STAT_INC_ATOMIC(ignore);
 		return NF_ACCEPT;
 	}
 
 	/* rcu_read_lock()ed by nf_hook_slow */
 	l3proto = __nf_ct_l3proto_find((u_int16_t)pf);
-	ret = l3proto->get_l4proto(*pskb, skb_network_offset(*pskb),
+	ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
 				   &dataoff, &protonum);
 	if (ret <= 0) {
 		pr_debug("not prepared to track yet or error occured\n");
@@ -666,13 +666,13 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
 	 * inverse of the return code tells to the netfilter
 	 * core what to do with the packet. */
 	if (l4proto->error != NULL &&
-	    (ret = l4proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
+	    (ret = l4proto->error(skb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
 		NF_CT_STAT_INC_ATOMIC(error);
 		NF_CT_STAT_INC_ATOMIC(invalid);
 		return -ret;
 	}
 
-	ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, l4proto,
+	ct = resolve_normal_ct(skb, dataoff, pf, protonum, l3proto, l4proto,
 			       &set_reply, &ctinfo);
 	if (!ct) {
 		/* Not valid part of a connection */
@@ -686,21 +686,21 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
 		return NF_DROP;
 	}
 
-	NF_CT_ASSERT((*pskb)->nfct);
+	NF_CT_ASSERT(skb->nfct);
 
-	ret = l4proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum);
+	ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum);
 	if (ret < 0) {
 		/* Invalid: inverse of the return code tells
 		 * the netfilter core what to do */
 		pr_debug("nf_conntrack_in: Can't track with proto module\n");
-		nf_conntrack_put((*pskb)->nfct);
-		(*pskb)->nfct = NULL;
+		nf_conntrack_put(skb->nfct);
+		skb->nfct = NULL;
 		NF_CT_STAT_INC_ATOMIC(invalid);
 		return -ret;
 	}
 
 	if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
-		nf_conntrack_event_cache(IPCT_STATUS, *pskb);
+		nf_conntrack_event_cache(IPCT_STATUS, skb);
 
 	return ret;
 }
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index c763ee74ea0..6df259067f7 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -43,7 +43,7 @@ module_param_array(ports, ushort, &ports_c, 0400);
 static int loose;
 module_param(loose, bool, 0600);
 
-unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb,
+unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb,
 				enum ip_conntrack_info ctinfo,
 				enum nf_ct_ftp_type type,
 				unsigned int matchoff,
@@ -344,7 +344,7 @@ static void update_nl_seq(u32 nl_seq, struct nf_ct_ftp_master *info, int dir,
 	}
 }
 
-static int help(struct sk_buff **pskb,
+static int help(struct sk_buff *skb,
 		unsigned int protoff,
 		struct nf_conn *ct,
 		enum ip_conntrack_info ctinfo)
@@ -371,21 +371,21 @@ static int help(struct sk_buff **pskb,
 		return NF_ACCEPT;
 	}
 
-	th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph);
+	th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
 	if (th == NULL)
 		return NF_ACCEPT;
 
 	dataoff = protoff + th->doff * 4;
 	/* No data? */
-	if (dataoff >= (*pskb)->len) {
+	if (dataoff >= skb->len) {
 		pr_debug("ftp: dataoff(%u) >= skblen(%u)\n", dataoff,
-			 (*pskb)->len);
+			 skb->len);
 		return NF_ACCEPT;
 	}
-	datalen = (*pskb)->len - dataoff;
+	datalen = skb->len - dataoff;
 
 	spin_lock_bh(&nf_ftp_lock);
-	fb_ptr = skb_header_pointer(*pskb, dataoff, datalen, ftp_buffer);
+	fb_ptr = skb_header_pointer(skb, dataoff, datalen, ftp_buffer);
 	BUG_ON(fb_ptr == NULL);
 
 	ends_in_nl = (fb_ptr[datalen - 1] == '\n');
@@ -491,7 +491,7 @@ static int help(struct sk_buff **pskb,
 	 * (possibly changed) expectation itself. */
 	nf_nat_ftp = rcu_dereference(nf_nat_ftp_hook);
 	if (nf_nat_ftp && ct->status & IPS_NAT_MASK)
-		ret = nf_nat_ftp(pskb, ctinfo, search[dir][i].ftptype,
+		ret = nf_nat_ftp(skb, ctinfo, search[dir][i].ftptype,
 				 matchoff, matchlen, exp);
 	else {
 		/* Can't expect this?  Best to drop packet now. */
@@ -508,7 +508,7 @@ out_update_nl:
 	/* Now if this ends in \n, update ftp info.  Seq may have been
 	 * adjusted by NAT code. */
 	if (ends_in_nl)
-		update_nl_seq(seq, ct_ftp_info, dir, *pskb);
+		update_nl_seq(seq, ct_ftp_info, dir, skb);
  out:
 	spin_unlock_bh(&nf_ftp_lock);
 	return ret;
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index a8a9dfbe7a6..f23fd9598e1 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -47,27 +47,27 @@ MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations "
 				     "(determined by routing information)");
 
 /* Hooks for NAT */
-int (*set_h245_addr_hook) (struct sk_buff **pskb,
+int (*set_h245_addr_hook) (struct sk_buff *skb,
 			   unsigned char **data, int dataoff,
 			   H245_TransportAddress *taddr,
 			   union nf_conntrack_address *addr, __be16 port)
 			   __read_mostly;
-int (*set_h225_addr_hook) (struct sk_buff **pskb,
+int (*set_h225_addr_hook) (struct sk_buff *skb,
 			   unsigned char **data, int dataoff,
 			   TransportAddress *taddr,
 			   union nf_conntrack_address *addr, __be16 port)
 			   __read_mostly;
-int (*set_sig_addr_hook) (struct sk_buff **pskb,
+int (*set_sig_addr_hook) (struct sk_buff *skb,
 			  struct nf_conn *ct,
 			  enum ip_conntrack_info ctinfo,
 			  unsigned char **data,
 			  TransportAddress *taddr, int count) __read_mostly;
-int (*set_ras_addr_hook) (struct sk_buff **pskb,
+int (*set_ras_addr_hook) (struct sk_buff *skb,
 			  struct nf_conn *ct,
 			  enum ip_conntrack_info ctinfo,
 			  unsigned char **data,
 			  TransportAddress *taddr, int count) __read_mostly;
-int (*nat_rtp_rtcp_hook) (struct sk_buff **pskb,
+int (*nat_rtp_rtcp_hook) (struct sk_buff *skb,
 			  struct nf_conn *ct,
 			  enum ip_conntrack_info ctinfo,
 			  unsigned char **data, int dataoff,
@@ -75,25 +75,25 @@ int (*nat_rtp_rtcp_hook) (struct sk_buff **pskb,
 			  __be16 port, __be16 rtp_port,
 			  struct nf_conntrack_expect *rtp_exp,
 			  struct nf_conntrack_expect *rtcp_exp) __read_mostly;
-int (*nat_t120_hook) (struct sk_buff **pskb,
+int (*nat_t120_hook) (struct sk_buff *skb,
 		      struct nf_conn *ct,
 		      enum ip_conntrack_info ctinfo,
 		      unsigned char **data, int dataoff,
 		      H245_TransportAddress *taddr, __be16 port,
 		      struct nf_conntrack_expect *exp) __read_mostly;
-int (*nat_h245_hook) (struct sk_buff **pskb,
+int (*nat_h245_hook) (struct sk_buff *skb,
 		      struct nf_conn *ct,
 		      enum ip_conntrack_info ctinfo,
 		      unsigned char **data, int dataoff,
 		      TransportAddress *taddr, __be16 port,
 		      struct nf_conntrack_expect *exp) __read_mostly;
-int (*nat_callforwarding_hook) (struct sk_buff **pskb,
+int (*nat_callforwarding_hook) (struct sk_buff *skb,
 				struct nf_conn *ct,
 				enum ip_conntrack_info ctinfo,
 				unsigned char **data, int dataoff,
 				TransportAddress *taddr, __be16 port,
 				struct nf_conntrack_expect *exp) __read_mostly;
-int (*nat_q931_hook) (struct sk_buff **pskb,
+int (*nat_q931_hook) (struct sk_buff *skb,
 		      struct nf_conn *ct,
 		      enum ip_conntrack_info ctinfo,
 		      unsigned char **data, TransportAddress *taddr, int idx,
@@ -108,7 +108,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[];
 static struct nf_conntrack_helper nf_conntrack_helper_ras[];
 
 /****************************************************************************/
-static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff,
+static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff,
 			 struct nf_conn *ct, enum ip_conntrack_info ctinfo,
 			 unsigned char **data, int *datalen, int *dataoff)
 {
@@ -122,7 +122,7 @@ static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff,
 	int tpktoff;
 
 	/* Get TCP header */
-	th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph);
+	th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
 	if (th == NULL)
 		return 0;
 
@@ -130,13 +130,13 @@ static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff,
 	tcpdataoff = protoff + th->doff * 4;
 
 	/* Get TCP data length */
-	tcpdatalen = (*pskb)->len - tcpdataoff;
+	tcpdatalen = skb->len - tcpdataoff;
 	if (tcpdatalen <= 0)	/* No TCP data */
 		goto clear_out;
 
 	if (*data == NULL) {	/* first TPKT */
 		/* Get first TPKT pointer */
-		tpkt = skb_header_pointer(*pskb, tcpdataoff, tcpdatalen,
+		tpkt = skb_header_pointer(skb, tcpdataoff, tcpdatalen,
 					  h323_buffer);
 		BUG_ON(tpkt == NULL);
 
@@ -248,7 +248,7 @@ static int get_h245_addr(struct nf_conn *ct, unsigned char *data,
 }
 
 /****************************************************************************/
-static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct,
+static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
 			   enum ip_conntrack_info ctinfo,
 			   unsigned char **data, int dataoff,
 			   H245_TransportAddress *taddr)
@@ -297,7 +297,7 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct,
 		   (nat_rtp_rtcp = rcu_dereference(nat_rtp_rtcp_hook)) &&
 		   ct->status & IPS_NAT_MASK) {
 		/* NAT needed */
-		ret = nat_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
+		ret = nat_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
 				   taddr, port, rtp_port, rtp_exp, rtcp_exp);
 	} else {		/* Conntrack only */
 		if (nf_ct_expect_related(rtp_exp) == 0) {
@@ -321,7 +321,7 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct,
 }
 
 /****************************************************************************/
-static int expect_t120(struct sk_buff **pskb,
+static int expect_t120(struct sk_buff *skb,
 		       struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, int dataoff,
@@ -355,7 +355,7 @@ static int expect_t120(struct sk_buff **pskb,
 	    (nat_t120 = rcu_dereference(nat_t120_hook)) &&
 	    ct->status & IPS_NAT_MASK) {
 		/* NAT needed */
-		ret = nat_t120(pskb, ct, ctinfo, data, dataoff, taddr,
+		ret = nat_t120(skb, ct, ctinfo, data, dataoff, taddr,
 			       port, exp);
 	} else {		/* Conntrack only */
 		if (nf_ct_expect_related(exp) == 0) {
@@ -371,7 +371,7 @@ static int expect_t120(struct sk_buff **pskb,
 }
 
 /****************************************************************************/
-static int process_h245_channel(struct sk_buff **pskb,
+static int process_h245_channel(struct sk_buff *skb,
 				struct nf_conn *ct,
 				enum ip_conntrack_info ctinfo,
 				unsigned char **data, int dataoff,
@@ -381,7 +381,7 @@ static int process_h245_channel(struct sk_buff **pskb,
 
 	if (channel->options & eH2250LogicalChannelParameters_mediaChannel) {
 		/* RTP */
-		ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
+		ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
 				      &channel->mediaChannel);
 		if (ret < 0)
 			return -1;
@@ -390,7 +390,7 @@ static int process_h245_channel(struct sk_buff **pskb,
 	if (channel->
 	    options & eH2250LogicalChannelParameters_mediaControlChannel) {
 		/* RTCP */
-		ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
+		ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
 				      &channel->mediaControlChannel);
 		if (ret < 0)
 			return -1;
@@ -400,7 +400,7 @@ static int process_h245_channel(struct sk_buff **pskb,
 }
 
 /****************************************************************************/
-static int process_olc(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_olc(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, int dataoff,
 		       OpenLogicalChannel *olc)
@@ -412,7 +412,7 @@ static int process_olc(struct sk_buff **pskb, struct nf_conn *ct,
 	if (olc->forwardLogicalChannelParameters.multiplexParameters.choice ==
 	    eOpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)
 	{
-		ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff,
+		ret = process_h245_channel(skb, ct, ctinfo, data, dataoff,
 					   &olc->
 					   forwardLogicalChannelParameters.
 					   multiplexParameters.
@@ -430,7 +430,7 @@ static int process_olc(struct sk_buff **pskb, struct nf_conn *ct,
 		eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
 	{
 		ret =
-		    process_h245_channel(pskb, ct, ctinfo, data, dataoff,
+		    process_h245_channel(skb, ct, ctinfo, data, dataoff,
 					 &olc->
 					 reverseLogicalChannelParameters.
 					 multiplexParameters.
@@ -448,7 +448,7 @@ static int process_olc(struct sk_buff **pskb, struct nf_conn *ct,
 	    t120.choice == eDataProtocolCapability_separateLANStack &&
 	    olc->separateStack.networkAddress.choice ==
 	    eNetworkAccessParameters_networkAddress_localAreaAddress) {
-		ret = expect_t120(pskb, ct, ctinfo, data, dataoff,
+		ret = expect_t120(skb, ct, ctinfo, data, dataoff,
 				  &olc->separateStack.networkAddress.
 				  localAreaAddress);
 		if (ret < 0)
@@ -459,7 +459,7 @@ static int process_olc(struct sk_buff **pskb, struct nf_conn *ct,
 }
 
 /****************************************************************************/
-static int process_olca(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_olca(struct sk_buff *skb, struct nf_conn *ct,
 			enum ip_conntrack_info ctinfo,
 			unsigned char **data, int dataoff,
 			OpenLogicalChannelAck *olca)
@@ -477,7 +477,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct,
 		choice ==
 		eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
 	{
-		ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff,
+		ret = process_h245_channel(skb, ct, ctinfo, data, dataoff,
 					   &olca->
 					   reverseLogicalChannelParameters.
 					   multiplexParameters.
@@ -496,7 +496,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct,
 		if (ack->options &
 		    eH2250LogicalChannelAckParameters_mediaChannel) {
 			/* RTP */
-			ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
+			ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
 					      &ack->mediaChannel);
 			if (ret < 0)
 				return -1;
@@ -505,7 +505,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct,
 		if (ack->options &
 		    eH2250LogicalChannelAckParameters_mediaControlChannel) {
 			/* RTCP */
-			ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
+			ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
 					      &ack->mediaControlChannel);
 			if (ret < 0)
 				return -1;
@@ -515,7 +515,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct,
 	if ((olca->options & eOpenLogicalChannelAck_separateStack) &&
 		olca->separateStack.networkAddress.choice ==
 		eNetworkAccessParameters_networkAddress_localAreaAddress) {
-		ret = expect_t120(pskb, ct, ctinfo, data, dataoff,
+		ret = expect_t120(skb, ct, ctinfo, data, dataoff,
 				  &olca->separateStack.networkAddress.
 				  localAreaAddress);
 		if (ret < 0)
@@ -526,7 +526,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct,
 }
 
 /****************************************************************************/
-static int process_h245(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_h245(struct sk_buff *skb, struct nf_conn *ct,
 			enum ip_conntrack_info ctinfo,
 			unsigned char **data, int dataoff,
 			MultimediaSystemControlMessage *mscm)
@@ -535,7 +535,7 @@ static int process_h245(struct sk_buff **pskb, struct nf_conn *ct,
 	case eMultimediaSystemControlMessage_request:
 		if (mscm->request.choice ==
 		    eRequestMessage_openLogicalChannel) {
-			return process_olc(pskb, ct, ctinfo, data, dataoff,
+			return process_olc(skb, ct, ctinfo, data, dataoff,
 					   &mscm->request.openLogicalChannel);
 		}
 		pr_debug("nf_ct_h323: H.245 Request %d\n",
@@ -544,7 +544,7 @@ static int process_h245(struct sk_buff **pskb, struct nf_conn *ct,
 	case eMultimediaSystemControlMessage_response:
 		if (mscm->response.choice ==
 		    eResponseMessage_openLogicalChannelAck) {
-			return process_olca(pskb, ct, ctinfo, data, dataoff,
+			return process_olca(skb, ct, ctinfo, data, dataoff,
 					    &mscm->response.
 					    openLogicalChannelAck);
 		}
@@ -560,7 +560,7 @@ static int process_h245(struct sk_buff **pskb, struct nf_conn *ct,
 }
 
 /****************************************************************************/
-static int h245_help(struct sk_buff **pskb, unsigned int protoff,
+static int h245_help(struct sk_buff *skb, unsigned int protoff,
 		     struct nf_conn *ct, enum ip_conntrack_info ctinfo)
 {
 	static MultimediaSystemControlMessage mscm;
@@ -574,12 +574,12 @@ static int h245_help(struct sk_buff **pskb, unsigned int protoff,
 	    ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
 		return NF_ACCEPT;
 	}
-	pr_debug("nf_ct_h245: skblen = %u\n", (*pskb)->len);
+	pr_debug("nf_ct_h245: skblen = %u\n", skb->len);
 
 	spin_lock_bh(&nf_h323_lock);
 
 	/* Process each TPKT */
-	while (get_tpkt_data(pskb, protoff, ct, ctinfo,
+	while (get_tpkt_data(skb, protoff, ct, ctinfo,
 			     &data, &datalen, &dataoff)) {
 		pr_debug("nf_ct_h245: TPKT len=%d ", datalen);
 		NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple);
@@ -596,7 +596,7 @@ static int h245_help(struct sk_buff **pskb, unsigned int protoff,
 		}
 
 		/* Process H.245 signal */
-		if (process_h245(pskb, ct, ctinfo, &data, dataoff, &mscm) < 0)
+		if (process_h245(skb, ct, ctinfo, &data, dataoff, &mscm) < 0)
 			goto drop;
 	}
 
@@ -654,7 +654,7 @@ int get_h225_addr(struct nf_conn *ct, unsigned char *data,
 }
 
 /****************************************************************************/
-static int expect_h245(struct sk_buff **pskb, struct nf_conn *ct,
+static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, int dataoff,
 		       TransportAddress *taddr)
@@ -687,7 +687,7 @@ static int expect_h245(struct sk_buff **pskb, struct nf_conn *ct,
 	    (nat_h245 = rcu_dereference(nat_h245_hook)) &&
 	    ct->status & IPS_NAT_MASK) {
 		/* NAT needed */
-		ret = nat_h245(pskb, ct, ctinfo, data, dataoff, taddr,
+		ret = nat_h245(skb, ct, ctinfo, data, dataoff, taddr,
 			       port, exp);
 	} else {		/* Conntrack only */
 		if (nf_ct_expect_related(exp) == 0) {
@@ -758,7 +758,7 @@ static int callforward_do_filter(union nf_conntrack_address *src,
 }
 
 /****************************************************************************/
-static int expect_callforwarding(struct sk_buff **pskb,
+static int expect_callforwarding(struct sk_buff *skb,
 				 struct nf_conn *ct,
 				 enum ip_conntrack_info ctinfo,
 				 unsigned char **data, int dataoff,
@@ -798,7 +798,7 @@ static int expect_callforwarding(struct sk_buff **pskb,
 	    (nat_callforwarding = rcu_dereference(nat_callforwarding_hook)) &&
 	    ct->status & IPS_NAT_MASK) {
 		/* Need NAT */
-		ret = nat_callforwarding(pskb, ct, ctinfo, data, dataoff,
+		ret = nat_callforwarding(skb, ct, ctinfo, data, dataoff,
 					 taddr, port, exp);
 	} else {		/* Conntrack only */
 		if (nf_ct_expect_related(exp) == 0) {
@@ -814,7 +814,7 @@ static int expect_callforwarding(struct sk_buff **pskb,
 }
 
 /****************************************************************************/
-static int process_setup(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
 			 enum ip_conntrack_info ctinfo,
 			 unsigned char **data, int dataoff,
 			 Setup_UUIE *setup)
@@ -829,7 +829,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct,
 	pr_debug("nf_ct_q931: Setup\n");
 
 	if (setup->options & eSetup_UUIE_h245Address) {
-		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+		ret = expect_h245(skb, ct, ctinfo, data, dataoff,
 				  &setup->h245Address);
 		if (ret < 0)
 			return -1;
@@ -846,7 +846,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct,
 			 NIP6(*(struct in6_addr *)&addr), ntohs(port),
 			 NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.src.u3),
 			 ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port));
-		ret = set_h225_addr(pskb, data, dataoff,
+		ret = set_h225_addr(skb, data, dataoff,
 				    &setup->destCallSignalAddress,
 				    &ct->tuplehash[!dir].tuple.src.u3,
 				    ct->tuplehash[!dir].tuple.src.u.tcp.port);
@@ -864,7 +864,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct,
 			 NIP6(*(struct in6_addr *)&addr), ntohs(port),
 			 NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.dst.u3),
 			 ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port));
-		ret = set_h225_addr(pskb, data, dataoff,
+		ret = set_h225_addr(skb, data, dataoff,
 				    &setup->sourceCallSignalAddress,
 				    &ct->tuplehash[!dir].tuple.dst.u3,
 				    ct->tuplehash[!dir].tuple.dst.u.tcp.port);
@@ -874,7 +874,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct,
 
 	if (setup->options & eSetup_UUIE_fastStart) {
 		for (i = 0; i < setup->fastStart.count; i++) {
-			ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+			ret = process_olc(skb, ct, ctinfo, data, dataoff,
 					  &setup->fastStart.item[i]);
 			if (ret < 0)
 				return -1;
@@ -885,7 +885,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct,
 }
 
 /****************************************************************************/
-static int process_callproceeding(struct sk_buff **pskb,
+static int process_callproceeding(struct sk_buff *skb,
 				  struct nf_conn *ct,
 				  enum ip_conntrack_info ctinfo,
 				  unsigned char **data, int dataoff,
@@ -897,7 +897,7 @@ static int process_callproceeding(struct sk_buff **pskb,
 	pr_debug("nf_ct_q931: CallProceeding\n");
 
 	if (callproc->options & eCallProceeding_UUIE_h245Address) {
-		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+		ret = expect_h245(skb, ct, ctinfo, data, dataoff,
 				  &callproc->h245Address);
 		if (ret < 0)
 			return -1;
@@ -905,7 +905,7 @@ static int process_callproceeding(struct sk_buff **pskb,
 
 	if (callproc->options & eCallProceeding_UUIE_fastStart) {
 		for (i = 0; i < callproc->fastStart.count; i++) {
-			ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+			ret = process_olc(skb, ct, ctinfo, data, dataoff,
 					  &callproc->fastStart.item[i]);
 			if (ret < 0)
 				return -1;
@@ -916,7 +916,7 @@ static int process_callproceeding(struct sk_buff **pskb,
 }
 
 /****************************************************************************/
-static int process_connect(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_connect(struct sk_buff *skb, struct nf_conn *ct,
 			   enum ip_conntrack_info ctinfo,
 			   unsigned char **data, int dataoff,
 			   Connect_UUIE *connect)
@@ -927,7 +927,7 @@ static int process_connect(struct sk_buff **pskb, struct nf_conn *ct,
 	pr_debug("nf_ct_q931: Connect\n");
 
 	if (connect->options & eConnect_UUIE_h245Address) {
-		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+		ret = expect_h245(skb, ct, ctinfo, data, dataoff,
 				  &connect->h245Address);
 		if (ret < 0)
 			return -1;
@@ -935,7 +935,7 @@ static int process_connect(struct sk_buff **pskb, struct nf_conn *ct,
 
 	if (connect->options & eConnect_UUIE_fastStart) {
 		for (i = 0; i < connect->fastStart.count; i++) {
-			ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+			ret = process_olc(skb, ct, ctinfo, data, dataoff,
 					  &connect->fastStart.item[i]);
 			if (ret < 0)
 				return -1;
@@ -946,7 +946,7 @@ static int process_connect(struct sk_buff **pskb, struct nf_conn *ct,
 }
 
 /****************************************************************************/
-static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_alerting(struct sk_buff *skb, struct nf_conn *ct,
 			    enum ip_conntrack_info ctinfo,
 			    unsigned char **data, int dataoff,
 			    Alerting_UUIE *alert)
@@ -957,7 +957,7 @@ static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct,
 	pr_debug("nf_ct_q931: Alerting\n");
 
 	if (alert->options & eAlerting_UUIE_h245Address) {
-		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+		ret = expect_h245(skb, ct, ctinfo, data, dataoff,
 				  &alert->h245Address);
 		if (ret < 0)
 			return -1;
@@ -965,7 +965,7 @@ static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct,
 
 	if (alert->options & eAlerting_UUIE_fastStart) {
 		for (i = 0; i < alert->fastStart.count; i++) {
-			ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+			ret = process_olc(skb, ct, ctinfo, data, dataoff,
 					  &alert->fastStart.item[i]);
 			if (ret < 0)
 				return -1;
@@ -976,7 +976,7 @@ static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct,
 }
 
 /****************************************************************************/
-static int process_facility(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_facility(struct sk_buff *skb, struct nf_conn *ct,
 			    enum ip_conntrack_info ctinfo,
 			    unsigned char **data, int dataoff,
 			    Facility_UUIE *facility)
@@ -988,7 +988,7 @@ static int process_facility(struct sk_buff **pskb, struct nf_conn *ct,
 
 	if (facility->reason.choice == eFacilityReason_callForwarded) {
 		if (facility->options & eFacility_UUIE_alternativeAddress)
-			return expect_callforwarding(pskb, ct, ctinfo, data,
+			return expect_callforwarding(skb, ct, ctinfo, data,
 						     dataoff,
 						     &facility->
 						     alternativeAddress);
@@ -996,7 +996,7 @@ static int process_facility(struct sk_buff **pskb, struct nf_conn *ct,
 	}
 
 	if (facility->options & eFacility_UUIE_h245Address) {
-		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+		ret = expect_h245(skb, ct, ctinfo, data, dataoff,
 				  &facility->h245Address);
 		if (ret < 0)
 			return -1;
@@ -1004,7 +1004,7 @@ static int process_facility(struct sk_buff **pskb, struct nf_conn *ct,
 
 	if (facility->options & eFacility_UUIE_fastStart) {
 		for (i = 0; i < facility->fastStart.count; i++) {
-			ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+			ret = process_olc(skb, ct, ctinfo, data, dataoff,
 					  &facility->fastStart.item[i]);
 			if (ret < 0)
 				return -1;
@@ -1015,7 +1015,7 @@ static int process_facility(struct sk_buff **pskb, struct nf_conn *ct,
 }
 
 /****************************************************************************/
-static int process_progress(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_progress(struct sk_buff *skb, struct nf_conn *ct,
 			    enum ip_conntrack_info ctinfo,
 			    unsigned char **data, int dataoff,
 			    Progress_UUIE *progress)
@@ -1026,7 +1026,7 @@ static int process_progress(struct sk_buff **pskb, struct nf_conn *ct,
 	pr_debug("nf_ct_q931: Progress\n");
 
 	if (progress->options & eProgress_UUIE_h245Address) {
-		ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+		ret = expect_h245(skb, ct, ctinfo, data, dataoff,
 				  &progress->h245Address);
 		if (ret < 0)
 			return -1;
@@ -1034,7 +1034,7 @@ static int process_progress(struct sk_buff **pskb, struct nf_conn *ct,
 
 	if (progress->options & eProgress_UUIE_fastStart) {
 		for (i = 0; i < progress->fastStart.count; i++) {
-			ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+			ret = process_olc(skb, ct, ctinfo, data, dataoff,
 					  &progress->fastStart.item[i]);
 			if (ret < 0)
 				return -1;
@@ -1045,7 +1045,7 @@ static int process_progress(struct sk_buff **pskb, struct nf_conn *ct,
 }
 
 /****************************************************************************/
-static int process_q931(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_q931(struct sk_buff *skb, struct nf_conn *ct,
 			enum ip_conntrack_info ctinfo,
 			unsigned char **data, int dataoff, Q931 *q931)
 {
@@ -1055,28 +1055,28 @@ static int process_q931(struct sk_buff **pskb, struct nf_conn *ct,
 
 	switch (pdu->h323_message_body.choice) {
 	case eH323_UU_PDU_h323_message_body_setup:
-		ret = process_setup(pskb, ct, ctinfo, data, dataoff,
+		ret = process_setup(skb, ct, ctinfo, data, dataoff,
 				    &pdu->h323_message_body.setup);
 		break;
 	case eH323_UU_PDU_h323_message_body_callProceeding:
-		ret = process_callproceeding(pskb, ct, ctinfo, data, dataoff,
+		ret = process_callproceeding(skb, ct, ctinfo, data, dataoff,
 					     &pdu->h323_message_body.
 					     callProceeding);
 		break;
 	case eH323_UU_PDU_h323_message_body_connect:
-		ret = process_connect(pskb, ct, ctinfo, data, dataoff,
+		ret = process_connect(skb, ct, ctinfo, data, dataoff,
 				      &pdu->h323_message_body.connect);
 		break;
 	case eH323_UU_PDU_h323_message_body_alerting:
-		ret = process_alerting(pskb, ct, ctinfo, data, dataoff,
+		ret = process_alerting(skb, ct, ctinfo, data, dataoff,
 				       &pdu->h323_message_body.alerting);
 		break;
 	case eH323_UU_PDU_h323_message_body_facility:
-		ret = process_facility(pskb, ct, ctinfo, data, dataoff,
+		ret = process_facility(skb, ct, ctinfo, data, dataoff,
 				       &pdu->h323_message_body.facility);
 		break;
 	case eH323_UU_PDU_h323_message_body_progress:
-		ret = process_progress(pskb, ct, ctinfo, data, dataoff,
+		ret = process_progress(skb, ct, ctinfo, data, dataoff,
 				       &pdu->h323_message_body.progress);
 		break;
 	default:
@@ -1090,7 +1090,7 @@ static int process_q931(struct sk_buff **pskb, struct nf_conn *ct,
 
 	if (pdu->options & eH323_UU_PDU_h245Control) {
 		for (i = 0; i < pdu->h245Control.count; i++) {
-			ret = process_h245(pskb, ct, ctinfo, data, dataoff,
+			ret = process_h245(skb, ct, ctinfo, data, dataoff,
 					   &pdu->h245Control.item[i]);
 			if (ret < 0)
 				return -1;
@@ -1101,7 +1101,7 @@ static int process_q931(struct sk_buff **pskb, struct nf_conn *ct,
 }
 
 /****************************************************************************/
-static int q931_help(struct sk_buff **pskb, unsigned int protoff,
+static int q931_help(struct sk_buff *skb, unsigned int protoff,
 		     struct nf_conn *ct, enum ip_conntrack_info ctinfo)
 {
 	static Q931 q931;
@@ -1115,12 +1115,12 @@ static int q931_help(struct sk_buff **pskb, unsigned int protoff,
 	    ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
 		return NF_ACCEPT;
 	}
-	pr_debug("nf_ct_q931: skblen = %u\n", (*pskb)->len);
+	pr_debug("nf_ct_q931: skblen = %u\n", skb->len);
 
 	spin_lock_bh(&nf_h323_lock);
 
 	/* Process each TPKT */
-	while (get_tpkt_data(pskb, protoff, ct, ctinfo,
+	while (get_tpkt_data(skb, protoff, ct, ctinfo,
 			     &data, &datalen, &dataoff)) {
 		pr_debug("nf_ct_q931: TPKT len=%d ", datalen);
 		NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple);
@@ -1136,7 +1136,7 @@ static int q931_help(struct sk_buff **pskb, unsigned int protoff,
 		}
 
 		/* Process Q.931 signal */
-		if (process_q931(pskb, ct, ctinfo, &data, dataoff, &q931) < 0)
+		if (process_q931(skb, ct, ctinfo, &data, dataoff, &q931) < 0)
 			goto drop;
 	}
 
@@ -1177,20 +1177,20 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = {
 };
 
 /****************************************************************************/
-static unsigned char *get_udp_data(struct sk_buff **pskb, unsigned int protoff,
+static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff,
 				   int *datalen)
 {
 	struct udphdr _uh, *uh;
 	int dataoff;
 
-	uh = skb_header_pointer(*pskb, protoff, sizeof(_uh), &_uh);
+	uh = skb_header_pointer(skb, protoff, sizeof(_uh), &_uh);
 	if (uh == NULL)
 		return NULL;
 	dataoff = protoff + sizeof(_uh);
-	if (dataoff >= (*pskb)->len)
+	if (dataoff >= skb->len)
 		return NULL;
-	*datalen = (*pskb)->len - dataoff;
-	return skb_header_pointer(*pskb, dataoff, *datalen, h323_buffer);
+	*datalen = skb->len - dataoff;
+	return skb_header_pointer(skb, dataoff, *datalen, h323_buffer);
 }
 
 /****************************************************************************/
@@ -1227,7 +1227,7 @@ static int set_expect_timeout(struct nf_conntrack_expect *exp,
 }
 
 /****************************************************************************/
-static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct,
+static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data,
 		       TransportAddress *taddr, int count)
@@ -1265,7 +1265,7 @@ static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct,
 
 	nat_q931 = rcu_dereference(nat_q931_hook);
 	if (nat_q931 && ct->status & IPS_NAT_MASK) {	/* Need NAT */
-		ret = nat_q931(pskb, ct, ctinfo, data, taddr, i, port, exp);
+		ret = nat_q931(skb, ct, ctinfo, data, taddr, i, port, exp);
 	} else {		/* Conntrack only */
 		if (nf_ct_expect_related(exp) == 0) {
 			pr_debug("nf_ct_ras: expect Q.931 ");
@@ -1283,7 +1283,7 @@ static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct,
 }
 
 /****************************************************************************/
-static int process_grq(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_grq(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, GatekeeperRequest *grq)
 {
@@ -1293,13 +1293,13 @@ static int process_grq(struct sk_buff **pskb, struct nf_conn *ct,
 
 	set_ras_addr = rcu_dereference(set_ras_addr_hook);
 	if (set_ras_addr && ct->status & IPS_NAT_MASK)	/* NATed */
-		return set_ras_addr(pskb, ct, ctinfo, data,
+		return set_ras_addr(skb, ct, ctinfo, data,
 				    &grq->rasAddress, 1);
 	return 0;
 }
 
 /****************************************************************************/
-static int process_gcf(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, GatekeeperConfirm *gcf)
 {
@@ -1343,7 +1343,7 @@ static int process_gcf(struct sk_buff **pskb, struct nf_conn *ct,
 }
 
 /****************************************************************************/
-static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_rrq(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, RegistrationRequest *rrq)
 {
@@ -1353,7 +1353,7 @@ static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct,
 
 	pr_debug("nf_ct_ras: RRQ\n");
 
-	ret = expect_q931(pskb, ct, ctinfo, data,
+	ret = expect_q931(skb, ct, ctinfo, data,
 			  rrq->callSignalAddress.item,
 			  rrq->callSignalAddress.count);
 	if (ret < 0)
@@ -1361,7 +1361,7 @@ static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct,
 
 	set_ras_addr = rcu_dereference(set_ras_addr_hook);
 	if (set_ras_addr && ct->status & IPS_NAT_MASK) {
-		ret = set_ras_addr(pskb, ct, ctinfo, data,
+		ret = set_ras_addr(skb, ct, ctinfo, data,
 				   rrq->rasAddress.item,
 				   rrq->rasAddress.count);
 		if (ret < 0)
@@ -1378,7 +1378,7 @@ static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct,
 }
 
 /****************************************************************************/
-static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, RegistrationConfirm *rcf)
 {
@@ -1392,7 +1392,7 @@ static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct,
 
 	set_sig_addr = rcu_dereference(set_sig_addr_hook);
 	if (set_sig_addr && ct->status & IPS_NAT_MASK) {
-		ret = set_sig_addr(pskb, ct, ctinfo, data,
+		ret = set_sig_addr(skb, ct, ctinfo, data,
 					rcf->callSignalAddress.item,
 					rcf->callSignalAddress.count);
 		if (ret < 0)
@@ -1407,7 +1407,7 @@ static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct,
 	if (info->timeout > 0) {
 		pr_debug("nf_ct_ras: set RAS connection timeout to "
 			 "%u seconds\n", info->timeout);
-		nf_ct_refresh(ct, *pskb, info->timeout * HZ);
+		nf_ct_refresh(ct, skb, info->timeout * HZ);
 
 		/* Set expect timeout */
 		read_lock_bh(&nf_conntrack_lock);
@@ -1427,7 +1427,7 @@ static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct,
 }
 
 /****************************************************************************/
-static int process_urq(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_urq(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, UnregistrationRequest *urq)
 {
@@ -1440,7 +1440,7 @@ static int process_urq(struct sk_buff **pskb, struct nf_conn *ct,
 
 	set_sig_addr = rcu_dereference(set_sig_addr_hook);
 	if (set_sig_addr && ct->status & IPS_NAT_MASK) {
-		ret = set_sig_addr(pskb, ct, ctinfo, data,
+		ret = set_sig_addr(skb, ct, ctinfo, data,
 				   urq->callSignalAddress.item,
 				   urq->callSignalAddress.count);
 		if (ret < 0)
@@ -1453,13 +1453,13 @@ static int process_urq(struct sk_buff **pskb, struct nf_conn *ct,
 	info->sig_port[!dir] = 0;
 
 	/* Give it 30 seconds for UCF or URJ */
-	nf_ct_refresh(ct, *pskb, 30 * HZ);
+	nf_ct_refresh(ct, skb, 30 * HZ);
 
 	return 0;
 }
 
 /****************************************************************************/
-static int process_arq(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_arq(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, AdmissionRequest *arq)
 {
@@ -1479,7 +1479,7 @@ static int process_arq(struct sk_buff **pskb, struct nf_conn *ct,
 	    port == info->sig_port[dir] &&
 	    set_h225_addr && ct->status & IPS_NAT_MASK) {
 		/* Answering ARQ */
-		return set_h225_addr(pskb, data, 0,
+		return set_h225_addr(skb, data, 0,
 				     &arq->destCallSignalAddress,
 				     &ct->tuplehash[!dir].tuple.dst.u3,
 				     info->sig_port[!dir]);
@@ -1491,7 +1491,7 @@ static int process_arq(struct sk_buff **pskb, struct nf_conn *ct,
 	    !memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) &&
 	    set_h225_addr && ct->status & IPS_NAT_MASK) {
 		/* Calling ARQ */
-		return set_h225_addr(pskb, data, 0,
+		return set_h225_addr(skb, data, 0,
 				     &arq->srcCallSignalAddress,
 				     &ct->tuplehash[!dir].tuple.dst.u3,
 				     port);
@@ -1501,7 +1501,7 @@ static int process_arq(struct sk_buff **pskb, struct nf_conn *ct,
 }
 
 /****************************************************************************/
-static int process_acf(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, AdmissionConfirm *acf)
 {
@@ -1522,7 +1522,7 @@ static int process_acf(struct sk_buff **pskb, struct nf_conn *ct,
 		/* Answering ACF */
 		set_sig_addr = rcu_dereference(set_sig_addr_hook);
 		if (set_sig_addr && ct->status & IPS_NAT_MASK)
-			return set_sig_addr(pskb, ct, ctinfo, data,
+			return set_sig_addr(skb, ct, ctinfo, data,
 					    &acf->destCallSignalAddress, 1);
 		return 0;
 	}
@@ -1548,7 +1548,7 @@ static int process_acf(struct sk_buff **pskb, struct nf_conn *ct,
 }
 
 /****************************************************************************/
-static int process_lrq(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_lrq(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, LocationRequest *lrq)
 {
@@ -1558,13 +1558,13 @@ static int process_lrq(struct sk_buff **pskb, struct nf_conn *ct,
 
 	set_ras_addr = rcu_dereference(set_ras_addr_hook);
 	if (set_ras_addr && ct->status & IPS_NAT_MASK)
-		return set_ras_addr(pskb, ct, ctinfo, data,
+		return set_ras_addr(skb, ct, ctinfo, data,
 				    &lrq->replyAddress, 1);
 	return 0;
 }
 
 /****************************************************************************/
-static int process_lcf(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, LocationConfirm *lcf)
 {
@@ -1603,7 +1603,7 @@ static int process_lcf(struct sk_buff **pskb, struct nf_conn *ct,
 }
 
 /****************************************************************************/
-static int process_irr(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_irr(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, InfoRequestResponse *irr)
 {
@@ -1615,7 +1615,7 @@ static int process_irr(struct sk_buff **pskb, struct nf_conn *ct,
 
 	set_ras_addr = rcu_dereference(set_ras_addr_hook);
 	if (set_ras_addr && ct->status & IPS_NAT_MASK) {
-		ret = set_ras_addr(pskb, ct, ctinfo, data,
+		ret = set_ras_addr(skb, ct, ctinfo, data,
 				   &irr->rasAddress, 1);
 		if (ret < 0)
 			return -1;
@@ -1623,7 +1623,7 @@ static int process_irr(struct sk_buff **pskb, struct nf_conn *ct,
 
 	set_sig_addr = rcu_dereference(set_sig_addr_hook);
 	if (set_sig_addr && ct->status & IPS_NAT_MASK) {
-		ret = set_sig_addr(pskb, ct, ctinfo, data,
+		ret = set_sig_addr(skb, ct, ctinfo, data,
 					irr->callSignalAddress.item,
 					irr->callSignalAddress.count);
 		if (ret < 0)
@@ -1634,40 +1634,40 @@ static int process_irr(struct sk_buff **pskb, struct nf_conn *ct,
 }
 
 /****************************************************************************/
-static int process_ras(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_ras(struct sk_buff *skb, struct nf_conn *ct,
 		       enum ip_conntrack_info ctinfo,
 		       unsigned char **data, RasMessage *ras)
 {
 	switch (ras->choice) {
 	case eRasMessage_gatekeeperRequest:
-		return process_grq(pskb, ct, ctinfo, data,
+		return process_grq(skb, ct, ctinfo, data,
 				   &ras->gatekeeperRequest);
 	case eRasMessage_gatekeeperConfirm:
-		return process_gcf(pskb, ct, ctinfo, data,
+		return process_gcf(skb, ct, ctinfo, data,
 				   &ras->gatekeeperConfirm);
 	case eRasMessage_registrationRequest:
-		return process_rrq(pskb, ct, ctinfo, data,
+		return process_rrq(skb, ct, ctinfo, data,
 				   &ras->registrationRequest);
 	case eRasMessage_registrationConfirm:
-		return process_rcf(pskb, ct, ctinfo, data,
+		return process_rcf(skb, ct, ctinfo, data,
 				   &ras->registrationConfirm);
 	case eRasMessage_unregistrationRequest:
-		return process_urq(pskb, ct, ctinfo, data,
+		return process_urq(skb, ct, ctinfo, data,
 				   &ras->unregistrationRequest);
 	case eRasMessage_admissionRequest:
-		return process_arq(pskb, ct, ctinfo, data,
+		return process_arq(skb, ct, ctinfo, data,
 				   &ras->admissionRequest);
 	case eRasMessage_admissionConfirm:
-		return process_acf(pskb, ct, ctinfo, data,
+		return process_acf(skb, ct, ctinfo, data,
 				   &ras->admissionConfirm);
 	case eRasMessage_locationRequest:
-		return process_lrq(pskb, ct, ctinfo, data,
+		return process_lrq(skb, ct, ctinfo, data,
 				   &ras->locationRequest);
 	case eRasMessage_locationConfirm:
-		return process_lcf(pskb, ct, ctinfo, data,
+		return process_lcf(skb, ct, ctinfo, data,
 				   &ras->locationConfirm);
 	case eRasMessage_infoRequestResponse:
-		return process_irr(pskb, ct, ctinfo, data,
+		return process_irr(skb, ct, ctinfo, data,
 				   &ras->infoRequestResponse);
 	default:
 		pr_debug("nf_ct_ras: RAS message %d\n", ras->choice);
@@ -1678,7 +1678,7 @@ static int process_ras(struct sk_buff **pskb, struct nf_conn *ct,
 }
 
 /****************************************************************************/
-static int ras_help(struct sk_buff **pskb, unsigned int protoff,
+static int ras_help(struct sk_buff *skb, unsigned int protoff,
 		    struct nf_conn *ct, enum ip_conntrack_info ctinfo)
 {
 	static RasMessage ras;
@@ -1686,12 +1686,12 @@ static int ras_help(struct sk_buff **pskb, unsigned int protoff,
 	int datalen = 0;
 	int ret;
 
-	pr_debug("nf_ct_ras: skblen = %u\n", (*pskb)->len);
+	pr_debug("nf_ct_ras: skblen = %u\n", skb->len);
 
 	spin_lock_bh(&nf_h323_lock);
 
 	/* Get UDP data */
-	data = get_udp_data(pskb, protoff, &datalen);
+	data = get_udp_data(skb, protoff, &datalen);
 	if (data == NULL)
 		goto accept;
 	pr_debug("nf_ct_ras: RAS message len=%d ", datalen);
@@ -1707,7 +1707,7 @@ static int ras_help(struct sk_buff **pskb, unsigned int protoff,
 	}
 
 	/* Process RAS message */
-	if (process_ras(pskb, ct, ctinfo, &data, &ras) < 0)
+	if (process_ras(skb, ct, ctinfo, &data, &ras) < 0)
 		goto drop;
 
       accept:
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 1562ca97a34..dfaed4ba83c 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -30,7 +30,7 @@ static unsigned int dcc_timeout __read_mostly = 300;
 static char *irc_buffer;
 static DEFINE_SPINLOCK(irc_buffer_lock);
 
-unsigned int (*nf_nat_irc_hook)(struct sk_buff **pskb,
+unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb,
 				enum ip_conntrack_info ctinfo,
 				unsigned int matchoff,
 				unsigned int matchlen,
@@ -89,7 +89,7 @@ static int parse_dcc(char *data, char *data_end, u_int32_t *ip,
 	return 0;
 }
 
-static int help(struct sk_buff **pskb, unsigned int protoff,
+static int help(struct sk_buff *skb, unsigned int protoff,
 		struct nf_conn *ct, enum ip_conntrack_info ctinfo)
 {
 	unsigned int dataoff;
@@ -116,22 +116,22 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
 		return NF_ACCEPT;
 
 	/* Not a full tcp header? */
-	th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph);
+	th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
 	if (th == NULL)
 		return NF_ACCEPT;
 
 	/* No data? */
 	dataoff = protoff + th->doff*4;
-	if (dataoff >= (*pskb)->len)
+	if (dataoff >= skb->len)
 		return NF_ACCEPT;
 
 	spin_lock_bh(&irc_buffer_lock);
-	ib_ptr = skb_header_pointer(*pskb, dataoff, (*pskb)->len - dataoff,
+	ib_ptr = skb_header_pointer(skb, dataoff, skb->len - dataoff,
 				    irc_buffer);
 	BUG_ON(ib_ptr == NULL);
 
 	data = ib_ptr;
-	data_limit = ib_ptr + (*pskb)->len - dataoff;
+	data_limit = ib_ptr + skb->len - dataoff;
 
 	/* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24
 	 * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */
@@ -143,7 +143,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
 		data += 5;
 		/* we have at least (19+MINMATCHLEN)-5 bytes valid data left */
 
-		iph = ip_hdr(*pskb);
+		iph = ip_hdr(skb);
 		pr_debug("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u\n",
 			 NIPQUAD(iph->saddr), ntohs(th->source),
 			 NIPQUAD(iph->daddr), ntohs(th->dest));
@@ -193,7 +193,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
 
 			nf_nat_irc = rcu_dereference(nf_nat_irc_hook);
 			if (nf_nat_irc && ct->status & IPS_NAT_MASK)
-				ret = nf_nat_irc(pskb, ctinfo,
+				ret = nf_nat_irc(skb, ctinfo,
 						 addr_beg_p - ib_ptr,
 						 addr_end_p - addr_beg_p,
 						 exp);
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index 1d59fabeb5f..9810d81e2a0 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -42,17 +42,17 @@ static unsigned int timeout __read_mostly = 3;
 module_param(timeout, uint, 0400);
 MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
 
-static int help(struct sk_buff **pskb, unsigned int protoff,
+static int help(struct sk_buff *skb, unsigned int protoff,
 		struct nf_conn *ct, enum ip_conntrack_info ctinfo)
 {
 	struct nf_conntrack_expect *exp;
-	struct iphdr *iph = ip_hdr(*pskb);
-	struct rtable *rt = (struct rtable *)(*pskb)->dst;
+	struct iphdr *iph = ip_hdr(skb);
+	struct rtable *rt = (struct rtable *)skb->dst;
 	struct in_device *in_dev;
 	__be32 mask = 0;
 
 	/* we're only interested in locally generated packets */
-	if ((*pskb)->sk == NULL)
+	if (skb->sk == NULL)
 		goto out;
 	if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
 		goto out;
@@ -91,7 +91,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
 	nf_ct_expect_related(exp);
 	nf_ct_expect_put(exp);
 
-	nf_ct_refresh(ct, *pskb, timeout * HZ);
+	nf_ct_refresh(ct, skb, timeout * HZ);
 out:
 	return NF_ACCEPT;
 }
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index b0804199ab5..099b6df3e2b 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -41,14 +41,14 @@ MODULE_ALIAS("ip_conntrack_pptp");
 static DEFINE_SPINLOCK(nf_pptp_lock);
 
 int
-(*nf_nat_pptp_hook_outbound)(struct sk_buff **pskb,
+(*nf_nat_pptp_hook_outbound)(struct sk_buff *skb,
 			     struct nf_conn *ct, enum ip_conntrack_info ctinfo,
 			     struct PptpControlHeader *ctlh,
 			     union pptp_ctrl_union *pptpReq) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_outbound);
 
 int
-(*nf_nat_pptp_hook_inbound)(struct sk_buff **pskb,
+(*nf_nat_pptp_hook_inbound)(struct sk_buff *skb,
 			    struct nf_conn *ct, enum ip_conntrack_info ctinfo,
 			    struct PptpControlHeader *ctlh,
 			    union pptp_ctrl_union *pptpReq) __read_mostly;
@@ -254,7 +254,7 @@ out_unexpect_orig:
 }
 
 static inline int
-pptp_inbound_pkt(struct sk_buff **pskb,
+pptp_inbound_pkt(struct sk_buff *skb,
 		 struct PptpControlHeader *ctlh,
 		 union pptp_ctrl_union *pptpReq,
 		 unsigned int reqlen,
@@ -367,7 +367,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
 
 	nf_nat_pptp_inbound = rcu_dereference(nf_nat_pptp_hook_inbound);
 	if (nf_nat_pptp_inbound && ct->status & IPS_NAT_MASK)
-		return nf_nat_pptp_inbound(pskb, ct, ctinfo, ctlh, pptpReq);
+		return nf_nat_pptp_inbound(skb, ct, ctinfo, ctlh, pptpReq);
 	return NF_ACCEPT;
 
 invalid:
@@ -380,7 +380,7 @@ invalid:
 }
 
 static inline int
-pptp_outbound_pkt(struct sk_buff **pskb,
+pptp_outbound_pkt(struct sk_buff *skb,
 		  struct PptpControlHeader *ctlh,
 		  union pptp_ctrl_union *pptpReq,
 		  unsigned int reqlen,
@@ -462,7 +462,7 @@ pptp_outbound_pkt(struct sk_buff **pskb,
 
 	nf_nat_pptp_outbound = rcu_dereference(nf_nat_pptp_hook_outbound);
 	if (nf_nat_pptp_outbound && ct->status & IPS_NAT_MASK)
-		return nf_nat_pptp_outbound(pskb, ct, ctinfo, ctlh, pptpReq);
+		return nf_nat_pptp_outbound(skb, ct, ctinfo, ctlh, pptpReq);
 	return NF_ACCEPT;
 
 invalid:
@@ -492,7 +492,7 @@ static const unsigned int pptp_msg_size[] = {
 
 /* track caller id inside control connection, call expect_related */
 static int
-conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff,
+conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff,
 		    struct nf_conn *ct, enum ip_conntrack_info ctinfo)
 
 {
@@ -502,7 +502,7 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff,
 	struct pptp_pkt_hdr _pptph, *pptph;
 	struct PptpControlHeader _ctlh, *ctlh;
 	union pptp_ctrl_union _pptpReq, *pptpReq;
-	unsigned int tcplen = (*pskb)->len - protoff;
+	unsigned int tcplen = skb->len - protoff;
 	unsigned int datalen, reqlen, nexthdr_off;
 	int oldsstate, oldcstate;
 	int ret;
@@ -514,12 +514,12 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff,
 		return NF_ACCEPT;
 
 	nexthdr_off = protoff;
-	tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph);
+	tcph = skb_header_pointer(skb, nexthdr_off, sizeof(_tcph), &_tcph);
 	BUG_ON(!tcph);
 	nexthdr_off += tcph->doff * 4;
 	datalen = tcplen - tcph->doff * 4;
 
-	pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph);
+	pptph = skb_header_pointer(skb, nexthdr_off, sizeof(_pptph), &_pptph);
 	if (!pptph) {
 		pr_debug("no full PPTP header, can't track\n");
 		return NF_ACCEPT;
@@ -534,7 +534,7 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff,
 		return NF_ACCEPT;
 	}
 
-	ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
+	ctlh = skb_header_pointer(skb, nexthdr_off, sizeof(_ctlh), &_ctlh);
 	if (!ctlh)
 		return NF_ACCEPT;
 	nexthdr_off += sizeof(_ctlh);
@@ -547,7 +547,7 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff,
 	if (reqlen > sizeof(*pptpReq))
 		reqlen = sizeof(*pptpReq);
 
-	pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
+	pptpReq = skb_header_pointer(skb, nexthdr_off, reqlen, &_pptpReq);
 	if (!pptpReq)
 		return NF_ACCEPT;
 
@@ -560,11 +560,11 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff,
 	 * established from PNS->PAC.  However, RFC makes no guarantee */
 	if (dir == IP_CT_DIR_ORIGINAL)
 		/* client -> server (PNS -> PAC) */
-		ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
+		ret = pptp_outbound_pkt(skb, ctlh, pptpReq, reqlen, ct,
 					ctinfo);
 	else
 		/* server -> client (PAC -> PNS) */
-		ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
+		ret = pptp_inbound_pkt(skb, ctlh, pptpReq, reqlen, ct,
 				       ctinfo);
 	pr_debug("sstate: %d->%d, cstate: %d->%d\n",
 		 oldsstate, info->sstate, oldcstate, info->cstate);
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index 355d371bac9..b5a16c6e21c 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -56,7 +56,7 @@ struct sane_reply_net_start {
 	/* other fields aren't interesting for conntrack */
 };
 
-static int help(struct sk_buff **pskb,
+static int help(struct sk_buff *skb,
 		unsigned int protoff,
 		struct nf_conn *ct,
 		enum ip_conntrack_info ctinfo)
@@ -80,19 +80,19 @@ static int help(struct sk_buff **pskb,
 		return NF_ACCEPT;
 
 	/* Not a full tcp header? */
-	th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph);
+	th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
 	if (th == NULL)
 		return NF_ACCEPT;
 
 	/* No data? */
 	dataoff = protoff + th->doff * 4;
-	if (dataoff >= (*pskb)->len)
+	if (dataoff >= skb->len)
 		return NF_ACCEPT;
 
-	datalen = (*pskb)->len - dataoff;
+	datalen = skb->len - dataoff;
 
 	spin_lock_bh(&nf_sane_lock);
-	sb_ptr = skb_header_pointer(*pskb, dataoff, datalen, sane_buffer);
+	sb_ptr = skb_header_pointer(skb, dataoff, datalen, sane_buffer);
 	BUG_ON(sb_ptr == NULL);
 
 	if (dir == IP_CT_DIR_ORIGINAL) {
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index d449fa47491..8f8b5a48df3 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -36,13 +36,13 @@ static unsigned int sip_timeout __read_mostly = SIP_TIMEOUT;
 module_param(sip_timeout, uint, 0600);
 MODULE_PARM_DESC(sip_timeout, "timeout for the master SIP session");
 
-unsigned int (*nf_nat_sip_hook)(struct sk_buff **pskb,
+unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb,
 				enum ip_conntrack_info ctinfo,
 				struct nf_conn *ct,
 				const char **dptr) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_sip_hook);
 
-unsigned int (*nf_nat_sdp_hook)(struct sk_buff **pskb,
+unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb,
 				enum ip_conntrack_info ctinfo,
 				struct nf_conntrack_expect *exp,
 				const char *dptr) __read_mostly;
@@ -363,7 +363,7 @@ int ct_sip_get_info(struct nf_conn *ct,
 }
 EXPORT_SYMBOL_GPL(ct_sip_get_info);
 
-static int set_expected_rtp(struct sk_buff **pskb,
+static int set_expected_rtp(struct sk_buff *skb,
 			    struct nf_conn *ct,
 			    enum ip_conntrack_info ctinfo,
 			    union nf_conntrack_address *addr,
@@ -385,7 +385,7 @@ static int set_expected_rtp(struct sk_buff **pskb,
 
 	nf_nat_sdp = rcu_dereference(nf_nat_sdp_hook);
 	if (nf_nat_sdp && ct->status & IPS_NAT_MASK)
-		ret = nf_nat_sdp(pskb, ctinfo, exp, dptr);
+		ret = nf_nat_sdp(skb, ctinfo, exp, dptr);
 	else {
 		if (nf_ct_expect_related(exp) != 0)
 			ret = NF_DROP;
@@ -397,7 +397,7 @@ static int set_expected_rtp(struct sk_buff **pskb,
 	return ret;
 }
 
-static int sip_help(struct sk_buff **pskb,
+static int sip_help(struct sk_buff *skb,
 		    unsigned int protoff,
 		    struct nf_conn *ct,
 		    enum ip_conntrack_info ctinfo)
@@ -414,13 +414,13 @@ static int sip_help(struct sk_buff **pskb,
 
 	/* No Data ? */
 	dataoff = protoff + sizeof(struct udphdr);
-	if (dataoff >= (*pskb)->len)
+	if (dataoff >= skb->len)
 		return NF_ACCEPT;
 
-	nf_ct_refresh(ct, *pskb, sip_timeout * HZ);
+	nf_ct_refresh(ct, skb, sip_timeout * HZ);
 
-	if (!skb_is_nonlinear(*pskb))
-		dptr = (*pskb)->data + dataoff;
+	if (!skb_is_nonlinear(skb))
+		dptr = skb->data + dataoff;
 	else {
 		pr_debug("Copy of skbuff not supported yet.\n");
 		goto out;
@@ -428,13 +428,13 @@ static int sip_help(struct sk_buff **pskb,
 
 	nf_nat_sip = rcu_dereference(nf_nat_sip_hook);
 	if (nf_nat_sip && ct->status & IPS_NAT_MASK) {
-		if (!nf_nat_sip(pskb, ctinfo, ct, &dptr)) {
+		if (!nf_nat_sip(skb, ctinfo, ct, &dptr)) {
 			ret = NF_DROP;
 			goto out;
 		}
 	}
 
-	datalen = (*pskb)->len - dataoff;
+	datalen = skb->len - dataoff;
 	if (datalen < sizeof("SIP/2.0 200") - 1)
 		goto out;
 
@@ -464,7 +464,7 @@ static int sip_help(struct sk_buff **pskb,
 				ret = NF_DROP;
 				goto out;
 			}
-			ret = set_expected_rtp(pskb, ct, ctinfo, &addr,
+			ret = set_expected_rtp(skb, ct, ctinfo, &addr,
 					       htons(port), dptr);
 		}
 	}
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index cc19506cf2f..e894aa1ff3a 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -29,12 +29,12 @@ static int ports_c;
 module_param_array(ports, ushort, &ports_c, 0400);
 MODULE_PARM_DESC(ports, "Port numbers of TFTP servers");
 
-unsigned int (*nf_nat_tftp_hook)(struct sk_buff **pskb,
+unsigned int (*nf_nat_tftp_hook)(struct sk_buff *skb,
 				 enum ip_conntrack_info ctinfo,
 				 struct nf_conntrack_expect *exp) __read_mostly;
 EXPORT_SYMBOL_GPL(nf_nat_tftp_hook);
 
-static int tftp_help(struct sk_buff **pskb,
+static int tftp_help(struct sk_buff *skb,
 		     unsigned int protoff,
 		     struct nf_conn *ct,
 		     enum ip_conntrack_info ctinfo)
@@ -46,7 +46,7 @@ static int tftp_help(struct sk_buff **pskb,
 	int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
 	typeof(nf_nat_tftp_hook) nf_nat_tftp;
 
-	tfh = skb_header_pointer(*pskb, protoff + sizeof(struct udphdr),
+	tfh = skb_header_pointer(skb, protoff + sizeof(struct udphdr),
 				 sizeof(_tftph), &_tftph);
 	if (tfh == NULL)
 		return NF_ACCEPT;
@@ -70,7 +70,7 @@ static int tftp_help(struct sk_buff **pskb,
 
 		nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook);
 		if (nf_nat_tftp && ct->status & IPS_NAT_MASK)
-			ret = nf_nat_tftp(pskb, ctinfo, exp);
+			ret = nf_nat_tftp(skb, ctinfo, exp);
 		else if (nf_ct_expect_related(exp) != 0)
 			ret = NF_DROP;
 		nf_ct_expect_put(exp);
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 0df7fff196a..196269c1e58 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -14,7 +14,7 @@
 
 /* core.c */
 extern unsigned int nf_iterate(struct list_head *head,
-				struct sk_buff **skb,
+				struct sk_buff *skb,
 				int hook,
 				const struct net_device *indev,
 				const struct net_device *outdev,
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index a481a349f7b..0cef1433d66 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -256,14 +256,14 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info,
 
 	if (verdict == NF_ACCEPT) {
 		afinfo = nf_get_afinfo(info->pf);
-		if (!afinfo || afinfo->reroute(&skb, info) < 0)
+		if (!afinfo || afinfo->reroute(skb, info) < 0)
 			verdict = NF_DROP;
 	}
 
 	if (verdict == NF_ACCEPT) {
 	next_hook:
 		verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
-				     &skb, info->hook,
+				     skb, info->hook,
 				     info->indev, info->outdev, &elem,
 				     info->okfn, INT_MIN);
 	}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 49f0480afe0..3ceeffcf6f9 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -617,6 +617,7 @@ static int
 nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e)
 {
 	int diff;
+	int err;
 
 	diff = data_len - e->skb->len;
 	if (diff < 0) {
@@ -626,25 +627,18 @@ nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e)
 		if (data_len > 0xFFFF)
 			return -EINVAL;
 		if (diff > skb_tailroom(e->skb)) {
-			struct sk_buff *newskb;
-
-			newskb = skb_copy_expand(e->skb,
-						 skb_headroom(e->skb),
-						 diff,
-						 GFP_ATOMIC);
-			if (newskb == NULL) {
+			err = pskb_expand_head(e->skb, 0,
+					       diff - skb_tailroom(e->skb),
+					       GFP_ATOMIC);
+			if (err) {
 				printk(KERN_WARNING "nf_queue: OOM "
 				      "in mangle, dropping packet\n");
-				return -ENOMEM;
+				return err;
 			}
-			if (e->skb->sk)
-				skb_set_owner_w(newskb, e->skb->sk);
-			kfree_skb(e->skb);
-			e->skb = newskb;
 		}
 		skb_put(e->skb, diff);
 	}
-	if (!skb_make_writable(&e->skb, data_len))
+	if (!skb_make_writable(e->skb, data_len))
 		return -ENOMEM;
 	skb_copy_to_linear_data(e->skb, data, data_len);
 	e->skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index 07a1b966500..77eeae658d4 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -27,7 +27,7 @@ MODULE_ALIAS("ipt_CLASSIFY");
 MODULE_ALIAS("ip6t_CLASSIFY");
 
 static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
        const struct net_device *in,
        const struct net_device *out,
        unsigned int hooknum,
@@ -36,7 +36,7 @@ target(struct sk_buff **pskb,
 {
 	const struct xt_classify_target_info *clinfo = targinfo;
 
-	(*pskb)->priority = clinfo->priority;
+	skb->priority = clinfo->priority;
 	return XT_CONTINUE;
 }
 
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 7043c2757e0..8cc324b159e 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -34,7 +34,7 @@ MODULE_ALIAS("ip6t_CONNMARK");
 #include <net/netfilter/nf_conntrack_ecache.h>
 
 static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
        const struct net_device *in,
        const struct net_device *out,
        unsigned int hooknum,
@@ -48,28 +48,28 @@ target(struct sk_buff **pskb,
 	u_int32_t mark;
 	u_int32_t newmark;
 
-	ct = nf_ct_get(*pskb, &ctinfo);
+	ct = nf_ct_get(skb, &ctinfo);
 	if (ct) {
 		switch(markinfo->mode) {
 		case XT_CONNMARK_SET:
 			newmark = (ct->mark & ~markinfo->mask) | markinfo->mark;
 			if (newmark != ct->mark) {
 				ct->mark = newmark;
-				nf_conntrack_event_cache(IPCT_MARK, *pskb);
+				nf_conntrack_event_cache(IPCT_MARK, skb);
 			}
 			break;
 		case XT_CONNMARK_SAVE:
 			newmark = (ct->mark & ~markinfo->mask) |
-				  ((*pskb)->mark & markinfo->mask);
+				  (skb->mark & markinfo->mask);
 			if (ct->mark != newmark) {
 				ct->mark = newmark;
-				nf_conntrack_event_cache(IPCT_MARK, *pskb);
+				nf_conntrack_event_cache(IPCT_MARK, skb);
 			}
 			break;
 		case XT_CONNMARK_RESTORE:
-			mark = (*pskb)->mark;
+			mark = skb->mark;
 			diff = (ct->mark ^ mark) & markinfo->mask;
-			(*pskb)->mark = mark ^ diff;
+			skb->mark = mark ^ diff;
 			break;
 		}
 	}
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 63d73138c1b..021b5c8d20e 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -61,12 +61,11 @@ static void secmark_restore(struct sk_buff *skb)
 	}
 }
 
-static unsigned int target(struct sk_buff **pskb, const struct net_device *in,
+static unsigned int target(struct sk_buff *skb, const struct net_device *in,
 			   const struct net_device *out, unsigned int hooknum,
 			   const struct xt_target *target,
 			   const void *targinfo)
 {
-	struct sk_buff *skb = *pskb;
 	const struct xt_connsecmark_target_info *info = targinfo;
 
 	switch (info->mode) {
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 798ab731009..6322a933ab7 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -25,7 +25,7 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_DSCP");
 MODULE_ALIAS("ip6t_DSCP");
 
-static unsigned int target(struct sk_buff **pskb,
+static unsigned int target(struct sk_buff *skb,
 			   const struct net_device *in,
 			   const struct net_device *out,
 			   unsigned int hooknum,
@@ -33,20 +33,20 @@ static unsigned int target(struct sk_buff **pskb,
 			   const void *targinfo)
 {
 	const struct xt_DSCP_info *dinfo = targinfo;
-	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(*pskb)) >> XT_DSCP_SHIFT;
+	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
 
 	if (dscp != dinfo->dscp) {
-		if (!skb_make_writable(pskb, sizeof(struct iphdr)))
+		if (!skb_make_writable(skb, sizeof(struct iphdr)))
 			return NF_DROP;
 
-		ipv4_change_dsfield(ip_hdr(*pskb), (__u8)(~XT_DSCP_MASK),
+		ipv4_change_dsfield(ip_hdr(skb), (__u8)(~XT_DSCP_MASK),
 				    dinfo->dscp << XT_DSCP_SHIFT);
 
 	}
 	return XT_CONTINUE;
 }
 
-static unsigned int target6(struct sk_buff **pskb,
+static unsigned int target6(struct sk_buff *skb,
 			    const struct net_device *in,
 			    const struct net_device *out,
 			    unsigned int hooknum,
@@ -54,13 +54,13 @@ static unsigned int target6(struct sk_buff **pskb,
 			    const void *targinfo)
 {
 	const struct xt_DSCP_info *dinfo = targinfo;
-	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(*pskb)) >> XT_DSCP_SHIFT;
+	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
 
 	if (dscp != dinfo->dscp) {
-		if (!skb_make_writable(pskb, sizeof(struct ipv6hdr)))
+		if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
 			return NF_DROP;
 
-		ipv6_change_dsfield(ipv6_hdr(*pskb), (__u8)(~XT_DSCP_MASK),
+		ipv6_change_dsfield(ipv6_hdr(skb), (__u8)(~XT_DSCP_MASK),
 				    dinfo->dscp << XT_DSCP_SHIFT);
 	}
 	return XT_CONTINUE;
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index f30fe0baf7d..bc6503d77d7 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -22,7 +22,7 @@ MODULE_ALIAS("ipt_MARK");
 MODULE_ALIAS("ip6t_MARK");
 
 static unsigned int
-target_v0(struct sk_buff **pskb,
+target_v0(struct sk_buff *skb,
 	  const struct net_device *in,
 	  const struct net_device *out,
 	  unsigned int hooknum,
@@ -31,12 +31,12 @@ target_v0(struct sk_buff **pskb,
 {
 	const struct xt_mark_target_info *markinfo = targinfo;
 
-	(*pskb)->mark = markinfo->mark;
+	skb->mark = markinfo->mark;
 	return XT_CONTINUE;
 }
 
 static unsigned int
-target_v1(struct sk_buff **pskb,
+target_v1(struct sk_buff *skb,
 	  const struct net_device *in,
 	  const struct net_device *out,
 	  unsigned int hooknum,
@@ -52,15 +52,15 @@ target_v1(struct sk_buff **pskb,
 		break;
 
 	case XT_MARK_AND:
-		mark = (*pskb)->mark & markinfo->mark;
+		mark = skb->mark & markinfo->mark;
 		break;
 
 	case XT_MARK_OR:
-		mark = (*pskb)->mark | markinfo->mark;
+		mark = skb->mark | markinfo->mark;
 		break;
 	}
 
-	(*pskb)->mark = mark;
+	skb->mark = mark;
 	return XT_CONTINUE;
 }
 
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index d3594c7ccb2..9fb449ffbf8 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -20,7 +20,7 @@ MODULE_ALIAS("ipt_NFLOG");
 MODULE_ALIAS("ip6t_NFLOG");
 
 static unsigned int
-nflog_target(struct sk_buff **pskb,
+nflog_target(struct sk_buff *skb,
 	     const struct net_device *in, const struct net_device *out,
 	     unsigned int hooknum, const struct xt_target *target,
 	     const void *targinfo)
@@ -33,7 +33,7 @@ nflog_target(struct sk_buff **pskb,
 	li.u.ulog.group	     = info->group;
 	li.u.ulog.qthreshold = info->threshold;
 
-	nf_log_packet(target->family, hooknum, *pskb, in, out, &li,
+	nf_log_packet(target->family, hooknum, skb, in, out, &li,
 		      "%s", info->prefix);
 	return XT_CONTINUE;
 }
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 13f59f3e8c3..c3984e9f766 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -24,7 +24,7 @@ MODULE_ALIAS("ip6t_NFQUEUE");
 MODULE_ALIAS("arpt_NFQUEUE");
 
 static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
        const struct net_device *in,
        const struct net_device *out,
        unsigned int hooknum,
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index fec1aefb1c3..4976ce18661 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -12,7 +12,7 @@ MODULE_ALIAS("ipt_NOTRACK");
 MODULE_ALIAS("ip6t_NOTRACK");
 
 static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
        const struct net_device *in,
        const struct net_device *out,
        unsigned int hooknum,
@@ -20,16 +20,16 @@ target(struct sk_buff **pskb,
        const void *targinfo)
 {
 	/* Previously seen (loopback)? Ignore. */
-	if ((*pskb)->nfct != NULL)
+	if (skb->nfct != NULL)
 		return XT_CONTINUE;
 
 	/* Attach fake conntrack entry.
 	   If there is a real ct entry correspondig to this packet,
 	   it'll hang aroun till timing out. We don't deal with it
 	   for performance reasons. JK */
-	(*pskb)->nfct = &nf_conntrack_untracked.ct_general;
-	(*pskb)->nfctinfo = IP_CT_NEW;
-	nf_conntrack_get((*pskb)->nfct);
+	skb->nfct = &nf_conntrack_untracked.ct_general;
+	skb->nfctinfo = IP_CT_NEW;
+	nf_conntrack_get(skb->nfct);
 
 	return XT_CONTINUE;
 }
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index c83779a941a..235806eb6ec 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -28,7 +28,7 @@ MODULE_ALIAS("ip6t_SECMARK");
 
 static u8 mode;
 
-static unsigned int target(struct sk_buff **pskb, const struct net_device *in,
+static unsigned int target(struct sk_buff *skb, const struct net_device *in,
 			   const struct net_device *out, unsigned int hooknum,
 			   const struct xt_target *target,
 			   const void *targinfo)
@@ -47,7 +47,7 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *in,
 		BUG();
 	}
 
-	(*pskb)->secmark = secmark;
+	skb->secmark = secmark;
 	return XT_CONTINUE;
 }
 
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index d40f7e4b128..07435a602b1 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -39,7 +39,7 @@ optlen(const u_int8_t *opt, unsigned int offset)
 }
 
 static int
-tcpmss_mangle_packet(struct sk_buff **pskb,
+tcpmss_mangle_packet(struct sk_buff *skb,
 		     const struct xt_tcpmss_info *info,
 		     unsigned int tcphoff,
 		     unsigned int minlen)
@@ -50,11 +50,11 @@ tcpmss_mangle_packet(struct sk_buff **pskb,
 	u16 newmss;
 	u8 *opt;
 
-	if (!skb_make_writable(pskb, (*pskb)->len))
+	if (!skb_make_writable(skb, skb->len))
 		return -1;
 
-	tcplen = (*pskb)->len - tcphoff;
-	tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff);
+	tcplen = skb->len - tcphoff;
+	tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
 
 	/* Since it passed flags test in tcp match, we know it is is
 	   not a fragment, and has data >= tcp header length.  SYN
@@ -64,19 +64,19 @@ tcpmss_mangle_packet(struct sk_buff **pskb,
 	if (tcplen != tcph->doff*4) {
 		if (net_ratelimit())
 			printk(KERN_ERR "xt_TCPMSS: bad length (%u bytes)\n",
-			       (*pskb)->len);
+			       skb->len);
 		return -1;
 	}
 
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
-		if (dst_mtu((*pskb)->dst) <= minlen) {
+		if (dst_mtu(skb->dst) <= minlen) {
 			if (net_ratelimit())
 				printk(KERN_ERR "xt_TCPMSS: "
 				       "unknown or invalid path-MTU (%u)\n",
-				       dst_mtu((*pskb)->dst));
+				       dst_mtu(skb->dst));
 			return -1;
 		}
-		newmss = dst_mtu((*pskb)->dst) - minlen;
+		newmss = dst_mtu(skb->dst) - minlen;
 	} else
 		newmss = info->mss;
 
@@ -95,7 +95,7 @@ tcpmss_mangle_packet(struct sk_buff **pskb,
 			opt[i+2] = (newmss & 0xff00) >> 8;
 			opt[i+3] = newmss & 0x00ff;
 
-			nf_proto_csum_replace2(&tcph->check, *pskb,
+			nf_proto_csum_replace2(&tcph->check, skb,
 					       htons(oldmss), htons(newmss), 0);
 			return 0;
 		}
@@ -104,57 +104,53 @@ tcpmss_mangle_packet(struct sk_buff **pskb,
 	/*
 	 * MSS Option not found ?! add it..
 	 */
-	if (skb_tailroom((*pskb)) < TCPOLEN_MSS) {
-		struct sk_buff *newskb;
-
-		newskb = skb_copy_expand(*pskb, skb_headroom(*pskb),
-					 TCPOLEN_MSS, GFP_ATOMIC);
-		if (!newskb)
+	if (skb_tailroom(skb) < TCPOLEN_MSS) {
+		if (pskb_expand_head(skb, 0,
+				     TCPOLEN_MSS - skb_tailroom(skb),
+				     GFP_ATOMIC))
 			return -1;
-		kfree_skb(*pskb);
-		*pskb = newskb;
-		tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff);
+		tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
 	}
 
-	skb_put((*pskb), TCPOLEN_MSS);
+	skb_put(skb, TCPOLEN_MSS);
 
 	opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
 	memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
 
-	nf_proto_csum_replace2(&tcph->check, *pskb,
+	nf_proto_csum_replace2(&tcph->check, skb,
 			       htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1);
 	opt[0] = TCPOPT_MSS;
 	opt[1] = TCPOLEN_MSS;
 	opt[2] = (newmss & 0xff00) >> 8;
 	opt[3] = newmss & 0x00ff;
 
-	nf_proto_csum_replace4(&tcph->check, *pskb, 0, *((__be32 *)opt), 0);
+	nf_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0);
 
 	oldval = ((__be16 *)tcph)[6];
 	tcph->doff += TCPOLEN_MSS/4;
-	nf_proto_csum_replace2(&tcph->check, *pskb,
+	nf_proto_csum_replace2(&tcph->check, skb,
 				oldval, ((__be16 *)tcph)[6], 0);
 	return TCPOLEN_MSS;
 }
 
 static unsigned int
-xt_tcpmss_target4(struct sk_buff **pskb,
+xt_tcpmss_target4(struct sk_buff *skb,
 		  const struct net_device *in,
 		  const struct net_device *out,
 		  unsigned int hooknum,
 		  const struct xt_target *target,
 		  const void *targinfo)
 {
-	struct iphdr *iph = ip_hdr(*pskb);
+	struct iphdr *iph = ip_hdr(skb);
 	__be16 newlen;
 	int ret;
 
-	ret = tcpmss_mangle_packet(pskb, targinfo, iph->ihl * 4,
+	ret = tcpmss_mangle_packet(skb, targinfo, iph->ihl * 4,
 				   sizeof(*iph) + sizeof(struct tcphdr));
 	if (ret < 0)
 		return NF_DROP;
 	if (ret > 0) {
-		iph = ip_hdr(*pskb);
+		iph = ip_hdr(skb);
 		newlen = htons(ntohs(iph->tot_len) + ret);
 		nf_csum_replace2(&iph->check, iph->tot_len, newlen);
 		iph->tot_len = newlen;
@@ -164,30 +160,30 @@ xt_tcpmss_target4(struct sk_buff **pskb,
 
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 static unsigned int
-xt_tcpmss_target6(struct sk_buff **pskb,
+xt_tcpmss_target6(struct sk_buff *skb,
 		  const struct net_device *in,
 		  const struct net_device *out,
 		  unsigned int hooknum,
 		  const struct xt_target *target,
 		  const void *targinfo)
 {
-	struct ipv6hdr *ipv6h = ipv6_hdr(*pskb);
+	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	u8 nexthdr;
 	int tcphoff;
 	int ret;
 
 	nexthdr = ipv6h->nexthdr;
-	tcphoff = ipv6_skip_exthdr(*pskb, sizeof(*ipv6h), &nexthdr);
+	tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr);
 	if (tcphoff < 0) {
 		WARN_ON(1);
 		return NF_DROP;
 	}
-	ret = tcpmss_mangle_packet(pskb, targinfo, tcphoff,
+	ret = tcpmss_mangle_packet(skb, targinfo, tcphoff,
 				   sizeof(*ipv6h) + sizeof(struct tcphdr));
 	if (ret < 0)
 		return NF_DROP;
 	if (ret > 0) {
-		ipv6h = ipv6_hdr(*pskb);
+		ipv6h = ipv6_hdr(skb);
 		ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret);
 	}
 	return XT_CONTINUE;
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index 4df2dedcc0b..26c5d08ab2c 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c
@@ -10,14 +10,14 @@ MODULE_ALIAS("ipt_TRACE");
 MODULE_ALIAS("ip6t_TRACE");
 
 static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
        const struct net_device *in,
        const struct net_device *out,
        unsigned int hooknum,
        const struct xt_target *target,
        const void *targinfo)
 {
-	(*pskb)->nf_trace = 1;
+	skb->nf_trace = 1;
 	return XT_CONTINUE;
 }
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c776bcd9f82..98e313e5e59 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1378,6 +1378,8 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
 		nl_table[unit].cb_mutex = cb_mutex;
 		nl_table[unit].module = module;
 		nl_table[unit].registered = 1;
+	} else {
+		kfree(listeners);
 	}
 	netlink_table_ungrab();
 
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 6b407ece953..fa006e06ce3 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -202,11 +202,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
 	/* yes, we have to worry about both in and out dev
 	 worry later - danger - this API seems to have changed
 	 from earlier kernels */
-
-	/* iptables targets take a double skb pointer in case the skb
-	 * needs to be replaced. We don't own the skb, so this must not
-	 * happen. The pskb_expand_head above should make sure of this */
-	ret = ipt->tcfi_t->u.kernel.target->target(&skb, skb->dev, NULL,
+	ret = ipt->tcfi_t->u.kernel.target->target(skb, skb->dev, NULL,
 						   ipt->tcfi_hook,
 						   ipt->tcfi_t->u.kernel.target,
 						   ipt->tcfi_t->data);
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 2d32fd27496..3f8335e6ea2 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -205,20 +205,19 @@ static unsigned int ingress_drop(struct Qdisc *sch)
 #ifndef CONFIG_NET_CLS_ACT
 #ifdef CONFIG_NETFILTER
 static unsigned int
-ing_hook(unsigned int hook, struct sk_buff **pskb,
+ing_hook(unsigned int hook, struct sk_buff *skb,
 			     const struct net_device *indev,
 			     const struct net_device *outdev,
 			     int (*okfn)(struct sk_buff *))
 {
 
 	struct Qdisc *q;
-	struct sk_buff *skb = *pskb;
 	struct net_device *dev = skb->dev;
 	int fwres=NF_ACCEPT;
 
 	DPRINTK("ing_hook: skb %s dev=%s len=%u\n",
 		skb->sk ? "(owned)" : "(unowned)",
-		skb->dev ? (*pskb)->dev->name : "(no dev)",
+		skb->dev ? skb->dev->name : "(no dev)",
 		skb->len);
 
 	if (dev->qdisc_ingress) {
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 9de3ddaa276..eb4deaf5891 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -954,9 +954,9 @@ static struct inet_protosw sctpv6_stream_protosw = {
 	.flags         = SCTP_PROTOSW_FLAG,
 };
 
-static int sctp6_rcv(struct sk_buff **pskb)
+static int sctp6_rcv(struct sk_buff *skb)
 {
-	return sctp_rcv(*pskb) ? -1 : 0;
+	return sctp_rcv(skb) ? -1 : 0;
 }
 
 static struct inet6_protocol sctpv6_protocol = {
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9c6a4b5f626..bd6f42a15a4 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5058,6 +5058,7 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
 static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
 				    char __user *optval, int __user *optlen)
 {
+	struct sctp_authchunks __user *p = (void __user *)optval;
 	struct sctp_authchunks val;
 	struct sctp_association *asoc;
 	struct sctp_chunks_param *ch;
@@ -5066,10 +5067,10 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
 	if (len <= sizeof(struct sctp_authchunks))
 		return -EINVAL;
 
-	if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks)))
+	if (copy_from_user(&val, p, sizeof(struct sctp_authchunks)))
 		return -EFAULT;
 
-	to = val.gauth_chunks;
+	to = p->gauth_chunks;
 	asoc = sctp_id2assoc(sk, val.gauth_assoc_id);
 	if (!asoc)
 		return -EINVAL;
@@ -5092,6 +5093,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
 static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
 				    char __user *optval, int __user *optlen)
 {
+	struct sctp_authchunks __user *p = (void __user *)optval;
 	struct sctp_authchunks val;
 	struct sctp_association *asoc;
 	struct sctp_chunks_param *ch;
@@ -5100,10 +5102,10 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
 	if (len <= sizeof(struct sctp_authchunks))
 		return -EINVAL;
 
-	if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks)))
+	if (copy_from_user(&val, p, sizeof(struct sctp_authchunks)))
 		return -EFAULT;
 
-	to = val.gauth_chunks;
+	to = p->gauth_chunks;
 	asoc = sctp_id2assoc(sk, val.gauth_assoc_id);
 	if (!asoc && val.gauth_assoc_id && sctp_style(sk, UDP))
 		return -EINVAL;
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 8ebfc4db7f5..5c69a725e53 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -5,6 +5,7 @@
 
 obj-$(CONFIG_SUNRPC) += sunrpc.o
 obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
+obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/
 
 sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
 	    auth.o auth_null.o auth_unix.o \
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 42b3220bed3..8bd074df27d 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -42,7 +42,7 @@ gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize)
 {
 	u8 *ptr;
 	u8 pad;
-	int len = buf->len;
+	size_t len = buf->len;
 
 	if (len <= buf->head[0].iov_len) {
 		pad = *(u8 *)(buf->head[0].iov_base + len - 1);
@@ -53,9 +53,9 @@ gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize)
 	} else
 		len -= buf->head[0].iov_len;
 	if (len <= buf->page_len) {
-		int last = (buf->page_base + len - 1)
+		unsigned int last = (buf->page_base + len - 1)
 					>>PAGE_CACHE_SHIFT;
-		int offset = (buf->page_base + len - 1)
+		unsigned int offset = (buf->page_base + len - 1)
 					& (PAGE_CACHE_SIZE - 1);
 		ptr = kmap_atomic(buf->pages[last], KM_USER0);
 		pad = *(ptr + offset);
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 7da7050f06c..73940df6c46 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -631,7 +631,8 @@ svc_safe_putnetobj(struct kvec *resv, struct xdr_netobj *o)
 	return 0;
 }
 
-/* Verify the checksum on the header and return SVC_OK on success.
+/*
+ * Verify the checksum on the header and return SVC_OK on success.
  * Otherwise, return SVC_DROP (in the case of a bad sequence number)
  * or return SVC_DENIED and indicate error in authp.
  */
@@ -961,6 +962,78 @@ gss_write_init_verf(struct svc_rqst *rqstp, struct rsi *rsip)
 }
 
 /*
+ * Having read the cred already and found we're in the context
+ * initiation case, read the verifier and initiate (or check the results
+ * of) upcalls to userspace for help with context initiation.  If
+ * the upcall results are available, write the verifier and result.
+ * Otherwise, drop the request pending an answer to the upcall.
+ */
+static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
+			struct rpc_gss_wire_cred *gc, __be32 *authp)
+{
+	struct kvec *argv = &rqstp->rq_arg.head[0];
+	struct kvec *resv = &rqstp->rq_res.head[0];
+	struct xdr_netobj tmpobj;
+	struct rsi *rsip, rsikey;
+
+	/* Read the verifier; should be NULL: */
+	*authp = rpc_autherr_badverf;
+	if (argv->iov_len < 2 * 4)
+		return SVC_DENIED;
+	if (svc_getnl(argv) != RPC_AUTH_NULL)
+		return SVC_DENIED;
+	if (svc_getnl(argv) != 0)
+		return SVC_DENIED;
+
+	/* Martial context handle and token for upcall: */
+	*authp = rpc_autherr_badcred;
+	if (gc->gc_proc == RPC_GSS_PROC_INIT && gc->gc_ctx.len != 0)
+		return SVC_DENIED;
+	memset(&rsikey, 0, sizeof(rsikey));
+	if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx))
+		return SVC_DROP;
+	*authp = rpc_autherr_badverf;
+	if (svc_safe_getnetobj(argv, &tmpobj)) {
+		kfree(rsikey.in_handle.data);
+		return SVC_DENIED;
+	}
+	if (dup_netobj(&rsikey.in_token, &tmpobj)) {
+		kfree(rsikey.in_handle.data);
+		return SVC_DROP;
+	}
+
+	/* Perform upcall, or find upcall result: */
+	rsip = rsi_lookup(&rsikey);
+	rsi_free(&rsikey);
+	if (!rsip)
+		return SVC_DROP;
+	switch (cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) {
+	case -EAGAIN:
+	case -ETIMEDOUT:
+	case -ENOENT:
+		/* No upcall result: */
+		return SVC_DROP;
+	case 0:
+		/* Got an answer to the upcall; use it: */
+		if (gss_write_init_verf(rqstp, rsip))
+			return SVC_DROP;
+		if (resv->iov_len + 4 > PAGE_SIZE)
+			return SVC_DROP;
+		svc_putnl(resv, RPC_SUCCESS);
+		if (svc_safe_putnetobj(resv, &rsip->out_handle))
+			return SVC_DROP;
+		if (resv->iov_len + 3 * 4 > PAGE_SIZE)
+			return SVC_DROP;
+		svc_putnl(resv, rsip->major_status);
+		svc_putnl(resv, rsip->minor_status);
+		svc_putnl(resv, GSS_SEQ_WIN);
+		if (svc_safe_putnetobj(resv, &rsip->out_token))
+			return SVC_DROP;
+	}
+	return SVC_COMPLETE;
+}
+
+/*
  * Accept an rpcsec packet.
  * If context establishment, punt to user space
  * If data exchange, verify/decrypt
@@ -974,11 +1047,9 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
 	struct kvec	*argv = &rqstp->rq_arg.head[0];
 	struct kvec	*resv = &rqstp->rq_res.head[0];
 	u32		crlen;
-	struct xdr_netobj tmpobj;
 	struct gss_svc_data *svcdata = rqstp->rq_auth_data;
 	struct rpc_gss_wire_cred *gc;
 	struct rsc	*rsci = NULL;
-	struct rsi	*rsip, rsikey;
 	__be32		*rpcstart;
 	__be32		*reject_stat = resv->iov_base + resv->iov_len;
 	int		ret;
@@ -1023,30 +1094,14 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
 	if ((gc->gc_proc != RPC_GSS_PROC_DATA) && (rqstp->rq_proc != 0))
 		goto auth_err;
 
-	/*
-	 * We've successfully parsed the credential. Let's check out the
-	 * verifier.  An AUTH_NULL verifier is allowed (and required) for
-	 * INIT and CONTINUE_INIT requests. AUTH_RPCSEC_GSS is required for
-	 * PROC_DATA and PROC_DESTROY.
-	 *
-	 * AUTH_NULL verifier is 0 (AUTH_NULL), 0 (length).
-	 * AUTH_RPCSEC_GSS verifier is:
-	 *   6 (AUTH_RPCSEC_GSS), length, checksum.
-	 * checksum is calculated over rpcheader from xid up to here.
-	 */
 	*authp = rpc_autherr_badverf;
 	switch (gc->gc_proc) {
 	case RPC_GSS_PROC_INIT:
 	case RPC_GSS_PROC_CONTINUE_INIT:
-		if (argv->iov_len < 2 * 4)
-			goto auth_err;
-		if (svc_getnl(argv) != RPC_AUTH_NULL)
-			goto auth_err;
-		if (svc_getnl(argv) != 0)
-			goto auth_err;
-		break;
+		return svcauth_gss_handle_init(rqstp, gc, authp);
 	case RPC_GSS_PROC_DATA:
 	case RPC_GSS_PROC_DESTROY:
+		/* Look up the context, and check the verifier: */
 		*authp = rpcsec_gsserr_credproblem;
 		rsci = gss_svc_searchbyctx(&gc->gc_ctx);
 		if (!rsci)
@@ -1067,51 +1122,6 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
 
 	/* now act upon the command: */
 	switch (gc->gc_proc) {
-	case RPC_GSS_PROC_INIT:
-	case RPC_GSS_PROC_CONTINUE_INIT:
-		*authp = rpc_autherr_badcred;
-		if (gc->gc_proc == RPC_GSS_PROC_INIT && gc->gc_ctx.len != 0)
-			goto auth_err;
-		memset(&rsikey, 0, sizeof(rsikey));
-		if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx))
-			goto drop;
-		*authp = rpc_autherr_badverf;
-		if (svc_safe_getnetobj(argv, &tmpobj)) {
-			kfree(rsikey.in_handle.data);
-			goto auth_err;
-		}
-		if (dup_netobj(&rsikey.in_token, &tmpobj)) {
-			kfree(rsikey.in_handle.data);
-			goto drop;
-		}
-
-		rsip = rsi_lookup(&rsikey);
-		rsi_free(&rsikey);
-		if (!rsip) {
-			goto drop;
-		}
-		switch(cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) {
-		case -EAGAIN:
-		case -ETIMEDOUT:
-		case -ENOENT:
-			goto drop;
-		case 0:
-			if (gss_write_init_verf(rqstp, rsip))
-				goto drop;
-			if (resv->iov_len + 4 > PAGE_SIZE)
-				goto drop;
-			svc_putnl(resv, RPC_SUCCESS);
-			if (svc_safe_putnetobj(resv, &rsip->out_handle))
-				goto drop;
-			if (resv->iov_len + 3 * 4 > PAGE_SIZE)
-				goto drop;
-			svc_putnl(resv, rsip->major_status);
-			svc_putnl(resv, rsip->minor_status);
-			svc_putnl(resv, GSS_SEQ_WIN);
-			if (svc_safe_putnetobj(resv, &rsip->out_token))
-				goto drop;
-		}
-		goto complete;
 	case RPC_GSS_PROC_DESTROY:
 		if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
 			goto auth_err;
@@ -1158,7 +1168,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
 		goto out;
 	}
 auth_err:
-	/* Restore write pointer to original value: */
+	/* Restore write pointer to its original value: */
 	xdr_ressize_check(rqstp, reject_stat);
 	ret = SVC_DENIED;
 	goto out;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 52429b1ffcc..76be83ee4b0 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -127,7 +127,14 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
 	struct rpc_clnt		*clnt = NULL;
 	struct rpc_auth		*auth;
 	int err;
-	int len;
+	size_t len;
+
+	/* sanity check the name before trying to print it */
+	err = -EINVAL;
+	len = strlen(servname);
+	if (len > RPC_MAXNETNAMELEN)
+		goto out_no_rpciod;
+	len++;
 
 	dprintk("RPC:       creating %s client for %s (xprt %p)\n",
 			program->name, servname, xprt);
@@ -148,7 +155,6 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
 	clnt->cl_parent = clnt;
 
 	clnt->cl_server = clnt->cl_inline_name;
-	len = strlen(servname) + 1;
 	if (len > sizeof(clnt->cl_inline_name)) {
 		char *buf = kmalloc(len, GFP_KERNEL);
 		if (buf != 0)
@@ -234,8 +240,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
 {
 	struct rpc_xprt *xprt;
 	struct rpc_clnt *clnt;
-	struct rpc_xprtsock_create xprtargs = {
-		.proto = args->protocol,
+	struct xprt_create xprtargs = {
+		.ident = args->protocol,
 		.srcaddr = args->saddress,
 		.dstaddr = args->address,
 		.addrlen = args->addrsize,
@@ -253,7 +259,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
 	 */
 	if (args->servername == NULL) {
 		struct sockaddr_in *addr =
-					(struct sockaddr_in *) &args->address;
+					(struct sockaddr_in *) args->address;
 		snprintf(servername, sizeof(servername), NIPQUAD_FMT,
 			NIPQUAD(addr->sin_addr.s_addr));
 		args->servername = servername;
@@ -269,9 +275,6 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
 	if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
 		xprt->resvport = 0;
 
-	dprintk("RPC:       creating %s client for %s (xprt %p)\n",
-			args->program->name, args->servername, xprt);
-
 	clnt = rpc_new_client(xprt, args->servername, args->program,
 				args->version, args->authflavor);
 	if (IS_ERR(clnt))
@@ -439,7 +442,7 @@ rpc_release_client(struct rpc_clnt *clnt)
  */
 struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
 				      struct rpc_program *program,
-				      int vers)
+				      u32 vers)
 {
 	struct rpc_clnt *clnt;
 	struct rpc_version *version;
@@ -843,8 +846,7 @@ call_allocate(struct rpc_task *task)
 	dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid);
 
 	if (RPC_IS_ASYNC(task) || !signalled()) {
-		xprt_release(task);
-		task->tk_action = call_reserve;
+		task->tk_action = call_allocate;
 		rpc_delay(task, HZ>>4);
 		return;
 	}
@@ -871,6 +873,7 @@ rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
 	buf->head[0].iov_len = len;
 	buf->tail[0].iov_len = 0;
 	buf->page_len = 0;
+	buf->flags = 0;
 	buf->len = 0;
 	buf->buflen = len;
 }
@@ -937,7 +940,7 @@ call_bind(struct rpc_task *task)
 static void
 call_bind_status(struct rpc_task *task)
 {
-	int status = -EACCES;
+	int status = -EIO;
 
 	if (task->tk_status >= 0) {
 		dprint_status(task);
@@ -947,9 +950,20 @@ call_bind_status(struct rpc_task *task)
 	}
 
 	switch (task->tk_status) {
+	case -EAGAIN:
+		dprintk("RPC: %5u rpcbind waiting for another request "
+				"to finish\n", task->tk_pid);
+		/* avoid busy-waiting here -- could be a network outage. */
+		rpc_delay(task, 5*HZ);
+		goto retry_timeout;
 	case -EACCES:
 		dprintk("RPC: %5u remote rpcbind: RPC program/version "
 				"unavailable\n", task->tk_pid);
+		/* fail immediately if this is an RPC ping */
+		if (task->tk_msg.rpc_proc->p_proc == 0) {
+			status = -EOPNOTSUPP;
+			break;
+		}
 		rpc_delay(task, 3*HZ);
 		goto retry_timeout;
 	case -ETIMEDOUT:
@@ -957,6 +971,7 @@ call_bind_status(struct rpc_task *task)
 				task->tk_pid);
 		goto retry_timeout;
 	case -EPFNOSUPPORT:
+		/* server doesn't support any rpcbind version we know of */
 		dprintk("RPC: %5u remote rpcbind service unavailable\n",
 				task->tk_pid);
 		break;
@@ -969,7 +984,6 @@ call_bind_status(struct rpc_task *task)
 	default:
 		dprintk("RPC: %5u unrecognized rpcbind error (%d)\n",
 				task->tk_pid, -task->tk_status);
-		status = -EIO;
 	}
 
 	rpc_exit(task, status);
@@ -1257,7 +1271,6 @@ call_refresh(struct rpc_task *task)
 {
 	dprint_status(task);
 
-	xprt_release(task);	/* Must do to obtain new XID */
 	task->tk_action = call_refreshresult;
 	task->tk_status = 0;
 	task->tk_client->cl_stats->rpcauthrefresh++;
@@ -1375,6 +1388,8 @@ call_verify(struct rpc_task *task)
 			dprintk("RPC: %5u %s: retry stale creds\n",
 					task->tk_pid, __FUNCTION__);
 			rpcauth_invalcred(task);
+			/* Ensure we obtain a new XID! */
+			xprt_release(task);
 			task->tk_action = call_refresh;
 			goto out_retry;
 		case RPC_AUTH_BADCRED:
@@ -1523,13 +1538,18 @@ void rpc_show_tasks(void)
 		spin_lock(&clnt->cl_lock);
 		list_for_each_entry(t, &clnt->cl_tasks, tk_task) {
 			const char *rpc_waitq = "none";
+			int proc;
+
+			if (t->tk_msg.rpc_proc)
+				proc = t->tk_msg.rpc_proc->p_proc;
+			else
+				proc = -1;
 
 			if (RPC_IS_QUEUED(t))
 				rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq);
 
 			printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n",
-				t->tk_pid,
-				(t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1),
+				t->tk_pid, proc,
 				t->tk_flags, t->tk_status,
 				t->tk_client,
 				(t->tk_client ? t->tk_client->cl_prog : 0),
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 669e12a4ed1..c8433e8865a 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -14,7 +14,7 @@
 #include <linux/pagemap.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
-#include <linux/dnotify.h>
+#include <linux/fsnotify.h>
 #include <linux/kernel.h>
 
 #include <asm/ioctls.h>
@@ -329,6 +329,7 @@ rpc_show_info(struct seq_file *m, void *v)
 			clnt->cl_prog, clnt->cl_vers);
 	seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
 	seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO));
+	seq_printf(m, "port: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PORT));
 	return 0;
 }
 
@@ -585,6 +586,7 @@ rpc_populate(struct dentry *parent,
 		if (S_ISDIR(mode))
 			inc_nlink(dir);
 		d_add(dentry, inode);
+		fsnotify_create(dir, dentry);
 	}
 	mutex_unlock(&dir->i_mutex);
 	return 0;
@@ -606,7 +608,7 @@ __rpc_mkdir(struct inode *dir, struct dentry *dentry)
 	inode->i_ino = iunique(dir->i_sb, 100);
 	d_instantiate(dentry, inode);
 	inc_nlink(dir);
-	inode_dir_notify(dir, DN_CREATE);
+	fsnotify_mkdir(dir, dentry);
 	return 0;
 out_err:
 	printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n",
@@ -748,7 +750,7 @@ rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pi
 	rpci->flags = flags;
 	rpci->ops = ops;
 	rpci->nkern_readwriters = 1;
-	inode_dir_notify(dir, DN_CREATE);
+	fsnotify_create(dir, dentry);
 	dget(dentry);
 out:
 	mutex_unlock(&dir->i_mutex);
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index d1740dbab99..a05493aedb6 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -16,11 +16,14 @@
 
 #include <linux/types.h>
 #include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/in6.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/xprtsock.h>
 
 #ifdef RPC_DEBUG
 # define RPCDBG_FACILITY	RPCDBG_BIND
@@ -91,26 +94,6 @@ enum {
 #define RPCB_MAXADDRLEN		(128u)
 
 /*
- * r_netid
- *
- * Quoting RFC 3530, section 2.2:
- *
- * For TCP over IPv4 the value of r_netid is the string "tcp".  For UDP
- * over IPv4 the value of r_netid is the string "udp".
- *
- * ...
- *
- * For TCP over IPv6 the value of r_netid is the string "tcp6".  For UDP
- * over IPv6 the value of r_netid is the string "udp6".
- */
-#define RPCB_NETID_UDP	"\165\144\160"		/* "udp" */
-#define RPCB_NETID_TCP	"\164\143\160"		/* "tcp" */
-#define RPCB_NETID_UDP6	"\165\144\160\066"	/* "udp6" */
-#define RPCB_NETID_TCP6	"\164\143\160\066"	/* "tcp6" */
-
-#define RPCB_MAXNETIDLEN	(4u)
-
-/*
  * r_owner
  *
  * The "owner" is allowed to unset a service in the rpcbind database.
@@ -120,7 +103,7 @@ enum {
 #define RPCB_MAXOWNERLEN	sizeof(RPCB_OWNER_STRING)
 
 static void			rpcb_getport_done(struct rpc_task *, void *);
-extern struct rpc_program	rpcb_program;
+static struct rpc_program	rpcb_program;
 
 struct rpcbind_args {
 	struct rpc_xprt *	r_xprt;
@@ -137,10 +120,13 @@ struct rpcbind_args {
 static struct rpc_procinfo rpcb_procedures2[];
 static struct rpc_procinfo rpcb_procedures3[];
 
-static struct rpcb_info {
+struct rpcb_info {
 	int			rpc_vers;
 	struct rpc_procinfo *	rpc_proc;
-} rpcb_next_version[];
+};
+
+static struct rpcb_info rpcb_next_version[];
+static struct rpcb_info rpcb_next_version6[];
 
 static void rpcb_getport_prepare(struct rpc_task *task, void *calldata)
 {
@@ -190,7 +176,17 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
 				   RPC_CLNT_CREATE_INTR),
 	};
 
-	((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT);
+	switch (srvaddr->sa_family) {
+	case AF_INET:
+		((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT);
+		break;
+	case AF_INET6:
+		((struct sockaddr_in6 *)srvaddr)->sin6_port = htons(RPCBIND_PORT);
+		break;
+	default:
+		return NULL;
+	}
+
 	if (!privileged)
 		args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
 	return rpc_create(&args);
@@ -234,7 +230,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
 			prog, vers, prot, port);
 
 	rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin,
-					IPPROTO_UDP, 2, 1);
+					XPRT_TRANSPORT_UDP, 2, 1);
 	if (IS_ERR(rpcb_clnt))
 		return PTR_ERR(rpcb_clnt);
 
@@ -316,6 +312,7 @@ void rpcb_getport_async(struct rpc_task *task)
 	struct rpc_task	*child;
 	struct sockaddr addr;
 	int status;
+	struct rpcb_info *info;
 
 	dprintk("RPC: %5u %s(%s, %u, %u, %d)\n",
 		task->tk_pid, __FUNCTION__,
@@ -325,7 +322,7 @@ void rpcb_getport_async(struct rpc_task *task)
 	BUG_ON(clnt->cl_parent != clnt);
 
 	if (xprt_test_and_set_binding(xprt)) {
-		status = -EACCES;		/* tell caller to check again */
+		status = -EAGAIN;	/* tell caller to check again */
 		dprintk("RPC: %5u %s: waiting for another binder\n",
 			task->tk_pid, __FUNCTION__);
 		goto bailout_nowake;
@@ -343,18 +340,43 @@ void rpcb_getport_async(struct rpc_task *task)
 		goto bailout_nofree;
 	}
 
-	if (rpcb_next_version[xprt->bind_index].rpc_proc == NULL) {
+	rpc_peeraddr(clnt, (void *)&addr, sizeof(addr));
+
+	/* Don't ever use rpcbind v2 for AF_INET6 requests */
+	switch (addr.sa_family) {
+	case AF_INET:
+		info = rpcb_next_version;
+		break;
+	case AF_INET6:
+		info = rpcb_next_version6;
+		break;
+	default:
+		status = -EAFNOSUPPORT;
+		dprintk("RPC: %5u %s: bad address family\n",
+				task->tk_pid, __FUNCTION__);
+		goto bailout_nofree;
+	}
+	if (info[xprt->bind_index].rpc_proc == NULL) {
 		xprt->bind_index = 0;
-		status = -EACCES;	/* tell caller to try again later */
+		status = -EPFNOSUPPORT;
 		dprintk("RPC: %5u %s: no more getport versions available\n",
 			task->tk_pid, __FUNCTION__);
 		goto bailout_nofree;
 	}
-	bind_version = rpcb_next_version[xprt->bind_index].rpc_vers;
+	bind_version = info[xprt->bind_index].rpc_vers;
 
 	dprintk("RPC: %5u %s: trying rpcbind version %u\n",
 		task->tk_pid, __FUNCTION__, bind_version);
 
+	rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot,
+				bind_version, 0);
+	if (IS_ERR(rpcb_clnt)) {
+		status = PTR_ERR(rpcb_clnt);
+		dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n",
+			task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt));
+		goto bailout_nofree;
+	}
+
 	map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC);
 	if (!map) {
 		status = -ENOMEM;
@@ -367,28 +389,19 @@ void rpcb_getport_async(struct rpc_task *task)
 	map->r_prot = xprt->prot;
 	map->r_port = 0;
 	map->r_xprt = xprt_get(xprt);
-	map->r_netid = (xprt->prot == IPPROTO_TCP) ? RPCB_NETID_TCP :
-						   RPCB_NETID_UDP;
-	memcpy(&map->r_addr, rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR),
-			sizeof(map->r_addr));
+	map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID);
+	memcpy(map->r_addr,
+	       rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR),
+	       sizeof(map->r_addr));
 	map->r_owner = RPCB_OWNER_STRING;	/* ignored for GETADDR */
 
-	rpc_peeraddr(clnt, (void *)&addr, sizeof(addr));
-	rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, bind_version, 0);
-	if (IS_ERR(rpcb_clnt)) {
-		status = PTR_ERR(rpcb_clnt);
-		dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n",
-			task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt));
-		goto bailout;
-	}
-
 	child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map);
 	rpc_release_client(rpcb_clnt);
 	if (IS_ERR(child)) {
 		status = -EIO;
 		dprintk("RPC: %5u %s: rpc_run_task failed\n",
 			task->tk_pid, __FUNCTION__);
-		goto bailout_nofree;
+		goto bailout;
 	}
 	rpc_put_task(child);
 
@@ -403,6 +416,7 @@ bailout_nofree:
 bailout_nowake:
 	task->tk_status = status;
 }
+EXPORT_SYMBOL_GPL(rpcb_getport_async);
 
 /*
  * Rpcbind child task calls this callback via tk_exit.
@@ -413,6 +427,10 @@ static void rpcb_getport_done(struct rpc_task *child, void *data)
 	struct rpc_xprt *xprt = map->r_xprt;
 	int status = child->tk_status;
 
+	/* Garbage reply: retry with a lesser rpcbind version */
+	if (status == -EIO)
+		status = -EPROTONOSUPPORT;
+
 	/* rpcbind server doesn't support this rpcbind protocol version */
 	if (status == -EPROTONOSUPPORT)
 		xprt->bind_index++;
@@ -490,16 +508,24 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p,
 			       unsigned short *portp)
 {
 	char *addr;
-	int addr_len, c, i, f, first, val;
+	u32 addr_len;
+	int c, i, f, first, val;
 
 	*portp = 0;
-	addr_len = (unsigned int) ntohl(*p++);
-	if (addr_len > RPCB_MAXADDRLEN)			/* sanity */
-		return -EINVAL;
-
-	dprintk("RPC:       rpcb_decode_getaddr returned string: '%s'\n",
-			(char *) p);
-
+	addr_len = ntohl(*p++);
+
+	/*
+	 * Simple sanity check.  The smallest possible universal
+	 * address is an IPv4 address string containing 11 bytes.
+	 */
+	if (addr_len < 11 || addr_len > RPCB_MAXADDRLEN)
+		goto out_err;
+
+	/*
+	 * Start at the end and walk backwards until the first dot
+	 * is encountered.  When the second dot is found, we have
+	 * both parts of the port number.
+	 */
 	addr = (char *)p;
 	val = 0;
 	first = 1;
@@ -521,8 +547,19 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p,
 		}
 	}
 
+	/*
+	 * Simple sanity check.  If we never saw a dot in the reply,
+	 * then this was probably just garbage.
+	 */
+	if (first)
+		goto out_err;
+
 	dprintk("RPC:       rpcb_decode_getaddr port=%u\n", *portp);
 	return 0;
+
+out_err:
+	dprintk("RPC:       rpcbind server returned malformed reply\n");
+	return -EIO;
 }
 
 #define RPCB_program_sz		(1u)
@@ -531,7 +568,7 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p,
 #define RPCB_port_sz		(1u)
 #define RPCB_boolean_sz		(1u)
 
-#define RPCB_netid_sz		(1+XDR_QUADLEN(RPCB_MAXNETIDLEN))
+#define RPCB_netid_sz		(1+XDR_QUADLEN(RPCBIND_MAXNETIDLEN))
 #define RPCB_addr_sz		(1+XDR_QUADLEN(RPCB_MAXADDRLEN))
 #define RPCB_ownerstring_sz	(1+XDR_QUADLEN(RPCB_MAXOWNERLEN))
 
@@ -593,6 +630,14 @@ static struct rpcb_info rpcb_next_version[] = {
 	{ 0, NULL },
 };
 
+static struct rpcb_info rpcb_next_version6[] = {
+#ifdef CONFIG_SUNRPC_BIND34
+	{ 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] },
+	{ 3, &rpcb_procedures3[RPCBPROC_GETADDR] },
+#endif
+	{ 0, NULL },
+};
+
 static struct rpc_version rpcb_version2 = {
 	.number		= 2,
 	.nrprocs	= RPCB_HIGHPROC_2,
@@ -621,7 +666,7 @@ static struct rpc_version *rpcb_version[] = {
 
 static struct rpc_stat rpcb_stats;
 
-struct rpc_program rpcb_program = {
+static struct rpc_program rpcb_program = {
 	.name		= "rpcbind",
 	.number		= RPCBIND_PROGRAM,
 	.nrvers		= ARRAY_SIZE(rpcb_version),
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 954d7ec86c7..3c773c53e12 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -777,6 +777,7 @@ void *rpc_malloc(struct rpc_task *task, size_t size)
 			task->tk_pid, size, buf);
 	return &buf->data;
 }
+EXPORT_SYMBOL_GPL(rpc_malloc);
 
 /**
  * rpc_free - free buffer allocated via rpc_malloc
@@ -802,6 +803,7 @@ void rpc_free(void *buffer)
 	else
 		kfree(buf);
 }
+EXPORT_SYMBOL_GPL(rpc_free);
 
 /*
  * Creation and deletion of RPC task structures
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 1d377d1ab7f..97ac45f034d 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -34,6 +34,7 @@ size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
 	desc->offset += len;
 	return len;
 }
+EXPORT_SYMBOL_GPL(xdr_skb_read_bits);
 
 /**
  * xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer
@@ -137,6 +138,7 @@ copy_tail:
 out:
 	return copied;
 }
+EXPORT_SYMBOL_GPL(xdr_partial_copy_from_skb);
 
 /**
  * csum_partial_copy_to_xdr - checksum and copy data
@@ -179,3 +181,4 @@ no_checksum:
 		return -1;
 	return 0;
 }
+EXPORT_SYMBOL_GPL(csum_partial_copy_to_xdr);
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 384c4ad5ab8..33d89e842c8 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -20,7 +20,7 @@
 #include <linux/sunrpc/auth.h>
 #include <linux/workqueue.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
-
+#include <linux/sunrpc/xprtsock.h>
 
 /* RPC scheduler */
 EXPORT_SYMBOL(rpc_execute);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 55ea6df069d..a4a6bf7deaa 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -777,6 +777,30 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port)
 }
 
 /*
+ * Printk the given error with the address of the client that caused it.
+ */
+static int
+__attribute__ ((format (printf, 2, 3)))
+svc_printk(struct svc_rqst *rqstp, const char *fmt, ...)
+{
+	va_list args;
+	int 	r;
+	char 	buf[RPC_MAX_ADDRBUFLEN];
+
+	if (!net_ratelimit())
+		return 0;
+
+	printk(KERN_WARNING "svc: %s: ",
+		svc_print_addr(rqstp, buf, sizeof(buf)));
+
+	va_start(args, fmt);
+	r = vprintk(fmt, args);
+	va_end(args);
+
+	return r;
+}
+
+/*
  * Process the RPC request.
  */
 int
@@ -963,14 +987,13 @@ svc_process(struct svc_rqst *rqstp)
 	return 0;
 
 err_short_len:
-	if (net_ratelimit())
-		printk("svc: short len %Zd, dropping request\n", argv->iov_len);
+	svc_printk(rqstp, "short len %Zd, dropping request\n",
+			argv->iov_len);
 
 	goto dropit;			/* drop request */
 
 err_bad_dir:
-	if (net_ratelimit())
-		printk("svc: bad direction %d, dropping request\n", dir);
+	svc_printk(rqstp, "bad direction %d, dropping request\n", dir);
 
 	serv->sv_stats->rpcbadfmt++;
 	goto dropit;			/* drop request */
@@ -1000,8 +1023,7 @@ err_bad_prog:
 	goto sendit;
 
 err_bad_vers:
-	if (net_ratelimit())
-		printk("svc: unknown version (%d for prog %d, %s)\n",
+	svc_printk(rqstp, "unknown version (%d for prog %d, %s)\n",
 		       vers, prog, progp->pg_name);
 
 	serv->sv_stats->rpcbadfmt++;
@@ -1011,16 +1033,14 @@ err_bad_vers:
 	goto sendit;
 
 err_bad_proc:
-	if (net_ratelimit())
-		printk("svc: unknown procedure (%d)\n", proc);
+	svc_printk(rqstp, "unknown procedure (%d)\n", proc);
 
 	serv->sv_stats->rpcbadfmt++;
 	svc_putnl(resv, RPC_PROC_UNAVAIL);
 	goto sendit;
 
 err_garbage:
-	if (net_ratelimit())
-		printk("svc: failed to decode args\n");
+	svc_printk(rqstp, "failed to decode args\n");
 
 	rpc_stat = rpc_garbage_args;
 err_bad:
diff --git a/net/sunrpc/timer.c b/net/sunrpc/timer.c
index 8142fdb8a93..31becbf0926 100644
--- a/net/sunrpc/timer.c
+++ b/net/sunrpc/timer.c
@@ -17,6 +17,7 @@
 
 #include <linux/types.h>
 #include <linux/unistd.h>
+#include <linux/module.h>
 
 #include <linux/sunrpc/clnt.h>
 
@@ -40,6 +41,7 @@ rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo)
 		rt->ntimeouts[i] = 0;
 	}
 }
+EXPORT_SYMBOL_GPL(rpc_init_rtt);
 
 /*
  * NB: When computing the smoothed RTT and standard deviation,
@@ -75,6 +77,7 @@ rpc_update_rtt(struct rpc_rtt *rt, unsigned timer, long m)
 	if (*sdrtt < RPC_RTO_MIN)
 		*sdrtt = RPC_RTO_MIN;
 }
+EXPORT_SYMBOL_GPL(rpc_update_rtt);
 
 /*
  * Estimate rto for an nfs rpc sent via. an unreliable datagram.
@@ -103,3 +106,4 @@ rpc_calc_rto(struct rpc_rtt *rt, unsigned timer)
 
 	return res;
 }
+EXPORT_SYMBOL_GPL(rpc_calc_rto);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index c8c2edccad7..282a9a2ec90 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -62,6 +62,9 @@ static inline void	do_xprt_reserve(struct rpc_task *);
 static void	xprt_connect_status(struct rpc_task *task);
 static int      __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
 
+static spinlock_t xprt_list_lock = SPIN_LOCK_UNLOCKED;
+static LIST_HEAD(xprt_list);
+
 /*
  * The transport code maintains an estimate on the maximum number of out-
  * standing RPC requests, using a smoothed version of the congestion
@@ -81,6 +84,78 @@ static int      __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
 #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd)
 
 /**
+ * xprt_register_transport - register a transport implementation
+ * @transport: transport to register
+ *
+ * If a transport implementation is loaded as a kernel module, it can
+ * call this interface to make itself known to the RPC client.
+ *
+ * Returns:
+ * 0:		transport successfully registered
+ * -EEXIST:	transport already registered
+ * -EINVAL:	transport module being unloaded
+ */
+int xprt_register_transport(struct xprt_class *transport)
+{
+	struct xprt_class *t;
+	int result;
+
+	result = -EEXIST;
+	spin_lock(&xprt_list_lock);
+	list_for_each_entry(t, &xprt_list, list) {
+		/* don't register the same transport class twice */
+		if (t->ident == transport->ident)
+			goto out;
+	}
+
+	result = -EINVAL;
+	if (try_module_get(THIS_MODULE)) {
+		list_add_tail(&transport->list, &xprt_list);
+		printk(KERN_INFO "RPC: Registered %s transport module.\n",
+			transport->name);
+		result = 0;
+	}
+
+out:
+	spin_unlock(&xprt_list_lock);
+	return result;
+}
+EXPORT_SYMBOL_GPL(xprt_register_transport);
+
+/**
+ * xprt_unregister_transport - unregister a transport implementation
+ * transport: transport to unregister
+ *
+ * Returns:
+ * 0:		transport successfully unregistered
+ * -ENOENT:	transport never registered
+ */
+int xprt_unregister_transport(struct xprt_class *transport)
+{
+	struct xprt_class *t;
+	int result;
+
+	result = 0;
+	spin_lock(&xprt_list_lock);
+	list_for_each_entry(t, &xprt_list, list) {
+		if (t == transport) {
+			printk(KERN_INFO
+				"RPC: Unregistered %s transport module.\n",
+				transport->name);
+			list_del_init(&transport->list);
+			module_put(THIS_MODULE);
+			goto out;
+		}
+	}
+	result = -ENOENT;
+
+out:
+	spin_unlock(&xprt_list_lock);
+	return result;
+}
+EXPORT_SYMBOL_GPL(xprt_unregister_transport);
+
+/**
  * xprt_reserve_xprt - serialize write access to transports
  * @task: task that is requesting access to the transport
  *
@@ -118,6 +193,7 @@ out_sleep:
 		rpc_sleep_on(&xprt->sending, task, NULL, NULL);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(xprt_reserve_xprt);
 
 static void xprt_clear_locked(struct rpc_xprt *xprt)
 {
@@ -167,6 +243,7 @@ out_sleep:
 		rpc_sleep_on(&xprt->sending, task, NULL, NULL);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong);
 
 static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
 {
@@ -246,6 +323,7 @@ void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
 		__xprt_lock_write_next(xprt);
 	}
 }
+EXPORT_SYMBOL_GPL(xprt_release_xprt);
 
 /**
  * xprt_release_xprt_cong - allow other requests to use a transport
@@ -262,6 +340,7 @@ void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
 		__xprt_lock_write_next_cong(xprt);
 	}
 }
+EXPORT_SYMBOL_GPL(xprt_release_xprt_cong);
 
 static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
 {
@@ -314,6 +393,7 @@ void xprt_release_rqst_cong(struct rpc_task *task)
 {
 	__xprt_put_cong(task->tk_xprt, task->tk_rqstp);
 }
+EXPORT_SYMBOL_GPL(xprt_release_rqst_cong);
 
 /**
  * xprt_adjust_cwnd - adjust transport congestion window
@@ -345,6 +425,7 @@ void xprt_adjust_cwnd(struct rpc_task *task, int result)
 	xprt->cwnd = cwnd;
 	__xprt_put_cong(xprt, req);
 }
+EXPORT_SYMBOL_GPL(xprt_adjust_cwnd);
 
 /**
  * xprt_wake_pending_tasks - wake all tasks on a transport's pending queue
@@ -359,6 +440,7 @@ void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status)
 	else
 		rpc_wake_up(&xprt->pending);
 }
+EXPORT_SYMBOL_GPL(xprt_wake_pending_tasks);
 
 /**
  * xprt_wait_for_buffer_space - wait for transport output buffer to clear
@@ -373,6 +455,7 @@ void xprt_wait_for_buffer_space(struct rpc_task *task)
 	task->tk_timeout = req->rq_timeout;
 	rpc_sleep_on(&xprt->pending, task, NULL, NULL);
 }
+EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space);
 
 /**
  * xprt_write_space - wake the task waiting for transport output buffer space
@@ -393,6 +476,7 @@ void xprt_write_space(struct rpc_xprt *xprt)
 	}
 	spin_unlock_bh(&xprt->transport_lock);
 }
+EXPORT_SYMBOL_GPL(xprt_write_space);
 
 /**
  * xprt_set_retrans_timeout_def - set a request's retransmit timeout
@@ -406,6 +490,7 @@ void xprt_set_retrans_timeout_def(struct rpc_task *task)
 {
 	task->tk_timeout = task->tk_rqstp->rq_timeout;
 }
+EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_def);
 
 /*
  * xprt_set_retrans_timeout_rtt - set a request's retransmit timeout
@@ -425,6 +510,7 @@ void xprt_set_retrans_timeout_rtt(struct rpc_task *task)
 	if (task->tk_timeout > max_timeout || task->tk_timeout == 0)
 		task->tk_timeout = max_timeout;
 }
+EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_rtt);
 
 static void xprt_reset_majortimeo(struct rpc_rqst *req)
 {
@@ -500,6 +586,7 @@ void xprt_disconnect(struct rpc_xprt *xprt)
 	xprt_wake_pending_tasks(xprt, -ENOTCONN);
 	spin_unlock_bh(&xprt->transport_lock);
 }
+EXPORT_SYMBOL_GPL(xprt_disconnect);
 
 static void
 xprt_init_autodisconnect(unsigned long data)
@@ -610,6 +697,7 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
 	xprt->stat.bad_xids++;
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(xprt_lookup_rqst);
 
 /**
  * xprt_update_rtt - update an RPC client's RTT state after receiving a reply
@@ -629,6 +717,7 @@ void xprt_update_rtt(struct rpc_task *task)
 		rpc_set_timeo(rtt, timer, req->rq_ntrans - 1);
 	}
 }
+EXPORT_SYMBOL_GPL(xprt_update_rtt);
 
 /**
  * xprt_complete_rqst - called when reply processing is complete
@@ -653,6 +742,7 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
 	req->rq_received = req->rq_private_buf.len = copied;
 	rpc_wake_up_task(task);
 }
+EXPORT_SYMBOL_GPL(xprt_complete_rqst);
 
 static void xprt_timer(struct rpc_task *task)
 {
@@ -889,23 +979,25 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i
  * @args: rpc transport creation arguments
  *
  */
-struct rpc_xprt *xprt_create_transport(struct rpc_xprtsock_create *args)
+struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
 {
 	struct rpc_xprt	*xprt;
 	struct rpc_rqst	*req;
+	struct xprt_class *t;
 
-	switch (args->proto) {
-	case IPPROTO_UDP:
-		xprt = xs_setup_udp(args);
-		break;
-	case IPPROTO_TCP:
-		xprt = xs_setup_tcp(args);
-		break;
-	default:
-		printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n",
-				args->proto);
-		return ERR_PTR(-EIO);
+	spin_lock(&xprt_list_lock);
+	list_for_each_entry(t, &xprt_list, list) {
+		if (t->ident == args->ident) {
+			spin_unlock(&xprt_list_lock);
+			goto found;
+		}
 	}
+	spin_unlock(&xprt_list_lock);
+	printk(KERN_ERR "RPC: transport (%d) not supported\n", args->ident);
+	return ERR_PTR(-EIO);
+
+found:
+	xprt = t->setup(args);
 	if (IS_ERR(xprt)) {
 		dprintk("RPC:       xprt_create_transport: failed, %ld\n",
 				-PTR_ERR(xprt));
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
new file mode 100644
index 00000000000..264f0feeb51
--- /dev/null
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o
+
+xprtrdma-y := transport.o rpc_rdma.o verbs.o
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
new file mode 100644
index 00000000000..12db6358042
--- /dev/null
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -0,0 +1,868 @@
+/*
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *      Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *      Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ *      Neither the name of the Network Appliance, Inc. nor the names of
+ *      its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written
+ *      permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * rpc_rdma.c
+ *
+ * This file contains the guts of the RPC RDMA protocol, and
+ * does marshaling/unmarshaling, etc. It is also where interfacing
+ * to the Linux RPC framework lives.
+ */
+
+#include "xprt_rdma.h"
+
+#include <linux/highmem.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_TRANS
+#endif
+
+enum rpcrdma_chunktype {
+	rpcrdma_noch = 0,
+	rpcrdma_readch,
+	rpcrdma_areadch,
+	rpcrdma_writech,
+	rpcrdma_replych
+};
+
+#ifdef RPC_DEBUG
+static const char transfertypes[][12] = {
+	"pure inline",	/* no chunks */
+	" read chunk",	/* some argument via rdma read */
+	"*read chunk",	/* entire request via rdma read */
+	"write chunk",	/* some result via rdma write */
+	"reply chunk"	/* entire reply via rdma write */
+};
+#endif
+
+/*
+ * Chunk assembly from upper layer xdr_buf.
+ *
+ * Prepare the passed-in xdr_buf into representation as RPC/RDMA chunk
+ * elements. Segments are then coalesced when registered, if possible
+ * within the selected memreg mode.
+ *
+ * Note, this routine is never called if the connection's memory
+ * registration strategy is 0 (bounce buffers).
+ */
+
+static int
+rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, int pos,
+	enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs)
+{
+	int len, n = 0, p;
+
+	if (pos == 0 && xdrbuf->head[0].iov_len) {
+		seg[n].mr_page = NULL;
+		seg[n].mr_offset = xdrbuf->head[0].iov_base;
+		seg[n].mr_len = xdrbuf->head[0].iov_len;
+		pos += xdrbuf->head[0].iov_len;
+		++n;
+	}
+
+	if (xdrbuf->page_len && (xdrbuf->pages[0] != NULL)) {
+		if (n == nsegs)
+			return 0;
+		seg[n].mr_page = xdrbuf->pages[0];
+		seg[n].mr_offset = (void *)(unsigned long) xdrbuf->page_base;
+		seg[n].mr_len = min_t(u32,
+			PAGE_SIZE - xdrbuf->page_base, xdrbuf->page_len);
+		len = xdrbuf->page_len - seg[n].mr_len;
+		pos += len;
+		++n;
+		p = 1;
+		while (len > 0) {
+			if (n == nsegs)
+				return 0;
+			seg[n].mr_page = xdrbuf->pages[p];
+			seg[n].mr_offset = NULL;
+			seg[n].mr_len = min_t(u32, PAGE_SIZE, len);
+			len -= seg[n].mr_len;
+			++n;
+			++p;
+		}
+	}
+
+	if (pos < xdrbuf->len && xdrbuf->tail[0].iov_len) {
+		if (n == nsegs)
+			return 0;
+		seg[n].mr_page = NULL;
+		seg[n].mr_offset = xdrbuf->tail[0].iov_base;
+		seg[n].mr_len = xdrbuf->tail[0].iov_len;
+		pos += xdrbuf->tail[0].iov_len;
+		++n;
+	}
+
+	if (pos < xdrbuf->len)
+		dprintk("RPC:       %s: marshaled only %d of %d\n",
+				__func__, pos, xdrbuf->len);
+
+	return n;
+}
+
+/*
+ * Create read/write chunk lists, and reply chunks, for RDMA
+ *
+ *   Assume check against THRESHOLD has been done, and chunks are required.
+ *   Assume only encoding one list entry for read|write chunks. The NFSv3
+ *     protocol is simple enough to allow this as it only has a single "bulk
+ *     result" in each procedure - complicated NFSv4 COMPOUNDs are not. (The
+ *     RDMA/Sessions NFSv4 proposal addresses this for future v4 revs.)
+ *
+ * When used for a single reply chunk (which is a special write
+ * chunk used for the entire reply, rather than just the data), it
+ * is used primarily for READDIR and READLINK which would otherwise
+ * be severely size-limited by a small rdma inline read max. The server
+ * response will come back as an RDMA Write, followed by a message
+ * of type RDMA_NOMSG carrying the xid and length. As a result, reply
+ * chunks do not provide data alignment, however they do not require
+ * "fixup" (moving the response to the upper layer buffer) either.
+ *
+ * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
+ *
+ *  Read chunklist (a linked list):
+ *   N elements, position P (same P for all chunks of same arg!):
+ *    1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0
+ *
+ *  Write chunklist (a list of (one) counted array):
+ *   N elements:
+ *    1 - N - HLOO - HLOO - ... - HLOO - 0
+ *
+ *  Reply chunk (a counted array):
+ *   N elements:
+ *    1 - N - HLOO - HLOO - ... - HLOO
+ */
+
+static unsigned int
+rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
+		struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type)
+{
+	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_task->tk_xprt);
+	int nsegs, nchunks = 0;
+	int pos;
+	struct rpcrdma_mr_seg *seg = req->rl_segments;
+	struct rpcrdma_read_chunk *cur_rchunk = NULL;
+	struct rpcrdma_write_array *warray = NULL;
+	struct rpcrdma_write_chunk *cur_wchunk = NULL;
+	u32 *iptr = headerp->rm_body.rm_chunks;
+
+	if (type == rpcrdma_readch || type == rpcrdma_areadch) {
+		/* a read chunk - server will RDMA Read our memory */
+		cur_rchunk = (struct rpcrdma_read_chunk *) iptr;
+	} else {
+		/* a write or reply chunk - server will RDMA Write our memory */
+		*iptr++ = xdr_zero;	/* encode a NULL read chunk list */
+		if (type == rpcrdma_replych)
+			*iptr++ = xdr_zero;	/* a NULL write chunk list */
+		warray = (struct rpcrdma_write_array *) iptr;
+		cur_wchunk = (struct rpcrdma_write_chunk *) (warray + 1);
+	}
+
+	if (type == rpcrdma_replych || type == rpcrdma_areadch)
+		pos = 0;
+	else
+		pos = target->head[0].iov_len;
+
+	nsegs = rpcrdma_convert_iovs(target, pos, type, seg, RPCRDMA_MAX_SEGS);
+	if (nsegs == 0)
+		return 0;
+
+	do {
+		/* bind/register the memory, then build chunk from result. */
+		int n = rpcrdma_register_external(seg, nsegs,
+						cur_wchunk != NULL, r_xprt);
+		if (n <= 0)
+			goto out;
+		if (cur_rchunk) {	/* read */
+			cur_rchunk->rc_discrim = xdr_one;
+			/* all read chunks have the same "position" */
+			cur_rchunk->rc_position = htonl(pos);
+			cur_rchunk->rc_target.rs_handle = htonl(seg->mr_rkey);
+			cur_rchunk->rc_target.rs_length = htonl(seg->mr_len);
+			xdr_encode_hyper(
+					(u32 *)&cur_rchunk->rc_target.rs_offset,
+					seg->mr_base);
+			dprintk("RPC:       %s: read chunk "
+				"elem %d@0x%llx:0x%x pos %d (%s)\n", __func__,
+				seg->mr_len, seg->mr_base, seg->mr_rkey, pos,
+				n < nsegs ? "more" : "last");
+			cur_rchunk++;
+			r_xprt->rx_stats.read_chunk_count++;
+		} else {		/* write/reply */
+			cur_wchunk->wc_target.rs_handle = htonl(seg->mr_rkey);
+			cur_wchunk->wc_target.rs_length = htonl(seg->mr_len);
+			xdr_encode_hyper(
+					(u32 *)&cur_wchunk->wc_target.rs_offset,
+					seg->mr_base);
+			dprintk("RPC:       %s: %s chunk "
+				"elem %d@0x%llx:0x%x (%s)\n", __func__,
+				(type == rpcrdma_replych) ? "reply" : "write",
+				seg->mr_len, seg->mr_base, seg->mr_rkey,
+				n < nsegs ? "more" : "last");
+			cur_wchunk++;
+			if (type == rpcrdma_replych)
+				r_xprt->rx_stats.reply_chunk_count++;
+			else
+				r_xprt->rx_stats.write_chunk_count++;
+			r_xprt->rx_stats.total_rdma_request += seg->mr_len;
+		}
+		nchunks++;
+		seg   += n;
+		nsegs -= n;
+	} while (nsegs);
+
+	/* success. all failures return above */
+	req->rl_nchunks = nchunks;
+
+	BUG_ON(nchunks == 0);
+
+	/*
+	 * finish off header. If write, marshal discrim and nchunks.
+	 */
+	if (cur_rchunk) {
+		iptr = (u32 *) cur_rchunk;
+		*iptr++ = xdr_zero;	/* finish the read chunk list */
+		*iptr++ = xdr_zero;	/* encode a NULL write chunk list */
+		*iptr++ = xdr_zero;	/* encode a NULL reply chunk */
+	} else {
+		warray->wc_discrim = xdr_one;
+		warray->wc_nchunks = htonl(nchunks);
+		iptr = (u32 *) cur_wchunk;
+		if (type == rpcrdma_writech) {
+			*iptr++ = xdr_zero; /* finish the write chunk list */
+			*iptr++ = xdr_zero; /* encode a NULL reply chunk */
+		}
+	}
+
+	/*
+	 * Return header size.
+	 */
+	return (unsigned char *)iptr - (unsigned char *)headerp;
+
+out:
+	for (pos = 0; nchunks--;)
+		pos += rpcrdma_deregister_external(
+				&req->rl_segments[pos], r_xprt, NULL);
+	return 0;
+}
+
+/*
+ * Copy write data inline.
+ * This function is used for "small" requests. Data which is passed
+ * to RPC via iovecs (or page list) is copied directly into the
+ * pre-registered memory buffer for this request. For small amounts
+ * of data, this is efficient. The cutoff value is tunable.
+ */
+static int
+rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
+{
+	int i, npages, curlen;
+	int copy_len;
+	unsigned char *srcp, *destp;
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
+
+	destp = rqst->rq_svec[0].iov_base;
+	curlen = rqst->rq_svec[0].iov_len;
+	destp += curlen;
+	/*
+	 * Do optional padding where it makes sense. Alignment of write
+	 * payload can help the server, if our setting is accurate.
+	 */
+	pad -= (curlen + 36/*sizeof(struct rpcrdma_msg_padded)*/);
+	if (pad < 0 || rqst->rq_slen - curlen < RPCRDMA_INLINE_PAD_THRESH)
+		pad = 0;	/* don't pad this request */
+
+	dprintk("RPC:       %s: pad %d destp 0x%p len %d hdrlen %d\n",
+		__func__, pad, destp, rqst->rq_slen, curlen);
+
+	copy_len = rqst->rq_snd_buf.page_len;
+	r_xprt->rx_stats.pullup_copy_count += copy_len;
+	npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT;
+	for (i = 0; copy_len && i < npages; i++) {
+		if (i == 0)
+			curlen = PAGE_SIZE - rqst->rq_snd_buf.page_base;
+		else
+			curlen = PAGE_SIZE;
+		if (curlen > copy_len)
+			curlen = copy_len;
+		dprintk("RPC:       %s: page %d destp 0x%p len %d curlen %d\n",
+			__func__, i, destp, copy_len, curlen);
+		srcp = kmap_atomic(rqst->rq_snd_buf.pages[i],
+					KM_SKB_SUNRPC_DATA);
+		if (i == 0)
+			memcpy(destp, srcp+rqst->rq_snd_buf.page_base, curlen);
+		else
+			memcpy(destp, srcp, curlen);
+		kunmap_atomic(srcp, KM_SKB_SUNRPC_DATA);
+		rqst->rq_svec[0].iov_len += curlen;
+		destp += curlen;
+		copy_len -= curlen;
+	}
+	if (rqst->rq_snd_buf.tail[0].iov_len) {
+		curlen = rqst->rq_snd_buf.tail[0].iov_len;
+		if (destp != rqst->rq_snd_buf.tail[0].iov_base) {
+			memcpy(destp,
+				rqst->rq_snd_buf.tail[0].iov_base, curlen);
+			r_xprt->rx_stats.pullup_copy_count += curlen;
+		}
+		dprintk("RPC:       %s: tail destp 0x%p len %d curlen %d\n",
+			__func__, destp, copy_len, curlen);
+		rqst->rq_svec[0].iov_len += curlen;
+	}
+	/* header now contains entire send message */
+	return pad;
+}
+
+/*
+ * Marshal a request: the primary job of this routine is to choose
+ * the transfer modes. See comments below.
+ *
+ * Uses multiple RDMA IOVs for a request:
+ *  [0] -- RPC RDMA header, which uses memory from the *start* of the
+ *         preregistered buffer that already holds the RPC data in
+ *         its middle.
+ *  [1] -- the RPC header/data, marshaled by RPC and the NFS protocol.
+ *  [2] -- optional padding.
+ *  [3] -- if padded, header only in [1] and data here.
+ */
+
+int
+rpcrdma_marshal_req(struct rpc_rqst *rqst)
+{
+	struct rpc_xprt *xprt = rqst->rq_task->tk_xprt;
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+	char *base;
+	size_t hdrlen, rpclen, padlen;
+	enum rpcrdma_chunktype rtype, wtype;
+	struct rpcrdma_msg *headerp;
+
+	/*
+	 * rpclen gets amount of data in first buffer, which is the
+	 * pre-registered buffer.
+	 */
+	base = rqst->rq_svec[0].iov_base;
+	rpclen = rqst->rq_svec[0].iov_len;
+
+	/* build RDMA header in private area at front */
+	headerp = (struct rpcrdma_msg *) req->rl_base;
+	/* don't htonl XID, it's already done in request */
+	headerp->rm_xid = rqst->rq_xid;
+	headerp->rm_vers = xdr_one;
+	headerp->rm_credit = htonl(r_xprt->rx_buf.rb_max_requests);
+	headerp->rm_type = __constant_htonl(RDMA_MSG);
+
+	/*
+	 * Chunks needed for results?
+	 *
+	 * o If the expected result is under the inline threshold, all ops
+	 *   return as inline (but see later).
+	 * o Large non-read ops return as a single reply chunk.
+	 * o Large read ops return data as write chunk(s), header as inline.
+	 *
+	 * Note: the NFS code sending down multiple result segments implies
+	 * the op is one of read, readdir[plus], readlink or NFSv4 getacl.
+	 */
+
+	/*
+	 * This code can handle read chunks, write chunks OR reply
+	 * chunks -- only one type. If the request is too big to fit
+	 * inline, then we will choose read chunks. If the request is
+	 * a READ, then use write chunks to separate the file data
+	 * into pages; otherwise use reply chunks.
+	 */
+	if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst))
+		wtype = rpcrdma_noch;
+	else if (rqst->rq_rcv_buf.page_len == 0)
+		wtype = rpcrdma_replych;
+	else if (rqst->rq_rcv_buf.flags & XDRBUF_READ)
+		wtype = rpcrdma_writech;
+	else
+		wtype = rpcrdma_replych;
+
+	/*
+	 * Chunks needed for arguments?
+	 *
+	 * o If the total request is under the inline threshold, all ops
+	 *   are sent as inline.
+	 * o Large non-write ops are sent with the entire message as a
+	 *   single read chunk (protocol 0-position special case).
+	 * o Large write ops transmit data as read chunk(s), header as
+	 *   inline.
+	 *
+	 * Note: the NFS code sending down multiple argument segments
+	 * implies the op is a write.
+	 * TBD check NFSv4 setacl
+	 */
+	if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst))
+		rtype = rpcrdma_noch;
+	else if (rqst->rq_snd_buf.page_len == 0)
+		rtype = rpcrdma_areadch;
+	else
+		rtype = rpcrdma_readch;
+
+	/* The following simplification is not true forever */
+	if (rtype != rpcrdma_noch && wtype == rpcrdma_replych)
+		wtype = rpcrdma_noch;
+	BUG_ON(rtype != rpcrdma_noch && wtype != rpcrdma_noch);
+
+	if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS &&
+	    (rtype != rpcrdma_noch || wtype != rpcrdma_noch)) {
+		/* forced to "pure inline"? */
+		dprintk("RPC:       %s: too much data (%d/%d) for inline\n",
+			__func__, rqst->rq_rcv_buf.len, rqst->rq_snd_buf.len);
+		return -1;
+	}
+
+	hdrlen = 28; /*sizeof *headerp;*/
+	padlen = 0;
+
+	/*
+	 * Pull up any extra send data into the preregistered buffer.
+	 * When padding is in use and applies to the transfer, insert
+	 * it and change the message type.
+	 */
+	if (rtype == rpcrdma_noch) {
+
+		padlen = rpcrdma_inline_pullup(rqst,
+						RPCRDMA_INLINE_PAD_VALUE(rqst));
+
+		if (padlen) {
+			headerp->rm_type = __constant_htonl(RDMA_MSGP);
+			headerp->rm_body.rm_padded.rm_align =
+				htonl(RPCRDMA_INLINE_PAD_VALUE(rqst));
+			headerp->rm_body.rm_padded.rm_thresh =
+				__constant_htonl(RPCRDMA_INLINE_PAD_THRESH);
+			headerp->rm_body.rm_padded.rm_pempty[0] = xdr_zero;
+			headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;
+			headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;
+			hdrlen += 2 * sizeof(u32); /* extra words in padhdr */
+			BUG_ON(wtype != rpcrdma_noch);
+
+		} else {
+			headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero;
+			headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero;
+			headerp->rm_body.rm_nochunks.rm_empty[2] = xdr_zero;
+			/* new length after pullup */
+			rpclen = rqst->rq_svec[0].iov_len;
+			/*
+			 * Currently we try to not actually use read inline.
+			 * Reply chunks have the desirable property that
+			 * they land, packed, directly in the target buffers
+			 * without headers, so they require no fixup. The
+			 * additional RDMA Write op sends the same amount
+			 * of data, streams on-the-wire and adds no overhead
+			 * on receive. Therefore, we request a reply chunk
+			 * for non-writes wherever feasible and efficient.
+			 */
+			if (wtype == rpcrdma_noch &&
+			    r_xprt->rx_ia.ri_memreg_strategy > RPCRDMA_REGISTER)
+				wtype = rpcrdma_replych;
+		}
+	}
+
+	/*
+	 * Marshal chunks. This routine will return the header length
+	 * consumed by marshaling.
+	 */
+	if (rtype != rpcrdma_noch) {
+		hdrlen = rpcrdma_create_chunks(rqst,
+					&rqst->rq_snd_buf, headerp, rtype);
+		wtype = rtype;	/* simplify dprintk */
+
+	} else if (wtype != rpcrdma_noch) {
+		hdrlen = rpcrdma_create_chunks(rqst,
+					&rqst->rq_rcv_buf, headerp, wtype);
+	}
+
+	if (hdrlen == 0)
+		return -1;
+
+	dprintk("RPC:       %s: %s: hdrlen %zd rpclen %zd padlen %zd\n"
+		"                   headerp 0x%p base 0x%p lkey 0x%x\n",
+		__func__, transfertypes[wtype], hdrlen, rpclen, padlen,
+		headerp, base, req->rl_iov.lkey);
+
+	/*
+	 * initialize send_iov's - normally only two: rdma chunk header and
+	 * single preregistered RPC header buffer, but if padding is present,
+	 * then use a preregistered (and zeroed) pad buffer between the RPC
+	 * header and any write data. In all non-rdma cases, any following
+	 * data has been copied into the RPC header buffer.
+	 */
+	req->rl_send_iov[0].addr = req->rl_iov.addr;
+	req->rl_send_iov[0].length = hdrlen;
+	req->rl_send_iov[0].lkey = req->rl_iov.lkey;
+
+	req->rl_send_iov[1].addr = req->rl_iov.addr + (base - req->rl_base);
+	req->rl_send_iov[1].length = rpclen;
+	req->rl_send_iov[1].lkey = req->rl_iov.lkey;
+
+	req->rl_niovs = 2;
+
+	if (padlen) {
+		struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+
+		req->rl_send_iov[2].addr = ep->rep_pad.addr;
+		req->rl_send_iov[2].length = padlen;
+		req->rl_send_iov[2].lkey = ep->rep_pad.lkey;
+
+		req->rl_send_iov[3].addr = req->rl_send_iov[1].addr + rpclen;
+		req->rl_send_iov[3].length = rqst->rq_slen - rpclen;
+		req->rl_send_iov[3].lkey = req->rl_iov.lkey;
+
+		req->rl_niovs = 4;
+	}
+
+	return 0;
+}
+
+/*
+ * Chase down a received write or reply chunklist to get length
+ * RDMA'd by server. See map at rpcrdma_create_chunks()! :-)
+ */
+static int
+rpcrdma_count_chunks(struct rpcrdma_rep *rep, int max, int wrchunk, u32 **iptrp)
+{
+	unsigned int i, total_len;
+	struct rpcrdma_write_chunk *cur_wchunk;
+
+	i = ntohl(**iptrp);	/* get array count */
+	if (i > max)
+		return -1;
+	cur_wchunk = (struct rpcrdma_write_chunk *) (*iptrp + 1);
+	total_len = 0;
+	while (i--) {
+		struct rpcrdma_segment *seg = &cur_wchunk->wc_target;
+		ifdebug(FACILITY) {
+			u64 off;
+			xdr_decode_hyper((u32 *)&seg->rs_offset, &off);
+			dprintk("RPC:       %s: chunk %d@0x%llx:0x%x\n",
+				__func__,
+				ntohl(seg->rs_length),
+				off,
+				ntohl(seg->rs_handle));
+		}
+		total_len += ntohl(seg->rs_length);
+		++cur_wchunk;
+	}
+	/* check and adjust for properly terminated write chunk */
+	if (wrchunk) {
+		u32 *w = (u32 *) cur_wchunk;
+		if (*w++ != xdr_zero)
+			return -1;
+		cur_wchunk = (struct rpcrdma_write_chunk *) w;
+	}
+	if ((char *) cur_wchunk > rep->rr_base + rep->rr_len)
+		return -1;
+
+	*iptrp = (u32 *) cur_wchunk;
+	return total_len;
+}
+
+/*
+ * Scatter inline received data back into provided iov's.
+ */
+static void
+rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len)
+{
+	int i, npages, curlen, olen;
+	char *destp;
+
+	curlen = rqst->rq_rcv_buf.head[0].iov_len;
+	if (curlen > copy_len) {	/* write chunk header fixup */
+		curlen = copy_len;
+		rqst->rq_rcv_buf.head[0].iov_len = curlen;
+	}
+
+	dprintk("RPC:       %s: srcp 0x%p len %d hdrlen %d\n",
+		__func__, srcp, copy_len, curlen);
+
+	/* Shift pointer for first receive segment only */
+	rqst->rq_rcv_buf.head[0].iov_base = srcp;
+	srcp += curlen;
+	copy_len -= curlen;
+
+	olen = copy_len;
+	i = 0;
+	rpcx_to_rdmax(rqst->rq_xprt)->rx_stats.fixup_copy_count += olen;
+	if (copy_len && rqst->rq_rcv_buf.page_len) {
+		npages = PAGE_ALIGN(rqst->rq_rcv_buf.page_base +
+			rqst->rq_rcv_buf.page_len) >> PAGE_SHIFT;
+		for (; i < npages; i++) {
+			if (i == 0)
+				curlen = PAGE_SIZE - rqst->rq_rcv_buf.page_base;
+			else
+				curlen = PAGE_SIZE;
+			if (curlen > copy_len)
+				curlen = copy_len;
+			dprintk("RPC:       %s: page %d"
+				" srcp 0x%p len %d curlen %d\n",
+				__func__, i, srcp, copy_len, curlen);
+			destp = kmap_atomic(rqst->rq_rcv_buf.pages[i],
+						KM_SKB_SUNRPC_DATA);
+			if (i == 0)
+				memcpy(destp + rqst->rq_rcv_buf.page_base,
+						srcp, curlen);
+			else
+				memcpy(destp, srcp, curlen);
+			flush_dcache_page(rqst->rq_rcv_buf.pages[i]);
+			kunmap_atomic(destp, KM_SKB_SUNRPC_DATA);
+			srcp += curlen;
+			copy_len -= curlen;
+			if (copy_len == 0)
+				break;
+		}
+		rqst->rq_rcv_buf.page_len = olen - copy_len;
+	} else
+		rqst->rq_rcv_buf.page_len = 0;
+
+	if (copy_len && rqst->rq_rcv_buf.tail[0].iov_len) {
+		curlen = copy_len;
+		if (curlen > rqst->rq_rcv_buf.tail[0].iov_len)
+			curlen = rqst->rq_rcv_buf.tail[0].iov_len;
+		if (rqst->rq_rcv_buf.tail[0].iov_base != srcp)
+			memcpy(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen);
+		dprintk("RPC:       %s: tail srcp 0x%p len %d curlen %d\n",
+			__func__, srcp, copy_len, curlen);
+		rqst->rq_rcv_buf.tail[0].iov_len = curlen;
+		copy_len -= curlen; ++i;
+	} else
+		rqst->rq_rcv_buf.tail[0].iov_len = 0;
+
+	if (copy_len)
+		dprintk("RPC:       %s: %d bytes in"
+			" %d extra segments (%d lost)\n",
+			__func__, olen, i, copy_len);
+
+	/* TBD avoid a warning from call_decode() */
+	rqst->rq_private_buf = rqst->rq_rcv_buf;
+}
+
+/*
+ * This function is called when an async event is posted to
+ * the connection which changes the connection state. All it
+ * does at this point is mark the connection up/down, the rpc
+ * timers do the rest.
+ */
+void
+rpcrdma_conn_func(struct rpcrdma_ep *ep)
+{
+	struct rpc_xprt *xprt = ep->rep_xprt;
+
+	spin_lock_bh(&xprt->transport_lock);
+	if (ep->rep_connected > 0) {
+		if (!xprt_test_and_set_connected(xprt))
+			xprt_wake_pending_tasks(xprt, 0);
+	} else {
+		if (xprt_test_and_clear_connected(xprt))
+			xprt_wake_pending_tasks(xprt, ep->rep_connected);
+	}
+	spin_unlock_bh(&xprt->transport_lock);
+}
+
+/*
+ * This function is called when memory window unbind which we are waiting
+ * for completes. Just use rr_func (zeroed by upcall) to signal completion.
+ */
+static void
+rpcrdma_unbind_func(struct rpcrdma_rep *rep)
+{
+	wake_up(&rep->rr_unbind);
+}
+
+/*
+ * Called as a tasklet to do req/reply match and complete a request
+ * Errors must result in the RPC task either being awakened, or
+ * allowed to timeout, to discover the errors at that time.
+ */
+void
+rpcrdma_reply_handler(struct rpcrdma_rep *rep)
+{
+	struct rpcrdma_msg *headerp;
+	struct rpcrdma_req *req;
+	struct rpc_rqst *rqst;
+	struct rpc_xprt *xprt = rep->rr_xprt;
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+	u32 *iptr;
+	int i, rdmalen, status;
+
+	/* Check status. If bad, signal disconnect and return rep to pool */
+	if (rep->rr_len == ~0U) {
+		rpcrdma_recv_buffer_put(rep);
+		if (r_xprt->rx_ep.rep_connected == 1) {
+			r_xprt->rx_ep.rep_connected = -EIO;
+			rpcrdma_conn_func(&r_xprt->rx_ep);
+		}
+		return;
+	}
+	if (rep->rr_len < 28) {
+		dprintk("RPC:       %s: short/invalid reply\n", __func__);
+		goto repost;
+	}
+	headerp = (struct rpcrdma_msg *) rep->rr_base;
+	if (headerp->rm_vers != xdr_one) {
+		dprintk("RPC:       %s: invalid version %d\n",
+			__func__, ntohl(headerp->rm_vers));
+		goto repost;
+	}
+
+	/* Get XID and try for a match. */
+	spin_lock(&xprt->transport_lock);
+	rqst = xprt_lookup_rqst(xprt, headerp->rm_xid);
+	if (rqst == NULL) {
+		spin_unlock(&xprt->transport_lock);
+		dprintk("RPC:       %s: reply 0x%p failed "
+			"to match any request xid 0x%08x len %d\n",
+			__func__, rep, headerp->rm_xid, rep->rr_len);
+repost:
+		r_xprt->rx_stats.bad_reply_count++;
+		rep->rr_func = rpcrdma_reply_handler;
+		if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))
+			rpcrdma_recv_buffer_put(rep);
+
+		return;
+	}
+
+	/* get request object */
+	req = rpcr_to_rdmar(rqst);
+
+	dprintk("RPC:       %s: reply 0x%p completes request 0x%p\n"
+		"                   RPC request 0x%p xid 0x%08x\n",
+			__func__, rep, req, rqst, headerp->rm_xid);
+
+	BUG_ON(!req || req->rl_reply);
+
+	/* from here on, the reply is no longer an orphan */
+	req->rl_reply = rep;
+
+	/* check for expected message types */
+	/* The order of some of these tests is important. */
+	switch (headerp->rm_type) {
+	case __constant_htonl(RDMA_MSG):
+		/* never expect read chunks */
+		/* never expect reply chunks (two ways to check) */
+		/* never expect write chunks without having offered RDMA */
+		if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
+		    (headerp->rm_body.rm_chunks[1] == xdr_zero &&
+		     headerp->rm_body.rm_chunks[2] != xdr_zero) ||
+		    (headerp->rm_body.rm_chunks[1] != xdr_zero &&
+		     req->rl_nchunks == 0))
+			goto badheader;
+		if (headerp->rm_body.rm_chunks[1] != xdr_zero) {
+			/* count any expected write chunks in read reply */
+			/* start at write chunk array count */
+			iptr = &headerp->rm_body.rm_chunks[2];
+			rdmalen = rpcrdma_count_chunks(rep,
+						req->rl_nchunks, 1, &iptr);
+			/* check for validity, and no reply chunk after */
+			if (rdmalen < 0 || *iptr++ != xdr_zero)
+				goto badheader;
+			rep->rr_len -=
+			    ((unsigned char *)iptr - (unsigned char *)headerp);
+			status = rep->rr_len + rdmalen;
+			r_xprt->rx_stats.total_rdma_reply += rdmalen;
+		} else {
+			/* else ordinary inline */
+			iptr = (u32 *)((unsigned char *)headerp + 28);
+			rep->rr_len -= 28; /*sizeof *headerp;*/
+			status = rep->rr_len;
+		}
+		/* Fix up the rpc results for upper layer */
+		rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len);
+		break;
+
+	case __constant_htonl(RDMA_NOMSG):
+		/* never expect read or write chunks, always reply chunks */
+		if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
+		    headerp->rm_body.rm_chunks[1] != xdr_zero ||
+		    headerp->rm_body.rm_chunks[2] != xdr_one ||
+		    req->rl_nchunks == 0)
+			goto badheader;
+		iptr = (u32 *)((unsigned char *)headerp + 28);
+		rdmalen = rpcrdma_count_chunks(rep, req->rl_nchunks, 0, &iptr);
+		if (rdmalen < 0)
+			goto badheader;
+		r_xprt->rx_stats.total_rdma_reply += rdmalen;
+		/* Reply chunk buffer already is the reply vector - no fixup. */
+		status = rdmalen;
+		break;
+
+badheader:
+	default:
+		dprintk("%s: invalid rpcrdma reply header (type %d):"
+				" chunks[012] == %d %d %d"
+				" expected chunks <= %d\n",
+				__func__, ntohl(headerp->rm_type),
+				headerp->rm_body.rm_chunks[0],
+				headerp->rm_body.rm_chunks[1],
+				headerp->rm_body.rm_chunks[2],
+				req->rl_nchunks);
+		status = -EIO;
+		r_xprt->rx_stats.bad_reply_count++;
+		break;
+	}
+
+	/* If using mw bind, start the deregister process now. */
+	/* (Note: if mr_free(), cannot perform it here, in tasklet context) */
+	if (req->rl_nchunks) switch (r_xprt->rx_ia.ri_memreg_strategy) {
+	case RPCRDMA_MEMWINDOWS:
+		for (i = 0; req->rl_nchunks-- > 1;)
+			i += rpcrdma_deregister_external(
+				&req->rl_segments[i], r_xprt, NULL);
+		/* Optionally wait (not here) for unbinds to complete */
+		rep->rr_func = rpcrdma_unbind_func;
+		(void) rpcrdma_deregister_external(&req->rl_segments[i],
+						   r_xprt, rep);
+		break;
+	case RPCRDMA_MEMWINDOWS_ASYNC:
+		for (i = 0; req->rl_nchunks--;)
+			i += rpcrdma_deregister_external(&req->rl_segments[i],
+							 r_xprt, NULL);
+		break;
+	default:
+		break;
+	}
+
+	dprintk("RPC:       %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
+			__func__, xprt, rqst, status);
+	xprt_complete_rqst(rqst->rq_task, status);
+	spin_unlock(&xprt->transport_lock);
+}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
new file mode 100644
index 00000000000..dc55cc974c9
--- /dev/null
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -0,0 +1,800 @@
+/*
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *      Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *      Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ *      Neither the name of the Network Appliance, Inc. nor the names of
+ *      its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written
+ *      permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * transport.c
+ *
+ * This file contains the top-level implementation of an RPC RDMA
+ * transport.
+ *
+ * Naming convention: functions beginning with xprt_ are part of the
+ * transport switch. All others are RPC RDMA internal.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+
+#include "xprt_rdma.h"
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_TRANS
+#endif
+
+MODULE_LICENSE("Dual BSD/GPL");
+
+MODULE_DESCRIPTION("RPC/RDMA Transport for Linux kernel NFS");
+MODULE_AUTHOR("Network Appliance, Inc.");
+
+/*
+ * tunables
+ */
+
+static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
+static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
+static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
+static unsigned int xprt_rdma_inline_write_padding;
+#if !RPCRDMA_PERSISTENT_REGISTRATION
+static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */
+#else
+static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL;
+#endif
+
+#ifdef RPC_DEBUG
+
+static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE;
+static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE;
+static unsigned int zero;
+static unsigned int max_padding = PAGE_SIZE;
+static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS;
+static unsigned int max_memreg = RPCRDMA_LAST - 1;
+
+static struct ctl_table_header *sunrpc_table_header;
+
+static ctl_table xr_tunables_table[] = {
+	{
+		.ctl_name	= CTL_SLOTTABLE_RDMA,
+		.procname	= "rdma_slot_table_entries",
+		.data		= &xprt_rdma_slot_table_entries,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_slot_table_size,
+		.extra2		= &max_slot_table_size
+	},
+	{
+		.ctl_name	= CTL_RDMA_MAXINLINEREAD,
+		.procname	= "rdma_max_inline_read",
+		.data		= &xprt_rdma_max_inline_read,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec,
+	},
+	{
+		.ctl_name	= CTL_RDMA_MAXINLINEWRITE,
+		.procname	= "rdma_max_inline_write",
+		.data		= &xprt_rdma_max_inline_write,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec,
+	},
+	{
+		.ctl_name	= CTL_RDMA_WRITEPADDING,
+		.procname	= "rdma_inline_write_padding",
+		.data		= &xprt_rdma_inline_write_padding,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+		.extra2		= &max_padding,
+	},
+	{
+		.ctl_name	= CTL_RDMA_MEMREG,
+		.procname	= "rdma_memreg_strategy",
+		.data		= &xprt_rdma_memreg_strategy,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &min_memreg,
+		.extra2		= &max_memreg,
+	},
+	{
+		.ctl_name = 0,
+	},
+};
+
+static ctl_table sunrpc_table[] = {
+	{
+		.ctl_name	= CTL_SUNRPC,
+		.procname	= "sunrpc",
+		.mode		= 0555,
+		.child		= xr_tunables_table
+	},
+	{
+		.ctl_name = 0,
+	},
+};
+
+#endif
+
+static struct rpc_xprt_ops xprt_rdma_procs;	/* forward reference */
+
+static void
+xprt_rdma_format_addresses(struct rpc_xprt *xprt)
+{
+	struct sockaddr_in *addr = (struct sockaddr_in *)
+					&rpcx_to_rdmad(xprt).addr;
+	char *buf;
+
+	buf = kzalloc(20, GFP_KERNEL);
+	if (buf)
+		snprintf(buf, 20, NIPQUAD_FMT, NIPQUAD(addr->sin_addr.s_addr));
+	xprt->address_strings[RPC_DISPLAY_ADDR] = buf;
+
+	buf = kzalloc(8, GFP_KERNEL);
+	if (buf)
+		snprintf(buf, 8, "%u", ntohs(addr->sin_port));
+	xprt->address_strings[RPC_DISPLAY_PORT] = buf;
+
+	xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
+
+	buf = kzalloc(48, GFP_KERNEL);
+	if (buf)
+		snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s",
+			NIPQUAD(addr->sin_addr.s_addr),
+			ntohs(addr->sin_port), "rdma");
+	xprt->address_strings[RPC_DISPLAY_ALL] = buf;
+
+	buf = kzalloc(10, GFP_KERNEL);
+	if (buf)
+		snprintf(buf, 10, "%02x%02x%02x%02x",
+			NIPQUAD(addr->sin_addr.s_addr));
+	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf;
+
+	buf = kzalloc(8, GFP_KERNEL);
+	if (buf)
+		snprintf(buf, 8, "%4hx", ntohs(addr->sin_port));
+	xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf;
+
+	buf = kzalloc(30, GFP_KERNEL);
+	if (buf)
+		snprintf(buf, 30, NIPQUAD_FMT".%u.%u",
+			NIPQUAD(addr->sin_addr.s_addr),
+			ntohs(addr->sin_port) >> 8,
+			ntohs(addr->sin_port) & 0xff);
+	xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
+
+	/* netid */
+	xprt->address_strings[RPC_DISPLAY_NETID] = "rdma";
+}
+
+static void
+xprt_rdma_free_addresses(struct rpc_xprt *xprt)
+{
+	kfree(xprt->address_strings[RPC_DISPLAY_ADDR]);
+	kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
+	kfree(xprt->address_strings[RPC_DISPLAY_ALL]);
+	kfree(xprt->address_strings[RPC_DISPLAY_HEX_ADDR]);
+	kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]);
+	kfree(xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR]);
+}
+
+static void
+xprt_rdma_connect_worker(struct work_struct *work)
+{
+	struct rpcrdma_xprt *r_xprt =
+		container_of(work, struct rpcrdma_xprt, rdma_connect.work);
+	struct rpc_xprt *xprt = &r_xprt->xprt;
+	int rc = 0;
+
+	if (!xprt->shutdown) {
+		xprt_clear_connected(xprt);
+
+		dprintk("RPC:       %s: %sconnect\n", __func__,
+				r_xprt->rx_ep.rep_connected != 0 ? "re" : "");
+		rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
+		if (rc)
+			goto out;
+	}
+	goto out_clear;
+
+out:
+	xprt_wake_pending_tasks(xprt, rc);
+
+out_clear:
+	dprintk("RPC:       %s: exit\n", __func__);
+	xprt_clear_connecting(xprt);
+}
+
+/*
+ * xprt_rdma_destroy
+ *
+ * Destroy the xprt.
+ * Free all memory associated with the object, including its own.
+ * NOTE: none of the *destroy methods free memory for their top-level
+ * objects, even though they may have allocated it (they do free
+ * private memory). It's up to the caller to handle it. In this
+ * case (RDMA transport), all structure memory is inlined with the
+ * struct rpcrdma_xprt.
+ */
+static void
+xprt_rdma_destroy(struct rpc_xprt *xprt)
+{
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+	int rc;
+
+	dprintk("RPC:       %s: called\n", __func__);
+
+	cancel_delayed_work(&r_xprt->rdma_connect);
+	flush_scheduled_work();
+
+	xprt_clear_connected(xprt);
+
+	rpcrdma_buffer_destroy(&r_xprt->rx_buf);
+	rc = rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
+	if (rc)
+		dprintk("RPC:       %s: rpcrdma_ep_destroy returned %i\n",
+			__func__, rc);
+	rpcrdma_ia_close(&r_xprt->rx_ia);
+
+	xprt_rdma_free_addresses(xprt);
+
+	kfree(xprt->slot);
+	xprt->slot = NULL;
+	kfree(xprt);
+
+	dprintk("RPC:       %s: returning\n", __func__);
+
+	module_put(THIS_MODULE);
+}
+
+/**
+ * xprt_setup_rdma - Set up transport to use RDMA
+ *
+ * @args: rpc transport arguments
+ */
+static struct rpc_xprt *
+xprt_setup_rdma(struct xprt_create *args)
+{
+	struct rpcrdma_create_data_internal cdata;
+	struct rpc_xprt *xprt;
+	struct rpcrdma_xprt *new_xprt;
+	struct rpcrdma_ep *new_ep;
+	struct sockaddr_in *sin;
+	int rc;
+
+	if (args->addrlen > sizeof(xprt->addr)) {
+		dprintk("RPC:       %s: address too large\n", __func__);
+		return ERR_PTR(-EBADF);
+	}
+
+	xprt = kzalloc(sizeof(struct rpcrdma_xprt), GFP_KERNEL);
+	if (xprt == NULL) {
+		dprintk("RPC:       %s: couldn't allocate rpcrdma_xprt\n",
+			__func__);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	xprt->max_reqs = xprt_rdma_slot_table_entries;
+	xprt->slot = kcalloc(xprt->max_reqs,
+				sizeof(struct rpc_rqst), GFP_KERNEL);
+	if (xprt->slot == NULL) {
+		kfree(xprt);
+		dprintk("RPC:       %s: couldn't allocate %d slots\n",
+			__func__, xprt->max_reqs);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* 60 second timeout, no retries */
+	xprt_set_timeout(&xprt->timeout, 0, 60UL * HZ);
+	xprt->bind_timeout = (60U * HZ);
+	xprt->connect_timeout = (60U * HZ);
+	xprt->reestablish_timeout = (5U * HZ);
+	xprt->idle_timeout = (5U * 60 * HZ);
+
+	xprt->resvport = 0;		/* privileged port not needed */
+	xprt->tsh_size = 0;		/* RPC-RDMA handles framing */
+	xprt->max_payload = RPCRDMA_MAX_DATA_SEGS * PAGE_SIZE;
+	xprt->ops = &xprt_rdma_procs;
+
+	/*
+	 * Set up RDMA-specific connect data.
+	 */
+
+	/* Put server RDMA address in local cdata */
+	memcpy(&cdata.addr, args->dstaddr, args->addrlen);
+
+	/* Ensure xprt->addr holds valid server TCP (not RDMA)
+	 * address, for any side protocols which peek at it */
+	xprt->prot = IPPROTO_TCP;
+	xprt->addrlen = args->addrlen;
+	memcpy(&xprt->addr, &cdata.addr, xprt->addrlen);
+
+	sin = (struct sockaddr_in *)&cdata.addr;
+	if (ntohs(sin->sin_port) != 0)
+		xprt_set_bound(xprt);
+
+	dprintk("RPC:       %s: %u.%u.%u.%u:%u\n", __func__,
+			NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port));
+
+	/* Set max requests */
+	cdata.max_requests = xprt->max_reqs;
+
+	/* Set some length limits */
+	cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */
+	cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */
+
+	cdata.inline_wsize = xprt_rdma_max_inline_write;
+	if (cdata.inline_wsize > cdata.wsize)
+		cdata.inline_wsize = cdata.wsize;
+
+	cdata.inline_rsize = xprt_rdma_max_inline_read;
+	if (cdata.inline_rsize > cdata.rsize)
+		cdata.inline_rsize = cdata.rsize;
+
+	cdata.padding = xprt_rdma_inline_write_padding;
+
+	/*
+	 * Create new transport instance, which includes initialized
+	 *  o ia
+	 *  o endpoint
+	 *  o buffers
+	 */
+
+	new_xprt = rpcx_to_rdmax(xprt);
+
+	rc = rpcrdma_ia_open(new_xprt, (struct sockaddr *) &cdata.addr,
+				xprt_rdma_memreg_strategy);
+	if (rc)
+		goto out1;
+
+	/*
+	 * initialize and create ep
+	 */
+	new_xprt->rx_data = cdata;
+	new_ep = &new_xprt->rx_ep;
+	new_ep->rep_remote_addr = cdata.addr;
+
+	rc = rpcrdma_ep_create(&new_xprt->rx_ep,
+				&new_xprt->rx_ia, &new_xprt->rx_data);
+	if (rc)
+		goto out2;
+
+	/*
+	 * Allocate pre-registered send and receive buffers for headers and
+	 * any inline data. Also specify any padding which will be provided
+	 * from a preregistered zero buffer.
+	 */
+	rc = rpcrdma_buffer_create(&new_xprt->rx_buf, new_ep, &new_xprt->rx_ia,
+				&new_xprt->rx_data);
+	if (rc)
+		goto out3;
+
+	/*
+	 * Register a callback for connection events. This is necessary because
+	 * connection loss notification is async. We also catch connection loss
+	 * when reaping receives.
+	 */
+	INIT_DELAYED_WORK(&new_xprt->rdma_connect, xprt_rdma_connect_worker);
+	new_ep->rep_func = rpcrdma_conn_func;
+	new_ep->rep_xprt = xprt;
+
+	xprt_rdma_format_addresses(xprt);
+
+	if (!try_module_get(THIS_MODULE))
+		goto out4;
+
+	return xprt;
+
+out4:
+	xprt_rdma_free_addresses(xprt);
+	rc = -EINVAL;
+out3:
+	(void) rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
+out2:
+	rpcrdma_ia_close(&new_xprt->rx_ia);
+out1:
+	kfree(xprt->slot);
+	kfree(xprt);
+	return ERR_PTR(rc);
+}
+
+/*
+ * Close a connection, during shutdown or timeout/reconnect
+ */
+static void
+xprt_rdma_close(struct rpc_xprt *xprt)
+{
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+
+	dprintk("RPC:       %s: closing\n", __func__);
+	xprt_disconnect(xprt);
+	(void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
+}
+
+static void
+xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
+{
+	struct sockaddr_in *sap;
+
+	sap = (struct sockaddr_in *)&xprt->addr;
+	sap->sin_port = htons(port);
+	sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr;
+	sap->sin_port = htons(port);
+	dprintk("RPC:       %s: %u\n", __func__, port);
+}
+
+static void
+xprt_rdma_connect(struct rpc_task *task)
+{
+	struct rpc_xprt *xprt = (struct rpc_xprt *)task->tk_xprt;
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+
+	if (!xprt_test_and_set_connecting(xprt)) {
+		if (r_xprt->rx_ep.rep_connected != 0) {
+			/* Reconnect */
+			schedule_delayed_work(&r_xprt->rdma_connect,
+				xprt->reestablish_timeout);
+		} else {
+			schedule_delayed_work(&r_xprt->rdma_connect, 0);
+			if (!RPC_IS_ASYNC(task))
+				flush_scheduled_work();
+		}
+	}
+}
+
+static int
+xprt_rdma_reserve_xprt(struct rpc_task *task)
+{
+	struct rpc_xprt *xprt = task->tk_xprt;
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+	int credits = atomic_read(&r_xprt->rx_buf.rb_credits);
+
+	/* == RPC_CWNDSCALE @ init, but *after* setup */
+	if (r_xprt->rx_buf.rb_cwndscale == 0UL) {
+		r_xprt->rx_buf.rb_cwndscale = xprt->cwnd;
+		dprintk("RPC:       %s: cwndscale %lu\n", __func__,
+			r_xprt->rx_buf.rb_cwndscale);
+		BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0);
+	}
+	xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale;
+	return xprt_reserve_xprt_cong(task);
+}
+
+/*
+ * The RDMA allocate/free functions need the task structure as a place
+ * to hide the struct rpcrdma_req, which is necessary for the actual send/recv
+ * sequence. For this reason, the recv buffers are attached to send
+ * buffers for portions of the RPC. Note that the RPC layer allocates
+ * both send and receive buffers in the same call. We may register
+ * the receive buffer portion when using reply chunks.
+ */
+static void *
+xprt_rdma_allocate(struct rpc_task *task, size_t size)
+{
+	struct rpc_xprt *xprt = task->tk_xprt;
+	struct rpcrdma_req *req, *nreq;
+
+	req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf);
+	BUG_ON(NULL == req);
+
+	if (size > req->rl_size) {
+		dprintk("RPC:       %s: size %zd too large for buffer[%zd]: "
+			"prog %d vers %d proc %d\n",
+			__func__, size, req->rl_size,
+			task->tk_client->cl_prog, task->tk_client->cl_vers,
+			task->tk_msg.rpc_proc->p_proc);
+		/*
+		 * Outgoing length shortage. Our inline write max must have
+		 * been configured to perform direct i/o.
+		 *
+		 * This is therefore a large metadata operation, and the
+		 * allocate call was made on the maximum possible message,
+		 * e.g. containing long filename(s) or symlink data. In
+		 * fact, while these metadata operations *might* carry
+		 * large outgoing payloads, they rarely *do*. However, we
+		 * have to commit to the request here, so reallocate and
+		 * register it now. The data path will never require this
+		 * reallocation.
+		 *
+		 * If the allocation or registration fails, the RPC framework
+		 * will (doggedly) retry.
+		 */
+		if (rpcx_to_rdmax(xprt)->rx_ia.ri_memreg_strategy ==
+				RPCRDMA_BOUNCEBUFFERS) {
+			/* forced to "pure inline" */
+			dprintk("RPC:       %s: too much data (%zd) for inline "
+					"(r/w max %d/%d)\n", __func__, size,
+					rpcx_to_rdmad(xprt).inline_rsize,
+					rpcx_to_rdmad(xprt).inline_wsize);
+			size = req->rl_size;
+			rpc_exit(task, -EIO);		/* fail the operation */
+			rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++;
+			goto out;
+		}
+		if (task->tk_flags & RPC_TASK_SWAPPER)
+			nreq = kmalloc(sizeof *req + size, GFP_ATOMIC);
+		else
+			nreq = kmalloc(sizeof *req + size, GFP_NOFS);
+		if (nreq == NULL)
+			goto outfail;
+
+		if (rpcrdma_register_internal(&rpcx_to_rdmax(xprt)->rx_ia,
+				nreq->rl_base, size + sizeof(struct rpcrdma_req)
+				- offsetof(struct rpcrdma_req, rl_base),
+				&nreq->rl_handle, &nreq->rl_iov)) {
+			kfree(nreq);
+			goto outfail;
+		}
+		rpcx_to_rdmax(xprt)->rx_stats.hardway_register_count += size;
+		nreq->rl_size = size;
+		nreq->rl_niovs = 0;
+		nreq->rl_nchunks = 0;
+		nreq->rl_buffer = (struct rpcrdma_buffer *)req;
+		nreq->rl_reply = req->rl_reply;
+		memcpy(nreq->rl_segments,
+			req->rl_segments, sizeof nreq->rl_segments);
+		/* flag the swap with an unused field */
+		nreq->rl_iov.length = 0;
+		req->rl_reply = NULL;
+		req = nreq;
+	}
+	dprintk("RPC:       %s: size %zd, request 0x%p\n", __func__, size, req);
+out:
+	return req->rl_xdr_buf;
+
+outfail:
+	rpcrdma_buffer_put(req);
+	rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++;
+	return NULL;
+}
+
+/*
+ * This function returns all RDMA resources to the pool.
+ */
+static void
+xprt_rdma_free(void *buffer)
+{
+	struct rpcrdma_req *req;
+	struct rpcrdma_xprt *r_xprt;
+	struct rpcrdma_rep *rep;
+	int i;
+
+	if (buffer == NULL)
+		return;
+
+	req = container_of(buffer, struct rpcrdma_req, rl_xdr_buf[0]);
+	r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf);
+	rep = req->rl_reply;
+
+	dprintk("RPC:       %s: called on 0x%p%s\n",
+		__func__, rep, (rep && rep->rr_func) ? " (with waiter)" : "");
+
+	/*
+	 * Finish the deregistration. When using mw bind, this was
+	 * begun in rpcrdma_reply_handler(). In all other modes, we
+	 * do it here, in thread context. The process is considered
+	 * complete when the rr_func vector becomes NULL - this
+	 * was put in place during rpcrdma_reply_handler() - the wait
+	 * call below will not block if the dereg is "done". If
+	 * interrupted, our framework will clean up.
+	 */
+	for (i = 0; req->rl_nchunks;) {
+		--req->rl_nchunks;
+		i += rpcrdma_deregister_external(
+			&req->rl_segments[i], r_xprt, NULL);
+	}
+
+	if (rep && wait_event_interruptible(rep->rr_unbind, !rep->rr_func)) {
+		rep->rr_func = NULL;	/* abandon the callback */
+		req->rl_reply = NULL;
+	}
+
+	if (req->rl_iov.length == 0) {	/* see allocate above */
+		struct rpcrdma_req *oreq = (struct rpcrdma_req *)req->rl_buffer;
+		oreq->rl_reply = req->rl_reply;
+		(void) rpcrdma_deregister_internal(&r_xprt->rx_ia,
+						   req->rl_handle,
+						   &req->rl_iov);
+		kfree(req);
+		req = oreq;
+	}
+
+	/* Put back request+reply buffers */
+	rpcrdma_buffer_put(req);
+}
+
+/*
+ * send_request invokes the meat of RPC RDMA. It must do the following:
+ *  1.  Marshal the RPC request into an RPC RDMA request, which means
+ *	putting a header in front of data, and creating IOVs for RDMA
+ *	from those in the request.
+ *  2.  In marshaling, detect opportunities for RDMA, and use them.
+ *  3.  Post a recv message to set up asynch completion, then send
+ *	the request (rpcrdma_ep_post).
+ *  4.  No partial sends are possible in the RPC-RDMA protocol (as in UDP).
+ */
+
+static int
+xprt_rdma_send_request(struct rpc_task *task)
+{
+	struct rpc_rqst *rqst = task->tk_rqstp;
+	struct rpc_xprt *xprt = task->tk_xprt;
+	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+
+	/* marshal the send itself */
+	if (req->rl_niovs == 0 && rpcrdma_marshal_req(rqst) != 0) {
+		r_xprt->rx_stats.failed_marshal_count++;
+		dprintk("RPC:       %s: rpcrdma_marshal_req failed\n",
+			__func__);
+		return -EIO;
+	}
+
+	if (req->rl_reply == NULL) 		/* e.g. reconnection */
+		rpcrdma_recv_buffer_get(req);
+
+	if (req->rl_reply) {
+		req->rl_reply->rr_func = rpcrdma_reply_handler;
+		/* this need only be done once, but... */
+		req->rl_reply->rr_xprt = xprt;
+	}
+
+	if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) {
+		xprt_disconnect(xprt);
+		return -ENOTCONN;	/* implies disconnect */
+	}
+
+	rqst->rq_bytes_sent = 0;
+	return 0;
+}
+
+static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
+{
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+	long idle_time = 0;
+
+	if (xprt_connected(xprt))
+		idle_time = (long)(jiffies - xprt->last_used) / HZ;
+
+	seq_printf(seq,
+	  "\txprt:\trdma %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu "
+	  "%lu %lu %lu %Lu %Lu %Lu %Lu %lu %lu %lu\n",
+
+	   0,	/* need a local port? */
+	   xprt->stat.bind_count,
+	   xprt->stat.connect_count,
+	   xprt->stat.connect_time,
+	   idle_time,
+	   xprt->stat.sends,
+	   xprt->stat.recvs,
+	   xprt->stat.bad_xids,
+	   xprt->stat.req_u,
+	   xprt->stat.bklog_u,
+
+	   r_xprt->rx_stats.read_chunk_count,
+	   r_xprt->rx_stats.write_chunk_count,
+	   r_xprt->rx_stats.reply_chunk_count,
+	   r_xprt->rx_stats.total_rdma_request,
+	   r_xprt->rx_stats.total_rdma_reply,
+	   r_xprt->rx_stats.pullup_copy_count,
+	   r_xprt->rx_stats.fixup_copy_count,
+	   r_xprt->rx_stats.hardway_register_count,
+	   r_xprt->rx_stats.failed_marshal_count,
+	   r_xprt->rx_stats.bad_reply_count);
+}
+
+/*
+ * Plumbing for rpc transport switch and kernel module
+ */
+
+static struct rpc_xprt_ops xprt_rdma_procs = {
+	.reserve_xprt		= xprt_rdma_reserve_xprt,
+	.release_xprt		= xprt_release_xprt_cong, /* sunrpc/xprt.c */
+	.release_request	= xprt_release_rqst_cong,       /* ditto */
+	.set_retrans_timeout	= xprt_set_retrans_timeout_def, /* ditto */
+	.rpcbind		= rpcb_getport_async,	/* sunrpc/rpcb_clnt.c */
+	.set_port		= xprt_rdma_set_port,
+	.connect		= xprt_rdma_connect,
+	.buf_alloc		= xprt_rdma_allocate,
+	.buf_free		= xprt_rdma_free,
+	.send_request		= xprt_rdma_send_request,
+	.close			= xprt_rdma_close,
+	.destroy		= xprt_rdma_destroy,
+	.print_stats		= xprt_rdma_print_stats
+};
+
+static struct xprt_class xprt_rdma = {
+	.list			= LIST_HEAD_INIT(xprt_rdma.list),
+	.name			= "rdma",
+	.owner			= THIS_MODULE,
+	.ident			= XPRT_TRANSPORT_RDMA,
+	.setup			= xprt_setup_rdma,
+};
+
+static void __exit xprt_rdma_cleanup(void)
+{
+	int rc;
+
+	dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n");
+#ifdef RPC_DEBUG
+	if (sunrpc_table_header) {
+		unregister_sysctl_table(sunrpc_table_header);
+		sunrpc_table_header = NULL;
+	}
+#endif
+	rc = xprt_unregister_transport(&xprt_rdma);
+	if (rc)
+		dprintk("RPC:       %s: xprt_unregister returned %i\n",
+			__func__, rc);
+}
+
+static int __init xprt_rdma_init(void)
+{
+	int rc;
+
+	rc = xprt_register_transport(&xprt_rdma);
+
+	if (rc)
+		return rc;
+
+	dprintk(KERN_INFO "RPCRDMA Module Init, register RPC RDMA transport\n");
+
+	dprintk(KERN_INFO "Defaults:\n");
+	dprintk(KERN_INFO "\tSlots %d\n"
+		"\tMaxInlineRead %d\n\tMaxInlineWrite %d\n",
+		xprt_rdma_slot_table_entries,
+		xprt_rdma_max_inline_read, xprt_rdma_max_inline_write);
+	dprintk(KERN_INFO "\tPadding %d\n\tMemreg %d\n",
+		xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy);
+
+#ifdef RPC_DEBUG
+	if (!sunrpc_table_header)
+		sunrpc_table_header = register_sysctl_table(sunrpc_table);
+#endif
+	return 0;
+}
+
+module_init(xprt_rdma_init);
+module_exit(xprt_rdma_cleanup);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
new file mode 100644
index 00000000000..44b0fb942e8
--- /dev/null
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -0,0 +1,1627 @@
+/*
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *      Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *      Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ *      Neither the name of the Network Appliance, Inc. nor the names of
+ *      its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written
+ *      permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * verbs.c
+ *
+ * Encapsulates the major functions managing:
+ *  o adapters
+ *  o endpoints
+ *  o connections
+ *  o buffer memory
+ */
+
+#include <linux/pci.h>	/* for Tavor hack below */
+
+#include "xprt_rdma.h"
+
+/*
+ * Globals/Macros
+ */
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY	RPCDBG_TRANS
+#endif
+
+/*
+ * internal functions
+ */
+
+/*
+ * handle replies in tasklet context, using a single, global list
+ * rdma tasklet function -- just turn around and call the func
+ * for all replies on the list
+ */
+
+static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
+static LIST_HEAD(rpcrdma_tasklets_g);
+
+static void
+rpcrdma_run_tasklet(unsigned long data)
+{
+	struct rpcrdma_rep *rep;
+	void (*func)(struct rpcrdma_rep *);
+	unsigned long flags;
+
+	data = data;
+	spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
+	while (!list_empty(&rpcrdma_tasklets_g)) {
+		rep = list_entry(rpcrdma_tasklets_g.next,
+				 struct rpcrdma_rep, rr_list);
+		list_del(&rep->rr_list);
+		func = rep->rr_func;
+		rep->rr_func = NULL;
+		spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
+
+		if (func)
+			func(rep);
+		else
+			rpcrdma_recv_buffer_put(rep);
+
+		spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
+	}
+	spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
+}
+
+static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
+
+static inline void
+rpcrdma_schedule_tasklet(struct rpcrdma_rep *rep)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
+	list_add_tail(&rep->rr_list, &rpcrdma_tasklets_g);
+	spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
+	tasklet_schedule(&rpcrdma_tasklet_g);
+}
+
+static void
+rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
+{
+	struct rpcrdma_ep *ep = context;
+
+	dprintk("RPC:       %s: QP error %X on device %s ep %p\n",
+		__func__, event->event, event->device->name, context);
+	if (ep->rep_connected == 1) {
+		ep->rep_connected = -EIO;
+		ep->rep_func(ep);
+		wake_up_all(&ep->rep_connect_wait);
+	}
+}
+
+static void
+rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
+{
+	struct rpcrdma_ep *ep = context;
+
+	dprintk("RPC:       %s: CQ error %X on device %s ep %p\n",
+		__func__, event->event, event->device->name, context);
+	if (ep->rep_connected == 1) {
+		ep->rep_connected = -EIO;
+		ep->rep_func(ep);
+		wake_up_all(&ep->rep_connect_wait);
+	}
+}
+
+static inline
+void rpcrdma_event_process(struct ib_wc *wc)
+{
+	struct rpcrdma_rep *rep =
+			(struct rpcrdma_rep *)(unsigned long) wc->wr_id;
+
+	dprintk("RPC:       %s: event rep %p status %X opcode %X length %u\n",
+		__func__, rep, wc->status, wc->opcode, wc->byte_len);
+
+	if (!rep) /* send or bind completion that we don't care about */
+		return;
+
+	if (IB_WC_SUCCESS != wc->status) {
+		dprintk("RPC:       %s: %s WC status %X, connection lost\n",
+			__func__, (wc->opcode & IB_WC_RECV) ? "recv" : "send",
+			 wc->status);
+		rep->rr_len = ~0U;
+		rpcrdma_schedule_tasklet(rep);
+		return;
+	}
+
+	switch (wc->opcode) {
+	case IB_WC_RECV:
+		rep->rr_len = wc->byte_len;
+		ib_dma_sync_single_for_cpu(
+			rdmab_to_ia(rep->rr_buffer)->ri_id->device,
+			rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
+		/* Keep (only) the most recent credits, after check validity */
+		if (rep->rr_len >= 16) {
+			struct rpcrdma_msg *p =
+					(struct rpcrdma_msg *) rep->rr_base;
+			unsigned int credits = ntohl(p->rm_credit);
+			if (credits == 0) {
+				dprintk("RPC:       %s: server"
+					" dropped credits to 0!\n", __func__);
+				/* don't deadlock */
+				credits = 1;
+			} else if (credits > rep->rr_buffer->rb_max_requests) {
+				dprintk("RPC:       %s: server"
+					" over-crediting: %d (%d)\n",
+					__func__, credits,
+					rep->rr_buffer->rb_max_requests);
+				credits = rep->rr_buffer->rb_max_requests;
+			}
+			atomic_set(&rep->rr_buffer->rb_credits, credits);
+		}
+		/* fall through */
+	case IB_WC_BIND_MW:
+		rpcrdma_schedule_tasklet(rep);
+		break;
+	default:
+		dprintk("RPC:       %s: unexpected WC event %X\n",
+			__func__, wc->opcode);
+		break;
+	}
+}
+
+static inline int
+rpcrdma_cq_poll(struct ib_cq *cq)
+{
+	struct ib_wc wc;
+	int rc;
+
+	for (;;) {
+		rc = ib_poll_cq(cq, 1, &wc);
+		if (rc < 0) {
+			dprintk("RPC:       %s: ib_poll_cq failed %i\n",
+				__func__, rc);
+			return rc;
+		}
+		if (rc == 0)
+			break;
+
+		rpcrdma_event_process(&wc);
+	}
+
+	return 0;
+}
+
+/*
+ * rpcrdma_cq_event_upcall
+ *
+ * This upcall handles recv, send, bind and unbind events.
+ * It is reentrant but processes single events in order to maintain
+ * ordering of receives to keep server credits.
+ *
+ * It is the responsibility of the scheduled tasklet to return
+ * recv buffers to the pool. NOTE: this affects synchronization of
+ * connection shutdown. That is, the structures required for
+ * the completion of the reply handler must remain intact until
+ * all memory has been reclaimed.
+ *
+ * Note that send events are suppressed and do not result in an upcall.
+ */
+static void
+rpcrdma_cq_event_upcall(struct ib_cq *cq, void *context)
+{
+	int rc;
+
+	rc = rpcrdma_cq_poll(cq);
+	if (rc)
+		return;
+
+	rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+	if (rc) {
+		dprintk("RPC:       %s: ib_req_notify_cq failed %i\n",
+			__func__, rc);
+		return;
+	}
+
+	rpcrdma_cq_poll(cq);
+}
+
+#ifdef RPC_DEBUG
+static const char * const conn[] = {
+	"address resolved",
+	"address error",
+	"route resolved",
+	"route error",
+	"connect request",
+	"connect response",
+	"connect error",
+	"unreachable",
+	"rejected",
+	"established",
+	"disconnected",
+	"device removal"
+};
+#endif
+
+static int
+rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
+{
+	struct rpcrdma_xprt *xprt = id->context;
+	struct rpcrdma_ia *ia = &xprt->rx_ia;
+	struct rpcrdma_ep *ep = &xprt->rx_ep;
+	struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
+	struct ib_qp_attr attr;
+	struct ib_qp_init_attr iattr;
+	int connstate = 0;
+
+	switch (event->event) {
+	case RDMA_CM_EVENT_ADDR_RESOLVED:
+	case RDMA_CM_EVENT_ROUTE_RESOLVED:
+		complete(&ia->ri_done);
+		break;
+	case RDMA_CM_EVENT_ADDR_ERROR:
+		ia->ri_async_rc = -EHOSTUNREACH;
+		dprintk("RPC:       %s: CM address resolution error, ep 0x%p\n",
+			__func__, ep);
+		complete(&ia->ri_done);
+		break;
+	case RDMA_CM_EVENT_ROUTE_ERROR:
+		ia->ri_async_rc = -ENETUNREACH;
+		dprintk("RPC:       %s: CM route resolution error, ep 0x%p\n",
+			__func__, ep);
+		complete(&ia->ri_done);
+		break;
+	case RDMA_CM_EVENT_ESTABLISHED:
+		connstate = 1;
+		ib_query_qp(ia->ri_id->qp, &attr,
+			IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
+			&iattr);
+		dprintk("RPC:       %s: %d responder resources"
+			" (%d initiator)\n",
+			__func__, attr.max_dest_rd_atomic, attr.max_rd_atomic);
+		goto connected;
+	case RDMA_CM_EVENT_CONNECT_ERROR:
+		connstate = -ENOTCONN;
+		goto connected;
+	case RDMA_CM_EVENT_UNREACHABLE:
+		connstate = -ENETDOWN;
+		goto connected;
+	case RDMA_CM_EVENT_REJECTED:
+		connstate = -ECONNREFUSED;
+		goto connected;
+	case RDMA_CM_EVENT_DISCONNECTED:
+		connstate = -ECONNABORTED;
+		goto connected;
+	case RDMA_CM_EVENT_DEVICE_REMOVAL:
+		connstate = -ENODEV;
+connected:
+		dprintk("RPC:       %s: %s: %u.%u.%u.%u:%u"
+			" (ep 0x%p event 0x%x)\n",
+			__func__,
+			(event->event <= 11) ? conn[event->event] :
+						"unknown connection error",
+			NIPQUAD(addr->sin_addr.s_addr),
+			ntohs(addr->sin_port),
+			ep, event->event);
+		atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1);
+		dprintk("RPC:       %s: %sconnected\n",
+					__func__, connstate > 0 ? "" : "dis");
+		ep->rep_connected = connstate;
+		ep->rep_func(ep);
+		wake_up_all(&ep->rep_connect_wait);
+		break;
+	default:
+		ia->ri_async_rc = -EINVAL;
+		dprintk("RPC:       %s: unexpected CM event %X\n",
+			__func__, event->event);
+		complete(&ia->ri_done);
+		break;
+	}
+
+	return 0;
+}
+
+static struct rdma_cm_id *
+rpcrdma_create_id(struct rpcrdma_xprt *xprt,
+			struct rpcrdma_ia *ia, struct sockaddr *addr)
+{
+	struct rdma_cm_id *id;
+	int rc;
+
+	id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP);
+	if (IS_ERR(id)) {
+		rc = PTR_ERR(id);
+		dprintk("RPC:       %s: rdma_create_id() failed %i\n",
+			__func__, rc);
+		return id;
+	}
+
+	ia->ri_async_rc = 0;
+	rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
+	if (rc) {
+		dprintk("RPC:       %s: rdma_resolve_addr() failed %i\n",
+			__func__, rc);
+		goto out;
+	}
+	wait_for_completion(&ia->ri_done);
+	rc = ia->ri_async_rc;
+	if (rc)
+		goto out;
+
+	ia->ri_async_rc = 0;
+	rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
+	if (rc) {
+		dprintk("RPC:       %s: rdma_resolve_route() failed %i\n",
+			__func__, rc);
+		goto out;
+	}
+	wait_for_completion(&ia->ri_done);
+	rc = ia->ri_async_rc;
+	if (rc)
+		goto out;
+
+	return id;
+
+out:
+	rdma_destroy_id(id);
+	return ERR_PTR(rc);
+}
+
+/*
+ * Drain any cq, prior to teardown.
+ */
+static void
+rpcrdma_clean_cq(struct ib_cq *cq)
+{
+	struct ib_wc wc;
+	int count = 0;
+
+	while (1 == ib_poll_cq(cq, 1, &wc))
+		++count;
+
+	if (count)
+		dprintk("RPC:       %s: flushed %d events (last 0x%x)\n",
+			__func__, count, wc.opcode);
+}
+
+/*
+ * Exported functions.
+ */
+
+/*
+ * Open and initialize an Interface Adapter.
+ *  o initializes fields of struct rpcrdma_ia, including
+ *    interface and provider attributes and protection zone.
+ */
+int
+rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
+{
+	int rc;
+	struct rpcrdma_ia *ia = &xprt->rx_ia;
+
+	init_completion(&ia->ri_done);
+
+	ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
+	if (IS_ERR(ia->ri_id)) {
+		rc = PTR_ERR(ia->ri_id);
+		goto out1;
+	}
+
+	ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
+	if (IS_ERR(ia->ri_pd)) {
+		rc = PTR_ERR(ia->ri_pd);
+		dprintk("RPC:       %s: ib_alloc_pd() failed %i\n",
+			__func__, rc);
+		goto out2;
+	}
+
+	/*
+	 * Optionally obtain an underlying physical identity mapping in
+	 * order to do a memory window-based bind. This base registration
+	 * is protected from remote access - that is enabled only by binding
+	 * for the specific bytes targeted during each RPC operation, and
+	 * revoked after the corresponding completion similar to a storage
+	 * adapter.
+	 */
+	if (memreg > RPCRDMA_REGISTER) {
+		int mem_priv = IB_ACCESS_LOCAL_WRITE;
+		switch (memreg) {
+#if RPCRDMA_PERSISTENT_REGISTRATION
+		case RPCRDMA_ALLPHYSICAL:
+			mem_priv |= IB_ACCESS_REMOTE_WRITE;
+			mem_priv |= IB_ACCESS_REMOTE_READ;
+			break;
+#endif
+		case RPCRDMA_MEMWINDOWS_ASYNC:
+		case RPCRDMA_MEMWINDOWS:
+			mem_priv |= IB_ACCESS_MW_BIND;
+			break;
+		default:
+			break;
+		}
+		ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
+		if (IS_ERR(ia->ri_bind_mem)) {
+			printk(KERN_ALERT "%s: ib_get_dma_mr for "
+				"phys register failed with %lX\n\t"
+				"Will continue with degraded performance\n",
+				__func__, PTR_ERR(ia->ri_bind_mem));
+			memreg = RPCRDMA_REGISTER;
+			ia->ri_bind_mem = NULL;
+		}
+	}
+
+	/* Else will do memory reg/dereg for each chunk */
+	ia->ri_memreg_strategy = memreg;
+
+	return 0;
+out2:
+	rdma_destroy_id(ia->ri_id);
+out1:
+	return rc;
+}
+
+/*
+ * Clean up/close an IA.
+ *   o if event handles and PD have been initialized, free them.
+ *   o close the IA
+ */
+void
+rpcrdma_ia_close(struct rpcrdma_ia *ia)
+{
+	int rc;
+
+	dprintk("RPC:       %s: entering\n", __func__);
+	if (ia->ri_bind_mem != NULL) {
+		rc = ib_dereg_mr(ia->ri_bind_mem);
+		dprintk("RPC:       %s: ib_dereg_mr returned %i\n",
+			__func__, rc);
+	}
+	if (ia->ri_id != NULL && !IS_ERR(ia->ri_id) && ia->ri_id->qp)
+		rdma_destroy_qp(ia->ri_id);
+	if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
+		rc = ib_dealloc_pd(ia->ri_pd);
+		dprintk("RPC:       %s: ib_dealloc_pd returned %i\n",
+			__func__, rc);
+	}
+	if (ia->ri_id != NULL && !IS_ERR(ia->ri_id))
+		rdma_destroy_id(ia->ri_id);
+}
+
+/*
+ * Create unconnected endpoint.
+ */
+int
+rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
+				struct rpcrdma_create_data_internal *cdata)
+{
+	struct ib_device_attr devattr;
+	int rc;
+
+	rc = ib_query_device(ia->ri_id->device, &devattr);
+	if (rc) {
+		dprintk("RPC:       %s: ib_query_device failed %d\n",
+			__func__, rc);
+		return rc;
+	}
+
+	/* check provider's send/recv wr limits */
+	if (cdata->max_requests > devattr.max_qp_wr)
+		cdata->max_requests = devattr.max_qp_wr;
+
+	ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
+	ep->rep_attr.qp_context = ep;
+	/* send_cq and recv_cq initialized below */
+	ep->rep_attr.srq = NULL;
+	ep->rep_attr.cap.max_send_wr = cdata->max_requests;
+	switch (ia->ri_memreg_strategy) {
+	case RPCRDMA_MEMWINDOWS_ASYNC:
+	case RPCRDMA_MEMWINDOWS:
+		/* Add room for mw_binds+unbinds - overkill! */
+		ep->rep_attr.cap.max_send_wr++;
+		ep->rep_attr.cap.max_send_wr *= (2 * RPCRDMA_MAX_SEGS);
+		if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr)
+			return -EINVAL;
+		break;
+	default:
+		break;
+	}
+	ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
+	ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
+	ep->rep_attr.cap.max_recv_sge = 1;
+	ep->rep_attr.cap.max_inline_data = 0;
+	ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
+	ep->rep_attr.qp_type = IB_QPT_RC;
+	ep->rep_attr.port_num = ~0;
+
+	dprintk("RPC:       %s: requested max: dtos: send %d recv %d; "
+		"iovs: send %d recv %d\n",
+		__func__,
+		ep->rep_attr.cap.max_send_wr,
+		ep->rep_attr.cap.max_recv_wr,
+		ep->rep_attr.cap.max_send_sge,
+		ep->rep_attr.cap.max_recv_sge);
+
+	/* set trigger for requesting send completion */
+	ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 /*  - 1*/;
+	switch (ia->ri_memreg_strategy) {
+	case RPCRDMA_MEMWINDOWS_ASYNC:
+	case RPCRDMA_MEMWINDOWS:
+		ep->rep_cqinit -= RPCRDMA_MAX_SEGS;
+		break;
+	default:
+		break;
+	}
+	if (ep->rep_cqinit <= 2)
+		ep->rep_cqinit = 0;
+	INIT_CQCOUNT(ep);
+	ep->rep_ia = ia;
+	init_waitqueue_head(&ep->rep_connect_wait);
+
+	/*
+	 * Create a single cq for receive dto and mw_bind (only ever
+	 * care about unbind, really). Send completions are suppressed.
+	 * Use single threaded tasklet upcalls to maintain ordering.
+	 */
+	ep->rep_cq = ib_create_cq(ia->ri_id->device, rpcrdma_cq_event_upcall,
+				  rpcrdma_cq_async_error_upcall, NULL,
+				  ep->rep_attr.cap.max_recv_wr +
+				  ep->rep_attr.cap.max_send_wr + 1, 0);
+	if (IS_ERR(ep->rep_cq)) {
+		rc = PTR_ERR(ep->rep_cq);
+		dprintk("RPC:       %s: ib_create_cq failed: %i\n",
+			__func__, rc);
+		goto out1;
+	}
+
+	rc = ib_req_notify_cq(ep->rep_cq, IB_CQ_NEXT_COMP);
+	if (rc) {
+		dprintk("RPC:       %s: ib_req_notify_cq failed: %i\n",
+			__func__, rc);
+		goto out2;
+	}
+
+	ep->rep_attr.send_cq = ep->rep_cq;
+	ep->rep_attr.recv_cq = ep->rep_cq;
+
+	/* Initialize cma parameters */
+
+	/* RPC/RDMA does not use private data */
+	ep->rep_remote_cma.private_data = NULL;
+	ep->rep_remote_cma.private_data_len = 0;
+
+	/* Client offers RDMA Read but does not initiate */
+	switch (ia->ri_memreg_strategy) {
+	case RPCRDMA_BOUNCEBUFFERS:
+		ep->rep_remote_cma.responder_resources = 0;
+		break;
+	case RPCRDMA_MTHCAFMR:
+	case RPCRDMA_REGISTER:
+		ep->rep_remote_cma.responder_resources = cdata->max_requests *
+				(RPCRDMA_MAX_DATA_SEGS / 8);
+		break;
+	case RPCRDMA_MEMWINDOWS:
+	case RPCRDMA_MEMWINDOWS_ASYNC:
+#if RPCRDMA_PERSISTENT_REGISTRATION
+	case RPCRDMA_ALLPHYSICAL:
+#endif
+		ep->rep_remote_cma.responder_resources = cdata->max_requests *
+				(RPCRDMA_MAX_DATA_SEGS / 2);
+		break;
+	default:
+		break;
+	}
+	if (ep->rep_remote_cma.responder_resources > devattr.max_qp_rd_atom)
+		ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
+	ep->rep_remote_cma.initiator_depth = 0;
+
+	ep->rep_remote_cma.retry_count = 7;
+	ep->rep_remote_cma.flow_control = 0;
+	ep->rep_remote_cma.rnr_retry_count = 0;
+
+	return 0;
+
+out2:
+	if (ib_destroy_cq(ep->rep_cq))
+		;
+out1:
+	return rc;
+}
+
+/*
+ * rpcrdma_ep_destroy
+ *
+ * Disconnect and destroy endpoint. After this, the only
+ * valid operations on the ep are to free it (if dynamically
+ * allocated) or re-create it.
+ *
+ * The caller's error handling must be sure to not leak the endpoint
+ * if this function fails.
+ */
+int
+rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+{
+	int rc;
+
+	dprintk("RPC:       %s: entering, connected is %d\n",
+		__func__, ep->rep_connected);
+
+	if (ia->ri_id->qp) {
+		rc = rpcrdma_ep_disconnect(ep, ia);
+		if (rc)
+			dprintk("RPC:       %s: rpcrdma_ep_disconnect"
+				" returned %i\n", __func__, rc);
+	}
+
+	ep->rep_func = NULL;
+
+	/* padding - could be done in rpcrdma_buffer_destroy... */
+	if (ep->rep_pad_mr) {
+		rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
+		ep->rep_pad_mr = NULL;
+	}
+
+	if (ia->ri_id->qp) {
+		rdma_destroy_qp(ia->ri_id);
+		ia->ri_id->qp = NULL;
+	}
+
+	rpcrdma_clean_cq(ep->rep_cq);
+	rc = ib_destroy_cq(ep->rep_cq);
+	if (rc)
+		dprintk("RPC:       %s: ib_destroy_cq returned %i\n",
+			__func__, rc);
+
+	return rc;
+}
+
+/*
+ * Connect unconnected endpoint.
+ */
+int
+rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+{
+	struct rdma_cm_id *id;
+	int rc = 0;
+	int retry_count = 0;
+	int reconnect = (ep->rep_connected != 0);
+
+	if (reconnect) {
+		struct rpcrdma_xprt *xprt;
+retry:
+		rc = rpcrdma_ep_disconnect(ep, ia);
+		if (rc && rc != -ENOTCONN)
+			dprintk("RPC:       %s: rpcrdma_ep_disconnect"
+				" status %i\n", __func__, rc);
+		rpcrdma_clean_cq(ep->rep_cq);
+
+		xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
+		id = rpcrdma_create_id(xprt, ia,
+				(struct sockaddr *)&xprt->rx_data.addr);
+		if (IS_ERR(id)) {
+			rc = PTR_ERR(id);
+			goto out;
+		}
+		/* TEMP TEMP TEMP - fail if new device:
+		 * Deregister/remarshal *all* requests!
+		 * Close and recreate adapter, pd, etc!
+		 * Re-determine all attributes still sane!
+		 * More stuff I haven't thought of!
+		 * Rrrgh!
+		 */
+		if (ia->ri_id->device != id->device) {
+			printk("RPC:       %s: can't reconnect on "
+				"different device!\n", __func__);
+			rdma_destroy_id(id);
+			rc = -ENETDOWN;
+			goto out;
+		}
+		/* END TEMP */
+		rdma_destroy_id(ia->ri_id);
+		ia->ri_id = id;
+	}
+
+	rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
+	if (rc) {
+		dprintk("RPC:       %s: rdma_create_qp failed %i\n",
+			__func__, rc);
+		goto out;
+	}
+
+/* XXX Tavor device performs badly with 2K MTU! */
+if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
+	struct pci_dev *pcid = to_pci_dev(ia->ri_id->device->dma_device);
+	if (pcid->device == PCI_DEVICE_ID_MELLANOX_TAVOR &&
+	    (pcid->vendor == PCI_VENDOR_ID_MELLANOX ||
+	     pcid->vendor == PCI_VENDOR_ID_TOPSPIN)) {
+		struct ib_qp_attr attr = {
+			.path_mtu = IB_MTU_1024
+		};
+		rc = ib_modify_qp(ia->ri_id->qp, &attr, IB_QP_PATH_MTU);
+	}
+}
+
+	/* Theoretically a client initiator_depth > 0 is not needed,
+	 * but many peers fail to complete the connection unless they
+	 * == responder_resources! */
+	if (ep->rep_remote_cma.initiator_depth !=
+				ep->rep_remote_cma.responder_resources)
+		ep->rep_remote_cma.initiator_depth =
+			ep->rep_remote_cma.responder_resources;
+
+	ep->rep_connected = 0;
+
+	rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
+	if (rc) {
+		dprintk("RPC:       %s: rdma_connect() failed with %i\n",
+				__func__, rc);
+		goto out;
+	}
+
+	if (reconnect)
+		return 0;
+
+	wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
+
+	/*
+	 * Check state. A non-peer reject indicates no listener
+	 * (ECONNREFUSED), which may be a transient state. All
+	 * others indicate a transport condition which has already
+	 * undergone a best-effort.
+	 */
+	if (ep->rep_connected == -ECONNREFUSED
+	    && ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
+		dprintk("RPC:       %s: non-peer_reject, retry\n", __func__);
+		goto retry;
+	}
+	if (ep->rep_connected <= 0) {
+		/* Sometimes, the only way to reliably connect to remote
+		 * CMs is to use same nonzero values for ORD and IRD. */
+		ep->rep_remote_cma.initiator_depth =
+					ep->rep_remote_cma.responder_resources;
+		if (ep->rep_remote_cma.initiator_depth == 0)
+			++ep->rep_remote_cma.initiator_depth;
+		if (ep->rep_remote_cma.responder_resources == 0)
+			++ep->rep_remote_cma.responder_resources;
+		if (retry_count++ == 0)
+			goto retry;
+		rc = ep->rep_connected;
+	} else {
+		dprintk("RPC:       %s: connected\n", __func__);
+	}
+
+out:
+	if (rc)
+		ep->rep_connected = rc;
+	return rc;
+}
+
+/*
+ * rpcrdma_ep_disconnect
+ *
+ * This is separate from destroy to facilitate the ability
+ * to reconnect without recreating the endpoint.
+ *
+ * This call is not reentrant, and must not be made in parallel
+ * on the same endpoint.
+ */
+int
+rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+{
+	int rc;
+
+	rpcrdma_clean_cq(ep->rep_cq);
+	rc = rdma_disconnect(ia->ri_id);
+	if (!rc) {
+		/* returns without wait if not connected */
+		wait_event_interruptible(ep->rep_connect_wait,
+							ep->rep_connected != 1);
+		dprintk("RPC:       %s: after wait, %sconnected\n", __func__,
+			(ep->rep_connected == 1) ? "still " : "dis");
+	} else {
+		dprintk("RPC:       %s: rdma_disconnect %i\n", __func__, rc);
+		ep->rep_connected = rc;
+	}
+	return rc;
+}
+
+/*
+ * Initialize buffer memory
+ */
+int
+rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
+	struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
+{
+	char *p;
+	size_t len;
+	int i, rc;
+
+	buf->rb_max_requests = cdata->max_requests;
+	spin_lock_init(&buf->rb_lock);
+	atomic_set(&buf->rb_credits, 1);
+
+	/* Need to allocate:
+	 *   1.  arrays for send and recv pointers
+	 *   2.  arrays of struct rpcrdma_req to fill in pointers
+	 *   3.  array of struct rpcrdma_rep for replies
+	 *   4.  padding, if any
+	 *   5.  mw's, if any
+	 * Send/recv buffers in req/rep need to be registered
+	 */
+
+	len = buf->rb_max_requests *
+		(sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
+	len += cdata->padding;
+	switch (ia->ri_memreg_strategy) {
+	case RPCRDMA_MTHCAFMR:
+		/* TBD we are perhaps overallocating here */
+		len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
+				sizeof(struct rpcrdma_mw);
+		break;
+	case RPCRDMA_MEMWINDOWS_ASYNC:
+	case RPCRDMA_MEMWINDOWS:
+		len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
+				sizeof(struct rpcrdma_mw);
+		break;
+	default:
+		break;
+	}
+
+	/* allocate 1, 4 and 5 in one shot */
+	p = kzalloc(len, GFP_KERNEL);
+	if (p == NULL) {
+		dprintk("RPC:       %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
+			__func__, len);
+		rc = -ENOMEM;
+		goto out;
+	}
+	buf->rb_pool = p;	/* for freeing it later */
+
+	buf->rb_send_bufs = (struct rpcrdma_req **) p;
+	p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
+	buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
+	p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
+
+	/*
+	 * Register the zeroed pad buffer, if any.
+	 */
+	if (cdata->padding) {
+		rc = rpcrdma_register_internal(ia, p, cdata->padding,
+					    &ep->rep_pad_mr, &ep->rep_pad);
+		if (rc)
+			goto out;
+	}
+	p += cdata->padding;
+
+	/*
+	 * Allocate the fmr's, or mw's for mw_bind chunk registration.
+	 * We "cycle" the mw's in order to minimize rkey reuse,
+	 * and also reduce unbind-to-bind collision.
+	 */
+	INIT_LIST_HEAD(&buf->rb_mws);
+	switch (ia->ri_memreg_strategy) {
+	case RPCRDMA_MTHCAFMR:
+		{
+		struct rpcrdma_mw *r = (struct rpcrdma_mw *)p;
+		struct ib_fmr_attr fa = {
+			RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT
+		};
+		/* TBD we are perhaps overallocating here */
+		for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
+			r->r.fmr = ib_alloc_fmr(ia->ri_pd,
+				IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ,
+				&fa);
+			if (IS_ERR(r->r.fmr)) {
+				rc = PTR_ERR(r->r.fmr);
+				dprintk("RPC:       %s: ib_alloc_fmr"
+					" failed %i\n", __func__, rc);
+				goto out;
+			}
+			list_add(&r->mw_list, &buf->rb_mws);
+			++r;
+		}
+		}
+		break;
+	case RPCRDMA_MEMWINDOWS_ASYNC:
+	case RPCRDMA_MEMWINDOWS:
+		{
+		struct rpcrdma_mw *r = (struct rpcrdma_mw *)p;
+		/* Allocate one extra request's worth, for full cycling */
+		for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
+			r->r.mw = ib_alloc_mw(ia->ri_pd);
+			if (IS_ERR(r->r.mw)) {
+				rc = PTR_ERR(r->r.mw);
+				dprintk("RPC:       %s: ib_alloc_mw"
+					" failed %i\n", __func__, rc);
+				goto out;
+			}
+			list_add(&r->mw_list, &buf->rb_mws);
+			++r;
+		}
+		}
+		break;
+	default:
+		break;
+	}
+
+	/*
+	 * Allocate/init the request/reply buffers. Doing this
+	 * using kmalloc for now -- one for each buf.
+	 */
+	for (i = 0; i < buf->rb_max_requests; i++) {
+		struct rpcrdma_req *req;
+		struct rpcrdma_rep *rep;
+
+		len = cdata->inline_wsize + sizeof(struct rpcrdma_req);
+		/* RPC layer requests *double* size + 1K RPC_SLACK_SPACE! */
+		/* Typical ~2400b, so rounding up saves work later */
+		if (len < 4096)
+			len = 4096;
+		req = kmalloc(len, GFP_KERNEL);
+		if (req == NULL) {
+			dprintk("RPC:       %s: request buffer %d alloc"
+				" failed\n", __func__, i);
+			rc = -ENOMEM;
+			goto out;
+		}
+		memset(req, 0, sizeof(struct rpcrdma_req));
+		buf->rb_send_bufs[i] = req;
+		buf->rb_send_bufs[i]->rl_buffer = buf;
+
+		rc = rpcrdma_register_internal(ia, req->rl_base,
+				len - offsetof(struct rpcrdma_req, rl_base),
+				&buf->rb_send_bufs[i]->rl_handle,
+				&buf->rb_send_bufs[i]->rl_iov);
+		if (rc)
+			goto out;
+
+		buf->rb_send_bufs[i]->rl_size = len-sizeof(struct rpcrdma_req);
+
+		len = cdata->inline_rsize + sizeof(struct rpcrdma_rep);
+		rep = kmalloc(len, GFP_KERNEL);
+		if (rep == NULL) {
+			dprintk("RPC:       %s: reply buffer %d alloc failed\n",
+				__func__, i);
+			rc = -ENOMEM;
+			goto out;
+		}
+		memset(rep, 0, sizeof(struct rpcrdma_rep));
+		buf->rb_recv_bufs[i] = rep;
+		buf->rb_recv_bufs[i]->rr_buffer = buf;
+		init_waitqueue_head(&rep->rr_unbind);
+
+		rc = rpcrdma_register_internal(ia, rep->rr_base,
+				len - offsetof(struct rpcrdma_rep, rr_base),
+				&buf->rb_recv_bufs[i]->rr_handle,
+				&buf->rb_recv_bufs[i]->rr_iov);
+		if (rc)
+			goto out;
+
+	}
+	dprintk("RPC:       %s: max_requests %d\n",
+		__func__, buf->rb_max_requests);
+	/* done */
+	return 0;
+out:
+	rpcrdma_buffer_destroy(buf);
+	return rc;
+}
+
+/*
+ * Unregister and destroy buffer memory. Need to deal with
+ * partial initialization, so it's callable from failed create.
+ * Must be called before destroying endpoint, as registrations
+ * reference it.
+ */
+void
+rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
+{
+	int rc, i;
+	struct rpcrdma_ia *ia = rdmab_to_ia(buf);
+
+	/* clean up in reverse order from create
+	 *   1.  recv mr memory (mr free, then kfree)
+	 *   1a. bind mw memory
+	 *   2.  send mr memory (mr free, then kfree)
+	 *   3.  padding (if any) [moved to rpcrdma_ep_destroy]
+	 *   4.  arrays
+	 */
+	dprintk("RPC:       %s: entering\n", __func__);
+
+	for (i = 0; i < buf->rb_max_requests; i++) {
+		if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) {
+			rpcrdma_deregister_internal(ia,
+					buf->rb_recv_bufs[i]->rr_handle,
+					&buf->rb_recv_bufs[i]->rr_iov);
+			kfree(buf->rb_recv_bufs[i]);
+		}
+		if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
+			while (!list_empty(&buf->rb_mws)) {
+				struct rpcrdma_mw *r;
+				r = list_entry(buf->rb_mws.next,
+					struct rpcrdma_mw, mw_list);
+				list_del(&r->mw_list);
+				switch (ia->ri_memreg_strategy) {
+				case RPCRDMA_MTHCAFMR:
+					rc = ib_dealloc_fmr(r->r.fmr);
+					if (rc)
+						dprintk("RPC:       %s:"
+							" ib_dealloc_fmr"
+							" failed %i\n",
+							__func__, rc);
+					break;
+				case RPCRDMA_MEMWINDOWS_ASYNC:
+				case RPCRDMA_MEMWINDOWS:
+					rc = ib_dealloc_mw(r->r.mw);
+					if (rc)
+						dprintk("RPC:       %s:"
+							" ib_dealloc_mw"
+							" failed %i\n",
+							__func__, rc);
+					break;
+				default:
+					break;
+				}
+			}
+			rpcrdma_deregister_internal(ia,
+					buf->rb_send_bufs[i]->rl_handle,
+					&buf->rb_send_bufs[i]->rl_iov);
+			kfree(buf->rb_send_bufs[i]);
+		}
+	}
+
+	kfree(buf->rb_pool);
+}
+
+/*
+ * Get a set of request/reply buffers.
+ *
+ * Reply buffer (if needed) is attached to send buffer upon return.
+ * Rule:
+ *    rb_send_index and rb_recv_index MUST always be pointing to the
+ *    *next* available buffer (non-NULL). They are incremented after
+ *    removing buffers, and decremented *before* returning them.
+ */
+struct rpcrdma_req *
+rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
+{
+	struct rpcrdma_req *req;
+	unsigned long flags;
+
+	spin_lock_irqsave(&buffers->rb_lock, flags);
+	if (buffers->rb_send_index == buffers->rb_max_requests) {
+		spin_unlock_irqrestore(&buffers->rb_lock, flags);
+		dprintk("RPC:       %s: out of request buffers\n", __func__);
+		return ((struct rpcrdma_req *)NULL);
+	}
+
+	req = buffers->rb_send_bufs[buffers->rb_send_index];
+	if (buffers->rb_send_index < buffers->rb_recv_index) {
+		dprintk("RPC:       %s: %d extra receives outstanding (ok)\n",
+			__func__,
+			buffers->rb_recv_index - buffers->rb_send_index);
+		req->rl_reply = NULL;
+	} else {
+		req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
+		buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
+	}
+	buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
+	if (!list_empty(&buffers->rb_mws)) {
+		int i = RPCRDMA_MAX_SEGS - 1;
+		do {
+			struct rpcrdma_mw *r;
+			r = list_entry(buffers->rb_mws.next,
+					struct rpcrdma_mw, mw_list);
+			list_del(&r->mw_list);
+			req->rl_segments[i].mr_chunk.rl_mw = r;
+		} while (--i >= 0);
+	}
+	spin_unlock_irqrestore(&buffers->rb_lock, flags);
+	return req;
+}
+
+/*
+ * Put request/reply buffers back into pool.
+ * Pre-decrement counter/array index.
+ */
+void
+rpcrdma_buffer_put(struct rpcrdma_req *req)
+{
+	struct rpcrdma_buffer *buffers = req->rl_buffer;
+	struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
+	int i;
+	unsigned long flags;
+
+	BUG_ON(req->rl_nchunks != 0);
+	spin_lock_irqsave(&buffers->rb_lock, flags);
+	buffers->rb_send_bufs[--buffers->rb_send_index] = req;
+	req->rl_niovs = 0;
+	if (req->rl_reply) {
+		buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply;
+		init_waitqueue_head(&req->rl_reply->rr_unbind);
+		req->rl_reply->rr_func = NULL;
+		req->rl_reply = NULL;
+	}
+	switch (ia->ri_memreg_strategy) {
+	case RPCRDMA_MTHCAFMR:
+	case RPCRDMA_MEMWINDOWS_ASYNC:
+	case RPCRDMA_MEMWINDOWS:
+		/*
+		 * Cycle mw's back in reverse order, and "spin" them.
+		 * This delays and scrambles reuse as much as possible.
+		 */
+		i = 1;
+		do {
+			struct rpcrdma_mw **mw;
+			mw = &req->rl_segments[i].mr_chunk.rl_mw;
+			list_add_tail(&(*mw)->mw_list, &buffers->rb_mws);
+			*mw = NULL;
+		} while (++i < RPCRDMA_MAX_SEGS);
+		list_add_tail(&req->rl_segments[0].mr_chunk.rl_mw->mw_list,
+					&buffers->rb_mws);
+		req->rl_segments[0].mr_chunk.rl_mw = NULL;
+		break;
+	default:
+		break;
+	}
+	spin_unlock_irqrestore(&buffers->rb_lock, flags);
+}
+
+/*
+ * Recover reply buffers from pool.
+ * This happens when recovering from error conditions.
+ * Post-increment counter/array index.
+ */
+void
+rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
+{
+	struct rpcrdma_buffer *buffers = req->rl_buffer;
+	unsigned long flags;
+
+	if (req->rl_iov.length == 0)	/* special case xprt_rdma_allocate() */
+		buffers = ((struct rpcrdma_req *) buffers)->rl_buffer;
+	spin_lock_irqsave(&buffers->rb_lock, flags);
+	if (buffers->rb_recv_index < buffers->rb_max_requests) {
+		req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
+		buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
+	}
+	spin_unlock_irqrestore(&buffers->rb_lock, flags);
+}
+
+/*
+ * Put reply buffers back into pool when not attached to
+ * request. This happens in error conditions, and when
+ * aborting unbinds. Pre-decrement counter/array index.
+ */
+void
+rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
+{
+	struct rpcrdma_buffer *buffers = rep->rr_buffer;
+	unsigned long flags;
+
+	rep->rr_func = NULL;
+	spin_lock_irqsave(&buffers->rb_lock, flags);
+	buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
+	spin_unlock_irqrestore(&buffers->rb_lock, flags);
+}
+
+/*
+ * Wrappers for internal-use kmalloc memory registration, used by buffer code.
+ */
+
+int
+rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
+				struct ib_mr **mrp, struct ib_sge *iov)
+{
+	struct ib_phys_buf ipb;
+	struct ib_mr *mr;
+	int rc;
+
+	/*
+	 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
+	 */
+	iov->addr = ib_dma_map_single(ia->ri_id->device,
+			va, len, DMA_BIDIRECTIONAL);
+	iov->length = len;
+
+	if (ia->ri_bind_mem != NULL) {
+		*mrp = NULL;
+		iov->lkey = ia->ri_bind_mem->lkey;
+		return 0;
+	}
+
+	ipb.addr = iov->addr;
+	ipb.size = iov->length;
+	mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
+			IB_ACCESS_LOCAL_WRITE, &iov->addr);
+
+	dprintk("RPC:       %s: phys convert: 0x%llx "
+			"registered 0x%llx length %d\n",
+			__func__, (unsigned long long)ipb.addr,
+			(unsigned long long)iov->addr, len);
+
+	if (IS_ERR(mr)) {
+		*mrp = NULL;
+		rc = PTR_ERR(mr);
+		dprintk("RPC:       %s: failed with %i\n", __func__, rc);
+	} else {
+		*mrp = mr;
+		iov->lkey = mr->lkey;
+		rc = 0;
+	}
+
+	return rc;
+}
+
+int
+rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
+				struct ib_mr *mr, struct ib_sge *iov)
+{
+	int rc;
+
+	ib_dma_unmap_single(ia->ri_id->device,
+			iov->addr, iov->length, DMA_BIDIRECTIONAL);
+
+	if (NULL == mr)
+		return 0;
+
+	rc = ib_dereg_mr(mr);
+	if (rc)
+		dprintk("RPC:       %s: ib_dereg_mr failed %i\n", __func__, rc);
+	return rc;
+}
+
+/*
+ * Wrappers for chunk registration, shared by read/write chunk code.
+ */
+
+static void
+rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
+{
+	seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+	seg->mr_dmalen = seg->mr_len;
+	if (seg->mr_page)
+		seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
+				seg->mr_page, offset_in_page(seg->mr_offset),
+				seg->mr_dmalen, seg->mr_dir);
+	else
+		seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
+				seg->mr_offset,
+				seg->mr_dmalen, seg->mr_dir);
+}
+
+static void
+rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
+{
+	if (seg->mr_page)
+		ib_dma_unmap_page(ia->ri_id->device,
+				seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
+	else
+		ib_dma_unmap_single(ia->ri_id->device,
+				seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
+}
+
+int
+rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
+			int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
+{
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+	int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
+				  IB_ACCESS_REMOTE_READ);
+	struct rpcrdma_mr_seg *seg1 = seg;
+	int i;
+	int rc = 0;
+
+	switch (ia->ri_memreg_strategy) {
+
+#if RPCRDMA_PERSISTENT_REGISTRATION
+	case RPCRDMA_ALLPHYSICAL:
+		rpcrdma_map_one(ia, seg, writing);
+		seg->mr_rkey = ia->ri_bind_mem->rkey;
+		seg->mr_base = seg->mr_dma;
+		seg->mr_nsegs = 1;
+		nsegs = 1;
+		break;
+#endif
+
+	/* Registration using fast memory registration */
+	case RPCRDMA_MTHCAFMR:
+		{
+		u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
+		int len, pageoff = offset_in_page(seg->mr_offset);
+		seg1->mr_offset -= pageoff;	/* start of page */
+		seg1->mr_len += pageoff;
+		len = -pageoff;
+		if (nsegs > RPCRDMA_MAX_DATA_SEGS)
+			nsegs = RPCRDMA_MAX_DATA_SEGS;
+		for (i = 0; i < nsegs;) {
+			rpcrdma_map_one(ia, seg, writing);
+			physaddrs[i] = seg->mr_dma;
+			len += seg->mr_len;
+			++seg;
+			++i;
+			/* Check for holes */
+			if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
+			    offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
+				break;
+		}
+		nsegs = i;
+		rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
+					physaddrs, nsegs, seg1->mr_dma);
+		if (rc) {
+			dprintk("RPC:       %s: failed ib_map_phys_fmr "
+				"%u@0x%llx+%i (%d)... status %i\n", __func__,
+				len, (unsigned long long)seg1->mr_dma,
+				pageoff, nsegs, rc);
+			while (nsegs--)
+				rpcrdma_unmap_one(ia, --seg);
+		} else {
+			seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
+			seg1->mr_base = seg1->mr_dma + pageoff;
+			seg1->mr_nsegs = nsegs;
+			seg1->mr_len = len;
+		}
+		}
+		break;
+
+	/* Registration using memory windows */
+	case RPCRDMA_MEMWINDOWS_ASYNC:
+	case RPCRDMA_MEMWINDOWS:
+		{
+		struct ib_mw_bind param;
+		rpcrdma_map_one(ia, seg, writing);
+		param.mr = ia->ri_bind_mem;
+		param.wr_id = 0ULL;	/* no send cookie */
+		param.addr = seg->mr_dma;
+		param.length = seg->mr_len;
+		param.send_flags = 0;
+		param.mw_access_flags = mem_priv;
+
+		DECR_CQCOUNT(&r_xprt->rx_ep);
+		rc = ib_bind_mw(ia->ri_id->qp,
+					seg->mr_chunk.rl_mw->r.mw, &param);
+		if (rc) {
+			dprintk("RPC:       %s: failed ib_bind_mw "
+				"%u@0x%llx status %i\n",
+				__func__, seg->mr_len,
+				(unsigned long long)seg->mr_dma, rc);
+			rpcrdma_unmap_one(ia, seg);
+		} else {
+			seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey;
+			seg->mr_base = param.addr;
+			seg->mr_nsegs = 1;
+			nsegs = 1;
+		}
+		}
+		break;
+
+	/* Default registration each time */
+	default:
+		{
+		struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS];
+		int len = 0;
+		if (nsegs > RPCRDMA_MAX_DATA_SEGS)
+			nsegs = RPCRDMA_MAX_DATA_SEGS;
+		for (i = 0; i < nsegs;) {
+			rpcrdma_map_one(ia, seg, writing);
+			ipb[i].addr = seg->mr_dma;
+			ipb[i].size = seg->mr_len;
+			len += seg->mr_len;
+			++seg;
+			++i;
+			/* Check for holes */
+			if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
+			    offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
+				break;
+		}
+		nsegs = i;
+		seg1->mr_base = seg1->mr_dma;
+		seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd,
+					ipb, nsegs, mem_priv, &seg1->mr_base);
+		if (IS_ERR(seg1->mr_chunk.rl_mr)) {
+			rc = PTR_ERR(seg1->mr_chunk.rl_mr);
+			dprintk("RPC:       %s: failed ib_reg_phys_mr "
+				"%u@0x%llx (%d)... status %i\n",
+				__func__, len,
+				(unsigned long long)seg1->mr_dma, nsegs, rc);
+			while (nsegs--)
+				rpcrdma_unmap_one(ia, --seg);
+		} else {
+			seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey;
+			seg1->mr_nsegs = nsegs;
+			seg1->mr_len = len;
+		}
+		}
+		break;
+	}
+	if (rc)
+		return -1;
+
+	return nsegs;
+}
+
+int
+rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
+		struct rpcrdma_xprt *r_xprt, void *r)
+{
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+	struct rpcrdma_mr_seg *seg1 = seg;
+	int nsegs = seg->mr_nsegs, rc;
+
+	switch (ia->ri_memreg_strategy) {
+
+#if RPCRDMA_PERSISTENT_REGISTRATION
+	case RPCRDMA_ALLPHYSICAL:
+		BUG_ON(nsegs != 1);
+		rpcrdma_unmap_one(ia, seg);
+		rc = 0;
+		break;
+#endif
+
+	case RPCRDMA_MTHCAFMR:
+		{
+		LIST_HEAD(l);
+		list_add(&seg->mr_chunk.rl_mw->r.fmr->list, &l);
+		rc = ib_unmap_fmr(&l);
+		while (seg1->mr_nsegs--)
+			rpcrdma_unmap_one(ia, seg++);
+		}
+		if (rc)
+			dprintk("RPC:       %s: failed ib_unmap_fmr,"
+				" status %i\n", __func__, rc);
+		break;
+
+	case RPCRDMA_MEMWINDOWS_ASYNC:
+	case RPCRDMA_MEMWINDOWS:
+		{
+		struct ib_mw_bind param;
+		BUG_ON(nsegs != 1);
+		param.mr = ia->ri_bind_mem;
+		param.addr = 0ULL;	/* unbind */
+		param.length = 0;
+		param.mw_access_flags = 0;
+		if (r) {
+			param.wr_id = (u64) (unsigned long) r;
+			param.send_flags = IB_SEND_SIGNALED;
+			INIT_CQCOUNT(&r_xprt->rx_ep);
+		} else {
+			param.wr_id = 0ULL;
+			param.send_flags = 0;
+			DECR_CQCOUNT(&r_xprt->rx_ep);
+		}
+		rc = ib_bind_mw(ia->ri_id->qp,
+				seg->mr_chunk.rl_mw->r.mw, &param);
+		rpcrdma_unmap_one(ia, seg);
+		}
+		if (rc)
+			dprintk("RPC:       %s: failed ib_(un)bind_mw,"
+				" status %i\n", __func__, rc);
+		else
+			r = NULL;	/* will upcall on completion */
+		break;
+
+	default:
+		rc = ib_dereg_mr(seg1->mr_chunk.rl_mr);
+		seg1->mr_chunk.rl_mr = NULL;
+		while (seg1->mr_nsegs--)
+			rpcrdma_unmap_one(ia, seg++);
+		if (rc)
+			dprintk("RPC:       %s: failed ib_dereg_mr,"
+				" status %i\n", __func__, rc);
+		break;
+	}
+	if (r) {
+		struct rpcrdma_rep *rep = r;
+		void (*func)(struct rpcrdma_rep *) = rep->rr_func;
+		rep->rr_func = NULL;
+		func(rep);	/* dereg done, callback now */
+	}
+	return nsegs;
+}
+
+/*
+ * Prepost any receive buffer, then post send.
+ *
+ * Receive buffer is donated to hardware, reclaimed upon recv completion.
+ */
+int
+rpcrdma_ep_post(struct rpcrdma_ia *ia,
+		struct rpcrdma_ep *ep,
+		struct rpcrdma_req *req)
+{
+	struct ib_send_wr send_wr, *send_wr_fail;
+	struct rpcrdma_rep *rep = req->rl_reply;
+	int rc;
+
+	if (rep) {
+		rc = rpcrdma_ep_post_recv(ia, ep, rep);
+		if (rc)
+			goto out;
+		req->rl_reply = NULL;
+	}
+
+	send_wr.next = NULL;
+	send_wr.wr_id = 0ULL;	/* no send cookie */
+	send_wr.sg_list = req->rl_send_iov;
+	send_wr.num_sge = req->rl_niovs;
+	send_wr.opcode = IB_WR_SEND;
+	send_wr.imm_data = 0;
+	if (send_wr.num_sge == 4)	/* no need to sync any pad (constant) */
+		ib_dma_sync_single_for_device(ia->ri_id->device,
+			req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
+			DMA_TO_DEVICE);
+	ib_dma_sync_single_for_device(ia->ri_id->device,
+		req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
+		DMA_TO_DEVICE);
+	ib_dma_sync_single_for_device(ia->ri_id->device,
+		req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
+		DMA_TO_DEVICE);
+
+	if (DECR_CQCOUNT(ep) > 0)
+		send_wr.send_flags = 0;
+	else { /* Provider must take a send completion every now and then */
+		INIT_CQCOUNT(ep);
+		send_wr.send_flags = IB_SEND_SIGNALED;
+	}
+
+	rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
+	if (rc)
+		dprintk("RPC:       %s: ib_post_send returned %i\n", __func__,
+			rc);
+out:
+	return rc;
+}
+
+/*
+ * (Re)post a receive buffer.
+ */
+int
+rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
+		     struct rpcrdma_ep *ep,
+		     struct rpcrdma_rep *rep)
+{
+	struct ib_recv_wr recv_wr, *recv_wr_fail;
+	int rc;
+
+	recv_wr.next = NULL;
+	recv_wr.wr_id = (u64) (unsigned long) rep;
+	recv_wr.sg_list = &rep->rr_iov;
+	recv_wr.num_sge = 1;
+
+	ib_dma_sync_single_for_cpu(ia->ri_id->device,
+		rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
+
+	DECR_CQCOUNT(ep);
+	rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
+
+	if (rc)
+		dprintk("RPC:       %s: ib_post_recv returned %i\n", __func__,
+			rc);
+	return rc;
+}
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
new file mode 100644
index 00000000000..2427822f8bd
--- /dev/null
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -0,0 +1,330 @@
+/*
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *      Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *      Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ *      Neither the name of the Network Appliance, Inc. nor the names of
+ *      its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written
+ *      permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _LINUX_SUNRPC_XPRT_RDMA_H
+#define _LINUX_SUNRPC_XPRT_RDMA_H
+
+#include <linux/wait.h> 		/* wait_queue_head_t, etc */
+#include <linux/spinlock.h> 		/* spinlock_t, etc */
+#include <asm/atomic.h>			/* atomic_t, etc */
+
+#include <rdma/rdma_cm.h>		/* RDMA connection api */
+#include <rdma/ib_verbs.h>		/* RDMA verbs api */
+
+#include <linux/sunrpc/clnt.h> 		/* rpc_xprt */
+#include <linux/sunrpc/rpc_rdma.h> 	/* RPC/RDMA protocol */
+#include <linux/sunrpc/xprtrdma.h> 	/* xprt parameters */
+
+/*
+ * Interface Adapter -- one per transport instance
+ */
+struct rpcrdma_ia {
+	struct rdma_cm_id 	*ri_id;
+	struct ib_pd		*ri_pd;
+	struct ib_mr		*ri_bind_mem;
+	struct completion	ri_done;
+	int			ri_async_rc;
+	enum rpcrdma_memreg	ri_memreg_strategy;
+};
+
+/*
+ * RDMA Endpoint -- one per transport instance
+ */
+
+struct rpcrdma_ep {
+	atomic_t		rep_cqcount;
+	int			rep_cqinit;
+	int			rep_connected;
+	struct rpcrdma_ia	*rep_ia;
+	struct ib_cq		*rep_cq;
+	struct ib_qp_init_attr	rep_attr;
+	wait_queue_head_t 	rep_connect_wait;
+	struct ib_sge		rep_pad;	/* holds zeroed pad */
+	struct ib_mr		*rep_pad_mr;	/* holds zeroed pad */
+	void			(*rep_func)(struct rpcrdma_ep *);
+	struct rpc_xprt		*rep_xprt;	/* for rep_func */
+	struct rdma_conn_param	rep_remote_cma;
+	struct sockaddr_storage	rep_remote_addr;
+};
+
+#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
+#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
+
+/*
+ * struct rpcrdma_rep -- this structure encapsulates state required to recv
+ * and complete a reply, asychronously. It needs several pieces of
+ * state:
+ *   o recv buffer (posted to provider)
+ *   o ib_sge (also donated to provider)
+ *   o status of reply (length, success or not)
+ *   o bookkeeping state to get run by tasklet (list, etc)
+ *
+ * These are allocated during initialization, per-transport instance;
+ * however, the tasklet execution list itself is global, as it should
+ * always be pretty short.
+ *
+ * N of these are associated with a transport instance, and stored in
+ * struct rpcrdma_buffer. N is the max number of outstanding requests.
+ */
+
+/* temporary static scatter/gather max */
+#define RPCRDMA_MAX_DATA_SEGS	(8)	/* max scatter/gather */
+#define RPCRDMA_MAX_SEGS 	(RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */
+#define MAX_RPCRDMAHDR	(\
+	/* max supported RPC/RDMA header */ \
+	sizeof(struct rpcrdma_msg) + (2 * sizeof(u32)) + \
+	(sizeof(struct rpcrdma_read_chunk) * RPCRDMA_MAX_SEGS) + sizeof(u32))
+
+struct rpcrdma_buffer;
+
+struct rpcrdma_rep {
+	unsigned int	rr_len;		/* actual received reply length */
+	struct rpcrdma_buffer *rr_buffer; /* home base for this structure */
+	struct rpc_xprt	*rr_xprt;	/* needed for request/reply matching */
+	void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */
+	struct list_head rr_list;	/* tasklet list */
+	wait_queue_head_t rr_unbind;	/* optional unbind wait */
+	struct ib_sge	rr_iov;		/* for posting */
+	struct ib_mr	*rr_handle;	/* handle for mem in rr_iov */
+	char	rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */
+};
+
+/*
+ * struct rpcrdma_req -- structure central to the request/reply sequence.
+ *
+ * N of these are associated with a transport instance, and stored in
+ * struct rpcrdma_buffer. N is the max number of outstanding requests.
+ *
+ * It includes pre-registered buffer memory for send AND recv.
+ * The recv buffer, however, is not owned by this structure, and
+ * is "donated" to the hardware when a recv is posted. When a
+ * reply is handled, the recv buffer used is given back to the
+ * struct rpcrdma_req associated with the request.
+ *
+ * In addition to the basic memory, this structure includes an array
+ * of iovs for send operations. The reason is that the iovs passed to
+ * ib_post_{send,recv} must not be modified until the work request
+ * completes.
+ *
+ * NOTES:
+ *   o RPCRDMA_MAX_SEGS is the max number of addressible chunk elements we
+ *     marshal. The number needed varies depending on the iov lists that
+ *     are passed to us, the memory registration mode we are in, and if
+ *     physical addressing is used, the layout.
+ */
+
+struct rpcrdma_mr_seg {		/* chunk descriptors */
+	union {				/* chunk memory handles */
+		struct ib_mr	*rl_mr;		/* if registered directly */
+		struct rpcrdma_mw {		/* if registered from region */
+			union {
+				struct ib_mw	*mw;
+				struct ib_fmr	*fmr;
+			} r;
+			struct list_head mw_list;
+		} *rl_mw;
+	} mr_chunk;
+	u64		mr_base;	/* registration result */
+	u32		mr_rkey;	/* registration result */
+	u32		mr_len;		/* length of chunk or segment */
+	int		mr_nsegs;	/* number of segments in chunk or 0 */
+	enum dma_data_direction	mr_dir;	/* segment mapping direction */
+	dma_addr_t	mr_dma;		/* segment mapping address */
+	size_t		mr_dmalen;	/* segment mapping length */
+	struct page	*mr_page;	/* owning page, if any */
+	char		*mr_offset;	/* kva if no page, else offset */
+};
+
+struct rpcrdma_req {
+	size_t 		rl_size;	/* actual length of buffer */
+	unsigned int	rl_niovs;	/* 0, 2 or 4 */
+	unsigned int	rl_nchunks;	/* non-zero if chunks */
+	struct rpcrdma_buffer *rl_buffer; /* home base for this structure */
+	struct rpcrdma_rep	*rl_reply;/* holder for reply buffer */
+	struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */
+	struct ib_sge	rl_send_iov[4];	/* for active requests */
+	struct ib_sge	rl_iov;		/* for posting */
+	struct ib_mr	*rl_handle;	/* handle for mem in rl_iov */
+	char		rl_base[MAX_RPCRDMAHDR]; /* start of actual buffer */
+	__u32 		rl_xdr_buf[0];	/* start of returned rpc rq_buffer */
+};
+#define rpcr_to_rdmar(r) \
+	container_of((r)->rq_buffer, struct rpcrdma_req, rl_xdr_buf[0])
+
+/*
+ * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for
+ * inline requests/replies, and client/server credits.
+ *
+ * One of these is associated with a transport instance
+ */
+struct rpcrdma_buffer {
+	spinlock_t	rb_lock;	/* protects indexes */
+	atomic_t	rb_credits;	/* most recent server credits */
+	unsigned long	rb_cwndscale;	/* cached framework rpc_cwndscale */
+	int		rb_max_requests;/* client max requests */
+	struct list_head rb_mws;	/* optional memory windows/fmrs */
+	int		rb_send_index;
+	struct rpcrdma_req	**rb_send_bufs;
+	int		rb_recv_index;
+	struct rpcrdma_rep	**rb_recv_bufs;
+	char		*rb_pool;
+};
+#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
+
+/*
+ * Internal structure for transport instance creation. This
+ * exists primarily for modularity.
+ *
+ * This data should be set with mount options
+ */
+struct rpcrdma_create_data_internal {
+	struct sockaddr_storage	addr;	/* RDMA server address */
+	unsigned int	max_requests;	/* max requests (slots) in flight */
+	unsigned int	rsize;		/* mount rsize - max read hdr+data */
+	unsigned int	wsize;		/* mount wsize - max write hdr+data */
+	unsigned int	inline_rsize;	/* max non-rdma read data payload */
+	unsigned int	inline_wsize;	/* max non-rdma write data payload */
+	unsigned int	padding;	/* non-rdma write header padding */
+};
+
+#define RPCRDMA_INLINE_READ_THRESHOLD(rq) \
+	(rpcx_to_rdmad(rq->rq_task->tk_xprt).inline_rsize)
+
+#define RPCRDMA_INLINE_WRITE_THRESHOLD(rq)\
+	(rpcx_to_rdmad(rq->rq_task->tk_xprt).inline_wsize)
+
+#define RPCRDMA_INLINE_PAD_VALUE(rq)\
+	rpcx_to_rdmad(rq->rq_task->tk_xprt).padding
+
+/*
+ * Statistics for RPCRDMA
+ */
+struct rpcrdma_stats {
+	unsigned long		read_chunk_count;
+	unsigned long		write_chunk_count;
+	unsigned long		reply_chunk_count;
+
+	unsigned long long	total_rdma_request;
+	unsigned long long	total_rdma_reply;
+
+	unsigned long long	pullup_copy_count;
+	unsigned long long	fixup_copy_count;
+	unsigned long		hardway_register_count;
+	unsigned long		failed_marshal_count;
+	unsigned long		bad_reply_count;
+};
+
+/*
+ * RPCRDMA transport -- encapsulates the structures above for
+ * integration with RPC.
+ *
+ * The contained structures are embedded, not pointers,
+ * for convenience. This structure need not be visible externally.
+ *
+ * It is allocated and initialized during mount, and released
+ * during unmount.
+ */
+struct rpcrdma_xprt {
+	struct rpc_xprt		xprt;
+	struct rpcrdma_ia	rx_ia;
+	struct rpcrdma_ep	rx_ep;
+	struct rpcrdma_buffer	rx_buf;
+	struct rpcrdma_create_data_internal rx_data;
+	struct delayed_work	rdma_connect;
+	struct rpcrdma_stats	rx_stats;
+};
+
+#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt)
+#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
+
+/*
+ * Interface Adapter calls - xprtrdma/verbs.c
+ */
+int rpcrdma_ia_open(struct rpcrdma_xprt *, struct sockaddr *, int);
+void rpcrdma_ia_close(struct rpcrdma_ia *);
+
+/*
+ * Endpoint calls - xprtrdma/verbs.c
+ */
+int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
+				struct rpcrdma_create_data_internal *);
+int rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
+int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
+int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
+
+int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
+				struct rpcrdma_req *);
+int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *,
+				struct rpcrdma_rep *);
+
+/*
+ * Buffer calls - xprtrdma/verbs.c
+ */
+int rpcrdma_buffer_create(struct rpcrdma_buffer *, struct rpcrdma_ep *,
+				struct rpcrdma_ia *,
+				struct rpcrdma_create_data_internal *);
+void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
+
+struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
+void rpcrdma_buffer_put(struct rpcrdma_req *);
+void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
+void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
+
+int rpcrdma_register_internal(struct rpcrdma_ia *, void *, int,
+				struct ib_mr **, struct ib_sge *);
+int rpcrdma_deregister_internal(struct rpcrdma_ia *,
+				struct ib_mr *, struct ib_sge *);
+
+int rpcrdma_register_external(struct rpcrdma_mr_seg *,
+				int, int, struct rpcrdma_xprt *);
+int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
+				struct rpcrdma_xprt *, void *);
+
+/*
+ * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
+ */
+void rpcrdma_conn_func(struct rpcrdma_ep *);
+void rpcrdma_reply_handler(struct rpcrdma_rep *);
+
+/*
+ * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
+ */
+int rpcrdma_marshal_req(struct rpc_rqst *);
+
+#endif				/* _LINUX_SUNRPC_XPRT_RDMA_H */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 282efd447a6..02298f529da 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -13,10 +13,14 @@
  *  (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no>
  *
  * IP socket transport implementation, (C) 2005 Chuck Lever <cel@netapp.com>
+ *
+ * IPv6 support contributed by Gilles Quillard, Bull Open Source, 2005.
+ *   <gilles.quillard@bull.net>
  */
 
 #include <linux/types.h>
 #include <linux/slab.h>
+#include <linux/module.h>
 #include <linux/capability.h>
 #include <linux/pagemap.h>
 #include <linux/errno.h>
@@ -28,6 +32,7 @@
 #include <linux/tcp.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/xprtsock.h>
 #include <linux/file.h>
 
 #include <net/sock.h>
@@ -260,14 +265,29 @@ struct sock_xprt {
 #define TCP_RCV_COPY_XID	(1UL << 2)
 #define TCP_RCV_COPY_DATA	(1UL << 3)
 
-static void xs_format_peer_addresses(struct rpc_xprt *xprt)
+static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt)
+{
+	return (struct sockaddr *) &xprt->addr;
+}
+
+static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt)
 {
-	struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr;
+	return (struct sockaddr_in *) &xprt->addr;
+}
+
+static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt)
+{
+	return (struct sockaddr_in6 *) &xprt->addr;
+}
+
+static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt)
+{
+	struct sockaddr_in *addr = xs_addr_in(xprt);
 	char *buf;
 
 	buf = kzalloc(20, GFP_KERNEL);
 	if (buf) {
-		snprintf(buf, 20, "%u.%u.%u.%u",
+		snprintf(buf, 20, NIPQUAD_FMT,
 				NIPQUAD(addr->sin_addr.s_addr));
 	}
 	xprt->address_strings[RPC_DISPLAY_ADDR] = buf;
@@ -279,26 +299,123 @@ static void xs_format_peer_addresses(struct rpc_xprt *xprt)
 	}
 	xprt->address_strings[RPC_DISPLAY_PORT] = buf;
 
-	if (xprt->prot == IPPROTO_UDP)
-		xprt->address_strings[RPC_DISPLAY_PROTO] = "udp";
-	else
-		xprt->address_strings[RPC_DISPLAY_PROTO] = "tcp";
+	buf = kzalloc(8, GFP_KERNEL);
+	if (buf) {
+		if (xprt->prot == IPPROTO_UDP)
+			snprintf(buf, 8, "udp");
+		else
+			snprintf(buf, 8, "tcp");
+	}
+	xprt->address_strings[RPC_DISPLAY_PROTO] = buf;
 
 	buf = kzalloc(48, GFP_KERNEL);
 	if (buf) {
-		snprintf(buf, 48, "addr=%u.%u.%u.%u port=%u proto=%s",
+		snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s",
 			NIPQUAD(addr->sin_addr.s_addr),
 			ntohs(addr->sin_port),
 			xprt->prot == IPPROTO_UDP ? "udp" : "tcp");
 	}
 	xprt->address_strings[RPC_DISPLAY_ALL] = buf;
+
+	buf = kzalloc(10, GFP_KERNEL);
+	if (buf) {
+		snprintf(buf, 10, "%02x%02x%02x%02x",
+				NIPQUAD(addr->sin_addr.s_addr));
+	}
+	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf;
+
+	buf = kzalloc(8, GFP_KERNEL);
+	if (buf) {
+		snprintf(buf, 8, "%4hx",
+				ntohs(addr->sin_port));
+	}
+	xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf;
+
+	buf = kzalloc(30, GFP_KERNEL);
+	if (buf) {
+		snprintf(buf, 30, NIPQUAD_FMT".%u.%u",
+				NIPQUAD(addr->sin_addr.s_addr),
+				ntohs(addr->sin_port) >> 8,
+				ntohs(addr->sin_port) & 0xff);
+	}
+	xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
+
+	xprt->address_strings[RPC_DISPLAY_NETID] =
+		kstrdup(xprt->prot == IPPROTO_UDP ?
+			RPCBIND_NETID_UDP : RPCBIND_NETID_TCP, GFP_KERNEL);
+}
+
+static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt)
+{
+	struct sockaddr_in6 *addr = xs_addr_in6(xprt);
+	char *buf;
+
+	buf = kzalloc(40, GFP_KERNEL);
+	if (buf) {
+		snprintf(buf, 40, NIP6_FMT,
+				NIP6(addr->sin6_addr));
+	}
+	xprt->address_strings[RPC_DISPLAY_ADDR] = buf;
+
+	buf = kzalloc(8, GFP_KERNEL);
+	if (buf) {
+		snprintf(buf, 8, "%u",
+				ntohs(addr->sin6_port));
+	}
+	xprt->address_strings[RPC_DISPLAY_PORT] = buf;
+
+	buf = kzalloc(8, GFP_KERNEL);
+	if (buf) {
+		if (xprt->prot == IPPROTO_UDP)
+			snprintf(buf, 8, "udp");
+		else
+			snprintf(buf, 8, "tcp");
+	}
+	xprt->address_strings[RPC_DISPLAY_PROTO] = buf;
+
+	buf = kzalloc(64, GFP_KERNEL);
+	if (buf) {
+		snprintf(buf, 64, "addr="NIP6_FMT" port=%u proto=%s",
+				NIP6(addr->sin6_addr),
+				ntohs(addr->sin6_port),
+				xprt->prot == IPPROTO_UDP ? "udp" : "tcp");
+	}
+	xprt->address_strings[RPC_DISPLAY_ALL] = buf;
+
+	buf = kzalloc(36, GFP_KERNEL);
+	if (buf) {
+		snprintf(buf, 36, NIP6_SEQFMT,
+				NIP6(addr->sin6_addr));
+	}
+	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf;
+
+	buf = kzalloc(8, GFP_KERNEL);
+	if (buf) {
+		snprintf(buf, 8, "%4hx",
+				ntohs(addr->sin6_port));
+	}
+	xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf;
+
+	buf = kzalloc(50, GFP_KERNEL);
+	if (buf) {
+		snprintf(buf, 50, NIP6_FMT".%u.%u",
+				NIP6(addr->sin6_addr),
+				ntohs(addr->sin6_port) >> 8,
+				ntohs(addr->sin6_port) & 0xff);
+	}
+	xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
+
+	xprt->address_strings[RPC_DISPLAY_NETID] =
+		kstrdup(xprt->prot == IPPROTO_UDP ?
+			RPCBIND_NETID_UDP6 : RPCBIND_NETID_TCP6, GFP_KERNEL);
 }
 
 static void xs_free_peer_addresses(struct rpc_xprt *xprt)
 {
-	kfree(xprt->address_strings[RPC_DISPLAY_ADDR]);
-	kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
-	kfree(xprt->address_strings[RPC_DISPLAY_ALL]);
+	int i;
+
+	for (i = 0; i < RPC_DISPLAY_MAX; i++)
+		kfree(xprt->address_strings[i]);
 }
 
 #define XS_SENDMSG_FLAGS	(MSG_DONTWAIT | MSG_NOSIGNAL)
@@ -463,19 +580,20 @@ static int xs_udp_send_request(struct rpc_task *task)
 
 	req->rq_xtime = jiffies;
 	status = xs_sendpages(transport->sock,
-			      (struct sockaddr *) &xprt->addr,
+			      xs_addr(xprt),
 			      xprt->addrlen, xdr,
 			      req->rq_bytes_sent);
 
 	dprintk("RPC:       xs_udp_send_request(%u) = %d\n",
 			xdr->len - req->rq_bytes_sent, status);
 
-	if (likely(status >= (int) req->rq_slen))
-		return 0;
-
-	/* Still some bytes left; set up for a retry later. */
-	if (status > 0)
+	if (status >= 0) {
+		task->tk_bytes_sent += status;
+		if (status >= req->rq_slen)
+			return 0;
+		/* Still some bytes left; set up for a retry later. */
 		status = -EAGAIN;
+	}
 
 	switch (status) {
 	case -ENETUNREACH:
@@ -523,7 +641,8 @@ static int xs_tcp_send_request(struct rpc_task *task)
 	struct rpc_xprt *xprt = req->rq_xprt;
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 	struct xdr_buf *xdr = &req->rq_snd_buf;
-	int status, retry = 0;
+	int status;
+	unsigned int retry = 0;
 
 	xs_encode_tcp_record_marker(&req->rq_snd_buf);
 
@@ -661,6 +780,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
 	xs_free_peer_addresses(xprt);
 	kfree(xprt->slot);
 	kfree(xprt);
+	module_put(THIS_MODULE);
 }
 
 static inline struct rpc_xprt *xprt_from_sock(struct sock *sk)
@@ -1139,14 +1259,23 @@ static unsigned short xs_get_random_port(void)
  */
 static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
 {
-	struct sockaddr_in *sap = (struct sockaddr_in *) &xprt->addr;
+	struct sockaddr *addr = xs_addr(xprt);
 
 	dprintk("RPC:       setting port for xprt %p to %u\n", xprt, port);
 
-	sap->sin_port = htons(port);
+	switch (addr->sa_family) {
+	case AF_INET:
+		((struct sockaddr_in *)addr)->sin_port = htons(port);
+		break;
+	case AF_INET6:
+		((struct sockaddr_in6 *)addr)->sin6_port = htons(port);
+		break;
+	default:
+		BUG();
+	}
 }
 
-static int xs_bind(struct sock_xprt *transport, struct socket *sock)
+static int xs_bind4(struct sock_xprt *transport, struct socket *sock)
 {
 	struct sockaddr_in myaddr = {
 		.sin_family = AF_INET,
@@ -1174,8 +1303,42 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
 		else
 			port--;
 	} while (err == -EADDRINUSE && port != transport->port);
-	dprintk("RPC:       xs_bind "NIPQUAD_FMT":%u: %s (%d)\n",
-		NIPQUAD(myaddr.sin_addr), port, err ? "failed" : "ok", err);
+	dprintk("RPC:       %s "NIPQUAD_FMT":%u: %s (%d)\n",
+			__FUNCTION__, NIPQUAD(myaddr.sin_addr),
+			port, err ? "failed" : "ok", err);
+	return err;
+}
+
+static int xs_bind6(struct sock_xprt *transport, struct socket *sock)
+{
+	struct sockaddr_in6 myaddr = {
+		.sin6_family = AF_INET6,
+	};
+	struct sockaddr_in6 *sa;
+	int err;
+	unsigned short port = transport->port;
+
+	if (!transport->xprt.resvport)
+		port = 0;
+	sa = (struct sockaddr_in6 *)&transport->addr;
+	myaddr.sin6_addr = sa->sin6_addr;
+	do {
+		myaddr.sin6_port = htons(port);
+		err = kernel_bind(sock, (struct sockaddr *) &myaddr,
+						sizeof(myaddr));
+		if (!transport->xprt.resvport)
+			break;
+		if (err == 0) {
+			transport->port = port;
+			break;
+		}
+		if (port <= xprt_min_resvport)
+			port = xprt_max_resvport;
+		else
+			port--;
+	} while (err == -EADDRINUSE && port != transport->port);
+	dprintk("RPC:       xs_bind6 "NIP6_FMT":%u: %s (%d)\n",
+		NIP6(myaddr.sin6_addr), port, err ? "failed" : "ok", err);
 	return err;
 }
 
@@ -1183,38 +1346,69 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
 static struct lock_class_key xs_key[2];
 static struct lock_class_key xs_slock_key[2];
 
-static inline void xs_reclassify_socket(struct socket *sock)
+static inline void xs_reclassify_socket4(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
+
 	BUG_ON(sock_owned_by_user(sk));
-	switch (sk->sk_family) {
-	case AF_INET:
-		sock_lock_init_class_and_name(sk, "slock-AF_INET-NFS",
-			&xs_slock_key[0], "sk_lock-AF_INET-NFS", &xs_key[0]);
-		break;
+	sock_lock_init_class_and_name(sk, "slock-AF_INET-RPC",
+		&xs_slock_key[0], "sk_lock-AF_INET-RPC", &xs_key[0]);
+}
 
-	case AF_INET6:
-		sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFS",
-			&xs_slock_key[1], "sk_lock-AF_INET6-NFS", &xs_key[1]);
-		break;
+static inline void xs_reclassify_socket6(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
 
-	default:
-		BUG();
-	}
+	BUG_ON(sock_owned_by_user(sk));
+	sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC",
+		&xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]);
 }
 #else
-static inline void xs_reclassify_socket(struct socket *sock)
+static inline void xs_reclassify_socket4(struct socket *sock)
+{
+}
+
+static inline void xs_reclassify_socket6(struct socket *sock)
 {
 }
 #endif
 
+static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
+{
+	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+
+	if (!transport->inet) {
+		struct sock *sk = sock->sk;
+
+		write_lock_bh(&sk->sk_callback_lock);
+
+		sk->sk_user_data = xprt;
+		transport->old_data_ready = sk->sk_data_ready;
+		transport->old_state_change = sk->sk_state_change;
+		transport->old_write_space = sk->sk_write_space;
+		sk->sk_data_ready = xs_udp_data_ready;
+		sk->sk_write_space = xs_udp_write_space;
+		sk->sk_no_check = UDP_CSUM_NORCV;
+		sk->sk_allocation = GFP_ATOMIC;
+
+		xprt_set_connected(xprt);
+
+		/* Reset to new socket */
+		transport->sock = sock;
+		transport->inet = sk;
+
+		write_unlock_bh(&sk->sk_callback_lock);
+	}
+	xs_udp_do_set_buffer_size(xprt);
+}
+
 /**
- * xs_udp_connect_worker - set up a UDP socket
+ * xs_udp_connect_worker4 - set up a UDP socket
  * @work: RPC transport to connect
  *
  * Invoked by a work queue tasklet.
  */
-static void xs_udp_connect_worker(struct work_struct *work)
+static void xs_udp_connect_worker4(struct work_struct *work)
 {
 	struct sock_xprt *transport =
 		container_of(work, struct sock_xprt, connect_worker.work);
@@ -1232,9 +1426,9 @@ static void xs_udp_connect_worker(struct work_struct *work)
 		dprintk("RPC:       can't create UDP transport socket (%d).\n", -err);
 		goto out;
 	}
-	xs_reclassify_socket(sock);
+	xs_reclassify_socket4(sock);
 
-	if (xs_bind(transport, sock)) {
+	if (xs_bind4(transport, sock)) {
 		sock_release(sock);
 		goto out;
 	}
@@ -1242,29 +1436,48 @@ static void xs_udp_connect_worker(struct work_struct *work)
 	dprintk("RPC:       worker connecting xprt %p to address: %s\n",
 			xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
 
-	if (!transport->inet) {
-		struct sock *sk = sock->sk;
+	xs_udp_finish_connecting(xprt, sock);
+	status = 0;
+out:
+	xprt_wake_pending_tasks(xprt, status);
+	xprt_clear_connecting(xprt);
+}
 
-		write_lock_bh(&sk->sk_callback_lock);
+/**
+ * xs_udp_connect_worker6 - set up a UDP socket
+ * @work: RPC transport to connect
+ *
+ * Invoked by a work queue tasklet.
+ */
+static void xs_udp_connect_worker6(struct work_struct *work)
+{
+	struct sock_xprt *transport =
+		container_of(work, struct sock_xprt, connect_worker.work);
+	struct rpc_xprt *xprt = &transport->xprt;
+	struct socket *sock = transport->sock;
+	int err, status = -EIO;
 
-		sk->sk_user_data = xprt;
-		transport->old_data_ready = sk->sk_data_ready;
-		transport->old_state_change = sk->sk_state_change;
-		transport->old_write_space = sk->sk_write_space;
-		sk->sk_data_ready = xs_udp_data_ready;
-		sk->sk_write_space = xs_udp_write_space;
-		sk->sk_no_check = UDP_CSUM_NORCV;
-		sk->sk_allocation = GFP_ATOMIC;
+	if (xprt->shutdown || !xprt_bound(xprt))
+		goto out;
 
-		xprt_set_connected(xprt);
+	/* Start by resetting any existing state */
+	xs_close(xprt);
 
-		/* Reset to new socket */
-		transport->sock = sock;
-		transport->inet = sk;
+	if ((err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
+		dprintk("RPC:       can't create UDP transport socket (%d).\n", -err);
+		goto out;
+	}
+	xs_reclassify_socket6(sock);
 
-		write_unlock_bh(&sk->sk_callback_lock);
+	if (xs_bind6(transport, sock) < 0) {
+		sock_release(sock);
+		goto out;
 	}
-	xs_udp_do_set_buffer_size(xprt);
+
+	dprintk("RPC:       worker connecting xprt %p to address: %s\n",
+			xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
+
+	xs_udp_finish_connecting(xprt, sock);
 	status = 0;
 out:
 	xprt_wake_pending_tasks(xprt, status);
@@ -1295,13 +1508,52 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
 				result);
 }
 
+static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
+{
+	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+
+	if (!transport->inet) {
+		struct sock *sk = sock->sk;
+
+		write_lock_bh(&sk->sk_callback_lock);
+
+		sk->sk_user_data = xprt;
+		transport->old_data_ready = sk->sk_data_ready;
+		transport->old_state_change = sk->sk_state_change;
+		transport->old_write_space = sk->sk_write_space;
+		sk->sk_data_ready = xs_tcp_data_ready;
+		sk->sk_state_change = xs_tcp_state_change;
+		sk->sk_write_space = xs_tcp_write_space;
+		sk->sk_allocation = GFP_ATOMIC;
+
+		/* socket options */
+		sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
+		sock_reset_flag(sk, SOCK_LINGER);
+		tcp_sk(sk)->linger2 = 0;
+		tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
+
+		xprt_clear_connected(xprt);
+
+		/* Reset to new socket */
+		transport->sock = sock;
+		transport->inet = sk;
+
+		write_unlock_bh(&sk->sk_callback_lock);
+	}
+
+	/* Tell the socket layer to start connecting... */
+	xprt->stat.connect_count++;
+	xprt->stat.connect_start = jiffies;
+	return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
+}
+
 /**
- * xs_tcp_connect_worker - connect a TCP socket to a remote endpoint
+ * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
  * @work: RPC transport to connect
  *
  * Invoked by a work queue tasklet.
  */
-static void xs_tcp_connect_worker(struct work_struct *work)
+static void xs_tcp_connect_worker4(struct work_struct *work)
 {
 	struct sock_xprt *transport =
 		container_of(work, struct sock_xprt, connect_worker.work);
@@ -1315,13 +1567,12 @@ static void xs_tcp_connect_worker(struct work_struct *work)
 	if (!sock) {
 		/* start from scratch */
 		if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
-			dprintk("RPC:       can't create TCP transport "
-					"socket (%d).\n", -err);
+			dprintk("RPC:       can't create TCP transport socket (%d).\n", -err);
 			goto out;
 		}
-		xs_reclassify_socket(sock);
+		xs_reclassify_socket4(sock);
 
-		if (xs_bind(transport, sock)) {
+		if (xs_bind4(transport, sock) < 0) {
 			sock_release(sock);
 			goto out;
 		}
@@ -1332,43 +1583,70 @@ static void xs_tcp_connect_worker(struct work_struct *work)
 	dprintk("RPC:       worker connecting xprt %p to address: %s\n",
 			xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
 
-	if (!transport->inet) {
-		struct sock *sk = sock->sk;
-
-		write_lock_bh(&sk->sk_callback_lock);
+	status = xs_tcp_finish_connecting(xprt, sock);
+	dprintk("RPC:       %p connect status %d connected %d sock state %d\n",
+			xprt, -status, xprt_connected(xprt),
+			sock->sk->sk_state);
+	if (status < 0) {
+		switch (status) {
+			case -EINPROGRESS:
+			case -EALREADY:
+				goto out_clear;
+			case -ECONNREFUSED:
+			case -ECONNRESET:
+				/* retry with existing socket, after a delay */
+				break;
+			default:
+				/* get rid of existing socket, and retry */
+				xs_close(xprt);
+				break;
+		}
+	}
+out:
+	xprt_wake_pending_tasks(xprt, status);
+out_clear:
+	xprt_clear_connecting(xprt);
+}
 
-		sk->sk_user_data = xprt;
-		transport->old_data_ready = sk->sk_data_ready;
-		transport->old_state_change = sk->sk_state_change;
-		transport->old_write_space = sk->sk_write_space;
-		sk->sk_data_ready = xs_tcp_data_ready;
-		sk->sk_state_change = xs_tcp_state_change;
-		sk->sk_write_space = xs_tcp_write_space;
-		sk->sk_allocation = GFP_ATOMIC;
+/**
+ * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
+ * @work: RPC transport to connect
+ *
+ * Invoked by a work queue tasklet.
+ */
+static void xs_tcp_connect_worker6(struct work_struct *work)
+{
+	struct sock_xprt *transport =
+		container_of(work, struct sock_xprt, connect_worker.work);
+	struct rpc_xprt *xprt = &transport->xprt;
+	struct socket *sock = transport->sock;
+	int err, status = -EIO;
 
-		/* socket options */
-		sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
-		sock_reset_flag(sk, SOCK_LINGER);
-		tcp_sk(sk)->linger2 = 0;
-		tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
+	if (xprt->shutdown || !xprt_bound(xprt))
+		goto out;
 
-		xprt_clear_connected(xprt);
+	if (!sock) {
+		/* start from scratch */
+		if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
+			dprintk("RPC:       can't create TCP transport socket (%d).\n", -err);
+			goto out;
+		}
+		xs_reclassify_socket6(sock);
 
-		/* Reset to new socket */
-		transport->sock = sock;
-		transport->inet = sk;
+		if (xs_bind6(transport, sock) < 0) {
+			sock_release(sock);
+			goto out;
+		}
+	} else
+		/* "close" the socket, preserving the local port */
+		xs_tcp_reuse_connection(xprt);
 
-		write_unlock_bh(&sk->sk_callback_lock);
-	}
+	dprintk("RPC:       worker connecting xprt %p to address: %s\n",
+			xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
 
-	/* Tell the socket layer to start connecting... */
-	xprt->stat.connect_count++;
-	xprt->stat.connect_start = jiffies;
-	status = kernel_connect(sock, (struct sockaddr *) &xprt->addr,
-			xprt->addrlen, O_NONBLOCK);
+	status = xs_tcp_finish_connecting(xprt, sock);
 	dprintk("RPC:       %p connect status %d connected %d sock state %d\n",
-			xprt, -status, xprt_connected(xprt),
-			sock->sk->sk_state);
+			xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
 	if (status < 0) {
 		switch (status) {
 			case -EINPROGRESS:
@@ -1508,7 +1786,8 @@ static struct rpc_xprt_ops xs_tcp_ops = {
 	.print_stats		= xs_tcp_print_stats,
 };
 
-static struct rpc_xprt *xs_setup_xprt(struct rpc_xprtsock_create *args, unsigned int slot_table_size)
+static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
+				      unsigned int slot_table_size)
 {
 	struct rpc_xprt *xprt;
 	struct sock_xprt *new;
@@ -1549,8 +1828,9 @@ static struct rpc_xprt *xs_setup_xprt(struct rpc_xprtsock_create *args, unsigned
  * @args: rpc transport creation arguments
  *
  */
-struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args)
+struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 {
+	struct sockaddr *addr = args->dstaddr;
 	struct rpc_xprt *xprt;
 	struct sock_xprt *transport;
 
@@ -1559,15 +1839,11 @@ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args)
 		return xprt;
 	transport = container_of(xprt, struct sock_xprt, xprt);
 
-	if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0)
-		xprt_set_bound(xprt);
-
 	xprt->prot = IPPROTO_UDP;
 	xprt->tsh_size = 0;
 	/* XXX: header size can vary due to auth type, IPv6, etc. */
 	xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
 
-	INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker);
 	xprt->bind_timeout = XS_BIND_TO;
 	xprt->connect_timeout = XS_UDP_CONN_TO;
 	xprt->reestablish_timeout = XS_UDP_REEST_TO;
@@ -1580,11 +1856,37 @@ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args)
 	else
 		xprt_set_timeout(&xprt->timeout, 5, 5 * HZ);
 
-	xs_format_peer_addresses(xprt);
+	switch (addr->sa_family) {
+	case AF_INET:
+		if (((struct sockaddr_in *)addr)->sin_port != htons(0))
+			xprt_set_bound(xprt);
+
+		INIT_DELAYED_WORK(&transport->connect_worker,
+					xs_udp_connect_worker4);
+		xs_format_ipv4_peer_addresses(xprt);
+		break;
+	case AF_INET6:
+		if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
+			xprt_set_bound(xprt);
+
+		INIT_DELAYED_WORK(&transport->connect_worker,
+					xs_udp_connect_worker6);
+		xs_format_ipv6_peer_addresses(xprt);
+		break;
+	default:
+		kfree(xprt);
+		return ERR_PTR(-EAFNOSUPPORT);
+	}
+
 	dprintk("RPC:       set up transport to address %s\n",
 			xprt->address_strings[RPC_DISPLAY_ALL]);
 
-	return xprt;
+	if (try_module_get(THIS_MODULE))
+		return xprt;
+
+	kfree(xprt->slot);
+	kfree(xprt);
+	return ERR_PTR(-EINVAL);
 }
 
 /**
@@ -1592,8 +1894,9 @@ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args)
  * @args: rpc transport creation arguments
  *
  */
-struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args)
+struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 {
+	struct sockaddr *addr = args->dstaddr;
 	struct rpc_xprt *xprt;
 	struct sock_xprt *transport;
 
@@ -1602,14 +1905,10 @@ struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args)
 		return xprt;
 	transport = container_of(xprt, struct sock_xprt, xprt);
 
-	if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0)
-		xprt_set_bound(xprt);
-
 	xprt->prot = IPPROTO_TCP;
 	xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
 	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
 
-	INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker);
 	xprt->bind_timeout = XS_BIND_TO;
 	xprt->connect_timeout = XS_TCP_CONN_TO;
 	xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
@@ -1622,15 +1921,55 @@ struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args)
 	else
 		xprt_set_timeout(&xprt->timeout, 2, 60 * HZ);
 
-	xs_format_peer_addresses(xprt);
+	switch (addr->sa_family) {
+	case AF_INET:
+		if (((struct sockaddr_in *)addr)->sin_port != htons(0))
+			xprt_set_bound(xprt);
+
+		INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker4);
+		xs_format_ipv4_peer_addresses(xprt);
+		break;
+	case AF_INET6:
+		if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
+			xprt_set_bound(xprt);
+
+		INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker6);
+		xs_format_ipv6_peer_addresses(xprt);
+		break;
+	default:
+		kfree(xprt);
+		return ERR_PTR(-EAFNOSUPPORT);
+	}
+
 	dprintk("RPC:       set up transport to address %s\n",
 			xprt->address_strings[RPC_DISPLAY_ALL]);
 
-	return xprt;
+	if (try_module_get(THIS_MODULE))
+		return xprt;
+
+	kfree(xprt->slot);
+	kfree(xprt);
+	return ERR_PTR(-EINVAL);
 }
 
+static struct xprt_class	xs_udp_transport = {
+	.list		= LIST_HEAD_INIT(xs_udp_transport.list),
+	.name		= "udp",
+	.owner		= THIS_MODULE,
+	.ident		= IPPROTO_UDP,
+	.setup		= xs_setup_udp,
+};
+
+static struct xprt_class	xs_tcp_transport = {
+	.list		= LIST_HEAD_INIT(xs_tcp_transport.list),
+	.name		= "tcp",
+	.owner		= THIS_MODULE,
+	.ident		= IPPROTO_TCP,
+	.setup		= xs_setup_tcp,
+};
+
 /**
- * init_socket_xprt - set up xprtsock's sysctls
+ * init_socket_xprt - set up xprtsock's sysctls, register with RPC client
  *
  */
 int init_socket_xprt(void)
@@ -1640,11 +1979,14 @@ int init_socket_xprt(void)
 		sunrpc_table_header = register_sysctl_table(sunrpc_table);
 #endif
 
+	xprt_register_transport(&xs_udp_transport);
+	xprt_register_transport(&xs_tcp_transport);
+
 	return 0;
 }
 
 /**
- * cleanup_socket_xprt - remove xprtsock's sysctls
+ * cleanup_socket_xprt - remove xprtsock's sysctls, unregister
  *
  */
 void cleanup_socket_xprt(void)
@@ -1655,4 +1997,7 @@ void cleanup_socket_xprt(void)
 		sunrpc_table_header = NULL;
 	}
 #endif
+
+	xprt_unregister_transport(&xs_udp_transport);
+	xprt_unregister_transport(&xs_tcp_transport);
 }
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 2b57eaf66ab..6996cba5aa9 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -334,7 +334,7 @@ static void unix_write_space(struct sock *sk)
 	read_lock(&sk->sk_callback_lock);
 	if (unix_writable(sk)) {
 		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-			wake_up_interruptible(sk->sk_sleep);
+			wake_up_interruptible_sync(sk->sk_sleep);
 		sk_wake_async(sk, 2, POLL_OUT);
 	}
 	read_unlock(&sk->sk_callback_lock);
@@ -1639,7 +1639,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (!skb)
 		goto out_unlock;
 
-	wake_up_interruptible(&u->peer_wait);
+	wake_up_interruptible_sync(&u->peer_wait);
 
 	if (msg->msg_name)
 		unix_copy_addr(msg, skb->sk);
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 2d5d2255a27..29f820e1825 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -53,8 +53,7 @@ static void wiphy_dev_release(struct device *dev)
 }
 
 #ifdef CONFIG_HOTPLUG
-static int wiphy_uevent(struct device *dev, char **envp,
-			int num_envp, char *buf, int size)
+static int wiphy_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
 	/* TODO, we probably need stuff here */
 	return 0;