aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/atm/atm_sysfs.c7
-rw-r--r--net/bridge/br.c2
-rw-r--r--net/bridge/br_input.c6
-rw-r--r--net/bridge/br_netfilter.c36
-rw-r--r--net/bridge/br_sysfs_br.c2
-rw-r--r--net/bridge/netfilter/ebt_arpreply.c3
-rw-r--r--net/bridge/netfilter/ebt_dnat.c17
-rw-r--r--net/bridge/netfilter/ebt_mark.c10
-rw-r--r--net/bridge/netfilter/ebt_redirect.c21
-rw-r--r--net/bridge/netfilter/ebt_snat.c23
-rw-r--r--net/bridge/netfilter/ebtable_broute.c4
-rw-r--r--net/bridge/netfilter/ebtable_filter.c4
-rw-r--r--net/bridge/netfilter/ebtable_nat.c8
-rw-r--r--net/bridge/netfilter/ebtables.c12
-rw-r--r--net/core/dev.c102
-rw-r--r--net/core/neighbour.c2
-rw-r--r--net/core/net-sysfs.c14
-rw-r--r--net/core/skbuff.c195
-rw-r--r--net/core/sock.c1
-rw-r--r--net/dccp/ipv6.c3
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c4
-rw-r--r--net/ipv4/Makefile3
-rw-r--r--net/ipv4/inet_fragment.c174
-rw-r--r--net/ipv4/inet_lro.c16
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_fragment.c421
-rw-r--r--net/ipv4/ip_input.c10
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_app.c33
-rw-r--r--net/ipv4/ipvs/ip_vs_core.c104
-rw-r--r--net/ipv4/ipvs/ip_vs_ftp.c19
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_tcp.c51
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_udp.c51
-rw-r--r--net/ipv4/ipvs/ip_vs_xmit.c6
-rw-r--r--net/ipv4/netfilter.c77
-rw-r--r--net/ipv4/netfilter/arp_tables.c20
-rw-r--r--net/ipv4/netfilter/arpt_mangle.c28
-rw-r--r--net/ipv4/netfilter/arptable_filter.c4
-rw-r--r--net/ipv4/netfilter/ip_queue.c24
-rw-r--r--net/ipv4/netfilter/ip_tables.c20
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c14
-rw-r--r--net/ipv4/netfilter/ipt_ECN.c27
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c4
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c6
-rw-r--r--net/ipv4/netfilter/ipt_NETMAP.c8
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c6
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c22
-rw-r--r--net/ipv4/netfilter/ipt_SAME.c4
-rw-r--r--net/ipv4/netfilter/ipt_TOS.c8
-rw-r--r--net/ipv4/netfilter/ipt_TTL.c6
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c4
-rw-r--r--net/ipv4/netfilter/iptable_filter.c12
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c22
-rw-r--r--net/ipv4/netfilter/iptable_raw.c12
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c48
-rw-r--r--net/ipv4/netfilter/nf_nat_amanda.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c50
-rw-r--r--net/ipv4/netfilter/nf_nat_ftp.c18
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c58
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c118
-rw-r--r--net/ipv4/netfilter/nf_nat_irc.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c8
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c8
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c10
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_tcp.c16
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udp.c16
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_unknown.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c14
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c56
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c14
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c62
-rw-r--r--net/ipv4/netfilter/nf_nat_tftp.c2
-rw-r--r--net/ipv4/proc.c4
-rw-r--r--net/ipv4/sysctl_net_ipv4.c9
-rw-r--r--net/ipv4/tcp_input.c11
-rw-r--r--net/ipv4/xfrm4_output.c4
-rw-r--r--net/ipv6/exthdrs.c64
-rw-r--r--net/ipv6/icmp.c5
-rw-r--r--net/ipv6/inet6_connection_sock.c2
-rw-r--r--net/ipv6/ip6_input.c6
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/netfilter.c6
-rw-r--r--net/ipv6/netfilter/ip6_queue.c20
-rw-r--r--net/ipv6/netfilter/ip6_tables.c16
-rw-r--r--net/ipv6/netfilter/ip6t_HL.c6
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c5
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c14
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c12
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c32
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c4
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c51
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c314
-rw-r--r--net/ipv6/proc.c2
-rw-r--r--net/ipv6/reassembly.c361
-rw-r--r--net/ipv6/route.c77
-rw-r--r--net/ipv6/sysctl_net_ipv6.c9
-rw-r--r--net/ipv6/tcp_ipv6.c3
-rw-r--r--net/ipv6/tunnel6.c6
-rw-r--r--net/ipv6/udp.c7
-rw-r--r--net/ipv6/udp_impl.h2
-rw-r--r--net/ipv6/udplite.c4
-rw-r--r--net/ipv6/xfrm6_input.c4
-rw-r--r--net/ipv6/xfrm6_output.c4
-rw-r--r--net/netfilter/core.c48
-rw-r--r--net/netfilter/nf_conntrack_amanda.c20
-rw-r--r--net/netfilter/nf_conntrack_core.c30
-rw-r--r--net/netfilter/nf_conntrack_ftp.c18
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c236
-rw-r--r--net/netfilter/nf_conntrack_irc.c16
-rw-r--r--net/netfilter/nf_conntrack_netbios_ns.c10
-rw-r--r--net/netfilter/nf_conntrack_pptp.c28
-rw-r--r--net/netfilter/nf_conntrack_sane.c10
-rw-r--r--net/netfilter/nf_conntrack_sip.c24
-rw-r--r--net/netfilter/nf_conntrack_tftp.c8
-rw-r--r--net/netfilter/nf_internals.h2
-rw-r--r--net/netfilter/nf_queue.c4
-rw-r--r--net/netfilter/nfnetlink_queue.c20
-rw-r--r--net/netfilter/xt_CLASSIFY.c4
-rw-r--r--net/netfilter/xt_CONNMARK.c14
-rw-r--r--net/netfilter/xt_CONNSECMARK.c3
-rw-r--r--net/netfilter/xt_DSCP.c16
-rw-r--r--net/netfilter/xt_MARK.c12
-rw-r--r--net/netfilter/xt_NFLOG.c4
-rw-r--r--net/netfilter/xt_NFQUEUE.c2
-rw-r--r--net/netfilter/xt_NOTRACK.c10
-rw-r--r--net/netfilter/xt_SECMARK.c4
-rw-r--r--net/netfilter/xt_TCPMSS.c58
-rw-r--r--net/netfilter/xt_TRACE.c4
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/sched/act_ipt.c6
-rw-r--r--net/sched/sch_ingress.c5
-rw-r--r--net/sctp/ipv6.c4
-rw-r--r--net/sctp/socket.c10
-rw-r--r--net/sunrpc/Makefile1
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c6
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c144
-rw-r--r--net/sunrpc/clnt.c52
-rw-r--r--net/sunrpc/rpc_pipe.c8
-rw-r--r--net/sunrpc/rpcb_clnt.c151
-rw-r--r--net/sunrpc/sched.c2
-rw-r--r--net/sunrpc/socklib.c3
-rw-r--r--net/sunrpc/sunrpc_syms.c2
-rw-r--r--net/sunrpc/svc.c40
-rw-r--r--net/sunrpc/timer.c4
-rw-r--r--net/sunrpc/xprt.c116
-rw-r--r--net/sunrpc/xprtrdma/Makefile3
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c868
-rw-r--r--net/sunrpc/xprtrdma/transport.c800
-rw-r--r--net/sunrpc/xprtrdma/verbs.c1627
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h330
-rw-r--r--net/sunrpc/xprtsock.c567
-rw-r--r--net/unix/af_unix.c4
-rw-r--r--net/wireless/sysfs.c3
153 files changed, 6243 insertions, 2390 deletions
diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index f094a0879c1..9ef07eda2c4 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -105,10 +105,9 @@ static struct class_device_attribute *atm_attrs[] = {
NULL
};
-static int atm_uevent(struct class_device *cdev, char **envp, int num_envp, char *buf, int size)
+static int atm_uevent(struct class_device *cdev, struct kobj_uevent_env *env)
{
struct atm_dev *adev;
- int i = 0, len = 0;
if (!cdev)
return -ENODEV;
@@ -117,11 +116,9 @@ static int atm_uevent(struct class_device *cdev, char **envp, int num_envp, char
if (!adev)
return -ENODEV;
- if (add_uevent_var(envp, num_envp, &i, buf, size, &len,
- "NAME=%s%d", adev->type, adev->number))
+ if (add_uevent_var(env, "NAME=%s%d", adev->type, adev->number))
return -ENOMEM;
- envp[i] = NULL;
return 0;
}
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 848b8fa8bed..93867bb6cc9 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -23,7 +23,7 @@
#include "br_private.h"
-int (*br_should_route_hook) (struct sk_buff **pskb) = NULL;
+int (*br_should_route_hook)(struct sk_buff *skb);
static struct llc_sap *br_stp_sap;
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 3a8a015c92e..3cedd4eeeed 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -126,6 +126,10 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
goto drop;
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (!skb)
+ return NULL;
+
if (unlikely(is_link_local(dest))) {
/* Pause frames shouldn't be passed up by driver anyway */
if (skb->protocol == htons(ETH_P_PAUSE))
@@ -145,7 +149,7 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
case BR_STATE_FORWARDING:
if (br_should_route_hook) {
- if (br_should_route_hook(&skb))
+ if (br_should_route_hook(skb))
return skb;
dest = eth_hdr(skb)->h_dest;
}
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 8245f051ccb..da22f900e89 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -503,18 +503,14 @@ inhdr_error:
* receiving device) to make netfilter happy, the REDIRECT
* target in particular. Save the original destination IP
* address to be able to detect DNAT afterwards. */
-static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
+static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
struct iphdr *iph;
- struct sk_buff *skb = *pskb;
__u32 len = nf_bridge_encap_header_len(skb);
- if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
- return NF_STOLEN;
-
if (unlikely(!pskb_may_pull(skb, len)))
goto out;
@@ -584,13 +580,11 @@ out:
* took place when the packet entered the bridge), but we
* register an IPv4 PRE_ROUTING 'sabotage' hook that will
* prevent this from happening. */
-static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff **pskb,
+static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct sk_buff *skb = *pskb;
-
if (skb->dst == (struct dst_entry *)&__fake_rtable) {
dst_release(skb->dst);
skb->dst = NULL;
@@ -625,12 +619,11 @@ static int br_nf_forward_finish(struct sk_buff *skb)
* but we are still able to filter on the 'real' indev/outdev
* because of the physdev module. For ARP, indev and outdev are the
* bridge ports. */
-static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb,
+static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct sk_buff *skb = *pskb;
struct nf_bridge_info *nf_bridge;
struct net_device *parent;
int pf;
@@ -648,7 +641,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb,
else
pf = PF_INET6;
- nf_bridge_pull_encap_header(*pskb);
+ nf_bridge_pull_encap_header(skb);
nf_bridge = skb->nf_bridge;
if (skb->pkt_type == PACKET_OTHERHOST) {
@@ -666,12 +659,11 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb,
return NF_STOLEN;
}
-static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb,
+static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct sk_buff *skb = *pskb;
struct net_device **d = (struct net_device **)(skb->cb);
#ifdef CONFIG_SYSCTL
@@ -682,12 +674,12 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb,
if (skb->protocol != htons(ETH_P_ARP)) {
if (!IS_VLAN_ARP(skb))
return NF_ACCEPT;
- nf_bridge_pull_encap_header(*pskb);
+ nf_bridge_pull_encap_header(skb);
}
if (arp_hdr(skb)->ar_pln != 4) {
if (IS_VLAN_ARP(skb))
- nf_bridge_push_encap_header(*pskb);
+ nf_bridge_push_encap_header(skb);
return NF_ACCEPT;
}
*d = (struct net_device *)in;
@@ -709,13 +701,12 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb,
* NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor
* will be executed.
*/
-static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
+static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
struct net_device *realindev;
- struct sk_buff *skb = *pskb;
struct nf_bridge_info *nf_bridge;
if (!skb->nf_bridge)
@@ -752,13 +743,12 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb)
}
/* PF_BRIDGE/POST_ROUTING ********************************************/
-static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
+static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct sk_buff *skb = *pskb;
- struct nf_bridge_info *nf_bridge = (*pskb)->nf_bridge;
+ struct nf_bridge_info *nf_bridge = skb->nf_bridge;
struct net_device *realoutdev = bridge_parent(skb->dev);
int pf;
@@ -828,13 +818,13 @@ print_error:
/* IP/SABOTAGE *****************************************************/
/* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING
* for the second time. */
-static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff **pskb,
+static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- if ((*pskb)->nf_bridge &&
- !((*pskb)->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) {
+ if (skb->nf_bridge &&
+ !(skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) {
return NF_STOP;
}
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index c65f54e0e27..3312e8f2abe 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -435,7 +435,7 @@ int br_sysfs_addbr(struct net_device *dev)
err = kobject_register(&br->ifobj);
if (err) {
pr_info("%s: can't add kobject (directory) %s/%s\n",
- __FUNCTION__, dev->name, br->ifobj.name);
+ __FUNCTION__, dev->name, kobject_name(&br->ifobj));
goto out3;
}
return 0;
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index ffe468a632e..48a80e42328 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -15,7 +15,7 @@
#include <net/arp.h>
#include <linux/module.h>
-static int ebt_target_reply(struct sk_buff **pskb, unsigned int hooknr,
+static int ebt_target_reply(struct sk_buff *skb, unsigned int hooknr,
const struct net_device *in, const struct net_device *out,
const void *data, unsigned int datalen)
{
@@ -23,7 +23,6 @@ static int ebt_target_reply(struct sk_buff **pskb, unsigned int hooknr,
__be32 _sip, *siptr, _dip, *diptr;
struct arphdr _ah, *ap;
unsigned char _sha[ETH_ALEN], *shp;
- struct sk_buff *skb = *pskb;
ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah);
if (ap == NULL)
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index 4582659dff0..74262e9a566 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -8,29 +8,22 @@
*
*/
+#include <linux/netfilter.h>
#include <linux/netfilter_bridge/ebtables.h>
#include <linux/netfilter_bridge/ebt_nat.h>
#include <linux/module.h>
#include <net/sock.h>
-static int ebt_target_dnat(struct sk_buff **pskb, unsigned int hooknr,
+static int ebt_target_dnat(struct sk_buff *skb, unsigned int hooknr,
const struct net_device *in, const struct net_device *out,
const void *data, unsigned int datalen)
{
struct ebt_nat_info *info = (struct ebt_nat_info *)data;
- if (skb_shared(*pskb) || skb_cloned(*pskb)) {
- struct sk_buff *nskb;
+ if (skb_make_writable(skb, 0))
+ return NF_DROP;
- nskb = skb_copy(*pskb, GFP_ATOMIC);
- if (!nskb)
- return NF_DROP;
- if ((*pskb)->sk)
- skb_set_owner_w(nskb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = nskb;
- }
- memcpy(eth_hdr(*pskb)->h_dest, info->mac, ETH_ALEN);
+ memcpy(eth_hdr(skb)->h_dest, info->mac, ETH_ALEN);
return info->target;
}
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 62d23c7b25e..6cba54309c0 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -17,7 +17,7 @@
#include <linux/netfilter_bridge/ebt_mark_t.h>
#include <linux/module.h>
-static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr,
+static int ebt_target_mark(struct sk_buff *skb, unsigned int hooknr,
const struct net_device *in, const struct net_device *out,
const void *data, unsigned int datalen)
{
@@ -25,13 +25,13 @@ static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr,
int action = info->target & -16;
if (action == MARK_SET_VALUE)
- (*pskb)->mark = info->mark;
+ skb->mark = info->mark;
else if (action == MARK_OR_VALUE)
- (*pskb)->mark |= info->mark;
+ skb->mark |= info->mark;
else if (action == MARK_AND_VALUE)
- (*pskb)->mark &= info->mark;
+ skb->mark &= info->mark;
else
- (*pskb)->mark ^= info->mark;
+ skb->mark ^= info->mark;
return info->target | ~EBT_VERDICT_BITS;
}
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 9f378eab72d..422cb834cff 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -8,35 +8,28 @@
*
*/
+#include <linux/netfilter.h>
#include <linux/netfilter_bridge/ebtables.h>
#include <linux/netfilter_bridge/ebt_redirect.h>
#include <linux/module.h>
#include <net/sock.h>
#include "../br_private.h"
-static int ebt_target_redirect(struct sk_buff **pskb, unsigned int hooknr,
+static int ebt_target_redirect(struct sk_buff *skb, unsigned int hooknr,
const struct net_device *in, const struct net_device *out,
const void *data, unsigned int datalen)
{
struct ebt_redirect_info *info = (struct ebt_redirect_info *)data;
- if (skb_shared(*pskb) || skb_cloned(*pskb)) {
- struct sk_buff *nskb;
+ if (skb_make_writable(skb, 0))
+ return NF_DROP;
- nskb = skb_copy(*pskb, GFP_ATOMIC);
- if (!nskb)
- return NF_DROP;
- if ((*pskb)->sk)
- skb_set_owner_w(nskb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = nskb;
- }
if (hooknr != NF_BR_BROUTING)
- memcpy(eth_hdr(*pskb)->h_dest,
+ memcpy(eth_hdr(skb)->h_dest,
in->br_port->br->dev->dev_addr, ETH_ALEN);
else
- memcpy(eth_hdr(*pskb)->h_dest, in->dev_addr, ETH_ALEN);
- (*pskb)->pkt_type = PACKET_HOST;
+ memcpy(eth_hdr(skb)->h_dest, in->dev_addr, ETH_ALEN);
+ skb->pkt_type = PACKET_HOST;
return info->target;
}
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index a50722182bf..425ac920904 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -8,6 +8,7 @@
*
*/
+#include <linux/netfilter.h>
#include <linux/netfilter_bridge/ebtables.h>
#include <linux/netfilter_bridge/ebt_nat.h>
#include <linux/module.h>
@@ -15,34 +16,26 @@
#include <linux/if_arp.h>
#include <net/arp.h>
-static int ebt_target_snat(struct sk_buff **pskb, unsigned int hooknr,
+static int ebt_target_snat(struct sk_buff *skb, unsigned int hooknr,
const struct net_device *in, const struct net_device *out,
const void *data, unsigned int datalen)
{
struct ebt_nat_info *info = (struct ebt_nat_info *) data;
- if (skb_shared(*pskb) || skb_cloned(*pskb)) {
- struct sk_buff *nskb;
+ if (skb_make_writable(skb, 0))
+ return NF_DROP;
- nskb = skb_copy(*pskb, GFP_ATOMIC);
- if (!nskb)
- return NF_DROP;
- if ((*pskb)->sk)
- skb_set_owner_w(nskb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = nskb;
- }
- memcpy(eth_hdr(*pskb)->h_source, info->mac, ETH_ALEN);
+ memcpy(eth_hdr(skb)->h_source, info->mac, ETH_ALEN);
if (!(info->target & NAT_ARP_BIT) &&
- eth_hdr(*pskb)->h_proto == htons(ETH_P_ARP)) {
+ eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) {
struct arphdr _ah, *ap;
- ap = skb_header_pointer(*pskb, 0, sizeof(_ah), &_ah);
+ ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah);
if (ap == NULL)
return EBT_DROP;
if (ap->ar_hln != ETH_ALEN)
goto out;
- if (skb_store_bits(*pskb, sizeof(_ah), info->mac,ETH_ALEN))
+ if (skb_store_bits(skb, sizeof(_ah), info->mac,ETH_ALEN))
return EBT_DROP;
}
out:
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index d37ce047893..e44519ebf1d 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -51,11 +51,11 @@ static struct ebt_table broute_table =
.me = THIS_MODULE,
};
-static int ebt_broute(struct sk_buff **pskb)
+static int ebt_broute(struct sk_buff *skb)
{
int ret;
- ret = ebt_do_table(NF_BR_BROUTING, pskb, (*pskb)->dev, NULL,
+ ret = ebt_do_table(NF_BR_BROUTING, skb, skb->dev, NULL,
&broute_table);
if (ret == NF_DROP)
return 1; /* route it */
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 81d84145c41..210493f99bc 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -61,10 +61,10 @@ static struct ebt_table frame_filter =
};
static unsigned int
-ebt_hook (unsigned int hook, struct sk_buff **pskb, const struct net_device *in,
+ebt_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in,
const struct net_device *out, int (*okfn)(struct sk_buff *))
{
- return ebt_do_table(hook, pskb, in, out, &frame_filter);
+ return ebt_do_table(hook, skb, in, out, &frame_filter);
}
static struct nf_hook_ops ebt_ops_filter[] = {
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 9c50488b62e..3e58c2e5ee2 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -61,17 +61,17 @@ static struct ebt_table frame_nat =
};
static unsigned int
-ebt_nat_dst(unsigned int hook, struct sk_buff **pskb, const struct net_device *in
+ebt_nat_dst(unsigned int hook, struct sk_buff *skb, const struct net_device *in
, const struct net_device *out, int (*okfn)(struct sk_buff *))
{
- return ebt_do_table(hook, pskb, in, out, &frame_nat);
+ return ebt_do_table(hook, skb, in, out, &frame_nat);
}
static unsigned int
-ebt_nat_src(unsigned int hook, struct sk_buff **pskb, const struct net_device *in
+ebt_nat_src(unsigned int hook, struct sk_buff *skb, const struct net_device *in
, const struct net_device *out, int (*okfn)(struct sk_buff *))
{
- return ebt_do_table(hook, pskb, in, out, &frame_nat);
+ return ebt_do_table(hook, skb, in, out, &frame_nat);
}
static struct nf_hook_ops ebt_ops_nat[] = {
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 6018d0e5193..d5a09eaef91 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -142,7 +142,7 @@ static inline int ebt_basic_match(struct ebt_entry *e, struct ethhdr *h,
}
/* Do some firewalling */
-unsigned int ebt_do_table (unsigned int hook, struct sk_buff **pskb,
+unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
struct ebt_table *table)
{
@@ -172,19 +172,19 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff **pskb,
base = private->entries;
i = 0;
while (i < nentries) {
- if (ebt_basic_match(point, eth_hdr(*pskb), in, out))
+ if (ebt_basic_match(point, eth_hdr(skb), in, out))
goto letscontinue;
- if (EBT_MATCH_ITERATE(point, ebt_do_match, *pskb, in, out) != 0)
+ if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, in, out) != 0)
goto letscontinue;
/* increase counter */
(*(counter_base + i)).pcnt++;
- (*(counter_base + i)).bcnt+=(**pskb).len;
+ (*(counter_base + i)).bcnt += skb->len;
/* these should only watch: not modify, nor tell us
what to do with the packet */
- EBT_WATCHER_ITERATE(point, ebt_do_watcher, *pskb, hook, in,
+ EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, hook, in,
out);
t = (struct ebt_entry_target *)
@@ -193,7 +193,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff **pskb,
if (!t->u.target->target)
verdict = ((struct ebt_standard_target *)t)->verdict;
else
- verdict = t->u.target->target(pskb, hook,
+ verdict = t->u.target->target(skb, hook,
in, out, t->data, t->target_size);
if (verdict == EBT_ACCEPT) {
read_unlock_bh(&table->lock);
diff --git a/net/core/dev.c b/net/core/dev.c
index 1e169a541ce..38b03da5c1c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -557,6 +557,7 @@ __setup("netdev=", netdev_boot_setup);
/**
* __dev_get_by_name - find a device by its name
+ * @net: the applicable net namespace
* @name: name to find
*
* Find an interface by name. Must be called under RTNL semaphore
@@ -581,6 +582,7 @@ struct net_device *__dev_get_by_name(struct net *net, const char *name)
/**
* dev_get_by_name - find a device by its name
+ * @net: the applicable net namespace
* @name: name to find
*
* Find an interface by name. This can be called from any
@@ -604,6 +606,7 @@ struct net_device *dev_get_by_name(struct net *net, const char *name)
/**
* __dev_get_by_index - find a device by its ifindex
+ * @net: the applicable net namespace
* @ifindex: index of device
*
* Search for an interface by index. Returns %NULL if the device
@@ -629,6 +632,7 @@ struct net_device *__dev_get_by_index(struct net *net, int ifindex)
/**
* dev_get_by_index - find a device by its ifindex
+ * @net: the applicable net namespace
* @ifindex: index of device
*
* Search for an interface by index. Returns NULL if the device
@@ -651,6 +655,7 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
/**
* dev_getbyhwaddr - find a device by its hardware address
+ * @net: the applicable net namespace
* @type: media type of device
* @ha: hardware address
*
@@ -709,6 +714,7 @@ EXPORT_SYMBOL(dev_getfirstbyhwtype);
/**
* dev_get_by_flags - find any device with given flags
+ * @net: the applicable net namespace
* @if_flags: IFF_* values
* @mask: bitmask of bits in if_flags to check
*
@@ -948,6 +954,7 @@ void netdev_state_change(struct net_device *dev)
/**
* dev_load - load a network module
+ * @net: the applicable net namespace
* @name: name of interface
*
* If a network interface is not present and the process has suitable
@@ -1185,7 +1192,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
/**
* call_netdevice_notifiers - call all network notifier blocks
* @val: value passed unmodified to notifier function
- * @v: pointer passed unmodified to notifier function
+ * @dev: net_device pointer passed unmodified to notifier function
*
* Call all network notifier blocks. Parameters and return value
* are as for raw_notifier_call_chain().
@@ -1355,22 +1362,21 @@ int skb_checksum_help(struct sk_buff *skb)
goto out_set_summed;
}
- if (skb_cloned(skb)) {
+ offset = skb->csum_start - skb_headroom(skb);
+ BUG_ON(offset >= skb_headlen(skb));
+ csum = skb_checksum(skb, offset, skb->len - offset, 0);
+
+ offset += skb->csum_offset;
+ BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
+
+ if (skb_cloned(skb) &&
+ !skb_clone_writable(skb, offset + sizeof(__sum16))) {
ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
if (ret)
goto out;
}
- offset = skb->csum_start - skb_headroom(skb);
- BUG_ON(offset > (int)skb->len);
- csum = skb_checksum(skb, offset, skb->len-offset, 0);
-
- offset = skb_headlen(skb) - offset;
- BUG_ON(offset <= 0);
- BUG_ON(skb->csum_offset + 2 > offset);
-
- *(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) =
- csum_fold(csum);
+ *(__sum16 *)(skb->data + offset) = csum_fold(csum);
out_set_summed:
skb->ip_summed = CHECKSUM_NONE;
out:
@@ -1942,27 +1948,51 @@ static int ing_filter(struct sk_buff *skb)
struct Qdisc *q;
struct net_device *dev = skb->dev;
int result = TC_ACT_OK;
+ u32 ttl = G_TC_RTTL(skb->tc_verd);
- if (dev->qdisc_ingress) {
- __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
- if (MAX_RED_LOOP < ttl++) {
- printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n",
- skb->iif, skb->dev->ifindex);
- return TC_ACT_SHOT;
- }
+ if (MAX_RED_LOOP < ttl++) {
+ printk(KERN_WARNING
+ "Redir loop detected Dropping packet (%d->%d)\n",
+ skb->iif, dev->ifindex);
+ return TC_ACT_SHOT;
+ }
+
+ skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
+ skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
- skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
+ spin_lock(&dev->ingress_lock);
+ if ((q = dev->qdisc_ingress) != NULL)
+ result = q->enqueue(skb, q);
+ spin_unlock(&dev->ingress_lock);
- skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
+ return result;
+}
+
+static inline struct sk_buff *handle_ing(struct sk_buff *skb,
+ struct packet_type **pt_prev,
+ int *ret, struct net_device *orig_dev)
+{
+ if (!skb->dev->qdisc_ingress)
+ goto out;
- spin_lock(&dev->ingress_lock);
- if ((q = dev->qdisc_ingress) != NULL)
- result = q->enqueue(skb, q);
- spin_unlock(&dev->ingress_lock);
+ if (*pt_prev) {
+ *ret = deliver_skb(skb, *pt_prev, orig_dev);
+ *pt_prev = NULL;
+ } else {
+ /* Huh? Why does turning on AF_PACKET affect this? */
+ skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
+ }
+ switch (ing_filter(skb)) {
+ case TC_ACT_SHOT:
+ case TC_ACT_STOLEN:
+ kfree_skb(skb);
+ return NULL;
}
- return result;
+out:
+ skb->tc_verd = 0;
+ return skb;
}
#endif
@@ -2014,21 +2044,9 @@ int netif_receive_skb(struct sk_buff *skb)
}
#ifdef CONFIG_NET_CLS_ACT
- if (pt_prev) {
- ret = deliver_skb(skb, pt_prev, orig_dev);
- pt_prev = NULL; /* noone else should process this after*/
- } else {
- skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
- }
-
- ret = ing_filter(skb);
-
- if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
- kfree_skb(skb);
+ skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
+ if (!skb)
goto out;
- }
-
- skb->tc_verd = 0;
ncls:
#endif
@@ -2097,7 +2115,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
/**
* __napi_schedule - schedule for receive
- * @napi: entry to schedule
+ * @n: entry to schedule
*
* The entry's receive function will be scheduled to run
*/
@@ -3259,6 +3277,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
/**
* dev_ioctl - network device ioctl
+ * @net: the applicable net namespace
* @cmd: command to issue
* @arg: pointer to a struct ifreq in user space
*
@@ -3436,6 +3455,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
/**
* dev_new_index - allocate an ifindex
+ * @net: the applicable net namespace
*
* Returns a suitable unique value for a new device interface
* number. The caller must hold the rtnl semaphore or the
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index c52df858d0b..cd3af59b38a 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -481,6 +481,8 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
if (!creat)
goto out;
+ ASSERT_RTNL();
+
n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
if (!n)
goto out;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 909a03d6c0e..6628e457ddc 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -396,28 +396,22 @@ static struct attribute_group wireless_group = {
#endif /* CONFIG_SYSFS */
#ifdef CONFIG_HOTPLUG
-static int netdev_uevent(struct device *d, char **envp,
- int num_envp, char *buf, int size)
+static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
{
struct net_device *dev = to_net_dev(d);
- int retval, len = 0, i = 0;
+ int retval;
/* pass interface to uevent. */
- retval = add_uevent_var(envp, num_envp, &i,
- buf, size, &len,
- "INTERFACE=%s", dev->name);
+ retval = add_uevent_var(env, "INTERFACE=%s", dev->name);
if (retval)
goto exit;
/* pass ifindex to uevent.
* ifindex is useful as it won't change (interface name may change)
* and is what RtNetlink uses natively. */
- retval = add_uevent_var(envp, num_envp, &i,
- buf, size, &len,
- "IFINDEX=%d", dev->ifindex);
+ retval = add_uevent_var(env, "IFINDEX=%d", dev->ifindex);
exit:
- envp[i] = NULL;
return retval;
}
#endif
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 944189d9632..70d9b5da96a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -362,6 +362,97 @@ void kfree_skb(struct sk_buff *skb)
__kfree_skb(skb);
}
+static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
+{
+ new->tstamp = old->tstamp;
+ new->dev = old->dev;
+ new->transport_header = old->transport_header;
+ new->network_header = old->network_header;
+ new->mac_header = old->mac_header;
+ new->dst = dst_clone(old->dst);
+#ifdef CONFIG_INET
+ new->sp = secpath_get(old->sp);
+#endif
+ memcpy(new->cb, old->cb, sizeof(old->cb));
+ new->csum_start = old->csum_start;
+ new->csum_offset = old->csum_offset;
+ new->local_df = old->local_df;
+ new->pkt_type = old->pkt_type;
+ new->ip_summed = old->ip_summed;
+ skb_copy_queue_mapping(new, old);
+ new->priority = old->priority;
+#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
+ new->ipvs_property = old->ipvs_property;
+#endif
+ new->protocol = old->protocol;
+ new->mark = old->mark;
+ __nf_copy(new, old);
+#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
+ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+ new->nf_trace = old->nf_trace;
+#endif
+#ifdef CONFIG_NET_SCHED
+ new->tc_index = old->tc_index;
+#ifdef CONFIG_NET_CLS_ACT
+ new->tc_verd = old->tc_verd;
+#endif
+#endif
+ skb_copy_secmark(new, old);
+}
+
+static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
+{
+#define C(x) n->x = skb->x
+
+ n->next = n->prev = NULL;
+ n->sk = NULL;
+ __copy_skb_header(n, skb);
+
+ C(len);
+ C(data_len);
+ C(mac_len);
+ n->cloned = 1;
+ n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
+ n->nohdr = 0;
+ n->destructor = NULL;
+#ifdef CONFIG_NET_CLS_ACT
+ /* FIXME What is this and why don't we do it in copy_skb_header? */
+ n->tc_verd = SET_TC_VERD(n->tc_verd,0);
+ n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd);
+ n->tc_verd = CLR_TC_MUNGED(n->tc_verd);
+ C(iif);
+#endif
+ C(truesize);
+ atomic_set(&n->users, 1);
+ C(head);
+ C(data);
+ C(tail);
+ C(end);
+
+ atomic_inc(&(skb_shinfo(skb)->dataref));
+ skb->cloned = 1;
+
+ return n;
+#undef C
+}
+
+/**
+ * skb_morph - morph one skb into another
+ * @dst: the skb to receive the contents
+ * @src: the skb to supply the contents
+ *
+ * This is identical to skb_clone except that the target skb is
+ * supplied by the user.
+ *
+ * The target skb is returned upon exit.
+ */
+struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
+{
+ skb_release_data(dst);
+ return __skb_clone(dst, src);
+}
+EXPORT_SYMBOL_GPL(skb_morph);
+
/**
* skb_clone - duplicate an sk_buff
* @skb: buffer to clone
@@ -393,66 +484,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
n->fclone = SKB_FCLONE_UNAVAILABLE;
}
-#define C(x) n->x = skb->x
-
- n->next = n->prev = NULL;
- n->sk = NULL;
- C(tstamp);
- C(dev);
- C(transport_header);
- C(network_header);
- C(mac_header);
- C(dst);
- dst_clone(skb->dst);
- C(sp);
-#ifdef CONFIG_INET
- secpath_get(skb->sp);
-#endif
- memcpy(n->cb, skb->cb, sizeof(skb->cb));
- C(len);
- C(data_len);
- C(mac_len);
- C(csum);
- C(local_df);
- n->cloned = 1;
- n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
- n->nohdr = 0;
- C(pkt_type);
- C(ip_summed);
- skb_copy_queue_mapping(n, skb);
- C(priority);
-#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
- C(ipvs_property);
-#endif
- C(protocol);
- n->destructor = NULL;
- C(mark);
- __nf_copy(n, skb);
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
- defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
- C(nf_trace);
-#endif
-#ifdef CONFIG_NET_SCHED
- C(tc_index);
-#ifdef CONFIG_NET_CLS_ACT
- n->tc_verd = SET_TC_VERD(skb->tc_verd,0);
- n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd);
- n->tc_verd = CLR_TC_MUNGED(n->tc_verd);
- C(iif);
-#endif
-#endif
- skb_copy_secmark(n, skb);
- C(truesize);
- atomic_set(&n->users, 1);
- C(head);
- C(data);
- C(tail);
- C(end);
-
- atomic_inc(&(skb_shinfo(skb)->dataref));
- skb->cloned = 1;
-
- return n;
+ return __skb_clone(n, skb);
}
static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
@@ -463,50 +495,15 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
*/
unsigned long offset = new->data - old->data;
#endif
- new->sk = NULL;
- new->dev = old->dev;
- skb_copy_queue_mapping(new, old);
- new->priority = old->priority;
- new->protocol = old->protocol;
- new->dst = dst_clone(old->dst);
-#ifdef CONFIG_INET
- new->sp = secpath_get(old->sp);
-#endif
- new->csum_start = old->csum_start;
- new->csum_offset = old->csum_offset;
- new->ip_summed = old->ip_summed;
- new->transport_header = old->transport_header;
- new->network_header = old->network_header;
- new->mac_header = old->mac_header;
+
+ __copy_skb_header(new, old);
+
#ifndef NET_SKBUFF_DATA_USES_OFFSET
/* {transport,network,mac}_header are relative to skb->head */
new->transport_header += offset;
new->network_header += offset;
new->mac_header += offset;
#endif
- memcpy(new->cb, old->cb, sizeof(old->cb));
- new->local_df = old->local_df;
- new->fclone = SKB_FCLONE_UNAVAILABLE;
- new->pkt_type = old->pkt_type;
- new->tstamp = old->tstamp;
- new->destructor = NULL;
- new->mark = old->mark;
- __nf_copy(new, old);
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
- defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
- new->nf_trace = old->nf_trace;
-#endif
-#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
- new->ipvs_property = old->ipvs_property;
-#endif
-#ifdef CONFIG_NET_SCHED
-#ifdef CONFIG_NET_CLS_ACT
- new->tc_verd = old->tc_verd;
-#endif
- new->tc_index = old->tc_index;
-#endif
- skb_copy_secmark(new, old);
- atomic_set(&new->users, 1);
skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
@@ -685,7 +682,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
skb->transport_header += off;
skb->network_header += off;
skb->mac_header += off;
- skb->csum_start += off;
+ skb->csum_start += nhead;
skb->cloned = 0;
skb->hdr_len = 0;
skb->nohdr = 0;
diff --git a/net/core/sock.c b/net/core/sock.c
index 4ed9b507c1e..d45ecdccc6a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -869,6 +869,7 @@ static inline void sock_lock_init(struct sock *sk)
/**
* sk_alloc - All socket objects are allocated here
+ * @net: the applicable net namespace
* @family: protocol family
* @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
* @prot: struct proto associated with this new sock instance
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 006a3834fbc..cac53548c2d 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -767,10 +767,9 @@ discard:
return 0;
}
-static int dccp_v6_rcv(struct sk_buff **pskb)
+static int dccp_v6_rcv(struct sk_buff *skb)
{
const struct dccp_hdr *dh;
- struct sk_buff *skb = *pskb;
struct sock *sk;
int min_cov;
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index f7fba7721e6..43fcd29046d 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -88,12 +88,12 @@ static void dnrmg_send_peer(struct sk_buff *skb)
static unsigned int dnrmg_hook(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- dnrmg_send_peer(*pskb);
+ dnrmg_send_peer(skb);
return NF_ACCEPT;
}
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index a02c36d0a13..93fe3966805 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -10,7 +10,8 @@ obj-y := route.o inetpeer.o protocol.o \
tcp_minisocks.o tcp_cong.o \
datagram.o raw.o udp.o udplite.o \
arp.o icmp.o devinet.o af_inet.o igmp.o \
- sysctl_net_ipv4.o fib_frontend.o fib_semantics.o
+ sysctl_net_ipv4.o fib_frontend.o fib_semantics.o \
+ inet_fragment.o
obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
new file mode 100644
index 00000000000..484cf512858
--- /dev/null
+++ b/net/ipv4/inet_fragment.c
@@ -0,0 +1,174 @@
+/*
+ * inet fragments management
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Pavel Emelyanov <xemul@openvz.org>
+ * Started as consolidation of ipv4/ip_fragment.c,
+ * ipv6/reassembly. and ipv6 nf conntrack reassembly
+ */
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/random.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+
+#include <net/inet_frag.h>
+
+static void inet_frag_secret_rebuild(unsigned long dummy)
+{
+ struct inet_frags *f = (struct inet_frags *)dummy;
+ unsigned long now = jiffies;
+ int i;
+
+ write_lock(&f->lock);
+ get_random_bytes(&f->rnd, sizeof(u32));
+ for (i = 0; i < INETFRAGS_HASHSZ; i++) {
+ struct inet_frag_queue *q;
+ struct hlist_node *p, *n;
+
+ hlist_for_each_entry_safe(q, p, n, &f->hash[i], list) {
+ unsigned int hval = f->hashfn(q);
+
+ if (hval != i) {
+ hlist_del(&q->list);
+
+ /* Relink to new hash chain. */
+ hlist_add_head(&q->list, &f->hash[hval]);
+ }
+ }
+ }
+ write_unlock(&f->lock);
+
+ mod_timer(&f->secret_timer, now + f->ctl->secret_interval);
+}
+
+void inet_frags_init(struct inet_frags *f)
+{
+ int i;
+
+ for (i = 0; i < INETFRAGS_HASHSZ; i++)
+ INIT_HLIST_HEAD(&f->hash[i]);
+
+ INIT_LIST_HEAD(&f->lru_list);
+ rwlock_init(&f->lock);
+
+ f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
+ (jiffies ^ (jiffies >> 6)));
+
+ f->nqueues = 0;
+ atomic_set(&f->mem, 0);
+
+ init_timer(&f->secret_timer);
+ f->secret_timer.function = inet_frag_secret_rebuild;
+ f->secret_timer.data = (unsigned long)f;
+ f->secret_timer.expires = jiffies + f->ctl->secret_interval;
+ add_timer(&f->secret_timer);
+}
+EXPORT_SYMBOL(inet_frags_init);
+
+void inet_frags_fini(struct inet_frags *f)
+{
+ del_timer(&f->secret_timer);
+}
+EXPORT_SYMBOL(inet_frags_fini);
+
+static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
+{
+ write_lock(&f->lock);
+ hlist_del(&fq->list);
+ list_del(&fq->lru_list);
+ f->nqueues--;
+ write_unlock(&f->lock);
+}
+
+void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
+{
+ if (del_timer(&fq->timer))
+ atomic_dec(&fq->refcnt);
+
+ if (!(fq->last_in & COMPLETE)) {
+ fq_unlink(fq, f);
+ atomic_dec(&fq->refcnt);
+ fq->last_in |= COMPLETE;
+ }
+}
+
+EXPORT_SYMBOL(inet_frag_kill);
+
+static inline void frag_kfree_skb(struct inet_frags *f, struct sk_buff *skb,
+ int *work)
+{
+ if (work)
+ *work -= skb->truesize;
+
+ atomic_sub(skb->truesize, &f->mem);
+ if (f->skb_free)
+ f->skb_free(skb);
+ kfree_skb(skb);
+}
+
+void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
+ int *work)
+{
+ struct sk_buff *fp;
+
+ BUG_TRAP(q->last_in & COMPLETE);
+ BUG_TRAP(del_timer(&q->timer) == 0);
+
+ /* Release all fragment data. */
+ fp = q->fragments;
+ while (fp) {
+ struct sk_buff *xp = fp->next;
+
+ frag_kfree_skb(f, fp, work);
+ fp = xp;
+ }
+
+ if (work)
+ *work -= f->qsize;
+ atomic_sub(f->qsize, &f->mem);
+
+ f->destructor(q);
+
+}
+EXPORT_SYMBOL(inet_frag_destroy);
+
+int inet_frag_evictor(struct inet_frags *f)
+{
+ struct inet_frag_queue *q;
+ int work, evicted = 0;
+
+ work = atomic_read(&f->mem) - f->ctl->low_thresh;
+ while (work > 0) {
+ read_lock(&f->lock);
+ if (list_empty(&f->lru_list)) {
+ read_unlock(&f->lock);
+ break;
+ }
+
+ q = list_first_entry(&f->lru_list,
+ struct inet_frag_queue, lru_list);
+ atomic_inc(&q->refcnt);
+ read_unlock(&f->lock);
+
+ spin_lock(&q->lock);
+ if (!(q->last_in & COMPLETE))
+ inet_frag_kill(q, f);
+ spin_unlock(&q->lock);
+
+ if (atomic_dec_and_test(&q->refcnt))
+ inet_frag_destroy(q, f, &work);
+ evicted++;
+ }
+
+ return evicted;
+}
+EXPORT_SYMBOL(inet_frag_evictor);
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
index 4545b64e281..ac3b1d3dba2 100644
--- a/net/ipv4/inet_lro.c
+++ b/net/ipv4/inet_lro.c
@@ -77,7 +77,7 @@ static int lro_tcp_ip_check(struct iphdr *iph, struct tcphdr *tcph,
/* check tcp options (only timestamp allowed) */
if (tcph->doff == TCPH_LEN_W_TIMESTAMP) {
- u32 *topt = (u32 *)(tcph + 1);
+ __be32 *topt = (__be32 *)(tcph + 1);
if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
| (TCPOPT_TIMESTAMP << 8)
@@ -103,14 +103,14 @@ static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
{
struct iphdr *iph = lro_desc->iph;
struct tcphdr *tcph = lro_desc->tcph;
- u32 *p;
+ __be32 *p;
__wsum tcp_hdr_csum;
tcph->ack_seq = lro_desc->tcp_ack;
tcph->window = lro_desc->tcp_window;
if (lro_desc->tcp_saw_tstamp) {
- p = (u32 *)(tcph + 1);
+ p = (__be32 *)(tcph + 1);
*(p+2) = lro_desc->tcp_rcv_tsecr;
}
@@ -150,7 +150,7 @@ static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
u16 vlan_tag, struct vlan_group *vgrp)
{
int nr_frags;
- u32 *ptr;
+ __be32 *ptr;
u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
nr_frags = skb_shinfo(skb)->nr_frags;
@@ -159,14 +159,14 @@ static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
lro_desc->iph = iph;
lro_desc->tcph = tcph;
lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len;
- lro_desc->tcp_ack = ntohl(tcph->ack_seq);
+ lro_desc->tcp_ack = tcph->ack_seq;
lro_desc->tcp_window = tcph->window;
lro_desc->pkt_aggr_cnt = 1;
lro_desc->ip_tot_len = ntohs(iph->tot_len);
if (tcph->doff == 8) {
- ptr = (u32 *)(tcph+1);
+ ptr = (__be32 *)(tcph+1);
lro_desc->tcp_saw_tstamp = 1;
lro_desc->tcp_rcv_tsval = *(ptr+1);
lro_desc->tcp_rcv_tsecr = *(ptr+2);
@@ -190,7 +190,7 @@ static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
struct tcphdr *tcph, int tcp_data_len)
{
struct sk_buff *parent = lro_desc->parent;
- u32 *topt;
+ __be32 *topt;
lro_desc->pkt_aggr_cnt++;
lro_desc->ip_tot_len += tcp_data_len;
@@ -200,7 +200,7 @@ static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
/* don't update tcp_rcv_tsval, would not work with PAWS */
if (lro_desc->tcp_saw_tstamp) {
- topt = (u32 *) (tcph + 1);
+ topt = (__be32 *) (tcph + 1);
lro_desc->tcp_rcv_tsecr = *(topt + 2);
}
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index afbf938836f..877da3ed52e 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -40,7 +40,7 @@
#include <net/route.h>
#include <net/xfrm.h>
-static inline int ip_forward_finish(struct sk_buff *skb)
+static int ip_forward_finish(struct sk_buff *skb)
{
struct ip_options * opt = &(IPCB(skb)->opt);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index fabb86db763..443b3f89192 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -39,6 +39,7 @@
#include <net/icmp.h>
#include <net/checksum.h>
#include <net/inetpeer.h>
+#include <net/inet_frag.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/inet.h>
@@ -49,21 +50,8 @@
* as well. Or notify me, at least. --ANK
*/
-/* Fragment cache limits. We will commit 256K at one time. Should we
- * cross that limit we will prune down to 192K. This should cope with
- * even the most extreme cases without allowing an attacker to measurably
- * harm machine performance.
- */
-int sysctl_ipfrag_high_thresh __read_mostly = 256*1024;
-int sysctl_ipfrag_low_thresh __read_mostly = 192*1024;
-
int sysctl_ipfrag_max_dist __read_mostly = 64;
-/* Important NOTE! Fragment queue must be destroyed before MSL expires.
- * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
- */
-int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME;
-
struct ipfrag_skb_cb
{
struct inet_skb_parm h;
@@ -74,153 +62,102 @@ struct ipfrag_skb_cb
/* Describe an entry in the "incomplete datagrams" queue. */
struct ipq {
- struct hlist_node list;
- struct list_head lru_list; /* lru list member */
+ struct inet_frag_queue q;
+
u32 user;
__be32 saddr;
__be32 daddr;
__be16 id;
u8 protocol;
- u8 last_in;
-#define COMPLETE 4
-#define FIRST_IN 2
-#define LAST_IN 1
-
- struct sk_buff *fragments; /* linked list of received fragments */
- int len; /* total length of original datagram */
- int meat;
- spinlock_t lock;
- atomic_t refcnt;
- struct timer_list timer; /* when will this queue expire? */
- ktime_t stamp;
int iif;
unsigned int rid;
struct inet_peer *peer;
};
-/* Hash table. */
+struct inet_frags_ctl ip4_frags_ctl __read_mostly = {
+ /*
+ * Fragment cache limits. We will commit 256K at one time. Should we
+ * cross that limit we will prune down to 192K. This should cope with
+ * even the most extreme cases without allowing an attacker to
+ * measurably harm machine performance.
+ */
+ .high_thresh = 256 * 1024,
+ .low_thresh = 192 * 1024,
-#define IPQ_HASHSZ 64
+ /*
+ * Important NOTE! Fragment queue must be destroyed before MSL expires.
+ * RFC791 is wrong proposing to prolongate timer each fragment arrival
+ * by TTL.
+ */
+ .timeout = IP_FRAG_TIME,
+ .secret_interval = 10 * 60 * HZ,
+};
-/* Per-bucket lock is easy to add now. */
-static struct hlist_head ipq_hash[IPQ_HASHSZ];
-static DEFINE_RWLOCK(ipfrag_lock);
-static u32 ipfrag_hash_rnd;
-static LIST_HEAD(ipq_lru_list);
-int ip_frag_nqueues = 0;
+static struct inet_frags ip4_frags;
-static __inline__ void __ipq_unlink(struct ipq *qp)
+int ip_frag_nqueues(void)
{
- hlist_del(&qp->list);
- list_del(&qp->lru_list);
- ip_frag_nqueues--;
+ return ip4_frags.nqueues;
}
-static __inline__ void ipq_unlink(struct ipq *ipq)
+int ip_frag_mem(void)
{
- write_lock(&ipfrag_lock);
- __ipq_unlink(ipq);
- write_unlock(&ipfrag_lock);
+ return atomic_read(&ip4_frags.mem);
}
+static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+ struct net_device *dev);
+
static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
{
return jhash_3words((__force u32)id << 16 | prot,
(__force u32)saddr, (__force u32)daddr,
- ipfrag_hash_rnd) & (IPQ_HASHSZ - 1);
+ ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1);
}
-static struct timer_list ipfrag_secret_timer;
-int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ;
-
-static void ipfrag_secret_rebuild(unsigned long dummy)
+static unsigned int ip4_hashfn(struct inet_frag_queue *q)
{
- unsigned long now = jiffies;
- int i;
-
- write_lock(&ipfrag_lock);
- get_random_bytes(&ipfrag_hash_rnd, sizeof(u32));
- for (i = 0; i < IPQ_HASHSZ; i++) {
- struct ipq *q;
- struct hlist_node *p, *n;
-
- hlist_for_each_entry_safe(q, p, n, &ipq_hash[i], list) {
- unsigned int hval = ipqhashfn(q->id, q->saddr,
- q->daddr, q->protocol);
-
- if (hval != i) {
- hlist_del(&q->list);
+ struct ipq *ipq;
- /* Relink to new hash chain. */
- hlist_add_head(&q->list, &ipq_hash[hval]);
- }
- }
- }
- write_unlock(&ipfrag_lock);
-
- mod_timer(&ipfrag_secret_timer, now + sysctl_ipfrag_secret_interval);
+ ipq = container_of(q, struct ipq, q);
+ return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
}
-atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */
-
/* Memory Tracking Functions. */
static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work)
{
if (work)
*work -= skb->truesize;
- atomic_sub(skb->truesize, &ip_frag_mem);
+ atomic_sub(skb->truesize, &ip4_frags.mem);
kfree_skb(skb);
}
-static __inline__ void frag_free_queue(struct ipq *qp, int *work)
+static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
{
- if (work)
- *work -= sizeof(struct ipq);
- atomic_sub(sizeof(struct ipq), &ip_frag_mem);
+ struct ipq *qp;
+
+ qp = container_of(q, struct ipq, q);
+ if (qp->peer)
+ inet_putpeer(qp->peer);
kfree(qp);
}
static __inline__ struct ipq *frag_alloc_queue(void)
{
- struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC);
+ struct ipq *qp = kzalloc(sizeof(struct ipq), GFP_ATOMIC);
if (!qp)
return NULL;
- atomic_add(sizeof(struct ipq), &ip_frag_mem);
+ atomic_add(sizeof(struct ipq), &ip4_frags.mem);
return qp;
}
/* Destruction primitives. */
-/* Complete destruction of ipq. */
-static void ip_frag_destroy(struct ipq *qp, int *work)
-{
- struct sk_buff *fp;
-
- BUG_TRAP(qp->last_in&COMPLETE);
- BUG_TRAP(del_timer(&qp->timer) == 0);
-
- if (qp->peer)
- inet_putpeer(qp->peer);
-
- /* Release all fragment data. */
- fp = qp->fragments;
- while (fp) {
- struct sk_buff *xp = fp->next;
-
- frag_kfree_skb(fp, work);
- fp = xp;
- }
-
- /* Finally, release the queue descriptor itself. */
- frag_free_queue(qp, work);
-}
-
-static __inline__ void ipq_put(struct ipq *ipq, int *work)
+static __inline__ void ipq_put(struct ipq *ipq)
{
- if (atomic_dec_and_test(&ipq->refcnt))
- ip_frag_destroy(ipq, work);
+ inet_frag_put(&ipq->q, &ip4_frags);
}
/* Kill ipq entry. It is not destroyed immediately,
@@ -228,14 +165,7 @@ static __inline__ void ipq_put(struct ipq *ipq, int *work)
*/
static void ipq_kill(struct ipq *ipq)
{
- if (del_timer(&ipq->timer))
- atomic_dec(&ipq->refcnt);
-
- if (!(ipq->last_in & COMPLETE)) {
- ipq_unlink(ipq);
- atomic_dec(&ipq->refcnt);
- ipq->last_in |= COMPLETE;
- }
+ inet_frag_kill(&ipq->q, &ip4_frags);
}
/* Memory limiting on fragments. Evictor trashes the oldest
@@ -243,33 +173,11 @@ static void ipq_kill(struct ipq *ipq)
*/
static void ip_evictor(void)
{
- struct ipq *qp;
- struct list_head *tmp;
- int work;
-
- work = atomic_read(&ip_frag_mem) - sysctl_ipfrag_low_thresh;
- if (work <= 0)
- return;
-
- while (work > 0) {
- read_lock(&ipfrag_lock);
- if (list_empty(&ipq_lru_list)) {
- read_unlock(&ipfrag_lock);
- return;
- }
- tmp = ipq_lru_list.next;
- qp = list_entry(tmp, struct ipq, lru_list);
- atomic_inc(&qp->refcnt);
- read_unlock(&ipfrag_lock);
+ int evicted;
- spin_lock(&qp->lock);
- if (!(qp->last_in&COMPLETE))
- ipq_kill(qp);
- spin_unlock(&qp->lock);
-
- ipq_put(qp, &work);
- IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
- }
+ evicted = inet_frag_evictor(&ip4_frags);
+ if (evicted)
+ IP_ADD_STATS_BH(IPSTATS_MIB_REASMFAILS, evicted);
}
/*
@@ -279,9 +187,9 @@ static void ip_expire(unsigned long arg)
{
struct ipq *qp = (struct ipq *) arg;
- spin_lock(&qp->lock);
+ spin_lock(&qp->q.lock);
- if (qp->last_in & COMPLETE)
+ if (qp->q.last_in & COMPLETE)
goto out;
ipq_kill(qp);
@@ -289,8 +197,8 @@ static void ip_expire(unsigned long arg)
IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT);
IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
- if ((qp->last_in&FIRST_IN) && qp->fragments != NULL) {
- struct sk_buff *head = qp->fragments;
+ if ((qp->q.last_in&FIRST_IN) && qp->q.fragments != NULL) {
+ struct sk_buff *head = qp->q.fragments;
/* Send an ICMP "Fragment Reassembly Timeout" message. */
if ((head->dev = dev_get_by_index(&init_net, qp->iif)) != NULL) {
icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
@@ -298,8 +206,8 @@ static void ip_expire(unsigned long arg)
}
}
out:
- spin_unlock(&qp->lock);
- ipq_put(qp, NULL);
+ spin_unlock(&qp->q.lock);
+ ipq_put(qp);
}
/* Creation primitives. */
@@ -312,7 +220,7 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in)
#endif
unsigned int hash;
- write_lock(&ipfrag_lock);
+ write_lock(&ip4_frags.lock);
hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr,
qp_in->protocol);
#ifdef CONFIG_SMP
@@ -320,31 +228,31 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in)
* such entry could be created on other cpu, while we
* promoted read lock to write lock.
*/
- hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
+ hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) {
if (qp->id == qp_in->id &&
qp->saddr == qp_in->saddr &&
qp->daddr == qp_in->daddr &&
qp->protocol == qp_in->protocol &&
qp->user == qp_in->user) {
- atomic_inc(&qp->refcnt);
- write_unlock(&ipfrag_lock);
- qp_in->last_in |= COMPLETE;
- ipq_put(qp_in, NULL);
+ atomic_inc(&qp->q.refcnt);
+ write_unlock(&ip4_frags.lock);
+ qp_in->q.last_in |= COMPLETE;
+ ipq_put(qp_in);
return qp;
}
}
#endif
qp = qp_in;
- if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time))
- atomic_inc(&qp->refcnt);
+ if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout))
+ atomic_inc(&qp->q.refcnt);
- atomic_inc(&qp->refcnt);
- hlist_add_head(&qp->list, &ipq_hash[hash]);
- INIT_LIST_HEAD(&qp->lru_list);
- list_add_tail(&qp->lru_list, &ipq_lru_list);
- ip_frag_nqueues++;
- write_unlock(&ipfrag_lock);
+ atomic_inc(&qp->q.refcnt);
+ hlist_add_head(&qp->q.list, &ip4_frags.hash[hash]);
+ INIT_LIST_HEAD(&qp->q.lru_list);
+ list_add_tail(&qp->q.lru_list, &ip4_frags.lru_list);
+ ip4_frags.nqueues++;
+ write_unlock(&ip4_frags.lock);
return qp;
}
@@ -357,23 +265,18 @@ static struct ipq *ip_frag_create(struct iphdr *iph, u32 user)
goto out_nomem;
qp->protocol = iph->protocol;
- qp->last_in = 0;
qp->id = iph->id;
qp->saddr = iph->saddr;
qp->daddr = iph->daddr;
qp->user = user;
- qp->len = 0;
- qp->meat = 0;
- qp->fragments = NULL;
- qp->iif = 0;
qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL;
/* Initialize a timer for this entry. */
- init_timer(&qp->timer);
- qp->timer.data = (unsigned long) qp; /* pointer to queue */
- qp->timer.function = ip_expire; /* expire function */
- spin_lock_init(&qp->lock);
- atomic_set(&qp->refcnt, 1);
+ init_timer(&qp->q.timer);
+ qp->q.timer.data = (unsigned long) qp; /* pointer to queue */
+ qp->q.timer.function = ip_expire; /* expire function */
+ spin_lock_init(&qp->q.lock);
+ atomic_set(&qp->q.refcnt, 1);
return ip_frag_intern(qp);
@@ -395,20 +298,20 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
struct ipq *qp;
struct hlist_node *n;
- read_lock(&ipfrag_lock);
+ read_lock(&ip4_frags.lock);
hash = ipqhashfn(id, saddr, daddr, protocol);
- hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
+ hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) {
if (qp->id == id &&
qp->saddr == saddr &&
qp->daddr == daddr &&
qp->protocol == protocol &&
qp->user == user) {
- atomic_inc(&qp->refcnt);
- read_unlock(&ipfrag_lock);
+ atomic_inc(&qp->q.refcnt);
+ read_unlock(&ip4_frags.lock);
return qp;
}
}
- read_unlock(&ipfrag_lock);
+ read_unlock(&ip4_frags.lock);
return ip_frag_create(iph, user);
}
@@ -429,7 +332,7 @@ static inline int ip_frag_too_far(struct ipq *qp)
end = atomic_inc_return(&peer->rid);
qp->rid = end;
- rc = qp->fragments && (end - start) > max;
+ rc = qp->q.fragments && (end - start) > max;
if (rc) {
IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
@@ -442,39 +345,42 @@ static int ip_frag_reinit(struct ipq *qp)
{
struct sk_buff *fp;
- if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) {
- atomic_inc(&qp->refcnt);
+ if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout)) {
+ atomic_inc(&qp->q.refcnt);
return -ETIMEDOUT;
}
- fp = qp->fragments;
+ fp = qp->q.fragments;
do {
struct sk_buff *xp = fp->next;
frag_kfree_skb(fp, NULL);
fp = xp;
} while (fp);
- qp->last_in = 0;
- qp->len = 0;
- qp->meat = 0;
- qp->fragments = NULL;
+ qp->q.last_in = 0;
+ qp->q.len = 0;
+ qp->q.meat = 0;
+ qp->q.fragments = NULL;
qp->iif = 0;
return 0;
}
/* Add new segment to existing queue. */
-static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
{
struct sk_buff *prev, *next;
+ struct net_device *dev;
int flags, offset;
int ihl, end;
+ int err = -ENOENT;
- if (qp->last_in & COMPLETE)
+ if (qp->q.last_in & COMPLETE)
goto err;
if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
- unlikely(ip_frag_too_far(qp)) && unlikely(ip_frag_reinit(qp))) {
+ unlikely(ip_frag_too_far(qp)) &&
+ unlikely(err = ip_frag_reinit(qp))) {
ipq_kill(qp);
goto err;
}
@@ -487,36 +393,40 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
/* Determine the position of this fragment. */
end = offset + skb->len - ihl;
+ err = -EINVAL;
/* Is this the final fragment? */
if ((flags & IP_MF) == 0) {
/* If we already have some bits beyond end
* or have different end, the segment is corrrupted.
*/
- if (end < qp->len ||
- ((qp->last_in & LAST_IN) && end != qp->len))
+ if (end < qp->q.len ||
+ ((qp->q.last_in & LAST_IN) && end != qp->q.len))
goto err;
- qp->last_in |= LAST_IN;
- qp->len = end;
+ qp->q.last_in |= LAST_IN;
+ qp->q.len = end;
} else {
if (end&7) {
end &= ~7;
if (skb->ip_summed != CHECKSUM_UNNECESSARY)
skb->ip_summed = CHECKSUM_NONE;
}
- if (end > qp->len) {
+ if (end > qp->q.len) {
/* Some bits beyond end -> corruption. */
- if (qp->last_in & LAST_IN)
+ if (qp->q.last_in & LAST_IN)
goto err;
- qp->len = end;
+ qp->q.len = end;
}
}
if (end == offset)
goto err;
+ err = -ENOMEM;
if (pskb_pull(skb, ihl) == NULL)
goto err;
- if (pskb_trim_rcsum(skb, end-offset))
+
+ err = pskb_trim_rcsum(skb, end - offset);
+ if (err)
goto err;
/* Find out which fragments are in front and at the back of us
@@ -524,7 +434,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
* this fragment, right?
*/
prev = NULL;
- for (next = qp->fragments; next != NULL; next = next->next) {
+ for (next = qp->q.fragments; next != NULL; next = next->next) {
if (FRAG_CB(next)->offset >= offset)
break; /* bingo! */
prev = next;
@@ -539,8 +449,10 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
if (i > 0) {
offset += i;
+ err = -EINVAL;
if (end <= offset)
goto err;
+ err = -ENOMEM;
if (!pskb_pull(skb, i))
goto err;
if (skb->ip_summed != CHECKSUM_UNNECESSARY)
@@ -548,6 +460,8 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
}
}
+ err = -ENOMEM;
+
while (next && FRAG_CB(next)->offset < end) {
int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */
@@ -558,7 +472,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
if (!pskb_pull(next, i))
goto err;
FRAG_CB(next)->offset += i;
- qp->meat -= i;
+ qp->q.meat -= i;
if (next->ip_summed != CHECKSUM_UNNECESSARY)
next->ip_summed = CHECKSUM_NONE;
break;
@@ -573,9 +487,9 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
if (prev)
prev->next = next;
else
- qp->fragments = next;
+ qp->q.fragments = next;
- qp->meat -= free_it->len;
+ qp->q.meat -= free_it->len;
frag_kfree_skb(free_it, NULL);
}
}
@@ -587,50 +501,77 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
if (prev)
prev->next = skb;
else
- qp->fragments = skb;
-
- if (skb->dev)
- qp->iif = skb->dev->ifindex;
- skb->dev = NULL;
- qp->stamp = skb->tstamp;
- qp->meat += skb->len;
- atomic_add(skb->truesize, &ip_frag_mem);
+ qp->q.fragments = skb;
+
+ dev = skb->dev;
+ if (dev) {
+ qp->iif = dev->ifindex;
+ skb->dev = NULL;
+ }
+ qp->q.stamp = skb->tstamp;
+ qp->q.meat += skb->len;
+ atomic_add(skb->truesize, &ip4_frags.mem);
if (offset == 0)
- qp->last_in |= FIRST_IN;
+ qp->q.last_in |= FIRST_IN;
- write_lock(&ipfrag_lock);
- list_move_tail(&qp->lru_list, &ipq_lru_list);
- write_unlock(&ipfrag_lock);
+ if (qp->q.last_in == (FIRST_IN | LAST_IN) && qp->q.meat == qp->q.len)
+ return ip_frag_reasm(qp, prev, dev);
- return;
+ write_lock(&ip4_frags.lock);
+ list_move_tail(&qp->q.lru_list, &ip4_frags.lru_list);
+ write_unlock(&ip4_frags.lock);
+ return -EINPROGRESS;
err:
kfree_skb(skb);
+ return err;
}
/* Build a new IP datagram from all its fragments. */
-static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
+static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
+ struct net_device *dev)
{
struct iphdr *iph;
- struct sk_buff *fp, *head = qp->fragments;
+ struct sk_buff *fp, *head = qp->q.fragments;
int len;
int ihlen;
+ int err;
ipq_kill(qp);
+ /* Make the one we just received the head. */
+ if (prev) {
+ head = prev->next;
+ fp = skb_clone(head, GFP_ATOMIC);
+
+ if (!fp)
+ goto out_nomem;
+
+ fp->next = head->next;
+ prev->next = fp;
+
+ skb_morph(head, qp->q.fragments);
+ head->next = qp->q.fragments->next;
+
+ kfree_skb(qp->q.fragments);
+ qp->q.fragments = head;
+ }
+
BUG_TRAP(head != NULL);
BUG_TRAP(FRAG_CB(head)->offset == 0);
/* Allocate a new buffer for the datagram. */
ihlen = ip_hdrlen(head);
- len = ihlen + qp->len;
+ len = ihlen + qp->q.len;
+ err = -E2BIG;
if (len > 65535)
goto out_oversize;
/* Head of list must not be cloned. */
+ err = -ENOMEM;
if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
goto out_nomem;
@@ -654,12 +595,12 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
head->len -= clone->len;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
- atomic_add(clone->truesize, &ip_frag_mem);
+ atomic_add(clone->truesize, &ip4_frags.mem);
}
skb_shinfo(head)->frag_list = head->next;
skb_push(head, head->data - skb_network_header(head));
- atomic_sub(head->truesize, &ip_frag_mem);
+ atomic_sub(head->truesize, &ip4_frags.mem);
for (fp=head->next; fp; fp = fp->next) {
head->data_len += fp->len;
@@ -669,19 +610,19 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
else if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
- atomic_sub(fp->truesize, &ip_frag_mem);
+ atomic_sub(fp->truesize, &ip4_frags.mem);
}
head->next = NULL;
head->dev = dev;
- head->tstamp = qp->stamp;
+ head->tstamp = qp->q.stamp;
iph = ip_hdr(head);
iph->frag_off = 0;
iph->tot_len = htons(len);
IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
- qp->fragments = NULL;
- return head;
+ qp->q.fragments = NULL;
+ return 0;
out_nomem:
LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing "
@@ -694,54 +635,46 @@ out_oversize:
NIPQUAD(qp->saddr));
out_fail:
IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
- return NULL;
+ return err;
}
/* Process an incoming IP datagram fragment. */
-struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
+int ip_defrag(struct sk_buff *skb, u32 user)
{
struct ipq *qp;
- struct net_device *dev;
IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
/* Start by cleaning up the memory. */
- if (atomic_read(&ip_frag_mem) > sysctl_ipfrag_high_thresh)
+ if (atomic_read(&ip4_frags.mem) > ip4_frags_ctl.high_thresh)
ip_evictor();
- dev = skb->dev;
-
/* Lookup (or create) queue header */
if ((qp = ip_find(ip_hdr(skb), user)) != NULL) {
- struct sk_buff *ret = NULL;
-
- spin_lock(&qp->lock);
+ int ret;
- ip_frag_queue(qp, skb);
+ spin_lock(&qp->q.lock);
- if (qp->last_in == (FIRST_IN|LAST_IN) &&
- qp->meat == qp->len)
- ret = ip_frag_reasm(qp, dev);
+ ret = ip_frag_queue(qp, skb);
- spin_unlock(&qp->lock);
- ipq_put(qp, NULL);
+ spin_unlock(&qp->q.lock);
+ ipq_put(qp);
return ret;
}
IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
kfree_skb(skb);
- return NULL;
+ return -ENOMEM;
}
void __init ipfrag_init(void)
{
- ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
- (jiffies ^ (jiffies >> 6)));
-
- init_timer(&ipfrag_secret_timer);
- ipfrag_secret_timer.function = ipfrag_secret_rebuild;
- ipfrag_secret_timer.expires = jiffies + sysctl_ipfrag_secret_interval;
- add_timer(&ipfrag_secret_timer);
+ ip4_frags.ctl = &ip4_frags_ctl;
+ ip4_frags.hashfn = ip4_hashfn;
+ ip4_frags.destructor = ip4_frag_free;
+ ip4_frags.skb_free = NULL;
+ ip4_frags.qsize = sizeof(struct ipq);
+ inet_frags_init(&ip4_frags);
}
EXPORT_SYMBOL(ip_defrag);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 41d8964591e..168c871fcd7 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -172,8 +172,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
(!sk->sk_bound_dev_if ||
sk->sk_bound_dev_if == skb->dev->ifindex)) {
if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
- skb = ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN);
- if (skb == NULL) {
+ if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) {
read_unlock(&ip_ra_lock);
return 1;
}
@@ -196,7 +195,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
return 0;
}
-static inline int ip_local_deliver_finish(struct sk_buff *skb)
+static int ip_local_deliver_finish(struct sk_buff *skb)
{
__skb_pull(skb, ip_hdrlen(skb));
@@ -265,8 +264,7 @@ int ip_local_deliver(struct sk_buff *skb)
*/
if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
- skb = ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER);
- if (!skb)
+ if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER))
return 0;
}
@@ -326,7 +324,7 @@ drop:
return -1;
}
-static inline int ip_rcv_finish(struct sk_buff *skb)
+static int ip_rcv_finish(struct sk_buff *skb)
{
const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 699f06781fd..f508835ba71 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -202,7 +202,7 @@ static inline int ip_skb_dst_mtu(struct sk_buff *skb)
skb->dst->dev->mtu : dst_mtu(skb->dst);
}
-static inline int ip_finish_output(struct sk_buff *skb)
+static int ip_finish_output(struct sk_buff *skb)
{
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
/* Policy lookup after SNAT yielded a new policy */
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index 341474eefa5..664cb8e97c1 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -25,6 +25,7 @@
#include <linux/skbuff.h>
#include <linux/in.h>
#include <linux/ip.h>
+#include <linux/netfilter.h>
#include <net/net_namespace.h>
#include <net/protocol.h>
#include <net/tcp.h>
@@ -328,18 +329,18 @@ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
spin_unlock(&cp->lock);
}
-static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb,
+static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
struct ip_vs_app *app)
{
int diff;
- const unsigned int tcp_offset = ip_hdrlen(*pskb);
+ const unsigned int tcp_offset = ip_hdrlen(skb);
struct tcphdr *th;
__u32 seq;
- if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
+ if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
return 0;
- th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset);
+ th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
/*
* Remember seq number in case this pkt gets resized
@@ -360,7 +361,7 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb,
if (app->pkt_out == NULL)
return 1;
- if (!app->pkt_out(app, cp, pskb, &diff))
+ if (!app->pkt_out(app, cp, skb, &diff))
return 0;
/*
@@ -378,7 +379,7 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb,
* called by ipvs packet handler, assumes previously checked cp!=NULL
* returns false if it can't handle packet (oom)
*/
-int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb)
+int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
{
struct ip_vs_app *app;
@@ -391,7 +392,7 @@ int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb)
/* TCP is complicated */
if (cp->protocol == IPPROTO_TCP)
- return app_tcp_pkt_out(cp, pskb, app);
+ return app_tcp_pkt_out(cp, skb, app);
/*
* Call private output hook function
@@ -399,22 +400,22 @@ int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb)
if (app->pkt_out == NULL)
return 1;
- return app->pkt_out(app, cp, pskb, NULL);
+ return app->pkt_out(app, cp, skb, NULL);
}
-static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb,
+static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
struct ip_vs_app *app)
{
int diff;
- const unsigned int tcp_offset = ip_hdrlen(*pskb);
+ const unsigned int tcp_offset = ip_hdrlen(skb);
struct tcphdr *th;
__u32 seq;
- if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
+ if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
return 0;
- th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset);
+ th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
/*
* Remember seq number in case this pkt gets resized
@@ -435,7 +436,7 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb,
if (app->pkt_in == NULL)
return 1;
- if (!app->pkt_in(app, cp, pskb, &diff))
+ if (!app->pkt_in(app, cp, skb, &diff))
return 0;
/*
@@ -453,7 +454,7 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb,
* called by ipvs packet handler, assumes previously checked cp!=NULL.
* returns false if can't handle packet (oom).
*/
-int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb)
+int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
{
struct ip_vs_app *app;
@@ -466,7 +467,7 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb)
/* TCP is complicated */
if (cp->protocol == IPPROTO_TCP)
- return app_tcp_pkt_in(cp, pskb, app);
+ return app_tcp_pkt_in(cp, skb, app);
/*
* Call private input hook function
@@ -474,7 +475,7 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb)
if (app->pkt_in == NULL)
return 1;
- return app->pkt_in(app, cp, pskb, NULL);
+ return app->pkt_in(app, cp, skb, NULL);
}
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index fbca2a2ff29..c6ed7654e83 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -58,7 +58,6 @@ EXPORT_SYMBOL(ip_vs_conn_put);
#ifdef CONFIG_IP_VS_DEBUG
EXPORT_SYMBOL(ip_vs_get_debug_level);
#endif
-EXPORT_SYMBOL(ip_vs_make_skb_writable);
/* ID used in ICMP lookups */
@@ -163,42 +162,6 @@ ip_vs_set_state(struct ip_vs_conn *cp, int direction,
}
-int ip_vs_make_skb_writable(struct sk_buff **pskb, int writable_len)
-{
- struct sk_buff *skb = *pskb;
-
- /* skb is already used, better copy skb and its payload */
- if (unlikely(skb_shared(skb) || skb->sk))
- goto copy_skb;
-
- /* skb data is already used, copy it */
- if (unlikely(skb_cloned(skb)))
- goto copy_data;
-
- return pskb_may_pull(skb, writable_len);
-
- copy_data:
- if (unlikely(writable_len > skb->len))
- return 0;
- return !pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
-
- copy_skb:
- if (unlikely(writable_len > skb->len))
- return 0;
- skb = skb_copy(skb, GFP_ATOMIC);
- if (!skb)
- return 0;
- BUG_ON(skb_is_nonlinear(skb));
-
- /* Rest of kernel will get very unhappy if we pass it a
- suddenly-orphaned skbuff */
- if ((*pskb)->sk)
- skb_set_owner_w(skb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = skb;
- return 1;
-}
-
/*
* IPVS persistent scheduling function
* It creates a connection entry according to its template if exists,
@@ -525,12 +488,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
* for VS/NAT.
*/
static unsigned int ip_vs_post_routing(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- if (!((*pskb)->ipvs_property))
+ if (!skb->ipvs_property)
return NF_ACCEPT;
/* The packet was sent from IPVS, exit this chain */
return NF_STOP;
@@ -541,13 +504,14 @@ __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
}
-static inline struct sk_buff *
-ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
+static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
{
- skb = ip_defrag(skb, user);
- if (skb)
+ int err = ip_defrag(skb, user);
+
+ if (!err)
ip_send_check(ip_hdr(skb));
- return skb;
+
+ return err;
}
/*
@@ -605,9 +569,8 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
* Currently handles error types - unreachable, quench, ttl exceeded.
* (Only used in VS/NAT)
*/
-static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
+static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
{
- struct sk_buff *skb = *pskb;
struct iphdr *iph;
struct icmphdr _icmph, *ic;
struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
@@ -619,10 +582,8 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
/* reassemble IP fragments */
if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
- skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
- if (!skb)
+ if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
return NF_STOLEN;
- *pskb = skb;
}
iph = ip_hdr(skb);
@@ -690,9 +651,8 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
offset += 2 * sizeof(__u16);
- if (!ip_vs_make_skb_writable(pskb, offset))
+ if (!skb_make_writable(skb, offset))
goto out;
- skb = *pskb;
ip_vs_nat_icmp(skb, pp, cp, 1);
@@ -724,11 +684,10 @@ static inline int is_tcp_reset(const struct sk_buff *skb)
* rewrite addresses of the packet and send it on its way...
*/
static unsigned int
-ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
+ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct sk_buff *skb = *pskb;
struct iphdr *iph;
struct ip_vs_protocol *pp;
struct ip_vs_conn *cp;
@@ -741,11 +700,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
iph = ip_hdr(skb);
if (unlikely(iph->protocol == IPPROTO_ICMP)) {
- int related, verdict = ip_vs_out_icmp(pskb, &related);
+ int related, verdict = ip_vs_out_icmp(skb, &related);
if (related)
return verdict;
- skb = *pskb;
iph = ip_hdr(skb);
}
@@ -756,11 +714,9 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
/* reassemble IP fragments */
if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) &&
!pp->dont_defrag)) {
- skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
- if (!skb)
+ if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
return NF_STOLEN;
iph = ip_hdr(skb);
- *pskb = skb;
}
ihl = iph->ihl << 2;
@@ -802,13 +758,12 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
- if (!ip_vs_make_skb_writable(pskb, ihl))
+ if (!skb_make_writable(skb, ihl))
goto drop;
/* mangle the packet */
- if (pp->snat_handler && !pp->snat_handler(pskb, pp, cp))
+ if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
goto drop;
- skb = *pskb;
ip_hdr(skb)->saddr = cp->vaddr;
ip_send_check(ip_hdr(skb));
@@ -818,9 +773,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
* if it came from this machine itself. So re-compute
* the routing information.
*/
- if (ip_route_me_harder(pskb, RTN_LOCAL) != 0)
+ if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
goto drop;
- skb = *pskb;
IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
@@ -835,7 +789,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
drop:
ip_vs_conn_put(cp);
- kfree_skb(*pskb);
+ kfree_skb(skb);
return NF_STOLEN;
}
@@ -847,9 +801,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
* Currently handles error types - unreachable, quench, ttl exceeded.
*/
static int
-ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
+ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
{
- struct sk_buff *skb = *pskb;
struct iphdr *iph;
struct icmphdr _icmph, *ic;
struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
@@ -861,12 +814,9 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
/* reassemble IP fragments */
if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
- skb = ip_vs_gather_frags(skb,
- hooknum == NF_IP_LOCAL_IN ?
- IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD);
- if (!skb)
+ if (ip_vs_gather_frags(skb, hooknum == NF_IP_LOCAL_IN ?
+ IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD))
return NF_STOLEN;
- *pskb = skb;
}
iph = ip_hdr(skb);
@@ -945,11 +895,10 @@ ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
* and send it on its way...
*/
static unsigned int
-ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
+ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct sk_buff *skb = *pskb;
struct iphdr *iph;
struct ip_vs_protocol *pp;
struct ip_vs_conn *cp;
@@ -971,11 +920,10 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
iph = ip_hdr(skb);
if (unlikely(iph->protocol == IPPROTO_ICMP)) {
- int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum);
+ int related, verdict = ip_vs_in_icmp(skb, &related, hooknum);
if (related)
return verdict;
- skb = *pskb;
iph = ip_hdr(skb);
}
@@ -1056,16 +1004,16 @@ ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
* and send them to ip_vs_in_icmp.
*/
static unsigned int
-ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff **pskb,
+ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
int r;
- if (ip_hdr(*pskb)->protocol != IPPROTO_ICMP)
+ if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
return NF_ACCEPT;
- return ip_vs_in_icmp(pskb, &r, hooknum);
+ return ip_vs_in_icmp(skb, &r, hooknum);
}
diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c
index 344ddbbdc75..59aa166b767 100644
--- a/net/ipv4/ipvs/ip_vs_ftp.c
+++ b/net/ipv4/ipvs/ip_vs_ftp.c
@@ -30,6 +30,7 @@
#include <linux/skbuff.h>
#include <linux/in.h>
#include <linux/ip.h>
+#include <linux/netfilter.h>
#include <net/protocol.h>
#include <net/tcp.h>
#include <asm/unaligned.h>
@@ -135,7 +136,7 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
* xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number.
*/
static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
- struct sk_buff **pskb, int *diff)
+ struct sk_buff *skb, int *diff)
{
struct iphdr *iph;
struct tcphdr *th;
@@ -155,14 +156,14 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
return 1;
/* Linear packets are much easier to deal with. */
- if (!ip_vs_make_skb_writable(pskb, (*pskb)->len))
+ if (!skb_make_writable(skb, skb->len))
return 0;
if (cp->app_data == &ip_vs_ftp_pasv) {
- iph = ip_hdr(*pskb);
+ iph = ip_hdr(skb);
th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
data = (char *)th + (th->doff << 2);
- data_limit = skb_tail_pointer(*pskb);
+ data_limit = skb_tail_pointer(skb);
if (ip_vs_ftp_get_addrport(data, data_limit,
SERVER_STRING,
@@ -213,7 +214,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
memcpy(start, buf, buf_len);
ret = 1;
} else {
- ret = !ip_vs_skb_replace(*pskb, GFP_ATOMIC, start,
+ ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start,
end-start, buf, buf_len);
}
@@ -238,7 +239,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
* the client.
*/
static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
- struct sk_buff **pskb, int *diff)
+ struct sk_buff *skb, int *diff)
{
struct iphdr *iph;
struct tcphdr *th;
@@ -256,20 +257,20 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
return 1;
/* Linear packets are much easier to deal with. */
- if (!ip_vs_make_skb_writable(pskb, (*pskb)->len))
+ if (!skb_make_writable(skb, skb->len))
return 0;
/*
* Detecting whether it is passive
*/
- iph = ip_hdr(*pskb);
+ iph = ip_hdr(skb);
th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
/* Since there may be OPTIONS in the TCP packet and the HLEN is
the length of the header in 32-bit multiples, it is accurate
to calculate data address by th+HLEN*4 */
data = data_start = (char *)th + (th->doff << 2);
- data_limit = skb_tail_pointer(*pskb);
+ data_limit = skb_tail_pointer(skb);
while (data <= data_limit - 6) {
if (strnicmp(data, "PASV\r\n", 6) == 0) {
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index e65577a7700..12dc0d640b6 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -20,6 +20,7 @@
#include <linux/tcp.h> /* for tcphdr */
#include <net/ip.h>
#include <net/tcp.h> /* for csum_tcpudp_magic */
+#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <net/ip_vs.h>
@@ -122,27 +123,27 @@ tcp_fast_csum_update(struct tcphdr *tcph, __be32 oldip, __be32 newip,
static int
-tcp_snat_handler(struct sk_buff **pskb,
+tcp_snat_handler(struct sk_buff *skb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct tcphdr *tcph;
- const unsigned int tcphoff = ip_hdrlen(*pskb);
+ const unsigned int tcphoff = ip_hdrlen(skb);
/* csum_check requires unshared skb */
- if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
+ if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
return 0;
if (unlikely(cp->app != NULL)) {
/* Some checks before mangling */
- if (pp->csum_check && !pp->csum_check(*pskb, pp))
+ if (pp->csum_check && !pp->csum_check(skb, pp))
return 0;
/* Call application helper if needed */
- if (!ip_vs_app_pkt_out(cp, pskb))
+ if (!ip_vs_app_pkt_out(cp, skb))
return 0;
}
- tcph = (void *)ip_hdr(*pskb) + tcphoff;
+ tcph = (void *)ip_hdr(skb) + tcphoff;
tcph->source = cp->vport;
/* Adjust TCP checksums */
@@ -150,17 +151,15 @@ tcp_snat_handler(struct sk_buff **pskb,
/* Only port and addr are changed, do fast csum update */
tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
cp->dport, cp->vport);
- if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
- (*pskb)->ip_summed = CHECKSUM_NONE;
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->ip_summed = CHECKSUM_NONE;
} else {
/* full checksum calculation */
tcph->check = 0;
- (*pskb)->csum = skb_checksum(*pskb, tcphoff,
- (*pskb)->len - tcphoff, 0);
+ skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
- (*pskb)->len - tcphoff,
- cp->protocol,
- (*pskb)->csum);
+ skb->len - tcphoff,
+ cp->protocol, skb->csum);
IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
pp->name, tcph->check,
(char*)&(tcph->check) - (char*)tcph);
@@ -170,30 +169,30 @@ tcp_snat_handler(struct sk_buff **pskb,
static int
-tcp_dnat_handler(struct sk_buff **pskb,
+tcp_dnat_handler(struct sk_buff *skb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct tcphdr *tcph;
- const unsigned int tcphoff = ip_hdrlen(*pskb);
+ const unsigned int tcphoff = ip_hdrlen(skb);
/* csum_check requires unshared skb */
- if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
+ if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
return 0;
if (unlikely(cp->app != NULL)) {
/* Some checks before mangling */
- if (pp->csum_check && !pp->csum_check(*pskb, pp))
+ if (pp->csum_check && !pp->csum_check(skb, pp))
return 0;
/*
* Attempt ip_vs_app call.
* It will fix ip_vs_conn and iph ack_seq stuff
*/
- if (!ip_vs_app_pkt_in(cp, pskb))
+ if (!ip_vs_app_pkt_in(cp, skb))
return 0;
}
- tcph = (void *)ip_hdr(*pskb) + tcphoff;
+ tcph = (void *)ip_hdr(skb) + tcphoff;
tcph->dest = cp->dport;
/*
@@ -203,18 +202,16 @@ tcp_dnat_handler(struct sk_buff **pskb,
/* Only port and addr are changed, do fast csum update */
tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
cp->vport, cp->dport);
- if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
- (*pskb)->ip_summed = CHECKSUM_NONE;
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->ip_summed = CHECKSUM_NONE;
} else {
/* full checksum calculation */
tcph->check = 0;
- (*pskb)->csum = skb_checksum(*pskb, tcphoff,
- (*pskb)->len - tcphoff, 0);
+ skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
- (*pskb)->len - tcphoff,
- cp->protocol,
- (*pskb)->csum);
- (*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->len - tcphoff,
+ cp->protocol, skb->csum);
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
}
return 1;
}
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 8ee5fe6a101..1fa7b330b9a 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -18,6 +18,7 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/kernel.h>
+#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/udp.h>
@@ -129,29 +130,29 @@ udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip,
}
static int
-udp_snat_handler(struct sk_buff **pskb,
+udp_snat_handler(struct sk_buff *skb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct udphdr *udph;
- const unsigned int udphoff = ip_hdrlen(*pskb);
+ const unsigned int udphoff = ip_hdrlen(skb);
/* csum_check requires unshared skb */
- if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
+ if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
return 0;
if (unlikely(cp->app != NULL)) {
/* Some checks before mangling */
- if (pp->csum_check && !pp->csum_check(*pskb, pp))
+ if (pp->csum_check && !pp->csum_check(skb, pp))
return 0;
/*
* Call application helper if needed
*/
- if (!ip_vs_app_pkt_out(cp, pskb))
+ if (!ip_vs_app_pkt_out(cp, skb))
return 0;
}
- udph = (void *)ip_hdr(*pskb) + udphoff;
+ udph = (void *)ip_hdr(skb) + udphoff;
udph->source = cp->vport;
/*
@@ -161,17 +162,15 @@ udp_snat_handler(struct sk_buff **pskb,
/* Only port and addr are changed, do fast csum update */
udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
cp->dport, cp->vport);
- if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
- (*pskb)->ip_summed = CHECKSUM_NONE;
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->ip_summed = CHECKSUM_NONE;
} else {
/* full checksum calculation */
udph->check = 0;
- (*pskb)->csum = skb_checksum(*pskb, udphoff,
- (*pskb)->len - udphoff, 0);
+ skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
- (*pskb)->len - udphoff,
- cp->protocol,
- (*pskb)->csum);
+ skb->len - udphoff,
+ cp->protocol, skb->csum);
if (udph->check == 0)
udph->check = CSUM_MANGLED_0;
IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
@@ -183,30 +182,30 @@ udp_snat_handler(struct sk_buff **pskb,
static int
-udp_dnat_handler(struct sk_buff **pskb,
+udp_dnat_handler(struct sk_buff *skb,
struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
{
struct udphdr *udph;
- unsigned int udphoff = ip_hdrlen(*pskb);
+ unsigned int udphoff = ip_hdrlen(skb);
/* csum_check requires unshared skb */
- if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
+ if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
return 0;
if (unlikely(cp->app != NULL)) {
/* Some checks before mangling */
- if (pp->csum_check && !pp->csum_check(*pskb, pp))
+ if (pp->csum_check && !pp->csum_check(skb, pp))
return 0;
/*
* Attempt ip_vs_app call.
* It will fix ip_vs_conn
*/
- if (!ip_vs_app_pkt_in(cp, pskb))
+ if (!ip_vs_app_pkt_in(cp, skb))
return 0;
}
- udph = (void *)ip_hdr(*pskb) + udphoff;
+ udph = (void *)ip_hdr(skb) + udphoff;
udph->dest = cp->dport;
/*
@@ -216,20 +215,18 @@ udp_dnat_handler(struct sk_buff **pskb,
/* Only port and addr are changed, do fast csum update */
udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
cp->vport, cp->dport);
- if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
- (*pskb)->ip_summed = CHECKSUM_NONE;
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->ip_summed = CHECKSUM_NONE;
} else {
/* full checksum calculation */
udph->check = 0;
- (*pskb)->csum = skb_checksum(*pskb, udphoff,
- (*pskb)->len - udphoff, 0);
+ skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
- (*pskb)->len - udphoff,
- cp->protocol,
- (*pskb)->csum);
+ skb->len - udphoff,
+ cp->protocol, skb->csum);
if (udph->check == 0)
udph->check = CSUM_MANGLED_0;
- (*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
}
return 1;
}
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index 666e080a74a..d0a92dec105 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -253,7 +253,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
}
/* copy-on-write the packet before mangling it */
- if (!ip_vs_make_skb_writable(&skb, sizeof(struct iphdr)))
+ if (!skb_make_writable(skb, sizeof(struct iphdr)))
goto tx_error_put;
if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
@@ -264,7 +264,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->dst = &rt->u.dst;
/* mangle the packet */
- if (pp->dnat_handler && !pp->dnat_handler(&skb, pp, cp))
+ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
goto tx_error;
ip_hdr(skb)->daddr = cp->daddr;
ip_send_check(ip_hdr(skb));
@@ -529,7 +529,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
}
/* copy-on-write the packet before mangling it */
- if (!ip_vs_make_skb_writable(&skb, offset))
+ if (!skb_make_writable(skb, offset))
goto tx_error_put;
if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index b44192924f9..5539debf497 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -3,14 +3,15 @@
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/ip.h>
+#include <linux/skbuff.h>
#include <net/route.h>
#include <net/xfrm.h>
#include <net/ip.h>
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
-int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type)
+int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
{
- const struct iphdr *iph = ip_hdr(*pskb);
+ const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
struct flowi fl = {};
struct dst_entry *odst;
@@ -29,14 +30,14 @@ int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type)
if (type == RTN_LOCAL)
fl.nl_u.ip4_u.saddr = iph->saddr;
fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
- fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0;
- fl.mark = (*pskb)->mark;
+ fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
+ fl.mark = skb->mark;
if (ip_route_output_key(&rt, &fl) != 0)
return -1;
/* Drop old route. */
- dst_release((*pskb)->dst);
- (*pskb)->dst = &rt->u.dst;
+ dst_release(skb->dst);
+ skb->dst = &rt->u.dst;
} else {
/* non-local src, find valid iif to satisfy
* rp-filter when calling ip_route_input. */
@@ -44,8 +45,8 @@ int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type)
if (ip_route_output_key(&rt, &fl) != 0)
return -1;
- odst = (*pskb)->dst;
- if (ip_route_input(*pskb, iph->daddr, iph->saddr,
+ odst = skb->dst;
+ if (ip_route_input(skb, iph->daddr, iph->saddr,
RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
dst_release(&rt->u.dst);
return -1;
@@ -54,70 +55,54 @@ int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type)
dst_release(odst);
}
- if ((*pskb)->dst->error)
+ if (skb->dst->error)
return -1;
#ifdef CONFIG_XFRM
- if (!(IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) &&
- xfrm_decode_session(*pskb, &fl, AF_INET) == 0)
- if (xfrm_lookup(&(*pskb)->dst, &fl, (*pskb)->sk, 0))
+ if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
+ xfrm_decode_session(skb, &fl, AF_INET) == 0)
+ if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0))
return -1;
#endif
/* Change in oif may mean change in hh_len. */
- hh_len = (*pskb)->dst->dev->hard_header_len;
- if (skb_headroom(*pskb) < hh_len) {
- struct sk_buff *nskb;
-
- nskb = skb_realloc_headroom(*pskb, hh_len);
- if (!nskb)
- return -1;
- if ((*pskb)->sk)
- skb_set_owner_w(nskb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = nskb;
- }
+ hh_len = skb->dst->dev->hard_header_len;
+ if (skb_headroom(skb) < hh_len &&
+ pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
+ return -1;
return 0;
}
EXPORT_SYMBOL(ip_route_me_harder);
#ifdef CONFIG_XFRM
-int ip_xfrm_me_harder(struct sk_buff **pskb)
+int ip_xfrm_me_harder(struct sk_buff *skb)
{
struct flowi fl;
unsigned int hh_len;
struct dst_entry *dst;
- if (IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED)
+ if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
return 0;
- if (xfrm_decode_session(*pskb, &fl, AF_INET) < 0)
+ if (xfrm_decode_session(skb, &fl, AF_INET) < 0)
return -1;
- dst = (*pskb)->dst;
+ dst = skb->dst;
if (dst->xfrm)
dst = ((struct xfrm_dst *)dst)->route;
dst_hold(dst);
- if (xfrm_lookup(&dst, &fl, (*pskb)->sk, 0) < 0)
+ if (xfrm_lookup(&dst, &fl, skb->sk, 0) < 0)
return -1;
- dst_release((*pskb)->dst);
- (*pskb)->dst = dst;
+ dst_release(skb->dst);
+ skb->dst = dst;
/* Change in oif may mean change in hh_len. */
- hh_len = (*pskb)->dst->dev->hard_header_len;
- if (skb_headroom(*pskb) < hh_len) {
- struct sk_buff *nskb;
-
- nskb = skb_realloc_headroom(*pskb, hh_len);
- if (!nskb)
- return -1;
- if ((*pskb)->sk)
- skb_set_owner_w(nskb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = nskb;
- }
+ hh_len = skb->dst->dev->hard_header_len;
+ if (skb_headroom(skb) < hh_len &&
+ pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
+ return -1;
return 0;
}
EXPORT_SYMBOL(ip_xfrm_me_harder);
@@ -150,17 +135,17 @@ static void nf_ip_saveroute(const struct sk_buff *skb, struct nf_info *info)
}
}
-static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info)
+static int nf_ip_reroute(struct sk_buff *skb, const struct nf_info *info)
{
const struct ip_rt_info *rt_info = nf_info_reroute(info);
if (info->hook == NF_IP_LOCAL_OUT) {
- const struct iphdr *iph = ip_hdr(*pskb);
+ const struct iphdr *iph = ip_hdr(skb);
if (!(iph->tos == rt_info->tos
&& iph->daddr == rt_info->daddr
&& iph->saddr == rt_info->saddr))
- return ip_route_me_harder(pskb, RTN_UNSPEC);
+ return ip_route_me_harder(skb, RTN_UNSPEC);
}
return 0;
}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 29114a9ccd1..2909c92ecd9 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -197,7 +197,7 @@ static inline int arp_checkentry(const struct arpt_arp *arp)
return 1;
}
-static unsigned int arpt_error(struct sk_buff **pskb,
+static unsigned int arpt_error(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -215,7 +215,7 @@ static inline struct arpt_entry *get_entry(void *base, unsigned int offset)
return (struct arpt_entry *)(base + offset);
}
-unsigned int arpt_do_table(struct sk_buff **pskb,
+unsigned int arpt_do_table(struct sk_buff *skb,
unsigned int hook,
const struct net_device *in,
const struct net_device *out,
@@ -231,9 +231,9 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
struct xt_table_info *private;
/* ARP header, plus 2 device addresses, plus 2 IP addresses. */
- if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) +
- (2 * (*pskb)->dev->addr_len) +
- (2 * sizeof(u32)))))
+ if (!pskb_may_pull(skb, (sizeof(struct arphdr) +
+ (2 * skb->dev->addr_len) +
+ (2 * sizeof(u32)))))
return NF_DROP;
indev = in ? in->name : nulldevname;
@@ -245,14 +245,14 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
e = get_entry(table_base, private->hook_entry[hook]);
back = get_entry(table_base, private->underflow[hook]);
- arp = arp_hdr(*pskb);
+ arp = arp_hdr(skb);
do {
- if (arp_packet_match(arp, (*pskb)->dev, indev, outdev, &e->arp)) {
+ if (arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
struct arpt_entry_target *t;
int hdr_len;
hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
- (2 * (*pskb)->dev->addr_len);
+ (2 * skb->dev->addr_len);
ADD_COUNTER(e->counters, hdr_len, 1);
t = arpt_get_target(e);
@@ -290,14 +290,14 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
/* Targets which reenter must return
* abs. verdicts
*/
- verdict = t->u.kernel.target->target(pskb,
+ verdict = t->u.kernel.target->target(skb,
in, out,
hook,
t->u.kernel.target,
t->data);
/* Target might have changed stuff. */
- arp = arp_hdr(*pskb);
+ arp = arp_hdr(skb);
if (verdict == ARPT_CONTINUE)
e = (void *)e + e->next_offset;
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index c4bdab47597..45fa4e20094 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -1,5 +1,6 @@
/* module that allows mangling of the arp payload */
#include <linux/module.h>
+#include <linux/netfilter.h>
#include <linux/netfilter_arp/arpt_mangle.h>
#include <net/sock.h>
@@ -8,7 +9,7 @@ MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
MODULE_DESCRIPTION("arptables arp payload mangle target");
static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
unsigned int hooknum, const struct xt_target *target,
const void *targinfo)
@@ -18,47 +19,38 @@ target(struct sk_buff **pskb,
unsigned char *arpptr;
int pln, hln;
- if (skb_shared(*pskb) || skb_cloned(*pskb)) {
- struct sk_buff *nskb;
+ if (skb_make_writable(skb, skb->len))
+ return NF_DROP;
- nskb = skb_copy(*pskb, GFP_ATOMIC);
- if (!nskb)
- return NF_DROP;
- if ((*pskb)->sk)
- skb_set_owner_w(nskb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = nskb;
- }
-
- arp = arp_hdr(*pskb);
- arpptr = skb_network_header(*pskb) + sizeof(*arp);
+ arp = arp_hdr(skb);
+ arpptr = skb_network_header(skb) + sizeof(*arp);
pln = arp->ar_pln;
hln = arp->ar_hln;
/* We assume that pln and hln were checked in the match */
if (mangle->flags & ARPT_MANGLE_SDEV) {
if (ARPT_DEV_ADDR_LEN_MAX < hln ||
- (arpptr + hln > skb_tail_pointer(*pskb)))
+ (arpptr + hln > skb_tail_pointer(skb)))
return NF_DROP;
memcpy(arpptr, mangle->src_devaddr, hln);
}
arpptr += hln;
if (mangle->flags & ARPT_MANGLE_SIP) {
if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
- (arpptr + pln > skb_tail_pointer(*pskb)))
+ (arpptr + pln > skb_tail_pointer(skb)))
return NF_DROP;
memcpy(arpptr, &mangle->u_s.src_ip, pln);
}
arpptr += pln;
if (mangle->flags & ARPT_MANGLE_TDEV) {
if (ARPT_DEV_ADDR_LEN_MAX < hln ||
- (arpptr + hln > skb_tail_pointer(*pskb)))
+ (arpptr + hln > skb_tail_pointer(skb)))
return NF_DROP;
memcpy(arpptr, mangle->tgt_devaddr, hln);
}
arpptr += hln;
if (mangle->flags & ARPT_MANGLE_TIP) {
if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
- (arpptr + pln > skb_tail_pointer(*pskb)))
+ (arpptr + pln > skb_tail_pointer(skb)))
return NF_DROP;
memcpy(arpptr, &mangle->u_t.tgt_ip, pln);
}
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 75c02306253..302d3da5f69 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -56,12 +56,12 @@ static struct arpt_table packet_filter = {
/* The work comes in here from netfilter.c */
static unsigned int arpt_hook(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return arpt_do_table(pskb, hook, in, out, &packet_filter);
+ return arpt_do_table(skb, hook, in, out, &packet_filter);
}
static struct nf_hook_ops arpt_ops[] = {
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 23cbfc7c80f..10a2ce09fd8 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -335,6 +335,7 @@ static int
ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
{
int diff;
+ int err;
struct iphdr *user_iph = (struct iphdr *)v->payload;
if (v->data_len < sizeof(*user_iph))
@@ -347,25 +348,18 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
if (v->data_len > 0xFFFF)
return -EINVAL;
if (diff > skb_tailroom(e->skb)) {
- struct sk_buff *newskb;
-
- newskb = skb_copy_expand(e->skb,
- skb_headroom(e->skb),
- diff,
- GFP_ATOMIC);
- if (newskb == NULL) {
- printk(KERN_WARNING "ip_queue: OOM "
- "in mangle, dropping packet\n");
- return -ENOMEM;
+ err = pskb_expand_head(e->skb, 0,
+ diff - skb_tailroom(e->skb),
+ GFP_ATOMIC);
+ if (err) {
+ printk(KERN_WARNING "ip_queue: error "
+ "in mangle, dropping packet: %d\n", -err);
+ return err;
}
- if (e->skb->sk)
- skb_set_owner_w(newskb, e->skb->sk);
- kfree_skb(e->skb);
- e->skb = newskb;
}
skb_put(e->skb, diff);
}
- if (!skb_make_writable(&e->skb, v->data_len))
+ if (!skb_make_writable(e->skb, v->data_len))
return -ENOMEM;
skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
e->skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 6486894f450..4b10b98640a 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -169,7 +169,7 @@ ip_checkentry(const struct ipt_ip *ip)
}
static unsigned int
-ipt_error(struct sk_buff **pskb,
+ipt_error(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -312,7 +312,7 @@ static void trace_packet(struct sk_buff *skb,
/* Returns one of the generic firewall policies, like NF_ACCEPT. */
unsigned int
-ipt_do_table(struct sk_buff **pskb,
+ipt_do_table(struct sk_buff *skb,
unsigned int hook,
const struct net_device *in,
const struct net_device *out,
@@ -331,8 +331,8 @@ ipt_do_table(struct sk_buff **pskb,
struct xt_table_info *private;
/* Initialization */
- ip = ip_hdr(*pskb);
- datalen = (*pskb)->len - ip->ihl * 4;
+ ip = ip_hdr(skb);
+ datalen = skb->len - ip->ihl * 4;
indev = in ? in->name : nulldevname;
outdev = out ? out->name : nulldevname;
/* We handle fragments by dealing with the first fragment as
@@ -359,7 +359,7 @@ ipt_do_table(struct sk_buff **pskb,
struct ipt_entry_target *t;
if (IPT_MATCH_ITERATE(e, do_match,
- *pskb, in, out,
+ skb, in, out,
offset, &hotdrop) != 0)
goto no_match;
@@ -371,8 +371,8 @@ ipt_do_table(struct sk_buff **pskb,
#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
/* The packet is traced: log it */
- if (unlikely((*pskb)->nf_trace))
- trace_packet(*pskb, hook, in, out,
+ if (unlikely(skb->nf_trace))
+ trace_packet(skb, hook, in, out,
table->name, private, e);
#endif
/* Standard target? */
@@ -410,7 +410,7 @@ ipt_do_table(struct sk_buff **pskb,
((struct ipt_entry *)table_base)->comefrom
= 0xeeeeeeec;
#endif
- verdict = t->u.kernel.target->target(pskb,
+ verdict = t->u.kernel.target->target(skb,
in, out,
hook,
t->u.kernel.target,
@@ -428,8 +428,8 @@ ipt_do_table(struct sk_buff **pskb,
= 0x57acc001;
#endif
/* Target might have changed stuff. */
- ip = ip_hdr(*pskb);
- datalen = (*pskb)->len - ip->ihl * 4;
+ ip = ip_hdr(skb);
+ datalen = skb->len - ip->ihl * 4;
if (verdict == IPT_CONTINUE)
e = (void *)e + e->next_offset;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 27f14e1ebd8..2f544dac72d 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -289,7 +289,7 @@ clusterip_responsible(const struct clusterip_config *config, u_int32_t hash)
***********************************************************************/
static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -305,7 +305,7 @@ target(struct sk_buff **pskb,
* is only decremented by destroy() - and ip_tables guarantees
* that the ->target() function isn't called after ->destroy() */
- ct = nf_ct_get(*pskb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
if (ct == NULL) {
printk(KERN_ERR "CLUSTERIP: no conntrack!\n");
/* FIXME: need to drop invalid ones, since replies
@@ -316,7 +316,7 @@ target(struct sk_buff **pskb,
/* special case: ICMP error handling. conntrack distinguishes between
* error messages (RELATED) and information requests (see below) */
- if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP
+ if (ip_hdr(skb)->protocol == IPPROTO_ICMP
&& (ctinfo == IP_CT_RELATED
|| ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY))
return XT_CONTINUE;
@@ -325,7 +325,7 @@ target(struct sk_buff **pskb,
* TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here
* on, which all have an ID field [relevant for hashing]. */
- hash = clusterip_hashfn(*pskb, cipinfo->config);
+ hash = clusterip_hashfn(skb, cipinfo->config);
switch (ctinfo) {
case IP_CT_NEW:
@@ -355,7 +355,7 @@ target(struct sk_buff **pskb,
/* despite being received via linklayer multicast, this is
* actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */
- (*pskb)->pkt_type = PACKET_HOST;
+ skb->pkt_type = PACKET_HOST;
return XT_CONTINUE;
}
@@ -505,12 +505,12 @@ static void arp_print(struct arp_payload *payload)
static unsigned int
arp_mangle(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct arphdr *arp = arp_hdr(*pskb);
+ struct arphdr *arp = arp_hdr(skb);
struct arp_payload *payload;
struct clusterip_config *c;
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index f1253bd3837..add110060a2 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -26,15 +26,15 @@ MODULE_DESCRIPTION("iptables ECN modification module");
/* set ECT codepoint from IP header.
* return false if there was an error. */
static inline bool
-set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
+set_ect_ip(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
{
- struct iphdr *iph = ip_hdr(*pskb);
+ struct iphdr *iph = ip_hdr(skb);
if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
__u8 oldtos;
- if (!skb_make_writable(pskb, sizeof(struct iphdr)))
+ if (!skb_make_writable(skb, sizeof(struct iphdr)))
return false;
- iph = ip_hdr(*pskb);
+ iph = ip_hdr(skb);
oldtos = iph->tos;
iph->tos &= ~IPT_ECN_IP_MASK;
iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
@@ -45,14 +45,13 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
/* Return false if there was an error. */
static inline bool
-set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
+set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
{
struct tcphdr _tcph, *tcph;
__be16 oldval;
/* Not enought header? */
- tcph = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
- sizeof(_tcph), &_tcph);
+ tcph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
if (!tcph)
return false;
@@ -62,9 +61,9 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
tcph->cwr == einfo->proto.tcp.cwr))
return true;
- if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph)))
+ if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph)))
return false;
- tcph = (void *)ip_hdr(*pskb) + ip_hdrlen(*pskb);
+ tcph = (void *)ip_hdr(skb) + ip_hdrlen(skb);
oldval = ((__be16 *)tcph)[6];
if (einfo->operation & IPT_ECN_OP_SET_ECE)
@@ -72,13 +71,13 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
if (einfo->operation & IPT_ECN_OP_SET_CWR)
tcph->cwr = einfo->proto.tcp.cwr;
- nf_proto_csum_replace2(&tcph->check, *pskb,
+ nf_proto_csum_replace2(&tcph->check, skb,
oldval, ((__be16 *)tcph)[6], 0);
return true;
}
static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -88,12 +87,12 @@ target(struct sk_buff **pskb,
const struct ipt_ECN_info *einfo = targinfo;
if (einfo->operation & IPT_ECN_OP_SET_IP)
- if (!set_ect_ip(pskb, einfo))
+ if (!set_ect_ip(skb, einfo))
return NF_DROP;
if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR)
- && ip_hdr(*pskb)->protocol == IPPROTO_TCP)
- if (!set_ect_tcp(pskb, einfo))
+ && ip_hdr(skb)->protocol == IPPROTO_TCP)
+ if (!set_ect_tcp(skb, einfo))
return NF_DROP;
return XT_CONTINUE;
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 127a5e89bf1..4b5e8216a4e 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -418,7 +418,7 @@ ipt_log_packet(unsigned int pf,
}
static unsigned int
-ipt_log_target(struct sk_buff **pskb,
+ipt_log_target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -432,7 +432,7 @@ ipt_log_target(struct sk_buff **pskb,
li.u.log.level = loginfo->level;
li.u.log.logflags = loginfo->logflags;
- ipt_log_packet(PF_INET, hooknum, *pskb, in, out, &li,
+ ipt_log_packet(PF_INET, hooknum, skb, in, out, &li,
loginfo->prefix);
return XT_CONTINUE;
}
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 3e0b562b2db..44b516e7cb7 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -52,7 +52,7 @@ masquerade_check(const char *tablename,
}
static unsigned int
-masquerade_target(struct sk_buff **pskb,
+masquerade_target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -69,7 +69,7 @@ masquerade_target(struct sk_buff **pskb,
NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING);
- ct = nf_ct_get(*pskb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
nat = nfct_nat(ct);
NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
@@ -82,7 +82,7 @@ masquerade_target(struct sk_buff **pskb,
return NF_ACCEPT;
mr = targinfo;
- rt = (struct rtable *)(*pskb)->dst;
+ rt = (struct rtable *)skb->dst;
newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
if (!newsrc) {
printk("MASQUERADE: %s ate my IP address\n", out->name);
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 41a011d5a06..f8699291e33 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -43,7 +43,7 @@ check(const char *tablename,
}
static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -59,14 +59,14 @@ target(struct sk_buff **pskb,
NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING
|| hooknum == NF_IP_POST_ROUTING
|| hooknum == NF_IP_LOCAL_OUT);
- ct = nf_ct_get(*pskb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
if (hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT)
- new_ip = ip_hdr(*pskb)->daddr & ~netmask;
+ new_ip = ip_hdr(skb)->daddr & ~netmask;
else
- new_ip = ip_hdr(*pskb)->saddr & ~netmask;
+ new_ip = ip_hdr(skb)->saddr & ~netmask;
new_ip |= mr->range[0].min_ip & netmask;
newrange = ((struct nf_nat_range)
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 6ac7a237331..f7cf7d61a2d 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -47,7 +47,7 @@ redirect_check(const char *tablename,
}
static unsigned int
-redirect_target(struct sk_buff **pskb,
+redirect_target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -63,7 +63,7 @@ redirect_target(struct sk_buff **pskb,
NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING
|| hooknum == NF_IP_LOCAL_OUT);
- ct = nf_ct_get(*pskb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
/* Local packets: make them go to loopback */
@@ -76,7 +76,7 @@ redirect_target(struct sk_buff **pskb,
newdst = 0;
rcu_read_lock();
- indev = __in_dev_get_rcu((*pskb)->dev);
+ indev = __in_dev_get_rcu(skb->dev);
if (indev && (ifa = indev->ifa_list))
newdst = ifa->ifa_local;
rcu_read_unlock();
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index cb038c8fbc9..dcf4d21d511 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -131,7 +131,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
)
addr_type = RTN_LOCAL;
- if (ip_route_me_harder(&nskb, addr_type))
+ if (ip_route_me_harder(nskb, addr_type))
goto free_nskb;
nskb->ip_summed = CHECKSUM_NONE;
@@ -162,7 +162,7 @@ static inline void send_unreach(struct sk_buff *skb_in, int code)
icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
}
-static unsigned int reject(struct sk_buff **pskb,
+static unsigned int reject(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -173,7 +173,7 @@ static unsigned int reject(struct sk_buff **pskb,
/* Our naive response construction doesn't deal with IP
options, and probably shouldn't try. */
- if (ip_hdrlen(*pskb) != sizeof(struct iphdr))
+ if (ip_hdrlen(skb) != sizeof(struct iphdr))
return NF_DROP;
/* WARNING: This code causes reentry within iptables.
@@ -181,28 +181,28 @@ static unsigned int reject(struct sk_buff **pskb,
must return an absolute verdict. --RR */
switch (reject->with) {
case IPT_ICMP_NET_UNREACHABLE:
- send_unreach(*pskb, ICMP_NET_UNREACH);
+ send_unreach(skb, ICMP_NET_UNREACH);
break;
case IPT_ICMP_HOST_UNREACHABLE:
- send_unreach(*pskb, ICMP_HOST_UNREACH);
+ send_unreach(skb, ICMP_HOST_UNREACH);
break;
case IPT_ICMP_PROT_UNREACHABLE:
- send_unreach(*pskb, ICMP_PROT_UNREACH);
+ send_unreach(skb, ICMP_PROT_UNREACH);
break;
case IPT_ICMP_PORT_UNREACHABLE:
- send_unreach(*pskb, ICMP_PORT_UNREACH);
+ send_unreach(skb, ICMP_PORT_UNREACH);
break;
case IPT_ICMP_NET_PROHIBITED:
- send_unreach(*pskb, ICMP_NET_ANO);
+ send_unreach(skb, ICMP_NET_ANO);
break;
case IPT_ICMP_HOST_PROHIBITED:
- send_unreach(*pskb, ICMP_HOST_ANO);
+ send_unreach(skb, ICMP_HOST_ANO);
break;
case IPT_ICMP_ADMIN_PROHIBITED:
- send_unreach(*pskb, ICMP_PKT_FILTERED);
+ send_unreach(skb, ICMP_PKT_FILTERED);
break;
case IPT_TCP_RESET:
- send_reset(*pskb, hooknum);
+ send_reset(skb, hooknum);
case IPT_ICMP_ECHOREPLY:
/* Doesn't happen. */
break;
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
index 97641f1a97f..8988571436b 100644
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ b/net/ipv4/netfilter/ipt_SAME.c
@@ -104,7 +104,7 @@ same_destroy(const struct xt_target *target, void *targinfo)
}
static unsigned int
-same_target(struct sk_buff **pskb,
+same_target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -121,7 +121,7 @@ same_target(struct sk_buff **pskb,
NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
hooknum == NF_IP_POST_ROUTING);
- ct = nf_ct_get(*pskb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
index 25f5d0b3906..d4573baa7f2 100644
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ b/net/ipv4/netfilter/ipt_TOS.c
@@ -21,7 +21,7 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
MODULE_DESCRIPTION("iptables TOS mangling module");
static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -29,13 +29,13 @@ target(struct sk_buff **pskb,
const void *targinfo)
{
const struct ipt_tos_target_info *tosinfo = targinfo;
- struct iphdr *iph = ip_hdr(*pskb);
+ struct iphdr *iph = ip_hdr(skb);
if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
__u8 oldtos;
- if (!skb_make_writable(pskb, sizeof(struct iphdr)))
+ if (!skb_make_writable(skb, sizeof(struct iphdr)))
return NF_DROP;
- iph = ip_hdr(*pskb);
+ iph = ip_hdr(skb);
oldtos = iph->tos;
iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos;
nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
index 2b54e7b0cfe..c620a052766 100644
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ b/net/ipv4/netfilter/ipt_TTL.c
@@ -20,7 +20,7 @@ MODULE_DESCRIPTION("IP tables TTL modification module");
MODULE_LICENSE("GPL");
static unsigned int
-ipt_ttl_target(struct sk_buff **pskb,
+ipt_ttl_target(struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
unsigned int hooknum, const struct xt_target *target,
const void *targinfo)
@@ -29,10 +29,10 @@ ipt_ttl_target(struct sk_buff **pskb,
const struct ipt_TTL_info *info = targinfo;
int new_ttl;
- if (!skb_make_writable(pskb, (*pskb)->len))
+ if (!skb_make_writable(skb, skb->len))
return NF_DROP;
- iph = ip_hdr(*pskb);
+ iph = ip_hdr(skb);
switch (info->mode) {
case IPT_TTL_SET:
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index c636d6d6357..212b830765a 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -279,7 +279,7 @@ alloc_failure:
spin_unlock_bh(&ulog_lock);
}
-static unsigned int ipt_ulog_target(struct sk_buff **pskb,
+static unsigned int ipt_ulog_target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -288,7 +288,7 @@ static unsigned int ipt_ulog_target(struct sk_buff **pskb,
{
struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
- ipt_ulog_packet(hooknum, *pskb, in, out, loginfo, NULL);
+ ipt_ulog_packet(hooknum, skb, in, out, loginfo, NULL);
return XT_CONTINUE;
}
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 4f51c1d7d2d..ba3262c6043 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -62,31 +62,31 @@ static struct xt_table packet_filter = {
/* The work comes in here from netfilter.c. */
static unsigned int
ipt_hook(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ipt_do_table(pskb, hook, in, out, &packet_filter);
+ return ipt_do_table(skb, hook, in, out, &packet_filter);
}
static unsigned int
ipt_local_out_hook(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
/* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
- || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr)) {
if (net_ratelimit())
printk("iptable_filter: ignoring short SOCK_RAW "
"packet.\n");
return NF_ACCEPT;
}
- return ipt_do_table(pskb, hook, in, out, &packet_filter);
+ return ipt_do_table(skb, hook, in, out, &packet_filter);
}
static struct nf_hook_ops ipt_ops[] = {
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 902446f7cbc..b4360a69d5c 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -75,17 +75,17 @@ static struct xt_table packet_mangler = {
/* The work comes in here from netfilter.c. */
static unsigned int
ipt_route_hook(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ipt_do_table(pskb, hook, in, out, &packet_mangler);
+ return ipt_do_table(skb, hook, in, out, &packet_mangler);
}
static unsigned int
ipt_local_hook(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -97,8 +97,8 @@ ipt_local_hook(unsigned int hook,
u_int32_t mark;
/* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
- || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+ if (skb->len < sizeof(struct iphdr)
+ || ip_hdrlen(skb) < sizeof(struct iphdr)) {
if (net_ratelimit())
printk("iptable_mangle: ignoring short SOCK_RAW "
"packet.\n");
@@ -106,22 +106,22 @@ ipt_local_hook(unsigned int hook,
}
/* Save things which could affect route */
- mark = (*pskb)->mark;
- iph = ip_hdr(*pskb);
+ mark = skb->mark;
+ iph = ip_hdr(skb);
saddr = iph->saddr;
daddr = iph->daddr;
tos = iph->tos;
- ret = ipt_do_table(pskb, hook, in, out, &packet_mangler);
+ ret = ipt_do_table(skb, hook, in, out, &packet_mangler);
/* Reroute for ANY change. */
if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) {
- iph = ip_hdr(*pskb);
+ iph = ip_hdr(skb);
if (iph->saddr != saddr ||
iph->daddr != daddr ||
- (*pskb)->mark != mark ||
+ skb->mark != mark ||
iph->tos != tos)
- if (ip_route_me_harder(pskb, RTN_UNSPEC))
+ if (ip_route_me_harder(skb, RTN_UNSPEC))
ret = NF_DROP;
}
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index d6e50339568..5de6e57ac55 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -47,30 +47,30 @@ static struct xt_table packet_raw = {
/* The work comes in here from netfilter.c. */
static unsigned int
ipt_hook(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ipt_do_table(pskb, hook, in, out, &packet_raw);
+ return ipt_do_table(skb, hook, in, out, &packet_raw);
}
static unsigned int
ipt_local_hook(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
/* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr) ||
- ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr)) {
if (net_ratelimit())
printk("iptable_raw: ignoring short SOCK_RAW"
"packet.\n");
return NF_ACCEPT;
}
- return ipt_do_table(pskb, hook, in, out, &packet_raw);
+ return ipt_do_table(skb, hook, in, out, &packet_raw);
}
/* 'raw' is the very first table. */
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 2fcb9249a8d..831e9b29806 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -63,19 +63,20 @@ static int ipv4_print_conntrack(struct seq_file *s,
}
/* Returns new sk_buff, or NULL */
-static struct sk_buff *
-nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
+static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
{
+ int err;
+
skb_orphan(skb);
local_bh_disable();
- skb = ip_defrag(skb, user);
+ err = ip_defrag(skb, user);
local_bh_enable();
- if (skb)
+ if (!err)
ip_send_check(ip_hdr(skb));
- return skb;
+ return err;
}
static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
@@ -99,17 +100,17 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
}
static unsigned int ipv4_confirm(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
/* We've seen it coming out the other side: confirm it */
- return nf_conntrack_confirm(pskb);
+ return nf_conntrack_confirm(skb);
}
static unsigned int ipv4_conntrack_help(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -120,7 +121,7 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum,
struct nf_conntrack_helper *helper;
/* This is where we call the helper: as the packet goes out. */
- ct = nf_ct_get(*pskb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
return NF_ACCEPT;
@@ -131,56 +132,55 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum,
helper = rcu_dereference(help->helper);
if (!helper)
return NF_ACCEPT;
- return helper->help(pskb, skb_network_offset(*pskb) + ip_hdrlen(*pskb),
+ return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
ct, ctinfo);
}
static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
/* Previously seen (loopback)? Ignore. Do this before
fragment check. */
- if ((*pskb)->nfct)
+ if (skb->nfct)
return NF_ACCEPT;
/* Gather fragments. */
- if (ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)) {
- *pskb = nf_ct_ipv4_gather_frags(*pskb,
- hooknum == NF_IP_PRE_ROUTING ?
- IP_DEFRAG_CONNTRACK_IN :
- IP_DEFRAG_CONNTRACK_OUT);
- if (!*pskb)
+ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
+ if (nf_ct_ipv4_gather_frags(skb,
+ hooknum == NF_IP_PRE_ROUTING ?
+ IP_DEFRAG_CONNTRACK_IN :
+ IP_DEFRAG_CONNTRACK_OUT))
return NF_STOLEN;
}
return NF_ACCEPT;
}
static unsigned int ipv4_conntrack_in(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return nf_conntrack_in(PF_INET, hooknum, pskb);
+ return nf_conntrack_in(PF_INET, hooknum, skb);
}
static unsigned int ipv4_conntrack_local(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
/* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
- || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr)) {
if (net_ratelimit())
printk("ipt_hook: happy cracking.\n");
return NF_ACCEPT;
}
- return nf_conntrack_in(PF_INET, hooknum, pskb);
+ return nf_conntrack_in(PF_INET, hooknum, skb);
}
/* Connection tracking may drop packets, but never alters them, so
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
index bd93a1d7105..35a5aa69cd9 100644
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -24,7 +24,7 @@ MODULE_DESCRIPTION("Amanda NAT helper");
MODULE_LICENSE("GPL");
MODULE_ALIAS("ip_nat_amanda");
-static unsigned int help(struct sk_buff **pskb,
+static unsigned int help(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
unsigned int matchoff,
unsigned int matchlen,
@@ -53,7 +53,7 @@ static unsigned int help(struct sk_buff **pskb,
return NF_DROP;
sprintf(buffer, "%u", port);
- ret = nf_nat_mangle_udp_packet(pskb, exp->master, ctinfo,
+ ret = nf_nat_mangle_udp_packet(skb, exp->master, ctinfo,
matchoff, matchlen,
buffer, strlen(buffer));
if (ret != NF_ACCEPT)
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 7221aa20e6f..56e93f692e8 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -349,7 +349,7 @@ EXPORT_SYMBOL(nf_nat_setup_info);
/* Returns true if succeeded. */
static int
manip_pkt(u_int16_t proto,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
unsigned int iphdroff,
const struct nf_conntrack_tuple *target,
enum nf_nat_manip_type maniptype)
@@ -357,19 +357,19 @@ manip_pkt(u_int16_t proto,
struct iphdr *iph;
struct nf_nat_protocol *p;
- if (!skb_make_writable(pskb, iphdroff + sizeof(*iph)))
+ if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
return 0;
- iph = (void *)(*pskb)->data + iphdroff;
+ iph = (void *)skb->data + iphdroff;
/* Manipulate protcol part. */
/* rcu_read_lock()ed by nf_hook_slow */
p = __nf_nat_proto_find(proto);
- if (!p->manip_pkt(pskb, iphdroff, target, maniptype))
+ if (!p->manip_pkt(skb, iphdroff, target, maniptype))
return 0;
- iph = (void *)(*pskb)->data + iphdroff;
+ iph = (void *)skb->data + iphdroff;
if (maniptype == IP_NAT_MANIP_SRC) {
nf_csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
@@ -385,7 +385,7 @@ manip_pkt(u_int16_t proto,
unsigned int nf_nat_packet(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int hooknum,
- struct sk_buff **pskb)
+ struct sk_buff *skb)
{
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
unsigned long statusbit;
@@ -407,7 +407,7 @@ unsigned int nf_nat_packet(struct nf_conn *ct,
/* We are aiming to look like inverse of other direction. */
nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
- if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype))
+ if (!manip_pkt(target.dst.protonum, skb, 0, &target, mtype))
return NF_DROP;
}
return NF_ACCEPT;
@@ -418,7 +418,7 @@ EXPORT_SYMBOL_GPL(nf_nat_packet);
int nf_nat_icmp_reply_translation(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int hooknum,
- struct sk_buff **pskb)
+ struct sk_buff *skb)
{
struct {
struct icmphdr icmp;
@@ -426,24 +426,24 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
} *inside;
struct nf_conntrack_l4proto *l4proto;
struct nf_conntrack_tuple inner, target;
- int hdrlen = ip_hdrlen(*pskb);
+ int hdrlen = ip_hdrlen(skb);
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
unsigned long statusbit;
enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
- if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
+ if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
return 0;
- inside = (void *)(*pskb)->data + ip_hdrlen(*pskb);
+ inside = (void *)skb->data + ip_hdrlen(skb);
/* We're actually going to mangle it beyond trivial checksum
adjustment, so make sure the current checksum is correct. */
- if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0))
+ if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
return 0;
/* Must be RELATED */
- NF_CT_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
- (*pskb)->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
+ NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED ||
+ skb->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
/* Redirects on non-null nats must be dropped, else they'll
start talking to each other without our translation, and be
@@ -458,15 +458,15 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
}
pr_debug("icmp_reply_translation: translating error %p manip %u "
- "dir %s\n", *pskb, manip,
+ "dir %s\n", skb, manip,
dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
/* rcu_read_lock()ed by nf_hook_slow */
l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
- if (!nf_ct_get_tuple(*pskb,
- ip_hdrlen(*pskb) + sizeof(struct icmphdr),
- (ip_hdrlen(*pskb) +
+ if (!nf_ct_get_tuple(skb,
+ ip_hdrlen(skb) + sizeof(struct icmphdr),
+ (ip_hdrlen(skb) +
sizeof(struct icmphdr) + inside->ip.ihl * 4),
(u_int16_t)AF_INET,
inside->ip.protocol,
@@ -478,19 +478,19 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
pass all hooks (locally-generated ICMP). Consider incoming
packet: PREROUTING (DST manip), routing produces ICMP, goes
through POSTROUTING (which must correct the DST manip). */
- if (!manip_pkt(inside->ip.protocol, pskb,
- ip_hdrlen(*pskb) + sizeof(inside->icmp),
+ if (!manip_pkt(inside->ip.protocol, skb,
+ ip_hdrlen(skb) + sizeof(inside->icmp),
&ct->tuplehash[!dir].tuple,
!manip))
return 0;
- if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
/* Reloading "inside" here since manip_pkt inner. */
- inside = (void *)(*pskb)->data + ip_hdrlen(*pskb);
+ inside = (void *)skb->data + ip_hdrlen(skb);
inside->icmp.checksum = 0;
inside->icmp.checksum =
- csum_fold(skb_checksum(*pskb, hdrlen,
- (*pskb)->len - hdrlen, 0));
+ csum_fold(skb_checksum(skb, hdrlen,
+ skb->len - hdrlen, 0));
}
/* Change outer to look the reply to an incoming packet
@@ -506,7 +506,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
if (ct->status & statusbit) {
nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
- if (!manip_pkt(0, pskb, 0, &target, manip))
+ if (!manip_pkt(0, skb, 0, &target, manip))
return 0;
}
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
index 3663bd879c3..e1a16d3ea4c 100644
--- a/net/ipv4/netfilter/nf_nat_ftp.c
+++ b/net/ipv4/netfilter/nf_nat_ftp.c
@@ -28,7 +28,7 @@ MODULE_ALIAS("ip_nat_ftp");
/* FIXME: Time out? --RR */
static int
-mangle_rfc959_packet(struct sk_buff **pskb,
+mangle_rfc959_packet(struct sk_buff *skb,
__be32 newip,
u_int16_t port,
unsigned int matchoff,
@@ -43,13 +43,13 @@ mangle_rfc959_packet(struct sk_buff **pskb,
pr_debug("calling nf_nat_mangle_tcp_packet\n");
- return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
+ return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
matchlen, buffer, strlen(buffer));
}
/* |1|132.235.1.2|6275| */
static int
-mangle_eprt_packet(struct sk_buff **pskb,
+mangle_eprt_packet(struct sk_buff *skb,
__be32 newip,
u_int16_t port,
unsigned int matchoff,
@@ -63,13 +63,13 @@ mangle_eprt_packet(struct sk_buff **pskb,
pr_debug("calling nf_nat_mangle_tcp_packet\n");
- return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
+ return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
matchlen, buffer, strlen(buffer));
}
/* |1|132.235.1.2|6275| */
static int
-mangle_epsv_packet(struct sk_buff **pskb,
+mangle_epsv_packet(struct sk_buff *skb,
__be32 newip,
u_int16_t port,
unsigned int matchoff,
@@ -83,11 +83,11 @@ mangle_epsv_packet(struct sk_buff **pskb,
pr_debug("calling nf_nat_mangle_tcp_packet\n");
- return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
+ return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
matchlen, buffer, strlen(buffer));
}
-static int (*mangle[])(struct sk_buff **, __be32, u_int16_t,
+static int (*mangle[])(struct sk_buff *, __be32, u_int16_t,
unsigned int, unsigned int, struct nf_conn *,
enum ip_conntrack_info)
= {
@@ -99,7 +99,7 @@ static int (*mangle[])(struct sk_buff **, __be32, u_int16_t,
/* So, this packet has hit the connection tracking matching code.
Mangle it, and change the expectation to match the new version. */
-static unsigned int nf_nat_ftp(struct sk_buff **pskb,
+static unsigned int nf_nat_ftp(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
enum nf_ct_ftp_type type,
unsigned int matchoff,
@@ -132,7 +132,7 @@ static unsigned int nf_nat_ftp(struct sk_buff **pskb,
if (port == 0)
return NF_DROP;
- if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo)) {
+ if (!mangle[type](skb, newip, port, matchoff, matchlen, ct, ctinfo)) {
nf_ct_unexpect_related(exp);
return NF_DROP;
}
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index c1b059a7370..a868c8c4132 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -22,12 +22,12 @@
#include <linux/netfilter/nf_conntrack_h323.h>
/****************************************************************************/
-static int set_addr(struct sk_buff **pskb,
+static int set_addr(struct sk_buff *skb,
unsigned char **data, int dataoff,
unsigned int addroff, __be32 ip, __be16 port)
{
enum ip_conntrack_info ctinfo;
- struct nf_conn *ct = nf_ct_get(*pskb, &ctinfo);
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
struct {
__be32 ip;
__be16 port;
@@ -38,8 +38,8 @@ static int set_addr(struct sk_buff **pskb,
buf.port = port;
addroff += dataoff;
- if (ip_hdr(*pskb)->protocol == IPPROTO_TCP) {
- if (!nf_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+ if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
+ if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
addroff, sizeof(buf),
(char *) &buf, sizeof(buf))) {
if (net_ratelimit())
@@ -49,14 +49,13 @@ static int set_addr(struct sk_buff **pskb,
}
/* Relocate data pointer */
- th = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
+ th = skb_header_pointer(skb, ip_hdrlen(skb),
sizeof(_tcph), &_tcph);
if (th == NULL)
return -1;
- *data = (*pskb)->data + ip_hdrlen(*pskb) +
- th->doff * 4 + dataoff;
+ *data = skb->data + ip_hdrlen(skb) + th->doff * 4 + dataoff;
} else {
- if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
+ if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
addroff, sizeof(buf),
(char *) &buf, sizeof(buf))) {
if (net_ratelimit())
@@ -67,36 +66,35 @@ static int set_addr(struct sk_buff **pskb,
/* nf_nat_mangle_udp_packet uses skb_make_writable() to copy
* or pull everything in a linear buffer, so we can safely
* use the skb pointers now */
- *data = ((*pskb)->data + ip_hdrlen(*pskb) +
- sizeof(struct udphdr));
+ *data = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
}
return 0;
}
/****************************************************************************/
-static int set_h225_addr(struct sk_buff **pskb,
+static int set_h225_addr(struct sk_buff *skb,
unsigned char **data, int dataoff,
TransportAddress *taddr,
union nf_conntrack_address *addr, __be16 port)
{
- return set_addr(pskb, data, dataoff, taddr->ipAddress.ip,
+ return set_addr(skb, data, dataoff, taddr->ipAddress.ip,
addr->ip, port);
}
/****************************************************************************/
-static int set_h245_addr(struct sk_buff **pskb,
+static int set_h245_addr(struct sk_buff *skb,
unsigned char **data, int dataoff,
H245_TransportAddress *taddr,
union nf_conntrack_address *addr, __be16 port)
{
- return set_addr(pskb, data, dataoff,
+ return set_addr(skb, data, dataoff,
taddr->unicastAddress.iPAddress.network,
addr->ip, port);
}
/****************************************************************************/
-static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct,
+static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data,
TransportAddress *taddr, int count)
@@ -125,7 +123,7 @@ static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct,
NIPQUAD(addr.ip), port,
NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip),
info->sig_port[!dir]);
- return set_h225_addr(pskb, data, 0, &taddr[i],
+ return set_h225_addr(skb, data, 0, &taddr[i],
&ct->tuplehash[!dir].
tuple.dst.u3,
info->sig_port[!dir]);
@@ -137,7 +135,7 @@ static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct,
NIPQUAD(addr.ip), port,
NIPQUAD(ct->tuplehash[!dir].tuple.src.u3.ip),
info->sig_port[!dir]);
- return set_h225_addr(pskb, data, 0, &taddr[i],
+ return set_h225_addr(skb, data, 0, &taddr[i],
&ct->tuplehash[!dir].
tuple.src.u3,
info->sig_port[!dir]);
@@ -149,7 +147,7 @@ static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct,
}
/****************************************************************************/
-static int set_ras_addr(struct sk_buff **pskb, struct nf_conn *ct,
+static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data,
TransportAddress *taddr, int count)
@@ -168,7 +166,7 @@ static int set_ras_addr(struct sk_buff **pskb, struct nf_conn *ct,
NIPQUAD(addr.ip), ntohs(port),
NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip),
ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port));
- return set_h225_addr(pskb, data, 0, &taddr[i],
+ return set_h225_addr(skb, data, 0, &taddr[i],
&ct->tuplehash[!dir].tuple.dst.u3,
ct->tuplehash[!dir].tuple.
dst.u.udp.port);
@@ -179,7 +177,7 @@ static int set_ras_addr(struct sk_buff **pskb, struct nf_conn *ct,
}
/****************************************************************************/
-static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct,
+static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
H245_TransportAddress *taddr,
@@ -244,7 +242,7 @@ static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct,
}
/* Modify signal */
- if (set_h245_addr(pskb, data, dataoff, taddr,
+ if (set_h245_addr(skb, data, dataoff, taddr,
&ct->tuplehash[!dir].tuple.dst.u3,
htons((port & htons(1)) ? nated_port + 1 :
nated_port)) == 0) {
@@ -273,7 +271,7 @@ static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct,
}
/****************************************************************************/
-static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct,
+static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
H245_TransportAddress *taddr, __be16 port,
@@ -301,7 +299,7 @@ static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct,
}
/* Modify signal */
- if (set_h245_addr(pskb, data, dataoff, taddr,
+ if (set_h245_addr(skb, data, dataoff, taddr,
&ct->tuplehash[!dir].tuple.dst.u3,
htons(nated_port)) < 0) {
nf_ct_unexpect_related(exp);
@@ -318,7 +316,7 @@ static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct,
}
/****************************************************************************/
-static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct,
+static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
TransportAddress *taddr, __be16 port,
@@ -351,7 +349,7 @@ static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct,
}
/* Modify signal */
- if (set_h225_addr(pskb, data, dataoff, taddr,
+ if (set_h225_addr(skb, data, dataoff, taddr,
&ct->tuplehash[!dir].tuple.dst.u3,
htons(nated_port)) == 0) {
/* Save ports */
@@ -406,7 +404,7 @@ static void ip_nat_q931_expect(struct nf_conn *new,
}
/****************************************************************************/
-static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct,
+static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, TransportAddress *taddr, int idx,
__be16 port, struct nf_conntrack_expect *exp)
@@ -439,7 +437,7 @@ static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct,
}
/* Modify signal */
- if (set_h225_addr(pskb, data, 0, &taddr[idx],
+ if (set_h225_addr(skb, data, 0, &taddr[idx],
&ct->tuplehash[!dir].tuple.dst.u3,
htons(nated_port)) == 0) {
/* Save ports */
@@ -450,7 +448,7 @@ static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct,
if (idx > 0 &&
get_h225_addr(ct, *data, &taddr[0], &addr, &port) &&
(ntohl(addr.ip) & 0xff000000) == 0x7f000000) {
- set_h225_addr(pskb, data, 0, &taddr[0],
+ set_h225_addr(skb, data, 0, &taddr[0],
&ct->tuplehash[!dir].tuple.dst.u3,
info->sig_port[!dir]);
}
@@ -495,7 +493,7 @@ static void ip_nat_callforwarding_expect(struct nf_conn *new,
}
/****************************************************************************/
-static int nat_callforwarding(struct sk_buff **pskb, struct nf_conn *ct,
+static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
TransportAddress *taddr, __be16 port,
@@ -525,7 +523,7 @@ static int nat_callforwarding(struct sk_buff **pskb, struct nf_conn *ct,
}
/* Modify signal */
- if (!set_h225_addr(pskb, data, dataoff, taddr,
+ if (!set_h225_addr(skb, data, dataoff, taddr,
&ct->tuplehash[!dir].tuple.dst.u3,
htons(nated_port)) == 0) {
nf_ct_unexpect_related(exp);
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 93d8a0a8f03..8718da00ef2 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -111,22 +111,14 @@ static void mangle_contents(struct sk_buff *skb,
}
/* Unusual, but possible case. */
-static int enlarge_skb(struct sk_buff **pskb, unsigned int extra)
+static int enlarge_skb(struct sk_buff *skb, unsigned int extra)
{
- struct sk_buff *nskb;
-
- if ((*pskb)->len + extra > 65535)
+ if (skb->len + extra > 65535)
return 0;
- nskb = skb_copy_expand(*pskb, skb_headroom(*pskb), extra, GFP_ATOMIC);
- if (!nskb)
+ if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC))
return 0;
- /* Transfer socket to new skb. */
- if ((*pskb)->sk)
- skb_set_owner_w(nskb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = nskb;
return 1;
}
@@ -139,7 +131,7 @@ static int enlarge_skb(struct sk_buff **pskb, unsigned int extra)
*
* */
int
-nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
+nf_nat_mangle_tcp_packet(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int match_offset,
@@ -147,37 +139,37 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
const char *rep_buffer,
unsigned int rep_len)
{
- struct rtable *rt = (struct rtable *)(*pskb)->dst;
+ struct rtable *rt = (struct rtable *)skb->dst;
struct iphdr *iph;
struct tcphdr *tcph;
int oldlen, datalen;
- if (!skb_make_writable(pskb, (*pskb)->len))
+ if (!skb_make_writable(skb, skb->len))
return 0;
if (rep_len > match_len &&
- rep_len - match_len > skb_tailroom(*pskb) &&
- !enlarge_skb(pskb, rep_len - match_len))
+ rep_len - match_len > skb_tailroom(skb) &&
+ !enlarge_skb(skb, rep_len - match_len))
return 0;
- SKB_LINEAR_ASSERT(*pskb);
+ SKB_LINEAR_ASSERT(skb);
- iph = ip_hdr(*pskb);
+ iph = ip_hdr(skb);
tcph = (void *)iph + iph->ihl*4;
- oldlen = (*pskb)->len - iph->ihl*4;
- mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4,
+ oldlen = skb->len - iph->ihl*4;
+ mangle_contents(skb, iph->ihl*4 + tcph->doff*4,
match_offset, match_len, rep_buffer, rep_len);
- datalen = (*pskb)->len - iph->ihl*4;
- if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+ datalen = skb->len - iph->ihl*4;
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
if (!(rt->rt_flags & RTCF_LOCAL) &&
- (*pskb)->dev->features & NETIF_F_V4_CSUM) {
- (*pskb)->ip_summed = CHECKSUM_PARTIAL;
- (*pskb)->csum_start = skb_headroom(*pskb) +
- skb_network_offset(*pskb) +
- iph->ihl * 4;
- (*pskb)->csum_offset = offsetof(struct tcphdr, check);
+ skb->dev->features & NETIF_F_V4_CSUM) {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_headroom(skb) +
+ skb_network_offset(skb) +
+ iph->ihl * 4;
+ skb->csum_offset = offsetof(struct tcphdr, check);
tcph->check = ~tcp_v4_check(datalen,
iph->saddr, iph->daddr, 0);
} else {
@@ -188,7 +180,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
datalen, 0));
}
} else
- nf_proto_csum_replace2(&tcph->check, *pskb,
+ nf_proto_csum_replace2(&tcph->check, skb,
htons(oldlen), htons(datalen), 1);
if (rep_len != match_len) {
@@ -197,7 +189,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff **pskb,
(int)rep_len - (int)match_len,
ct, ctinfo);
/* Tell TCP window tracking about seq change */
- nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb),
+ nf_conntrack_tcp_update(skb, ip_hdrlen(skb),
ct, CTINFO2DIR(ctinfo));
}
return 1;
@@ -215,7 +207,7 @@ EXPORT_SYMBOL(nf_nat_mangle_tcp_packet);
* should be fairly easy to do.
*/
int
-nf_nat_mangle_udp_packet(struct sk_buff **pskb,
+nf_nat_mangle_udp_packet(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int match_offset,
@@ -223,48 +215,48 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb,
const char *rep_buffer,
unsigned int rep_len)
{
- struct rtable *rt = (struct rtable *)(*pskb)->dst;
+ struct rtable *rt = (struct rtable *)skb->dst;
struct iphdr *iph;
struct udphdr *udph;
int datalen, oldlen;
/* UDP helpers might accidentally mangle the wrong packet */
- iph = ip_hdr(*pskb);
- if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) +
+ iph = ip_hdr(skb);
+ if (skb->len < iph->ihl*4 + sizeof(*udph) +
match_offset + match_len)
return 0;
- if (!skb_make_writable(pskb, (*pskb)->len))
+ if (!skb_make_writable(skb, skb->len))
return 0;
if (rep_len > match_len &&
- rep_len - match_len > skb_tailroom(*pskb) &&
- !enlarge_skb(pskb, rep_len - match_len))
+ rep_len - match_len > skb_tailroom(skb) &&
+ !enlarge_skb(skb, rep_len - match_len))
return 0;
- iph = ip_hdr(*pskb);
+ iph = ip_hdr(skb);
udph = (void *)iph + iph->ihl*4;
- oldlen = (*pskb)->len - iph->ihl*4;
- mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph),
+ oldlen = skb->len - iph->ihl*4;
+ mangle_contents(skb, iph->ihl*4 + sizeof(*udph),
match_offset, match_len, rep_buffer, rep_len);
/* update the length of the UDP packet */
- datalen = (*pskb)->len - iph->ihl*4;
+ datalen = skb->len - iph->ihl*4;
udph->len = htons(datalen);
/* fix udp checksum if udp checksum was previously calculated */
- if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL)
+ if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL)
return 1;
- if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
if (!(rt->rt_flags & RTCF_LOCAL) &&
- (*pskb)->dev->features & NETIF_F_V4_CSUM) {
- (*pskb)->ip_summed = CHECKSUM_PARTIAL;
- (*pskb)->csum_start = skb_headroom(*pskb) +
- skb_network_offset(*pskb) +
- iph->ihl * 4;
- (*pskb)->csum_offset = offsetof(struct udphdr, check);
+ skb->dev->features & NETIF_F_V4_CSUM) {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_headroom(skb) +
+ skb_network_offset(skb) +
+ iph->ihl * 4;
+ skb->csum_offset = offsetof(struct udphdr, check);
udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
datalen, IPPROTO_UDP,
0);
@@ -278,7 +270,7 @@ nf_nat_mangle_udp_packet(struct sk_buff **pskb,
udph->check = CSUM_MANGLED_0;
}
} else
- nf_proto_csum_replace2(&udph->check, *pskb,
+ nf_proto_csum_replace2(&udph->check, skb,
htons(oldlen), htons(datalen), 1);
return 1;
@@ -330,7 +322,7 @@ sack_adjust(struct sk_buff *skb,
/* TCP SACK sequence number adjustment */
static inline unsigned int
-nf_nat_sack_adjust(struct sk_buff **pskb,
+nf_nat_sack_adjust(struct sk_buff *skb,
struct tcphdr *tcph,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
@@ -338,17 +330,17 @@ nf_nat_sack_adjust(struct sk_buff **pskb,
unsigned int dir, optoff, optend;
struct nf_conn_nat *nat = nfct_nat(ct);
- optoff = ip_hdrlen(*pskb) + sizeof(struct tcphdr);
- optend = ip_hdrlen(*pskb) + tcph->doff * 4;
+ optoff = ip_hdrlen(skb) + sizeof(struct tcphdr);
+ optend = ip_hdrlen(skb) + tcph->doff * 4;
- if (!skb_make_writable(pskb, optend))
+ if (!skb_make_writable(skb, optend))
return 0;
dir = CTINFO2DIR(ctinfo);
while (optoff < optend) {
/* Usually: option, length. */
- unsigned char *op = (*pskb)->data + optoff;
+ unsigned char *op = skb->data + optoff;
switch (op[0]) {
case TCPOPT_EOL:
@@ -365,7 +357,7 @@ nf_nat_sack_adjust(struct sk_buff **pskb,
if (op[0] == TCPOPT_SACK &&
op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
- sack_adjust(*pskb, tcph, optoff+2,
+ sack_adjust(skb, tcph, optoff+2,
optoff+op[1], &nat->seq[!dir]);
optoff += op[1];
}
@@ -375,7 +367,7 @@ nf_nat_sack_adjust(struct sk_buff **pskb,
/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */
int
-nf_nat_seq_adjust(struct sk_buff **pskb,
+nf_nat_seq_adjust(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
{
@@ -390,10 +382,10 @@ nf_nat_seq_adjust(struct sk_buff **pskb,
this_way = &nat->seq[dir];
other_way = &nat->seq[!dir];
- if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph)))
+ if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph)))
return 0;
- tcph = (void *)(*pskb)->data + ip_hdrlen(*pskb);
+ tcph = (void *)skb->data + ip_hdrlen(skb);
if (after(ntohl(tcph->seq), this_way->correction_pos))
newseq = htonl(ntohl(tcph->seq) + this_way->offset_after);
else
@@ -405,8 +397,8 @@ nf_nat_seq_adjust(struct sk_buff **pskb,
else
newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before);
- nf_proto_csum_replace4(&tcph->check, *pskb, tcph->seq, newseq, 0);
- nf_proto_csum_replace4(&tcph->check, *pskb, tcph->ack_seq, newack, 0);
+ nf_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
+ nf_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
@@ -415,10 +407,10 @@ nf_nat_seq_adjust(struct sk_buff **pskb,
tcph->seq = newseq;
tcph->ack_seq = newack;
- if (!nf_nat_sack_adjust(pskb, tcph, ct, ctinfo))
+ if (!nf_nat_sack_adjust(skb, tcph, ct, ctinfo))
return 0;
- nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb), ct, dir);
+ nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir);
return 1;
}
diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c
index bcf274bba60..766e2c16c6b 100644
--- a/net/ipv4/netfilter/nf_nat_irc.c
+++ b/net/ipv4/netfilter/nf_nat_irc.c
@@ -27,7 +27,7 @@ MODULE_DESCRIPTION("IRC (DCC) NAT helper");
MODULE_LICENSE("GPL");
MODULE_ALIAS("ip_nat_irc");
-static unsigned int help(struct sk_buff **pskb,
+static unsigned int help(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
unsigned int matchoff,
unsigned int matchlen,
@@ -58,7 +58,7 @@ static unsigned int help(struct sk_buff **pskb,
pr_debug("nf_nat_irc: inserting '%s' == %u.%u.%u.%u, port %u\n",
buffer, NIPQUAD(ip), port);
- ret = nf_nat_mangle_tcp_packet(pskb, exp->master, ctinfo,
+ ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo,
matchoff, matchlen, buffer,
strlen(buffer));
if (ret != NF_ACCEPT)
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 984ec8308b2..e1385a09907 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -110,7 +110,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
/* outbound packets == from PNS to PAC */
static int
-pptp_outbound_pkt(struct sk_buff **pskb,
+pptp_outbound_pkt(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
struct PptpControlHeader *ctlh,
@@ -175,7 +175,7 @@ pptp_outbound_pkt(struct sk_buff **pskb,
ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid));
/* mangle packet */
- if (nf_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+ if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
cid_off + sizeof(struct pptp_pkt_hdr) +
sizeof(struct PptpControlHeader),
sizeof(new_callid), (char *)&new_callid,
@@ -213,7 +213,7 @@ pptp_exp_gre(struct nf_conntrack_expect *expect_orig,
/* inbound packets == from PAC to PNS */
static int
-pptp_inbound_pkt(struct sk_buff **pskb,
+pptp_inbound_pkt(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
struct PptpControlHeader *ctlh,
@@ -268,7 +268,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
pr_debug("altering peer call id from 0x%04x to 0x%04x\n",
ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid));
- if (nf_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+ if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
pcid_off + sizeof(struct pptp_pkt_hdr) +
sizeof(struct PptpControlHeader),
sizeof(new_pcid), (char *)&new_pcid,
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index d562290b182..b820f996035 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -98,21 +98,21 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
/* manipulate a GRE packet according to maniptype */
static int
-gre_manip_pkt(struct sk_buff **pskb, unsigned int iphdroff,
+gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
struct gre_hdr *greh;
struct gre_hdr_pptp *pgreh;
- struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+ struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
unsigned int hdroff = iphdroff + iph->ihl * 4;
/* pgreh includes two optional 32bit fields which are not required
* to be there. That's where the magic '8' comes from */
- if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh) - 8))
+ if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8))
return 0;
- greh = (void *)(*pskb)->data + hdroff;
+ greh = (void *)skb->data + hdroff;
pgreh = (struct gre_hdr_pptp *)greh;
/* we only have destination manip of a packet, since 'source key'
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index 898d7377115..b9fc724388f 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -52,20 +52,20 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
}
static int
-icmp_manip_pkt(struct sk_buff **pskb,
+icmp_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
- struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+ struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
struct icmphdr *hdr;
unsigned int hdroff = iphdroff + iph->ihl*4;
- if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
+ if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
return 0;
- hdr = (struct icmphdr *)((*pskb)->data + hdroff);
- nf_proto_csum_replace2(&hdr->checksum, *pskb,
+ hdr = (struct icmphdr *)(skb->data + hdroff);
+ nf_proto_csum_replace2(&hdr->checksum, skb,
hdr->un.echo.id, tuple->src.u.icmp.id, 0);
hdr->un.echo.id = tuple->src.u.icmp.id;
return 1;
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
index 5bbbb2acdc7..6bab2e18445 100644
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -88,12 +88,12 @@ tcp_unique_tuple(struct nf_conntrack_tuple *tuple,
}
static int
-tcp_manip_pkt(struct sk_buff **pskb,
+tcp_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
- struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+ struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
struct tcphdr *hdr;
unsigned int hdroff = iphdroff + iph->ihl*4;
__be32 oldip, newip;
@@ -103,14 +103,14 @@ tcp_manip_pkt(struct sk_buff **pskb,
/* this could be a inner header returned in icmp packet; in such
cases we cannot update the checksum field since it is outside of
the 8 bytes of transport layer headers we are guaranteed */
- if ((*pskb)->len >= hdroff + sizeof(struct tcphdr))
+ if (skb->len >= hdroff + sizeof(struct tcphdr))
hdrsize = sizeof(struct tcphdr);
- if (!skb_make_writable(pskb, hdroff + hdrsize))
+ if (!skb_make_writable(skb, hdroff + hdrsize))
return 0;
- iph = (struct iphdr *)((*pskb)->data + iphdroff);
- hdr = (struct tcphdr *)((*pskb)->data + hdroff);
+ iph = (struct iphdr *)(skb->data + iphdroff);
+ hdr = (struct tcphdr *)(skb->data + hdroff);
if (maniptype == IP_NAT_MANIP_SRC) {
/* Get rid of src ip and src pt */
@@ -132,8 +132,8 @@ tcp_manip_pkt(struct sk_buff **pskb,
if (hdrsize < sizeof(*hdr))
return 1;
- nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1);
- nf_proto_csum_replace2(&hdr->check, *pskb, oldport, newport, 0);
+ nf_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
+ nf_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0);
return 1;
}
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
index a0af4fd9558..cbf1a61e290 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -86,22 +86,22 @@ udp_unique_tuple(struct nf_conntrack_tuple *tuple,
}
static int
-udp_manip_pkt(struct sk_buff **pskb,
+udp_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
- struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+ struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
struct udphdr *hdr;
unsigned int hdroff = iphdroff + iph->ihl*4;
__be32 oldip, newip;
__be16 *portptr, newport;
- if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
+ if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
return 0;
- iph = (struct iphdr *)((*pskb)->data + iphdroff);
- hdr = (struct udphdr *)((*pskb)->data + hdroff);
+ iph = (struct iphdr *)(skb->data + iphdroff);
+ hdr = (struct udphdr *)(skb->data + hdroff);
if (maniptype == IP_NAT_MANIP_SRC) {
/* Get rid of src ip and src pt */
@@ -116,9 +116,9 @@ udp_manip_pkt(struct sk_buff **pskb,
newport = tuple->dst.u.udp.port;
portptr = &hdr->dest;
}
- if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) {
- nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1);
- nf_proto_csum_replace2(&hdr->check, *pskb, *portptr, newport,
+ if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ nf_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
+ nf_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
0);
if (!hdr->check)
hdr->check = CSUM_MANGLED_0;
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
index f50d0203f9c..cfd2742e970 100644
--- a/net/ipv4/netfilter/nf_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c
@@ -37,7 +37,7 @@ static int unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
}
static int
-unknown_manip_pkt(struct sk_buff **pskb,
+unknown_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 76ec59ae524..46b25ab5f78 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -65,7 +65,7 @@ static struct xt_table nat_table = {
};
/* Source NAT */
-static unsigned int ipt_snat_target(struct sk_buff **pskb,
+static unsigned int ipt_snat_target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -78,7 +78,7 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb,
NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING);
- ct = nf_ct_get(*pskb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
/* Connection must be valid and new. */
NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
@@ -107,7 +107,7 @@ static void warn_if_extra_mangle(__be32 dstip, __be32 srcip)
ip_rt_put(rt);
}
-static unsigned int ipt_dnat_target(struct sk_buff **pskb,
+static unsigned int ipt_dnat_target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -121,14 +121,14 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb,
NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
hooknum == NF_IP_LOCAL_OUT);
- ct = nf_ct_get(*pskb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
/* Connection must be valid and new. */
NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
if (hooknum == NF_IP_LOCAL_OUT &&
mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
- warn_if_extra_mangle(ip_hdr(*pskb)->daddr,
+ warn_if_extra_mangle(ip_hdr(skb)->daddr,
mr->range[0].min_ip);
return nf_nat_setup_info(ct, &mr->range[0], hooknum);
@@ -204,7 +204,7 @@ alloc_null_binding_confirmed(struct nf_conn *ct, unsigned int hooknum)
return nf_nat_setup_info(ct, &range, hooknum);
}
-int nf_nat_rule_find(struct sk_buff **pskb,
+int nf_nat_rule_find(struct sk_buff *skb,
unsigned int hooknum,
const struct net_device *in,
const struct net_device *out,
@@ -212,7 +212,7 @@ int nf_nat_rule_find(struct sk_buff **pskb,
{
int ret;
- ret = ipt_do_table(pskb, hooknum, in, out, &nat_table);
+ ret = ipt_do_table(skb, hooknum, in, out, &nat_table);
if (ret == NF_ACCEPT) {
if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index e14d41976c2..ce9edbcc01e 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -60,7 +60,7 @@ static void addr_map_init(struct nf_conn *ct, struct addr_map *map)
}
}
-static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo,
+static int map_sip_addr(struct sk_buff *skb, enum ip_conntrack_info ctinfo,
struct nf_conn *ct, const char **dptr, size_t dlen,
enum sip_header_pos pos, struct addr_map *map)
{
@@ -84,15 +84,15 @@ static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo,
} else
return 1;
- if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
+ if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
matchoff, matchlen, addr, addrlen))
return 0;
- *dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr);
+ *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
return 1;
}
-static unsigned int ip_nat_sip(struct sk_buff **pskb,
+static unsigned int ip_nat_sip(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
struct nf_conn *ct,
const char **dptr)
@@ -101,8 +101,8 @@ static unsigned int ip_nat_sip(struct sk_buff **pskb,
struct addr_map map;
int dataoff, datalen;
- dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
- datalen = (*pskb)->len - dataoff;
+ dataoff = ip_hdrlen(skb) + sizeof(struct udphdr);
+ datalen = skb->len - dataoff;
if (datalen < sizeof("SIP/2.0") - 1)
return NF_ACCEPT;
@@ -121,19 +121,19 @@ static unsigned int ip_nat_sip(struct sk_buff **pskb,
else
pos = POS_REQ_URI;
- if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, pos, &map))
+ if (!map_sip_addr(skb, ctinfo, ct, dptr, datalen, pos, &map))
return NF_DROP;
}
- if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_FROM, &map) ||
- !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_TO, &map) ||
- !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_VIA, &map) ||
- !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map))
+ if (!map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_FROM, &map) ||
+ !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_TO, &map) ||
+ !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_VIA, &map) ||
+ !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map))
return NF_DROP;
return NF_ACCEPT;
}
-static unsigned int mangle_sip_packet(struct sk_buff **pskb,
+static unsigned int mangle_sip_packet(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
struct nf_conn *ct,
const char **dptr, size_t dlen,
@@ -145,16 +145,16 @@ static unsigned int mangle_sip_packet(struct sk_buff **pskb,
if (ct_sip_get_info(ct, *dptr, dlen, &matchoff, &matchlen, pos) <= 0)
return 0;
- if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
+ if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
matchoff, matchlen, buffer, bufflen))
return 0;
/* We need to reload this. Thanks Patrick. */
- *dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr);
+ *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
return 1;
}
-static int mangle_content_len(struct sk_buff **pskb,
+static int mangle_content_len(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
struct nf_conn *ct,
const char *dptr)
@@ -163,22 +163,22 @@ static int mangle_content_len(struct sk_buff **pskb,
char buffer[sizeof("65536")];
int bufflen;
- dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
+ dataoff = ip_hdrlen(skb) + sizeof(struct udphdr);
/* Get actual SDP lenght */
- if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff,
+ if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff,
&matchlen, POS_SDP_HEADER) > 0) {
/* since ct_sip_get_info() give us a pointer passing 'v='
we need to add 2 bytes in this count. */
- int c_len = (*pskb)->len - dataoff - matchoff + 2;
+ int c_len = skb->len - dataoff - matchoff + 2;
/* Now, update SDP length */
- if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff,
+ if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff,
&matchlen, POS_CONTENT) > 0) {
bufflen = sprintf(buffer, "%u", c_len);
- return nf_nat_mangle_udp_packet(pskb, ct, ctinfo,
+ return nf_nat_mangle_udp_packet(skb, ct, ctinfo,
matchoff, matchlen,
buffer, bufflen);
}
@@ -186,7 +186,7 @@ static int mangle_content_len(struct sk_buff **pskb,
return 0;
}
-static unsigned int mangle_sdp(struct sk_buff **pskb,
+static unsigned int mangle_sdp(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
struct nf_conn *ct,
__be32 newip, u_int16_t port,
@@ -195,25 +195,25 @@ static unsigned int mangle_sdp(struct sk_buff **pskb,
char buffer[sizeof("nnn.nnn.nnn.nnn")];
unsigned int dataoff, bufflen;
- dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr);
+ dataoff = ip_hdrlen(skb) + sizeof(struct udphdr);
/* Mangle owner and contact info. */
bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip));
- if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
+ if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff,
buffer, bufflen, POS_OWNER_IP4))
return 0;
- if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
+ if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff,
buffer, bufflen, POS_CONNECTION_IP4))
return 0;
/* Mangle media port. */
bufflen = sprintf(buffer, "%u", port);
- if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff,
+ if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff,
buffer, bufflen, POS_MEDIA))
return 0;
- return mangle_content_len(pskb, ctinfo, ct, dptr);
+ return mangle_content_len(skb, ctinfo, ct, dptr);
}
static void ip_nat_sdp_expect(struct nf_conn *ct,
@@ -241,7 +241,7 @@ static void ip_nat_sdp_expect(struct nf_conn *ct,
/* So, this packet has hit the connection tracking matching code.
Mangle it, and change the expectation to match the new version. */
-static unsigned int ip_nat_sdp(struct sk_buff **pskb,
+static unsigned int ip_nat_sdp(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
struct nf_conntrack_expect *exp,
const char *dptr)
@@ -277,7 +277,7 @@ static unsigned int ip_nat_sdp(struct sk_buff **pskb,
if (port == 0)
return NF_DROP;
- if (!mangle_sdp(pskb, ctinfo, ct, newip, port, dptr)) {
+ if (!mangle_sdp(skb, ctinfo, ct, newip, port, dptr)) {
nf_ct_unexpect_related(exp);
return NF_DROP;
}
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 6bfcd3a90f0..03709d6b4b0 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1188,9 +1188,9 @@ static int snmp_parse_mangle(unsigned char *msg,
*/
static int snmp_translate(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
- struct sk_buff **pskb)
+ struct sk_buff *skb)
{
- struct iphdr *iph = ip_hdr(*pskb);
+ struct iphdr *iph = ip_hdr(skb);
struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
u_int16_t udplen = ntohs(udph->len);
u_int16_t paylen = udplen - sizeof(struct udphdr);
@@ -1225,13 +1225,13 @@ static int snmp_translate(struct nf_conn *ct,
/* We don't actually set up expectations, just adjust internal IP
* addresses if this is being NATted */
-static int help(struct sk_buff **pskb, unsigned int protoff,
+static int help(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
{
int dir = CTINFO2DIR(ctinfo);
unsigned int ret;
- struct iphdr *iph = ip_hdr(*pskb);
+ struct iphdr *iph = ip_hdr(skb);
struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
/* SNMP replies and originating SNMP traps get mangled */
@@ -1250,7 +1250,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
* enough room for a UDP header. Just verify the UDP length field so we
* can mess around with the payload.
*/
- if (ntohs(udph->len) != (*pskb)->len - (iph->ihl << 2)) {
+ if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) {
if (net_ratelimit())
printk(KERN_WARNING "SNMP: dropping malformed packet "
"src=%u.%u.%u.%u dst=%u.%u.%u.%u\n",
@@ -1258,11 +1258,11 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
return NF_DROP;
}
- if (!skb_make_writable(pskb, (*pskb)->len))
+ if (!skb_make_writable(skb, skb->len))
return NF_DROP;
spin_lock_bh(&snmp_lock);
- ret = snmp_translate(ct, ctinfo, pskb);
+ ret = snmp_translate(ct, ctinfo, skb);
spin_unlock_bh(&snmp_lock);
return ret;
}
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 46cc99def16..7db76ea9af9 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -67,7 +67,7 @@ static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
static unsigned int
nf_nat_fn(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -80,9 +80,9 @@ nf_nat_fn(unsigned int hooknum,
/* We never see fragments: conntrack defrags on pre-routing
and local-out, and nf_nat_out protects post-routing. */
- NF_CT_ASSERT(!(ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)));
+ NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)));
- ct = nf_ct_get(*pskb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
/* Can't track? It's not due to stress, or conntrack would
have dropped it. Hence it's the user's responsibilty to
packet filter it out, or implement conntrack/NAT for that
@@ -91,10 +91,10 @@ nf_nat_fn(unsigned int hooknum,
/* Exception: ICMP redirect to new connection (not in
hash table yet). We must not let this through, in
case we're doing NAT to the same network. */
- if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) {
+ if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
struct icmphdr _hdr, *hp;
- hp = skb_header_pointer(*pskb, ip_hdrlen(*pskb),
+ hp = skb_header_pointer(skb, ip_hdrlen(skb),
sizeof(_hdr), &_hdr);
if (hp != NULL &&
hp->type == ICMP_REDIRECT)
@@ -119,9 +119,9 @@ nf_nat_fn(unsigned int hooknum,
switch (ctinfo) {
case IP_CT_RELATED:
case IP_CT_RELATED+IP_CT_IS_REPLY:
- if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) {
+ if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
if (!nf_nat_icmp_reply_translation(ct, ctinfo,
- hooknum, pskb))
+ hooknum, skb))
return NF_DROP;
else
return NF_ACCEPT;
@@ -141,7 +141,7 @@ nf_nat_fn(unsigned int hooknum,
/* LOCAL_IN hook doesn't have a chain! */
ret = alloc_null_binding(ct, hooknum);
else
- ret = nf_nat_rule_find(pskb, hooknum, in, out,
+ ret = nf_nat_rule_find(skb, hooknum, in, out,
ct);
if (ret != NF_ACCEPT) {
@@ -159,31 +159,31 @@ nf_nat_fn(unsigned int hooknum,
ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));
}
- return nf_nat_packet(ct, ctinfo, hooknum, pskb);
+ return nf_nat_packet(ct, ctinfo, hooknum, skb);
}
static unsigned int
nf_nat_in(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
unsigned int ret;
- __be32 daddr = ip_hdr(*pskb)->daddr;
+ __be32 daddr = ip_hdr(skb)->daddr;
- ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
+ ret = nf_nat_fn(hooknum, skb, in, out, okfn);
if (ret != NF_DROP && ret != NF_STOLEN &&
- daddr != ip_hdr(*pskb)->daddr) {
- dst_release((*pskb)->dst);
- (*pskb)->dst = NULL;
+ daddr != ip_hdr(skb)->daddr) {
+ dst_release(skb->dst);
+ skb->dst = NULL;
}
return ret;
}
static unsigned int
nf_nat_out(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -195,14 +195,14 @@ nf_nat_out(unsigned int hooknum,
unsigned int ret;
/* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr) ||
- ip_hdrlen(*pskb) < sizeof(struct iphdr))
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
- ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
+ ret = nf_nat_fn(hooknum, skb, in, out, okfn);
#ifdef CONFIG_XFRM
if (ret != NF_DROP && ret != NF_STOLEN &&
- (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) {
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
if (ct->tuplehash[dir].tuple.src.u3.ip !=
@@ -210,7 +210,7 @@ nf_nat_out(unsigned int hooknum,
|| ct->tuplehash[dir].tuple.src.u.all !=
ct->tuplehash[!dir].tuple.dst.u.all
)
- return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP;
+ return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP;
}
#endif
return ret;
@@ -218,7 +218,7 @@ nf_nat_out(unsigned int hooknum,
static unsigned int
nf_nat_local_fn(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -228,24 +228,24 @@ nf_nat_local_fn(unsigned int hooknum,
unsigned int ret;
/* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr) ||
- ip_hdrlen(*pskb) < sizeof(struct iphdr))
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
- ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
+ ret = nf_nat_fn(hooknum, skb, in, out, okfn);
if (ret != NF_DROP && ret != NF_STOLEN &&
- (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) {
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
if (ct->tuplehash[dir].tuple.dst.u3.ip !=
ct->tuplehash[!dir].tuple.src.u3.ip) {
- if (ip_route_me_harder(pskb, RTN_UNSPEC))
+ if (ip_route_me_harder(skb, RTN_UNSPEC))
ret = NF_DROP;
}
#ifdef CONFIG_XFRM
else if (ct->tuplehash[dir].tuple.dst.u.all !=
ct->tuplehash[!dir].tuple.src.u.all)
- if (ip_xfrm_me_harder(pskb))
+ if (ip_xfrm_me_harder(skb))
ret = NF_DROP;
#endif
}
@@ -254,7 +254,7 @@ nf_nat_local_fn(unsigned int hooknum,
static unsigned int
nf_nat_adjust(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -262,10 +262,10 @@ nf_nat_adjust(unsigned int hooknum,
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
- ct = nf_ct_get(*pskb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
pr_debug("nf_nat_standalone: adjusting sequence number\n");
- if (!nf_nat_seq_adjust(pskb, ct, ctinfo))
+ if (!nf_nat_seq_adjust(skb, ct, ctinfo))
return NF_DROP;
}
return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c
index 04dfeaefec0..0ecec701cb4 100644
--- a/net/ipv4/netfilter/nf_nat_tftp.c
+++ b/net/ipv4/netfilter/nf_nat_tftp.c
@@ -20,7 +20,7 @@ MODULE_DESCRIPTION("TFTP NAT helper");
MODULE_LICENSE("GPL");
MODULE_ALIAS("ip_nat_tftp");
-static unsigned int help(struct sk_buff **pskb,
+static unsigned int help(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
struct nf_conntrack_expect *exp)
{
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index e5b05b03910..fd16cb8f8ab 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -70,8 +70,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot));
seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot));
seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot));
- seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues,
- atomic_read(&ip_frag_mem));
+ seq_printf(seq, "FRAG: inuse %d memory %d\n",
+ ip_frag_nqueues(), ip_frag_mem());
return 0;
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index eb286abcf5d..c98ef16effd 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -19,6 +19,7 @@
#include <net/route.h>
#include <net/tcp.h>
#include <net/cipso_ipv4.h>
+#include <net/inet_frag.h>
/* From af_inet.c */
extern int sysctl_ip_nonlocal_bind;
@@ -357,7 +358,7 @@ ctl_table ipv4_table[] = {
{
.ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH,
.procname = "ipfrag_high_thresh",
- .data = &sysctl_ipfrag_high_thresh,
+ .data = &ip4_frags_ctl.high_thresh,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
@@ -365,7 +366,7 @@ ctl_table ipv4_table[] = {
{
.ctl_name = NET_IPV4_IPFRAG_LOW_THRESH,
.procname = "ipfrag_low_thresh",
- .data = &sysctl_ipfrag_low_thresh,
+ .data = &ip4_frags_ctl.low_thresh,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
@@ -381,7 +382,7 @@ ctl_table ipv4_table[] = {
{
.ctl_name = NET_IPV4_IPFRAG_TIME,
.procname = "ipfrag_time",
- .data = &sysctl_ipfrag_time,
+ .data = &ip4_frags_ctl.timeout,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
@@ -732,7 +733,7 @@ ctl_table ipv4_table[] = {
{
.ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL,
.procname = "ipfrag_secret_interval",
- .data = &sysctl_ipfrag_secret_interval,
+ .data = &ip4_frags_ctl.secret_interval,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0a42e934034..0f00966b178 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1995,8 +1995,7 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp,
}
/* Mark head of queue up as lost. */
-static void tcp_mark_head_lost(struct sock *sk,
- int packets, u32 high_seq)
+static void tcp_mark_head_lost(struct sock *sk, int packets)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
@@ -2019,7 +2018,7 @@ static void tcp_mark_head_lost(struct sock *sk,
tp->lost_skb_hint = skb;
tp->lost_cnt_hint = cnt;
cnt += tcp_skb_pcount(skb);
- if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq))
+ if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
break;
if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
@@ -2040,9 +2039,9 @@ static void tcp_update_scoreboard(struct sock *sk)
int lost = tp->fackets_out - tp->reordering;
if (lost <= 0)
lost = 1;
- tcp_mark_head_lost(sk, lost, tp->high_seq);
+ tcp_mark_head_lost(sk, lost);
} else {
- tcp_mark_head_lost(sk, 1, tp->high_seq);
+ tcp_mark_head_lost(sk, 1);
}
/* New heuristics: it is possible only after we switched
@@ -2381,7 +2380,7 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
before(tp->snd_una, tp->high_seq) &&
icsk->icsk_ca_state != TCP_CA_Open &&
tp->fackets_out > tp->reordering) {
- tcp_mark_head_lost(sk, tp->fackets_out-tp->reordering, tp->high_seq);
+ tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering);
NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
}
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 434ef302ba8..a4edd666318 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -78,7 +78,7 @@ static int xfrm4_output_finish2(struct sk_buff *skb)
while (likely((err = xfrm4_output_one(skb)) == 0)) {
nf_reset(skb);
- err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, &skb, NULL,
+ err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, skb, NULL,
skb->dst->dev, dst_output);
if (unlikely(err != 1))
break;
@@ -86,7 +86,7 @@ static int xfrm4_output_finish2(struct sk_buff *skb)
if (!skb->dst->xfrm)
return dst_output(skb);
- err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL,
+ err = nf_hook(PF_INET, NF_IP_POST_ROUTING, skb, NULL,
skb->dst->dev, xfrm4_output_finish2);
if (unlikely(err != 1))
break;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index c82d4d49f71..1e89efd38a0 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -102,7 +102,7 @@ EXPORT_SYMBOL_GPL(ipv6_find_tlv);
struct tlvtype_proc {
int type;
- int (*func)(struct sk_buff **skbp, int offset);
+ int (*func)(struct sk_buff *skb, int offset);
};
/*********************
@@ -111,10 +111,8 @@ struct tlvtype_proc {
/* An unknown option is detected, decide what to do */
-static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff)
+static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
{
- struct sk_buff *skb = *skbp;
-
switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) {
case 0: /* ignore */
return 1;
@@ -139,9 +137,8 @@ static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff)
/* Parse tlv encoded option header (hop-by-hop or destination) */
-static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
+static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb)
{
- struct sk_buff *skb = *skbp;
struct tlvtype_proc *curr;
const unsigned char *nh = skb_network_header(skb);
int off = skb_network_header_len(skb);
@@ -172,13 +169,13 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp)
/* type specific length/alignment
checks will be performed in the
func(). */
- if (curr->func(skbp, off) == 0)
+ if (curr->func(skb, off) == 0)
return 0;
break;
}
}
if (curr->type < 0) {
- if (ip6_tlvopt_unknown(skbp, off) == 0)
+ if (ip6_tlvopt_unknown(skb, off) == 0)
return 0;
}
break;
@@ -198,9 +195,8 @@ bad:
*****************************/
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
-static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
+static int ipv6_dest_hao(struct sk_buff *skb, int optoff)
{
- struct sk_buff *skb = *skbp;
struct ipv6_destopt_hao *hao;
struct inet6_skb_parm *opt = IP6CB(skb);
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
@@ -234,22 +230,13 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
goto discard;
if (skb_cloned(skb)) {
- struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
- struct inet6_skb_parm *opt2;
-
- if (skb2 == NULL)
+ if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
goto discard;
- opt2 = IP6CB(skb2);
- memcpy(opt2, opt, sizeof(*opt2));
-
- kfree_skb(skb);
-
/* update all variable using below by copied skbuff */
- *skbp = skb = skb2;
- hao = (struct ipv6_destopt_hao *)(skb_network_header(skb2) +
+ hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) +
optoff);
- ipv6h = ipv6_hdr(skb2);
+ ipv6h = ipv6_hdr(skb);
}
if (skb->ip_summed == CHECKSUM_COMPLETE)
@@ -280,9 +267,8 @@ static struct tlvtype_proc tlvprocdestopt_lst[] = {
{-1, NULL}
};
-static int ipv6_destopt_rcv(struct sk_buff **skbp)
+static int ipv6_destopt_rcv(struct sk_buff *skb)
{
- struct sk_buff *skb = *skbp;
struct inet6_skb_parm *opt = IP6CB(skb);
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
__u16 dstbuf;
@@ -304,9 +290,8 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp)
#endif
dst = dst_clone(skb->dst);
- if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) {
+ if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
dst_release(dst);
- skb = *skbp;
skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
opt = IP6CB(skb);
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
@@ -337,10 +322,8 @@ void __init ipv6_destopt_init(void)
NONE header. No data in packet.
********************************/
-static int ipv6_nodata_rcv(struct sk_buff **skbp)
+static int ipv6_nodata_rcv(struct sk_buff *skb)
{
- struct sk_buff *skb = *skbp;
-
kfree_skb(skb);
return 0;
}
@@ -360,9 +343,8 @@ void __init ipv6_nodata_init(void)
Routing header.
********************************/
-static int ipv6_rthdr_rcv(struct sk_buff **skbp)
+static int ipv6_rthdr_rcv(struct sk_buff *skb)
{
- struct sk_buff *skb = *skbp;
struct inet6_skb_parm *opt = IP6CB(skb);
struct in6_addr *addr = NULL;
struct in6_addr daddr;
@@ -464,18 +446,14 @@ looped_back:
Do not damage packets queued somewhere.
*/
if (skb_cloned(skb)) {
- struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
/* the copy is a forwarded packet */
- if (skb2 == NULL) {
+ if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
return -1;
}
- kfree_skb(skb);
- *skbp = skb = skb2;
- opt = IP6CB(skb2);
- hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb2);
+ hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb);
}
if (skb->ip_summed == CHECKSUM_COMPLETE)
@@ -578,9 +556,8 @@ static inline struct inet6_dev *ipv6_skb_idev(struct sk_buff *skb)
/* Router Alert as of RFC 2711 */
-static int ipv6_hop_ra(struct sk_buff **skbp, int optoff)
+static int ipv6_hop_ra(struct sk_buff *skb, int optoff)
{
- struct sk_buff *skb = *skbp;
const unsigned char *nh = skb_network_header(skb);
if (nh[optoff + 1] == 2) {
@@ -595,9 +572,8 @@ static int ipv6_hop_ra(struct sk_buff **skbp, int optoff)
/* Jumbo payload */
-static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff)
+static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
{
- struct sk_buff *skb = *skbp;
const unsigned char *nh = skb_network_header(skb);
u32 pkt_len;
@@ -648,9 +624,8 @@ static struct tlvtype_proc tlvprochopopt_lst[] = {
{ -1, }
};
-int ipv6_parse_hopopts(struct sk_buff **skbp)
+int ipv6_parse_hopopts(struct sk_buff *skb)
{
- struct sk_buff *skb = *skbp;
struct inet6_skb_parm *opt = IP6CB(skb);
/*
@@ -667,8 +642,7 @@ int ipv6_parse_hopopts(struct sk_buff **skbp)
}
opt->hop = sizeof(struct ipv6hdr);
- if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) {
- skb = *skbp;
+ if (ip6_parse_tlv(tlvprochopopt_lst, skb)) {
skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
opt = IP6CB(skb);
opt->nhoff = sizeof(struct ipv6hdr);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 47b8ce232e8..9bb031fa1c2 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -82,7 +82,7 @@ EXPORT_SYMBOL(icmpv6msg_statistics);
static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL;
#define icmpv6_socket __get_cpu_var(__icmpv6_socket)
-static int icmpv6_rcv(struct sk_buff **pskb);
+static int icmpv6_rcv(struct sk_buff *skb);
static struct inet6_protocol icmpv6_protocol = {
.handler = icmpv6_rcv,
@@ -614,9 +614,8 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info)
* Handle icmp messages
*/
-static int icmpv6_rcv(struct sk_buff **pskb)
+static int icmpv6_rcv(struct sk_buff *skb)
{
- struct sk_buff *skb = *pskb;
struct net_device *dev = skb->dev;
struct inet6_dev *idev = __in6_dev_get(dev);
struct in6_addr *saddr, *daddr;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 25b93170974..78de42ada84 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -146,7 +146,7 @@ void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst,
__ip6_dst_store(sk, dst, daddr, saddr);
#ifdef CONFIG_XFRM
- if (dst) {
+ {
struct rt6_info *rt = (struct rt6_info *)dst;
rt->rt6i_flow_cache_genid = atomic_read(&flow_cache_genid);
}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 9149fc23975..fac6f7f9dd7 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -125,7 +125,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
}
if (hdr->nexthdr == NEXTHDR_HOP) {
- if (ipv6_parse_hopopts(&skb) < 0) {
+ if (ipv6_parse_hopopts(skb) < 0) {
IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS);
rcu_read_unlock();
return 0;
@@ -149,7 +149,7 @@ out:
*/
-static inline int ip6_input_finish(struct sk_buff *skb)
+static int ip6_input_finish(struct sk_buff *skb)
{
struct inet6_protocol *ipprot;
struct sock *raw_sk;
@@ -199,7 +199,7 @@ resubmit:
!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard;
- ret = ipprot->handler(&skb);
+ ret = ipprot->handler(skb);
if (ret > 0)
goto resubmit;
else if (ret == 0)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 011082ed921..13565dfb1b4 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -70,7 +70,7 @@ static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *f
spin_unlock_bh(&ip6_id_lock);
}
-static inline int ip6_output_finish(struct sk_buff *skb)
+static int ip6_output_finish(struct sk_buff *skb)
{
struct dst_entry *dst = skb->dst;
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 38b14961391..b1326c2bf8a 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -68,15 +68,15 @@ static void nf_ip6_saveroute(const struct sk_buff *skb, struct nf_info *info)
}
}
-static int nf_ip6_reroute(struct sk_buff **pskb, const struct nf_info *info)
+static int nf_ip6_reroute(struct sk_buff *skb, const struct nf_info *info)
{
struct ip6_rt_info *rt_info = nf_info_reroute(info);
if (info->hook == NF_IP6_LOCAL_OUT) {
- struct ipv6hdr *iph = ipv6_hdr(*pskb);
+ struct ipv6hdr *iph = ipv6_hdr(skb);
if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
!ipv6_addr_equal(&iph->saddr, &rt_info->saddr))
- return ip6_route_me_harder(*pskb);
+ return ip6_route_me_harder(skb);
}
return 0;
}
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 0473145ac53..6413a30d9f6 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -332,6 +332,7 @@ static int
ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
{
int diff;
+ int err;
struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload;
if (v->data_len < sizeof(*user_iph))
@@ -344,25 +345,18 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
if (v->data_len > 0xFFFF)
return -EINVAL;
if (diff > skb_tailroom(e->skb)) {
- struct sk_buff *newskb;
-
- newskb = skb_copy_expand(e->skb,
- skb_headroom(e->skb),
- diff,
- GFP_ATOMIC);
- if (newskb == NULL) {
+ err = pskb_expand_head(e->skb, 0,
+ diff - skb_tailroom(e->skb),
+ GFP_ATOMIC);
+ if (err) {
printk(KERN_WARNING "ip6_queue: OOM "
"in mangle, dropping packet\n");
- return -ENOMEM;
+ return err;
}
- if (e->skb->sk)
- skb_set_owner_w(newskb, e->skb->sk);
- kfree_skb(e->skb);
- e->skb = newskb;
}
skb_put(e->skb, diff);
}
- if (!skb_make_writable(&e->skb, v->data_len))
+ if (!skb_make_writable(e->skb, v->data_len))
return -ENOMEM;
skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
e->skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index cd9df02bb85..acaba153793 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -205,7 +205,7 @@ ip6_checkentry(const struct ip6t_ip6 *ipv6)
}
static unsigned int
-ip6t_error(struct sk_buff **pskb,
+ip6t_error(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -350,7 +350,7 @@ static void trace_packet(struct sk_buff *skb,
/* Returns one of the generic firewall policies, like NF_ACCEPT. */
unsigned int
-ip6t_do_table(struct sk_buff **pskb,
+ip6t_do_table(struct sk_buff *skb,
unsigned int hook,
const struct net_device *in,
const struct net_device *out,
@@ -389,17 +389,17 @@ ip6t_do_table(struct sk_buff **pskb,
do {
IP_NF_ASSERT(e);
IP_NF_ASSERT(back);
- if (ip6_packet_match(*pskb, indev, outdev, &e->ipv6,
+ if (ip6_packet_match(skb, indev, outdev, &e->ipv6,
&protoff, &offset, &hotdrop)) {
struct ip6t_entry_target *t;
if (IP6T_MATCH_ITERATE(e, do_match,
- *pskb, in, out,
+ skb, in, out,
offset, protoff, &hotdrop) != 0)
goto no_match;
ADD_COUNTER(e->counters,
- ntohs(ipv6_hdr(*pskb)->payload_len)
+ ntohs(ipv6_hdr(skb)->payload_len)
+ IPV6_HDR_LEN,
1);
@@ -409,8 +409,8 @@ ip6t_do_table(struct sk_buff **pskb,
#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
/* The packet is traced: log it */
- if (unlikely((*pskb)->nf_trace))
- trace_packet(*pskb, hook, in, out,
+ if (unlikely(skb->nf_trace))
+ trace_packet(skb, hook, in, out,
table->name, private, e);
#endif
/* Standard target? */
@@ -448,7 +448,7 @@ ip6t_do_table(struct sk_buff **pskb,
((struct ip6t_entry *)table_base)->comefrom
= 0xeeeeeeec;
#endif
- verdict = t->u.kernel.target->target(pskb,
+ verdict = t->u.kernel.target->target(skb,
in, out,
hook,
t->u.kernel.target,
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
index ad4d94310b8..9afc836fd45 100644
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ b/net/ipv6/netfilter/ip6t_HL.c
@@ -18,7 +18,7 @@ MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
MODULE_DESCRIPTION("IP6 tables Hop Limit modification module");
MODULE_LICENSE("GPL");
-static unsigned int ip6t_hl_target(struct sk_buff **pskb,
+static unsigned int ip6t_hl_target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -29,10 +29,10 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb,
const struct ip6t_HL_info *info = targinfo;
int new_hl;
- if (!skb_make_writable(pskb, (*pskb)->len))
+ if (!skb_make_writable(skb, skb->len))
return NF_DROP;
- ip6h = ipv6_hdr(*pskb);
+ ip6h = ipv6_hdr(skb);
switch (info->mode) {
case IP6T_HL_SET:
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 6ab99001dcc..7a48c342df4 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -431,7 +431,7 @@ ip6t_log_packet(unsigned int pf,
}
static unsigned int
-ip6t_log_target(struct sk_buff **pskb,
+ip6t_log_target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -445,8 +445,7 @@ ip6t_log_target(struct sk_buff **pskb,
li.u.log.level = loginfo->level;
li.u.log.logflags = loginfo->logflags;
- ip6t_log_packet(PF_INET6, hooknum, *pskb, in, out, &li,
- loginfo->prefix);
+ ip6t_log_packet(PF_INET6, hooknum, skb, in, out, &li, loginfo->prefix);
return XT_CONTINUE;
}
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 3fd08d5567a..1a7d2917545 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -172,7 +172,7 @@ send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum)
icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL);
}
-static unsigned int reject6_target(struct sk_buff **pskb,
+static unsigned int reject6_target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -187,25 +187,25 @@ static unsigned int reject6_target(struct sk_buff **pskb,
must return an absolute verdict. --RR */
switch (reject->with) {
case IP6T_ICMP6_NO_ROUTE:
- send_unreach(*pskb, ICMPV6_NOROUTE, hooknum);
+ send_unreach(skb, ICMPV6_NOROUTE, hooknum);
break;
case IP6T_ICMP6_ADM_PROHIBITED:
- send_unreach(*pskb, ICMPV6_ADM_PROHIBITED, hooknum);
+ send_unreach(skb, ICMPV6_ADM_PROHIBITED, hooknum);
break;
case IP6T_ICMP6_NOT_NEIGHBOUR:
- send_unreach(*pskb, ICMPV6_NOT_NEIGHBOUR, hooknum);
+ send_unreach(skb, ICMPV6_NOT_NEIGHBOUR, hooknum);
break;
case IP6T_ICMP6_ADDR_UNREACH:
- send_unreach(*pskb, ICMPV6_ADDR_UNREACH, hooknum);
+ send_unreach(skb, ICMPV6_ADDR_UNREACH, hooknum);
break;
case IP6T_ICMP6_PORT_UNREACH:
- send_unreach(*pskb, ICMPV6_PORT_UNREACH, hooknum);
+ send_unreach(skb, ICMPV6_PORT_UNREACH, hooknum);
break;
case IP6T_ICMP6_ECHOREPLY:
/* Do nothing */
break;
case IP6T_TCP_RESET:
- send_reset(*pskb);
+ send_reset(skb);
break;
default:
if (net_ratelimit())
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 7e32e2aaf7f..1d26b202bf3 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -60,32 +60,32 @@ static struct xt_table packet_filter = {
/* The work comes in here from netfilter.c. */
static unsigned int
ip6t_hook(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ip6t_do_table(pskb, hook, in, out, &packet_filter);
+ return ip6t_do_table(skb, hook, in, out, &packet_filter);
}
static unsigned int
ip6t_local_out_hook(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
#if 0
/* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
- || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+ if (skb->len < sizeof(struct iphdr)
+ || ip_hdrlen(skb) < sizeof(struct iphdr)) {
if (net_ratelimit())
printk("ip6t_hook: happy cracking.\n");
return NF_ACCEPT;
}
#endif
- return ip6t_do_table(pskb, hook, in, out, &packet_filter);
+ return ip6t_do_table(skb, hook, in, out, &packet_filter);
}
static struct nf_hook_ops ip6t_ops[] = {
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index f0a9efa67fb..a0b6381f1e8 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -68,17 +68,17 @@ static struct xt_table packet_mangler = {
/* The work comes in here from netfilter.c. */
static unsigned int
ip6t_route_hook(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ip6t_do_table(pskb, hook, in, out, &packet_mangler);
+ return ip6t_do_table(skb, hook, in, out, &packet_mangler);
}
static unsigned int
ip6t_local_hook(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -91,8 +91,8 @@ ip6t_local_hook(unsigned int hook,
#if 0
/* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
- || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+ if (skb->len < sizeof(struct iphdr)
+ || ip_hdrlen(skb) < sizeof(struct iphdr)) {
if (net_ratelimit())
printk("ip6t_hook: happy cracking.\n");
return NF_ACCEPT;
@@ -100,22 +100,22 @@ ip6t_local_hook(unsigned int hook,
#endif
/* save source/dest address, mark, hoplimit, flowlabel, priority, */
- memcpy(&saddr, &ipv6_hdr(*pskb)->saddr, sizeof(saddr));
- memcpy(&daddr, &ipv6_hdr(*pskb)->daddr, sizeof(daddr));
- mark = (*pskb)->mark;
- hop_limit = ipv6_hdr(*pskb)->hop_limit;
+ memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
+ memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr));
+ mark = skb->mark;
+ hop_limit = ipv6_hdr(skb)->hop_limit;
/* flowlabel and prio (includes version, which shouldn't change either */
- flowlabel = *((u_int32_t *)ipv6_hdr(*pskb));
+ flowlabel = *((u_int32_t *)ipv6_hdr(skb));
- ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler);
+ ret = ip6t_do_table(skb, hook, in, out, &packet_mangler);
if (ret != NF_DROP && ret != NF_STOLEN
- && (memcmp(&ipv6_hdr(*pskb)->saddr, &saddr, sizeof(saddr))
- || memcmp(&ipv6_hdr(*pskb)->daddr, &daddr, sizeof(daddr))
- || (*pskb)->mark != mark
- || ipv6_hdr(*pskb)->hop_limit != hop_limit))
- return ip6_route_me_harder(*pskb) == 0 ? ret : NF_DROP;
+ && (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr))
+ || memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr))
+ || skb->mark != mark
+ || ipv6_hdr(skb)->hop_limit != hop_limit))
+ return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP;
return ret;
}
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index ec290e4ebdd..8f7109f991e 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -46,12 +46,12 @@ static struct xt_table packet_raw = {
/* The work comes in here from netfilter.c. */
static unsigned int
ip6t_hook(unsigned int hook,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return ip6t_do_table(pskb, hook, in, out, &packet_raw);
+ return ip6t_do_table(skb, hook, in, out, &packet_raw);
}
static struct nf_hook_ops ip6t_ops[] = {
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 37a3db92695..0e40948f4fc 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -18,6 +18,7 @@
#include <linux/icmp.h>
#include <linux/sysctl.h>
#include <net/ipv6.h>
+#include <net/inet_frag.h>
#include <linux/netfilter_ipv6.h>
#include <net/netfilter/nf_conntrack.h>
@@ -145,7 +146,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
}
static unsigned int ipv6_confirm(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -155,12 +156,12 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
struct nf_conntrack_helper *helper;
enum ip_conntrack_info ctinfo;
unsigned int ret, protoff;
- unsigned int extoff = (u8 *)(ipv6_hdr(*pskb) + 1) - (*pskb)->data;
- unsigned char pnum = ipv6_hdr(*pskb)->nexthdr;
+ unsigned int extoff = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
+ unsigned char pnum = ipv6_hdr(skb)->nexthdr;
/* This is where we call the helper: as the packet goes out. */
- ct = nf_ct_get(*pskb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
goto out;
@@ -172,23 +173,23 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
if (!helper)
goto out;
- protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum,
- (*pskb)->len - extoff);
- if (protoff > (*pskb)->len || pnum == NEXTHDR_FRAGMENT) {
+ protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum,
+ skb->len - extoff);
+ if (protoff > skb->len || pnum == NEXTHDR_FRAGMENT) {
pr_debug("proto header not found\n");
return NF_ACCEPT;
}
- ret = helper->help(pskb, protoff, ct, ctinfo);
+ ret = helper->help(skb, protoff, ct, ctinfo);
if (ret != NF_ACCEPT)
return ret;
out:
/* We've seen it coming out the other side: confirm it */
- return nf_conntrack_confirm(pskb);
+ return nf_conntrack_confirm(skb);
}
static unsigned int ipv6_defrag(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
@@ -196,17 +197,17 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
struct sk_buff *reasm;
/* Previously seen (loopback)? */
- if ((*pskb)->nfct)
+ if (skb->nfct)
return NF_ACCEPT;
- reasm = nf_ct_frag6_gather(*pskb);
+ reasm = nf_ct_frag6_gather(skb);
/* queued */
if (reasm == NULL)
return NF_STOLEN;
/* error occured or not fragmented */
- if (reasm == *pskb)
+ if (reasm == skb)
return NF_ACCEPT;
nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
@@ -216,12 +217,12 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
}
static unsigned int ipv6_conntrack_in(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct sk_buff *reasm = (*pskb)->nfct_reasm;
+ struct sk_buff *reasm = skb->nfct_reasm;
/* This packet is fragmented and has reassembled packet. */
if (reasm) {
@@ -229,32 +230,32 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum,
if (!reasm->nfct) {
unsigned int ret;
- ret = nf_conntrack_in(PF_INET6, hooknum, &reasm);
+ ret = nf_conntrack_in(PF_INET6, hooknum, reasm);
if (ret != NF_ACCEPT)
return ret;
}
nf_conntrack_get(reasm->nfct);
- (*pskb)->nfct = reasm->nfct;
- (*pskb)->nfctinfo = reasm->nfctinfo;
+ skb->nfct = reasm->nfct;
+ skb->nfctinfo = reasm->nfctinfo;
return NF_ACCEPT;
}
- return nf_conntrack_in(PF_INET6, hooknum, pskb);
+ return nf_conntrack_in(PF_INET6, hooknum, skb);
}
static unsigned int ipv6_conntrack_local(unsigned int hooknum,
- struct sk_buff **pskb,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
/* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct ipv6hdr)) {
+ if (skb->len < sizeof(struct ipv6hdr)) {
if (net_ratelimit())
printk("ipv6_conntrack_local: packet too short\n");
return NF_ACCEPT;
}
- return ipv6_conntrack_in(hooknum, pskb, in, out, okfn);
+ return ipv6_conntrack_in(hooknum, skb, in, out, okfn);
}
static struct nf_hook_ops ipv6_conntrack_ops[] = {
@@ -307,7 +308,7 @@ static ctl_table nf_ct_ipv6_sysctl_table[] = {
{
.ctl_name = NET_NF_CONNTRACK_FRAG6_TIMEOUT,
.procname = "nf_conntrack_frag6_timeout",
- .data = &nf_ct_frag6_timeout,
+ .data = &nf_frags_ctl.timeout,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
@@ -315,7 +316,7 @@ static ctl_table nf_ct_ipv6_sysctl_table[] = {
{
.ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH,
.procname = "nf_conntrack_frag6_low_thresh",
- .data = &nf_ct_frag6_low_thresh,
+ .data = &nf_frags_ctl.low_thresh,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
@@ -323,7 +324,7 @@ static ctl_table nf_ct_ipv6_sysctl_table[] = {
{
.ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH,
.procname = "nf_conntrack_frag6_high_thresh",
- .data = &nf_ct_frag6_high_thresh,
+ .data = &nf_frags_ctl.high_thresh,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 25442a8c1ba..726fafd4196 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -31,6 +31,7 @@
#include <net/sock.h>
#include <net/snmp.h>
+#include <net/inet_frag.h>
#include <net/ipv6.h>
#include <net/protocol.h>
@@ -48,10 +49,6 @@
#define NF_CT_FRAG6_LOW_THRESH 196608 /* == 192*1024 */
#define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT
-unsigned int nf_ct_frag6_high_thresh __read_mostly = 256*1024;
-unsigned int nf_ct_frag6_low_thresh __read_mostly = 192*1024;
-unsigned long nf_ct_frag6_timeout __read_mostly = IPV6_FRAG_TIMEOUT;
-
struct nf_ct_frag6_skb_cb
{
struct inet6_skb_parm h;
@@ -63,51 +60,24 @@ struct nf_ct_frag6_skb_cb
struct nf_ct_frag6_queue
{
- struct hlist_node list;
- struct list_head lru_list; /* lru list member */
+ struct inet_frag_queue q;
__be32 id; /* fragment id */
struct in6_addr saddr;
struct in6_addr daddr;
- spinlock_t lock;
- atomic_t refcnt;
- struct timer_list timer; /* expire timer */
- struct sk_buff *fragments;
- int len;
- int meat;
- ktime_t stamp;
unsigned int csum;
- __u8 last_in; /* has first/last segment arrived? */
-#define COMPLETE 4
-#define FIRST_IN 2
-#define LAST_IN 1
__u16 nhoffset;
};
-/* Hash table. */
-
-#define FRAG6Q_HASHSZ 64
-
-static struct hlist_head nf_ct_frag6_hash[FRAG6Q_HASHSZ];
-static DEFINE_RWLOCK(nf_ct_frag6_lock);
-static u32 nf_ct_frag6_hash_rnd;
-static LIST_HEAD(nf_ct_frag6_lru_list);
-int nf_ct_frag6_nqueues = 0;
-
-static __inline__ void __fq_unlink(struct nf_ct_frag6_queue *fq)
-{
- hlist_del(&fq->list);
- list_del(&fq->lru_list);
- nf_ct_frag6_nqueues--;
-}
+struct inet_frags_ctl nf_frags_ctl __read_mostly = {
+ .high_thresh = 256 * 1024,
+ .low_thresh = 192 * 1024,
+ .timeout = IPV6_FRAG_TIMEOUT,
+ .secret_interval = 10 * 60 * HZ,
+};
-static __inline__ void fq_unlink(struct nf_ct_frag6_queue *fq)
-{
- write_lock(&nf_ct_frag6_lock);
- __fq_unlink(fq);
- write_unlock(&nf_ct_frag6_lock);
-}
+static struct inet_frags nf_frags;
static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
struct in6_addr *daddr)
@@ -120,7 +90,7 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
a += JHASH_GOLDEN_RATIO;
b += JHASH_GOLDEN_RATIO;
- c += nf_ct_frag6_hash_rnd;
+ c += nf_frags.rnd;
__jhash_mix(a, b, c);
a += (__force u32)saddr->s6_addr32[3];
@@ -133,100 +103,54 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
c += (__force u32)id;
__jhash_mix(a, b, c);
- return c & (FRAG6Q_HASHSZ - 1);
+ return c & (INETFRAGS_HASHSZ - 1);
}
-static struct timer_list nf_ct_frag6_secret_timer;
-int nf_ct_frag6_secret_interval = 10 * 60 * HZ;
-
-static void nf_ct_frag6_secret_rebuild(unsigned long dummy)
+static unsigned int nf_hashfn(struct inet_frag_queue *q)
{
- unsigned long now = jiffies;
- int i;
-
- write_lock(&nf_ct_frag6_lock);
- get_random_bytes(&nf_ct_frag6_hash_rnd, sizeof(u32));
- for (i = 0; i < FRAG6Q_HASHSZ; i++) {
- struct nf_ct_frag6_queue *q;
- struct hlist_node *p, *n;
-
- hlist_for_each_entry_safe(q, p, n, &nf_ct_frag6_hash[i], list) {
- unsigned int hval = ip6qhashfn(q->id,
- &q->saddr,
- &q->daddr);
- if (hval != i) {
- hlist_del(&q->list);
- /* Relink to new hash chain. */
- hlist_add_head(&q->list,
- &nf_ct_frag6_hash[hval]);
- }
- }
- }
- write_unlock(&nf_ct_frag6_lock);
+ struct nf_ct_frag6_queue *nq;
- mod_timer(&nf_ct_frag6_secret_timer, now + nf_ct_frag6_secret_interval);
+ nq = container_of(q, struct nf_ct_frag6_queue, q);
+ return ip6qhashfn(nq->id, &nq->saddr, &nq->daddr);
}
-atomic_t nf_ct_frag6_mem = ATOMIC_INIT(0);
+static void nf_skb_free(struct sk_buff *skb)
+{
+ if (NFCT_FRAG6_CB(skb)->orig)
+ kfree_skb(NFCT_FRAG6_CB(skb)->orig);
+}
/* Memory Tracking Functions. */
static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work)
{
if (work)
*work -= skb->truesize;
- atomic_sub(skb->truesize, &nf_ct_frag6_mem);
- if (NFCT_FRAG6_CB(skb)->orig)
- kfree_skb(NFCT_FRAG6_CB(skb)->orig);
-
+ atomic_sub(skb->truesize, &nf_frags.mem);
+ nf_skb_free(skb);
kfree_skb(skb);
}
-static inline void frag_free_queue(struct nf_ct_frag6_queue *fq,
- unsigned int *work)
+static void nf_frag_free(struct inet_frag_queue *q)
{
- if (work)
- *work -= sizeof(struct nf_ct_frag6_queue);
- atomic_sub(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem);
- kfree(fq);
+ kfree(container_of(q, struct nf_ct_frag6_queue, q));
}
static inline struct nf_ct_frag6_queue *frag_alloc_queue(void)
{
- struct nf_ct_frag6_queue *fq = kmalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC);
+ struct nf_ct_frag6_queue *fq;
- if (!fq)
+ fq = kzalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC);
+ if (fq == NULL)
return NULL;
- atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem);
+ atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_frags.mem);
return fq;
}
/* Destruction primitives. */
-/* Complete destruction of fq. */
-static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq,
- unsigned int *work)
+static __inline__ void fq_put(struct nf_ct_frag6_queue *fq)
{
- struct sk_buff *fp;
-
- BUG_TRAP(fq->last_in&COMPLETE);
- BUG_TRAP(del_timer(&fq->timer) == 0);
-
- /* Release all fragment data. */
- fp = fq->fragments;
- while (fp) {
- struct sk_buff *xp = fp->next;
-
- frag_kfree_skb(fp, work);
- fp = xp;
- }
-
- frag_free_queue(fq, work);
-}
-
-static __inline__ void fq_put(struct nf_ct_frag6_queue *fq, unsigned int *work)
-{
- if (atomic_dec_and_test(&fq->refcnt))
- nf_ct_frag6_destroy(fq, work);
+ inet_frag_put(&fq->q, &nf_frags);
}
/* Kill fq entry. It is not destroyed immediately,
@@ -234,62 +158,28 @@ static __inline__ void fq_put(struct nf_ct_frag6_queue *fq, unsigned int *work)
*/
static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
{
- if (del_timer(&fq->timer))
- atomic_dec(&fq->refcnt);
-
- if (!(fq->last_in & COMPLETE)) {
- fq_unlink(fq);
- atomic_dec(&fq->refcnt);
- fq->last_in |= COMPLETE;
- }
+ inet_frag_kill(&fq->q, &nf_frags);
}
static void nf_ct_frag6_evictor(void)
{
- struct nf_ct_frag6_queue *fq;
- struct list_head *tmp;
- unsigned int work;
-
- work = atomic_read(&nf_ct_frag6_mem);
- if (work <= nf_ct_frag6_low_thresh)
- return;
-
- work -= nf_ct_frag6_low_thresh;
- while (work > 0) {
- read_lock(&nf_ct_frag6_lock);
- if (list_empty(&nf_ct_frag6_lru_list)) {
- read_unlock(&nf_ct_frag6_lock);
- return;
- }
- tmp = nf_ct_frag6_lru_list.next;
- BUG_ON(tmp == NULL);
- fq = list_entry(tmp, struct nf_ct_frag6_queue, lru_list);
- atomic_inc(&fq->refcnt);
- read_unlock(&nf_ct_frag6_lock);
-
- spin_lock(&fq->lock);
- if (!(fq->last_in&COMPLETE))
- fq_kill(fq);
- spin_unlock(&fq->lock);
-
- fq_put(fq, &work);
- }
+ inet_frag_evictor(&nf_frags);
}
static void nf_ct_frag6_expire(unsigned long data)
{
struct nf_ct_frag6_queue *fq = (struct nf_ct_frag6_queue *) data;
- spin_lock(&fq->lock);
+ spin_lock(&fq->q.lock);
- if (fq->last_in & COMPLETE)
+ if (fq->q.last_in & COMPLETE)
goto out;
fq_kill(fq);
out:
- spin_unlock(&fq->lock);
- fq_put(fq, NULL);
+ spin_unlock(&fq->q.lock);
+ fq_put(fq);
}
/* Creation primitives. */
@@ -302,31 +192,31 @@ static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash,
struct hlist_node *n;
#endif
- write_lock(&nf_ct_frag6_lock);
+ write_lock(&nf_frags.lock);
#ifdef CONFIG_SMP
- hlist_for_each_entry(fq, n, &nf_ct_frag6_hash[hash], list) {
+ hlist_for_each_entry(fq, n, &nf_frags.hash[hash], q.list) {
if (fq->id == fq_in->id &&
ipv6_addr_equal(&fq_in->saddr, &fq->saddr) &&
ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) {
- atomic_inc(&fq->refcnt);
- write_unlock(&nf_ct_frag6_lock);
- fq_in->last_in |= COMPLETE;
- fq_put(fq_in, NULL);
+ atomic_inc(&fq->q.refcnt);
+ write_unlock(&nf_frags.lock);
+ fq_in->q.last_in |= COMPLETE;
+ fq_put(fq_in);
return fq;
}
}
#endif
fq = fq_in;
- if (!mod_timer(&fq->timer, jiffies + nf_ct_frag6_timeout))
- atomic_inc(&fq->refcnt);
+ if (!mod_timer(&fq->q.timer, jiffies + nf_frags_ctl.timeout))
+ atomic_inc(&fq->q.refcnt);
- atomic_inc(&fq->refcnt);
- hlist_add_head(&fq->list, &nf_ct_frag6_hash[hash]);
- INIT_LIST_HEAD(&fq->lru_list);
- list_add_tail(&fq->lru_list, &nf_ct_frag6_lru_list);
- nf_ct_frag6_nqueues++;
- write_unlock(&nf_ct_frag6_lock);
+ atomic_inc(&fq->q.refcnt);
+ hlist_add_head(&fq->q.list, &nf_frags.hash[hash]);
+ INIT_LIST_HEAD(&fq->q.lru_list);
+ list_add_tail(&fq->q.lru_list, &nf_frags.lru_list);
+ nf_frags.nqueues++;
+ write_unlock(&nf_frags.lock);
return fq;
}
@@ -341,15 +231,13 @@ nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src, str
goto oom;
}
- memset(fq, 0, sizeof(struct nf_ct_frag6_queue));
-
fq->id = id;
ipv6_addr_copy(&fq->saddr, src);
ipv6_addr_copy(&fq->daddr, dst);
- setup_timer(&fq->timer, nf_ct_frag6_expire, (unsigned long)fq);
- spin_lock_init(&fq->lock);
- atomic_set(&fq->refcnt, 1);
+ setup_timer(&fq->q.timer, nf_ct_frag6_expire, (unsigned long)fq);
+ spin_lock_init(&fq->q.lock);
+ atomic_set(&fq->q.refcnt, 1);
return nf_ct_frag6_intern(hash, fq);
@@ -364,17 +252,17 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst)
struct hlist_node *n;
unsigned int hash = ip6qhashfn(id, src, dst);
- read_lock(&nf_ct_frag6_lock);
- hlist_for_each_entry(fq, n, &nf_ct_frag6_hash[hash], list) {
+ read_lock(&nf_frags.lock);
+ hlist_for_each_entry(fq, n, &nf_frags.hash[hash], q.list) {
if (fq->id == id &&
ipv6_addr_equal(src, &fq->saddr) &&
ipv6_addr_equal(dst, &fq->daddr)) {
- atomic_inc(&fq->refcnt);
- read_unlock(&nf_ct_frag6_lock);
+ atomic_inc(&fq->q.refcnt);
+ read_unlock(&nf_frags.lock);
return fq;
}
}
- read_unlock(&nf_ct_frag6_lock);
+ read_unlock(&nf_frags.lock);
return nf_ct_frag6_create(hash, id, src, dst);
}
@@ -386,7 +274,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
struct sk_buff *prev, *next;
int offset, end;
- if (fq->last_in & COMPLETE) {
+ if (fq->q.last_in & COMPLETE) {
pr_debug("Allready completed\n");
goto err;
}
@@ -412,13 +300,13 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
/* If we already have some bits beyond end
* or have different end, the segment is corrupted.
*/
- if (end < fq->len ||
- ((fq->last_in & LAST_IN) && end != fq->len)) {
+ if (end < fq->q.len ||
+ ((fq->q.last_in & LAST_IN) && end != fq->q.len)) {
pr_debug("already received last fragment\n");
goto err;
}
- fq->last_in |= LAST_IN;
- fq->len = end;
+ fq->q.last_in |= LAST_IN;
+ fq->q.len = end;
} else {
/* Check if the fragment is rounded to 8 bytes.
* Required by the RFC.
@@ -430,13 +318,13 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
pr_debug("end of fragment not rounded to 8 bytes.\n");
return -1;
}
- if (end > fq->len) {
+ if (end > fq->q.len) {
/* Some bits beyond end -> corruption. */
- if (fq->last_in & LAST_IN) {
+ if (fq->q.last_in & LAST_IN) {
pr_debug("last packet already reached.\n");
goto err;
}
- fq->len = end;
+ fq->q.len = end;
}
}
@@ -458,7 +346,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
* this fragment, right?
*/
prev = NULL;
- for (next = fq->fragments; next != NULL; next = next->next) {
+ for (next = fq->q.fragments; next != NULL; next = next->next) {
if (NFCT_FRAG6_CB(next)->offset >= offset)
break; /* bingo! */
prev = next;
@@ -503,7 +391,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
/* next fragment */
NFCT_FRAG6_CB(next)->offset += i;
- fq->meat -= i;
+ fq->q.meat -= i;
if (next->ip_summed != CHECKSUM_UNNECESSARY)
next->ip_summed = CHECKSUM_NONE;
break;
@@ -518,9 +406,9 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
if (prev)
prev->next = next;
else
- fq->fragments = next;
+ fq->q.fragments = next;
- fq->meat -= free_it->len;
+ fq->q.meat -= free_it->len;
frag_kfree_skb(free_it, NULL);
}
}
@@ -532,23 +420,23 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
if (prev)
prev->next = skb;
else
- fq->fragments = skb;
+ fq->q.fragments = skb;
skb->dev = NULL;
- fq->stamp = skb->tstamp;
- fq->meat += skb->len;
- atomic_add(skb->truesize, &nf_ct_frag6_mem);
+ fq->q.stamp = skb->tstamp;
+ fq->q.meat += skb->len;
+ atomic_add(skb->truesize, &nf_frags.mem);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
*/
if (offset == 0) {
fq->nhoffset = nhoff;
- fq->last_in |= FIRST_IN;
+ fq->q.last_in |= FIRST_IN;
}
- write_lock(&nf_ct_frag6_lock);
- list_move_tail(&fq->lru_list, &nf_ct_frag6_lru_list);
- write_unlock(&nf_ct_frag6_lock);
+ write_lock(&nf_frags.lock);
+ list_move_tail(&fq->q.lru_list, &nf_frags.lru_list);
+ write_unlock(&nf_frags.lock);
return 0;
err:
@@ -567,7 +455,7 @@ err:
static struct sk_buff *
nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
{
- struct sk_buff *fp, *op, *head = fq->fragments;
+ struct sk_buff *fp, *op, *head = fq->q.fragments;
int payload_len;
fq_kill(fq);
@@ -577,7 +465,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
/* Unfragmented part is taken from the first segment. */
payload_len = ((head->data - skb_network_header(head)) -
- sizeof(struct ipv6hdr) + fq->len -
+ sizeof(struct ipv6hdr) + fq->q.len -
sizeof(struct frag_hdr));
if (payload_len > IPV6_MAXPLEN) {
pr_debug("payload len is too large.\n");
@@ -614,7 +502,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
clone->ip_summed = head->ip_summed;
NFCT_FRAG6_CB(clone)->orig = NULL;
- atomic_add(clone->truesize, &nf_ct_frag6_mem);
+ atomic_add(clone->truesize, &nf_frags.mem);
}
/* We have to remove fragment header from datagram and to relocate
@@ -628,7 +516,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
skb_shinfo(head)->frag_list = head->next;
skb_reset_transport_header(head);
skb_push(head, head->data - skb_network_header(head));
- atomic_sub(head->truesize, &nf_ct_frag6_mem);
+ atomic_sub(head->truesize, &nf_frags.mem);
for (fp=head->next; fp; fp = fp->next) {
head->data_len += fp->len;
@@ -638,12 +526,12 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
else if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
- atomic_sub(fp->truesize, &nf_ct_frag6_mem);
+ atomic_sub(fp->truesize, &nf_frags.mem);
}
head->next = NULL;
head->dev = dev;
- head->tstamp = fq->stamp;
+ head->tstamp = fq->q.stamp;
ipv6_hdr(head)->payload_len = htons(payload_len);
/* Yes, and fold redundant checksum back. 8) */
@@ -652,7 +540,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
skb_network_header_len(head),
head->csum);
- fq->fragments = NULL;
+ fq->q.fragments = NULL;
/* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */
fp = skb_shinfo(head)->frag_list;
@@ -788,7 +676,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
goto ret_orig;
}
- if (atomic_read(&nf_ct_frag6_mem) > nf_ct_frag6_high_thresh)
+ if (atomic_read(&nf_frags.mem) > nf_frags_ctl.high_thresh)
nf_ct_frag6_evictor();
fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr);
@@ -797,23 +685,23 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
goto ret_orig;
}
- spin_lock(&fq->lock);
+ spin_lock(&fq->q.lock);
if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
- spin_unlock(&fq->lock);
+ spin_unlock(&fq->q.lock);
pr_debug("Can't insert skb to queue\n");
- fq_put(fq, NULL);
+ fq_put(fq);
goto ret_orig;
}
- if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) {
+ if (fq->q.last_in == (FIRST_IN|LAST_IN) && fq->q.meat == fq->q.len) {
ret_skb = nf_ct_frag6_reasm(fq, dev);
if (ret_skb == NULL)
pr_debug("Can't reassemble fragmented packets\n");
}
- spin_unlock(&fq->lock);
+ spin_unlock(&fq->q.lock);
- fq_put(fq, NULL);
+ fq_put(fq);
return ret_skb;
ret_orig:
@@ -859,20 +747,20 @@ int nf_ct_frag6_kfree_frags(struct sk_buff *skb)
int nf_ct_frag6_init(void)
{
- nf_ct_frag6_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
- (jiffies ^ (jiffies >> 6)));
-
- setup_timer(&nf_ct_frag6_secret_timer, nf_ct_frag6_secret_rebuild, 0);
- nf_ct_frag6_secret_timer.expires = jiffies
- + nf_ct_frag6_secret_interval;
- add_timer(&nf_ct_frag6_secret_timer);
+ nf_frags.ctl = &nf_frags_ctl;
+ nf_frags.hashfn = nf_hashfn;
+ nf_frags.destructor = nf_frag_free;
+ nf_frags.skb_free = nf_skb_free;
+ nf_frags.qsize = sizeof(struct nf_ct_frag6_queue);
+ inet_frags_init(&nf_frags);
return 0;
}
void nf_ct_frag6_cleanup(void)
{
- del_timer(&nf_ct_frag6_secret_timer);
- nf_ct_frag6_low_thresh = 0;
+ inet_frags_fini(&nf_frags);
+
+ nf_frags_ctl.low_thresh = 0;
nf_ct_frag6_evictor();
}
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index db945018579..be526ad9254 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -54,7 +54,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "RAW6: inuse %d\n",
fold_prot_inuse(&rawv6_prot));
seq_printf(seq, "FRAG6: inuse %d memory %d\n",
- ip6_frag_nqueues, atomic_read(&ip6_frag_mem));
+ ip6_frag_nqueues(), ip6_frag_mem());
return 0;
}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 31601c99354..6ad19cfc202 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -42,6 +42,7 @@
#include <linux/icmpv6.h>
#include <linux/random.h>
#include <linux/jhash.h>
+#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/snmp.h>
@@ -53,11 +54,7 @@
#include <net/rawv6.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
-
-int sysctl_ip6frag_high_thresh __read_mostly = 256*1024;
-int sysctl_ip6frag_low_thresh __read_mostly = 192*1024;
-
-int sysctl_ip6frag_time __read_mostly = IPV6_FRAG_TIMEOUT;
+#include <net/inet_frag.h>
struct ip6frag_skb_cb
{
@@ -74,53 +71,39 @@ struct ip6frag_skb_cb
struct frag_queue
{
- struct hlist_node list;
- struct list_head lru_list; /* lru list member */
+ struct inet_frag_queue q;
__be32 id; /* fragment id */
struct in6_addr saddr;
struct in6_addr daddr;
- spinlock_t lock;
- atomic_t refcnt;
- struct timer_list timer; /* expire timer */
- struct sk_buff *fragments;
- int len;
- int meat;
int iif;
- ktime_t stamp;
unsigned int csum;
- __u8 last_in; /* has first/last segment arrived? */
-#define COMPLETE 4
-#define FIRST_IN 2
-#define LAST_IN 1
__u16 nhoffset;
};
-/* Hash table. */
-
-#define IP6Q_HASHSZ 64
+struct inet_frags_ctl ip6_frags_ctl __read_mostly = {
+ .high_thresh = 256 * 1024,
+ .low_thresh = 192 * 1024,
+ .timeout = IPV6_FRAG_TIMEOUT,
+ .secret_interval = 10 * 60 * HZ,
+};
-static struct hlist_head ip6_frag_hash[IP6Q_HASHSZ];
-static DEFINE_RWLOCK(ip6_frag_lock);
-static u32 ip6_frag_hash_rnd;
-static LIST_HEAD(ip6_frag_lru_list);
-int ip6_frag_nqueues = 0;
+static struct inet_frags ip6_frags;
-static __inline__ void __fq_unlink(struct frag_queue *fq)
+int ip6_frag_nqueues(void)
{
- hlist_del(&fq->list);
- list_del(&fq->lru_list);
- ip6_frag_nqueues--;
+ return ip6_frags.nqueues;
}
-static __inline__ void fq_unlink(struct frag_queue *fq)
+int ip6_frag_mem(void)
{
- write_lock(&ip6_frag_lock);
- __fq_unlink(fq);
- write_unlock(&ip6_frag_lock);
+ return atomic_read(&ip6_frags.mem);
}
+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
+ struct net_device *dev);
+
/*
* callers should be careful not to use the hash value outside the ipfrag_lock
* as doing so could race with ipfrag_hash_rnd being recalculated.
@@ -136,7 +119,7 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
a += JHASH_GOLDEN_RATIO;
b += JHASH_GOLDEN_RATIO;
- c += ip6_frag_hash_rnd;
+ c += ip6_frags.rnd;
__jhash_mix(a, b, c);
a += (__force u32)saddr->s6_addr32[3];
@@ -149,60 +132,29 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
c += (__force u32)id;
__jhash_mix(a, b, c);
- return c & (IP6Q_HASHSZ - 1);
+ return c & (INETFRAGS_HASHSZ - 1);
}
-static struct timer_list ip6_frag_secret_timer;
-int sysctl_ip6frag_secret_interval __read_mostly = 10 * 60 * HZ;
-
-static void ip6_frag_secret_rebuild(unsigned long dummy)
+static unsigned int ip6_hashfn(struct inet_frag_queue *q)
{
- unsigned long now = jiffies;
- int i;
-
- write_lock(&ip6_frag_lock);
- get_random_bytes(&ip6_frag_hash_rnd, sizeof(u32));
- for (i = 0; i < IP6Q_HASHSZ; i++) {
- struct frag_queue *q;
- struct hlist_node *p, *n;
-
- hlist_for_each_entry_safe(q, p, n, &ip6_frag_hash[i], list) {
- unsigned int hval = ip6qhashfn(q->id,
- &q->saddr,
- &q->daddr);
-
- if (hval != i) {
- hlist_del(&q->list);
-
- /* Relink to new hash chain. */
- hlist_add_head(&q->list,
- &ip6_frag_hash[hval]);
-
- }
- }
- }
- write_unlock(&ip6_frag_lock);
+ struct frag_queue *fq;
- mod_timer(&ip6_frag_secret_timer, now + sysctl_ip6frag_secret_interval);
+ fq = container_of(q, struct frag_queue, q);
+ return ip6qhashfn(fq->id, &fq->saddr, &fq->daddr);
}
-atomic_t ip6_frag_mem = ATOMIC_INIT(0);
-
/* Memory Tracking Functions. */
static inline void frag_kfree_skb(struct sk_buff *skb, int *work)
{
if (work)
*work -= skb->truesize;
- atomic_sub(skb->truesize, &ip6_frag_mem);
+ atomic_sub(skb->truesize, &ip6_frags.mem);
kfree_skb(skb);
}
-static inline void frag_free_queue(struct frag_queue *fq, int *work)
+static void ip6_frag_free(struct inet_frag_queue *fq)
{
- if (work)
- *work -= sizeof(struct frag_queue);
- atomic_sub(sizeof(struct frag_queue), &ip6_frag_mem);
- kfree(fq);
+ kfree(container_of(fq, struct frag_queue, q));
}
static inline struct frag_queue *frag_alloc_queue(void)
@@ -211,36 +163,15 @@ static inline struct frag_queue *frag_alloc_queue(void)
if(!fq)
return NULL;
- atomic_add(sizeof(struct frag_queue), &ip6_frag_mem);
+ atomic_add(sizeof(struct frag_queue), &ip6_frags.mem);
return fq;
}
/* Destruction primitives. */
-/* Complete destruction of fq. */
-static void ip6_frag_destroy(struct frag_queue *fq, int *work)
-{
- struct sk_buff *fp;
-
- BUG_TRAP(fq->last_in&COMPLETE);
- BUG_TRAP(del_timer(&fq->timer) == 0);
-
- /* Release all fragment data. */
- fp = fq->fragments;
- while (fp) {
- struct sk_buff *xp = fp->next;
-
- frag_kfree_skb(fp, work);
- fp = xp;
- }
-
- frag_free_queue(fq, work);
-}
-
-static __inline__ void fq_put(struct frag_queue *fq, int *work)
+static __inline__ void fq_put(struct frag_queue *fq)
{
- if (atomic_dec_and_test(&fq->refcnt))
- ip6_frag_destroy(fq, work);
+ inet_frag_put(&fq->q, &ip6_frags);
}
/* Kill fq entry. It is not destroyed immediately,
@@ -248,45 +179,16 @@ static __inline__ void fq_put(struct frag_queue *fq, int *work)
*/
static __inline__ void fq_kill(struct frag_queue *fq)
{
- if (del_timer(&fq->timer))
- atomic_dec(&fq->refcnt);
-
- if (!(fq->last_in & COMPLETE)) {
- fq_unlink(fq);
- atomic_dec(&fq->refcnt);
- fq->last_in |= COMPLETE;
- }
+ inet_frag_kill(&fq->q, &ip6_frags);
}
static void ip6_evictor(struct inet6_dev *idev)
{
- struct frag_queue *fq;
- struct list_head *tmp;
- int work;
-
- work = atomic_read(&ip6_frag_mem) - sysctl_ip6frag_low_thresh;
- if (work <= 0)
- return;
-
- while(work > 0) {
- read_lock(&ip6_frag_lock);
- if (list_empty(&ip6_frag_lru_list)) {
- read_unlock(&ip6_frag_lock);
- return;
- }
- tmp = ip6_frag_lru_list.next;
- fq = list_entry(tmp, struct frag_queue, lru_list);
- atomic_inc(&fq->refcnt);
- read_unlock(&ip6_frag_lock);
-
- spin_lock(&fq->lock);
- if (!(fq->last_in&COMPLETE))
- fq_kill(fq);
- spin_unlock(&fq->lock);
-
- fq_put(fq, &work);
- IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS);
- }
+ int evicted;
+
+ evicted = inet_frag_evictor(&ip6_frags);
+ if (evicted)
+ IP6_ADD_STATS_BH(idev, IPSTATS_MIB_REASMFAILS, evicted);
}
static void ip6_frag_expire(unsigned long data)
@@ -294,9 +196,9 @@ static void ip6_frag_expire(unsigned long data)
struct frag_queue *fq = (struct frag_queue *) data;
struct net_device *dev = NULL;
- spin_lock(&fq->lock);
+ spin_lock(&fq->q.lock);
- if (fq->last_in & COMPLETE)
+ if (fq->q.last_in & COMPLETE)
goto out;
fq_kill(fq);
@@ -311,7 +213,7 @@ static void ip6_frag_expire(unsigned long data)
rcu_read_unlock();
/* Don't send error if the first segment did not arrive. */
- if (!(fq->last_in&FIRST_IN) || !fq->fragments)
+ if (!(fq->q.last_in&FIRST_IN) || !fq->q.fragments)
goto out;
/*
@@ -319,13 +221,13 @@ static void ip6_frag_expire(unsigned long data)
segment was received. And do not use fq->dev
pointer directly, device might already disappeared.
*/
- fq->fragments->dev = dev;
- icmpv6_send(fq->fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev);
+ fq->q.fragments->dev = dev;
+ icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev);
out:
if (dev)
dev_put(dev);
- spin_unlock(&fq->lock);
- fq_put(fq, NULL);
+ spin_unlock(&fq->q.lock);
+ fq_put(fq);
}
/* Creation primitives. */
@@ -339,32 +241,32 @@ static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in)
struct hlist_node *n;
#endif
- write_lock(&ip6_frag_lock);
+ write_lock(&ip6_frags.lock);
hash = ip6qhashfn(fq_in->id, &fq_in->saddr, &fq_in->daddr);
#ifdef CONFIG_SMP
- hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) {
+ hlist_for_each_entry(fq, n, &ip6_frags.hash[hash], q.list) {
if (fq->id == fq_in->id &&
ipv6_addr_equal(&fq_in->saddr, &fq->saddr) &&
ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) {
- atomic_inc(&fq->refcnt);
- write_unlock(&ip6_frag_lock);
- fq_in->last_in |= COMPLETE;
- fq_put(fq_in, NULL);
+ atomic_inc(&fq->q.refcnt);
+ write_unlock(&ip6_frags.lock);
+ fq_in->q.last_in |= COMPLETE;
+ fq_put(fq_in);
return fq;
}
}
#endif
fq = fq_in;
- if (!mod_timer(&fq->timer, jiffies + sysctl_ip6frag_time))
- atomic_inc(&fq->refcnt);
+ if (!mod_timer(&fq->q.timer, jiffies + ip6_frags_ctl.timeout))
+ atomic_inc(&fq->q.refcnt);
- atomic_inc(&fq->refcnt);
- hlist_add_head(&fq->list, &ip6_frag_hash[hash]);
- INIT_LIST_HEAD(&fq->lru_list);
- list_add_tail(&fq->lru_list, &ip6_frag_lru_list);
- ip6_frag_nqueues++;
- write_unlock(&ip6_frag_lock);
+ atomic_inc(&fq->q.refcnt);
+ hlist_add_head(&fq->q.list, &ip6_frags.hash[hash]);
+ INIT_LIST_HEAD(&fq->q.lru_list);
+ list_add_tail(&fq->q.lru_list, &ip6_frags.lru_list);
+ ip6_frags.nqueues++;
+ write_unlock(&ip6_frags.lock);
return fq;
}
@@ -382,11 +284,11 @@ ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst,
ipv6_addr_copy(&fq->saddr, src);
ipv6_addr_copy(&fq->daddr, dst);
- init_timer(&fq->timer);
- fq->timer.function = ip6_frag_expire;
- fq->timer.data = (long) fq;
- spin_lock_init(&fq->lock);
- atomic_set(&fq->refcnt, 1);
+ init_timer(&fq->q.timer);
+ fq->q.timer.function = ip6_frag_expire;
+ fq->q.timer.data = (long) fq;
+ spin_lock_init(&fq->q.lock);
+ atomic_set(&fq->q.refcnt, 1);
return ip6_frag_intern(fq);
@@ -403,30 +305,31 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst,
struct hlist_node *n;
unsigned int hash;
- read_lock(&ip6_frag_lock);
+ read_lock(&ip6_frags.lock);
hash = ip6qhashfn(id, src, dst);
- hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) {
+ hlist_for_each_entry(fq, n, &ip6_frags.hash[hash], q.list) {
if (fq->id == id &&
ipv6_addr_equal(src, &fq->saddr) &&
ipv6_addr_equal(dst, &fq->daddr)) {
- atomic_inc(&fq->refcnt);
- read_unlock(&ip6_frag_lock);
+ atomic_inc(&fq->q.refcnt);
+ read_unlock(&ip6_frags.lock);
return fq;
}
}
- read_unlock(&ip6_frag_lock);
+ read_unlock(&ip6_frags.lock);
return ip6_frag_create(id, src, dst, idev);
}
-static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
+static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
struct frag_hdr *fhdr, int nhoff)
{
struct sk_buff *prev, *next;
+ struct net_device *dev;
int offset, end;
- if (fq->last_in & COMPLETE)
+ if (fq->q.last_in & COMPLETE)
goto err;
offset = ntohs(fhdr->frag_off) & ~0x7;
@@ -439,7 +342,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
((u8 *)&fhdr->frag_off -
skb_network_header(skb)));
- return;
+ return -1;
}
if (skb->ip_summed == CHECKSUM_COMPLETE) {
@@ -454,11 +357,11 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
/* If we already have some bits beyond end
* or have different end, the segment is corrupted.
*/
- if (end < fq->len ||
- ((fq->last_in & LAST_IN) && end != fq->len))
+ if (end < fq->q.len ||
+ ((fq->q.last_in & LAST_IN) && end != fq->q.len))
goto err;
- fq->last_in |= LAST_IN;
- fq->len = end;
+ fq->q.last_in |= LAST_IN;
+ fq->q.len = end;
} else {
/* Check if the fragment is rounded to 8 bytes.
* Required by the RFC.
@@ -471,13 +374,13 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
offsetof(struct ipv6hdr, payload_len));
- return;
+ return -1;
}
- if (end > fq->len) {
+ if (end > fq->q.len) {
/* Some bits beyond end -> corruption. */
- if (fq->last_in & LAST_IN)
+ if (fq->q.last_in & LAST_IN)
goto err;
- fq->len = end;
+ fq->q.len = end;
}
}
@@ -496,7 +399,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
* this fragment, right?
*/
prev = NULL;
- for(next = fq->fragments; next != NULL; next = next->next) {
+ for(next = fq->q.fragments; next != NULL; next = next->next) {
if (FRAG6_CB(next)->offset >= offset)
break; /* bingo! */
prev = next;
@@ -533,7 +436,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
if (!pskb_pull(next, i))
goto err;
FRAG6_CB(next)->offset += i; /* next fragment */
- fq->meat -= i;
+ fq->q.meat -= i;
if (next->ip_summed != CHECKSUM_UNNECESSARY)
next->ip_summed = CHECKSUM_NONE;
break;
@@ -548,9 +451,9 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
if (prev)
prev->next = next;
else
- fq->fragments = next;
+ fq->q.fragments = next;
- fq->meat -= free_it->len;
+ fq->q.meat -= free_it->len;
frag_kfree_skb(free_it, NULL);
}
}
@@ -562,30 +465,37 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
if (prev)
prev->next = skb;
else
- fq->fragments = skb;
+ fq->q.fragments = skb;
- if (skb->dev)
- fq->iif = skb->dev->ifindex;
- skb->dev = NULL;
- fq->stamp = skb->tstamp;
- fq->meat += skb->len;
- atomic_add(skb->truesize, &ip6_frag_mem);
+ dev = skb->dev;
+ if (dev) {
+ fq->iif = dev->ifindex;
+ skb->dev = NULL;
+ }
+ fq->q.stamp = skb->tstamp;
+ fq->q.meat += skb->len;
+ atomic_add(skb->truesize, &ip6_frags.mem);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
*/
if (offset == 0) {
fq->nhoffset = nhoff;
- fq->last_in |= FIRST_IN;
+ fq->q.last_in |= FIRST_IN;
}
- write_lock(&ip6_frag_lock);
- list_move_tail(&fq->lru_list, &ip6_frag_lru_list);
- write_unlock(&ip6_frag_lock);
- return;
+
+ if (fq->q.last_in == (FIRST_IN | LAST_IN) && fq->q.meat == fq->q.len)
+ return ip6_frag_reasm(fq, prev, dev);
+
+ write_lock(&ip6_frags.lock);
+ list_move_tail(&fq->q.lru_list, &ip6_frags.lru_list);
+ write_unlock(&ip6_frags.lock);
+ return -1;
err:
IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS);
kfree_skb(skb);
+ return -1;
}
/*
@@ -597,21 +507,39 @@ err:
* queue is eligible for reassembly i.e. it is not COMPLETE,
* the last and the first frames arrived and all the bits are here.
*/
-static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
struct net_device *dev)
{
- struct sk_buff *fp, *head = fq->fragments;
+ struct sk_buff *fp, *head = fq->q.fragments;
int payload_len;
unsigned int nhoff;
fq_kill(fq);
+ /* Make the one we just received the head. */
+ if (prev) {
+ head = prev->next;
+ fp = skb_clone(head, GFP_ATOMIC);
+
+ if (!fp)
+ goto out_oom;
+
+ fp->next = head->next;
+ prev->next = fp;
+
+ skb_morph(head, fq->q.fragments);
+ head->next = fq->q.fragments->next;
+
+ kfree_skb(fq->q.fragments);
+ fq->q.fragments = head;
+ }
+
BUG_TRAP(head != NULL);
BUG_TRAP(FRAG6_CB(head)->offset == 0);
/* Unfragmented part is taken from the first segment. */
payload_len = ((head->data - skb_network_header(head)) -
- sizeof(struct ipv6hdr) + fq->len -
+ sizeof(struct ipv6hdr) + fq->q.len -
sizeof(struct frag_hdr));
if (payload_len > IPV6_MAXPLEN)
goto out_oversize;
@@ -640,7 +568,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
head->len -= clone->len;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
- atomic_add(clone->truesize, &ip6_frag_mem);
+ atomic_add(clone->truesize, &ip6_frags.mem);
}
/* We have to remove fragment header from datagram and to relocate
@@ -655,7 +583,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
skb_shinfo(head)->frag_list = head->next;
skb_reset_transport_header(head);
skb_push(head, head->data - skb_network_header(head));
- atomic_sub(head->truesize, &ip6_frag_mem);
+ atomic_sub(head->truesize, &ip6_frags.mem);
for (fp=head->next; fp; fp = fp->next) {
head->data_len += fp->len;
@@ -665,17 +593,15 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
else if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
- atomic_sub(fp->truesize, &ip6_frag_mem);
+ atomic_sub(fp->truesize, &ip6_frags.mem);
}
head->next = NULL;
head->dev = dev;
- head->tstamp = fq->stamp;
+ head->tstamp = fq->q.stamp;
ipv6_hdr(head)->payload_len = htons(payload_len);
IP6CB(head)->nhoff = nhoff;
- *skb_in = head;
-
/* Yes, and fold redundant checksum back. 8) */
if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_partial(skb_network_header(head),
@@ -685,7 +611,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
rcu_read_lock();
IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
rcu_read_unlock();
- fq->fragments = NULL;
+ fq->q.fragments = NULL;
return 1;
out_oversize:
@@ -702,10 +628,8 @@ out_fail:
return -1;
}
-static int ipv6_frag_rcv(struct sk_buff **skbp)
+static int ipv6_frag_rcv(struct sk_buff *skb)
{
- struct sk_buff *skb = *skbp;
- struct net_device *dev = skb->dev;
struct frag_hdr *fhdr;
struct frag_queue *fq;
struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -739,23 +663,19 @@ static int ipv6_frag_rcv(struct sk_buff **skbp)
return 1;
}
- if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh)
+ if (atomic_read(&ip6_frags.mem) > ip6_frags_ctl.high_thresh)
ip6_evictor(ip6_dst_idev(skb->dst));
if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr,
ip6_dst_idev(skb->dst))) != NULL) {
- int ret = -1;
+ int ret;
- spin_lock(&fq->lock);
+ spin_lock(&fq->q.lock);
- ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
+ ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
- if (fq->last_in == (FIRST_IN|LAST_IN) &&
- fq->meat == fq->len)
- ret = ip6_frag_reasm(fq, skbp, dev);
-
- spin_unlock(&fq->lock);
- fq_put(fq, NULL);
+ spin_unlock(&fq->q.lock);
+ fq_put(fq);
return ret;
}
@@ -775,11 +695,10 @@ void __init ipv6_frag_init(void)
if (inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT) < 0)
printk(KERN_ERR "ipv6_frag_init: Could not register protocol\n");
- ip6_frag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
- (jiffies ^ (jiffies >> 6)));
-
- init_timer(&ip6_frag_secret_timer);
- ip6_frag_secret_timer.function = ip6_frag_secret_rebuild;
- ip6_frag_secret_timer.expires = jiffies + sysctl_ip6frag_secret_interval;
- add_timer(&ip6_frag_secret_timer);
+ ip6_frags.ctl = &ip6_frags_ctl;
+ ip6_frags.hashfn = ip6_hashfn;
+ ip6_frags.destructor = ip6_frag_free;
+ ip6_frags.skb_free = NULL;
+ ip6_frags.qsize = sizeof(struct frag_queue);
+ inet_frags_init(&ip6_frags);
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 6ff19f9eb9e..cce9941c11c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -663,7 +663,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d
return rt;
}
-static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
+static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif,
struct flowi *fl, int flags)
{
struct fib6_node *fn;
@@ -682,7 +682,7 @@ restart_2:
fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
restart:
- rt = rt6_select(fn, fl->iif, strict | reachable);
+ rt = rt6_select(fn, oif, strict | reachable);
BACKTRACK(&fl->fl6_src);
if (rt == &ip6_null_entry ||
rt->rt6i_flags & RTF_CACHE)
@@ -735,6 +735,12 @@ out2:
return rt;
}
+static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
+ struct flowi *fl, int flags)
+{
+ return ip6_pol_route(table, fl->iif, fl, flags);
+}
+
void ip6_route_input(struct sk_buff *skb)
{
struct ipv6hdr *iph = ipv6_hdr(skb);
@@ -761,72 +767,7 @@ void ip6_route_input(struct sk_buff *skb)
static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
struct flowi *fl, int flags)
{
- struct fib6_node *fn;
- struct rt6_info *rt, *nrt;
- int strict = 0;
- int attempts = 3;
- int err;
- int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
-
- strict |= flags & RT6_LOOKUP_F_IFACE;
-
-relookup:
- read_lock_bh(&table->tb6_lock);
-
-restart_2:
- fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
-
-restart:
- rt = rt6_select(fn, fl->oif, strict | reachable);
- BACKTRACK(&fl->fl6_src);
- if (rt == &ip6_null_entry ||
- rt->rt6i_flags & RTF_CACHE)
- goto out;
-
- dst_hold(&rt->u.dst);
- read_unlock_bh(&table->tb6_lock);
-
- if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
- nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
- else {
-#if CLONE_OFFLINK_ROUTE
- nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
-#else
- goto out2;
-#endif
- }
-
- dst_release(&rt->u.dst);
- rt = nrt ? : &ip6_null_entry;
-
- dst_hold(&rt->u.dst);
- if (nrt) {
- err = ip6_ins_rt(nrt);
- if (!err)
- goto out2;
- }
-
- if (--attempts <= 0)
- goto out2;
-
- /*
- * Race condition! In the gap, when table->tb6_lock was
- * released someone could insert this route. Relookup.
- */
- dst_release(&rt->u.dst);
- goto relookup;
-
-out:
- if (reachable) {
- reachable = 0;
- goto restart_2;
- }
- dst_hold(&rt->u.dst);
- read_unlock_bh(&table->tb6_lock);
-out2:
- rt->u.dst.lastuse = jiffies;
- rt->u.dst.__use++;
- return rt;
+ return ip6_pol_route(table, fl->oif, fl, flags);
}
struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 3fb44277207..68bb2548e46 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -12,6 +12,7 @@
#include <net/ndisc.h>
#include <net/ipv6.h>
#include <net/addrconf.h>
+#include <net/inet_frag.h>
#ifdef CONFIG_SYSCTL
@@ -41,7 +42,7 @@ static ctl_table ipv6_table[] = {
{
.ctl_name = NET_IPV6_IP6FRAG_HIGH_THRESH,
.procname = "ip6frag_high_thresh",
- .data = &sysctl_ip6frag_high_thresh,
+ .data = &ip6_frags_ctl.high_thresh,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
@@ -49,7 +50,7 @@ static ctl_table ipv6_table[] = {
{
.ctl_name = NET_IPV6_IP6FRAG_LOW_THRESH,
.procname = "ip6frag_low_thresh",
- .data = &sysctl_ip6frag_low_thresh,
+ .data = &ip6_frags_ctl.low_thresh,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec
@@ -57,7 +58,7 @@ static ctl_table ipv6_table[] = {
{
.ctl_name = NET_IPV6_IP6FRAG_TIME,
.procname = "ip6frag_time",
- .data = &sysctl_ip6frag_time,
+ .data = &ip6_frags_ctl.timeout,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
@@ -66,7 +67,7 @@ static ctl_table ipv6_table[] = {
{
.ctl_name = NET_IPV6_IP6FRAG_SECRET_INTERVAL,
.procname = "ip6frag_secret_interval",
- .data = &sysctl_ip6frag_secret_interval,
+ .data = &ip6_frags_ctl.secret_interval,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index a07b59c528f..737b755342b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1668,9 +1668,8 @@ ipv6_pktoptions:
return 0;
}
-static int tcp_v6_rcv(struct sk_buff **pskb)
+static int tcp_v6_rcv(struct sk_buff *skb)
{
- struct sk_buff *skb = *pskb;
struct tcphdr *th;
struct sock *sk;
int ret;
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index 23e2809878a..6323921b40b 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -87,9 +87,8 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)
EXPORT_SYMBOL(xfrm6_tunnel_deregister);
-static int tunnel6_rcv(struct sk_buff **pskb)
+static int tunnel6_rcv(struct sk_buff *skb)
{
- struct sk_buff *skb = *pskb;
struct xfrm6_tunnel *handler;
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
@@ -106,9 +105,8 @@ drop:
return 0;
}
-static int tunnel46_rcv(struct sk_buff **pskb)
+static int tunnel46_rcv(struct sk_buff *skb)
{
- struct sk_buff *skb = *pskb;
struct xfrm6_tunnel *handler;
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 82ff26dd447..caebad6ee51 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -405,10 +405,9 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh,
return 0;
}
-int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[],
+int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
int proto)
{
- struct sk_buff *skb = *pskb;
struct sock *sk;
struct udphdr *uh;
struct net_device *dev = skb->dev;
@@ -494,9 +493,9 @@ discard:
return 0;
}
-static __inline__ int udpv6_rcv(struct sk_buff **pskb)
+static __inline__ int udpv6_rcv(struct sk_buff *skb)
{
- return __udp6_lib_rcv(pskb, udp_hash, IPPROTO_UDP);
+ return __udp6_lib_rcv(skb, udp_hash, IPPROTO_UDP);
}
/*
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 6e252f318f7..2d3fda60123 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -6,7 +6,7 @@
#include <net/addrconf.h>
#include <net/inet_common.h>
-extern int __udp6_lib_rcv(struct sk_buff **, struct hlist_head [], int );
+extern int __udp6_lib_rcv(struct sk_buff *, struct hlist_head [], int );
extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *,
int , int , int , __be32 , struct hlist_head []);
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index f54016a5500..766566f7de4 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -17,9 +17,9 @@
DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6) __read_mostly;
-static int udplitev6_rcv(struct sk_buff **pskb)
+static int udplitev6_rcv(struct sk_buff *skb)
{
- return __udp6_lib_rcv(pskb, udplite_hash, IPPROTO_UDPLITE);
+ return __udp6_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE);
}
static void udplitev6_err(struct sk_buff *skb,
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index c858537cec4..02f69e544f6 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -133,9 +133,9 @@ drop:
EXPORT_SYMBOL(xfrm6_rcv_spi);
-int xfrm6_rcv(struct sk_buff **pskb)
+int xfrm6_rcv(struct sk_buff *skb)
{
- return xfrm6_rcv_spi(*pskb, 0);
+ return xfrm6_rcv_spi(skb, 0);
}
EXPORT_SYMBOL(xfrm6_rcv);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 4618c18e611..a5a32c17249 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -80,7 +80,7 @@ static int xfrm6_output_finish2(struct sk_buff *skb)
while (likely((err = xfrm6_output_one(skb)) == 0)) {
nf_reset(skb);
- err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, &skb, NULL,
+ err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL,
skb->dst->dev, dst_output);
if (unlikely(err != 1))
break;
@@ -88,7 +88,7 @@ static int xfrm6_output_finish2(struct sk_buff *skb)
if (!skb->dst->xfrm)
return dst_output(skb);
- err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL,
+ err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL,
skb->dst->dev, xfrm6_output_finish2);
if (unlikely(err != 1))
break;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index a523fa4136e..bed9ba01e8e 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -117,7 +117,7 @@ void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
EXPORT_SYMBOL(nf_unregister_hooks);
unsigned int nf_iterate(struct list_head *head,
- struct sk_buff **skb,
+ struct sk_buff *skb,
int hook,
const struct net_device *indev,
const struct net_device *outdev,
@@ -160,7 +160,7 @@ unsigned int nf_iterate(struct list_head *head,
/* Returns 1 if okfn() needs to be executed by the caller,
* -EPERM for NF_DROP, 0 otherwise. */
-int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
+int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
struct net_device *indev,
struct net_device *outdev,
int (*okfn)(struct sk_buff *),
@@ -175,17 +175,17 @@ int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
elem = &nf_hooks[pf][hook];
next_hook:
- verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
+ verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev,
outdev, &elem, okfn, hook_thresh);
if (verdict == NF_ACCEPT || verdict == NF_STOP) {
ret = 1;
goto unlock;
} else if (verdict == NF_DROP) {
- kfree_skb(*pskb);
+ kfree_skb(skb);
ret = -EPERM;
} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
NFDEBUG("nf_hook: Verdict = QUEUE.\n");
- if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn,
+ if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
verdict >> NF_VERDICT_BITS))
goto next_hook;
}
@@ -196,34 +196,24 @@ unlock:
EXPORT_SYMBOL(nf_hook_slow);
-int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len)
+int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
{
- struct sk_buff *nskb;
-
- if (writable_len > (*pskb)->len)
+ if (writable_len > skb->len)
return 0;
/* Not exclusive use of packet? Must copy. */
- if (skb_cloned(*pskb) && !skb_clone_writable(*pskb, writable_len))
- goto copy_skb;
- if (skb_shared(*pskb))
- goto copy_skb;
-
- return pskb_may_pull(*pskb, writable_len);
-
-copy_skb:
- nskb = skb_copy(*pskb, GFP_ATOMIC);
- if (!nskb)
- return 0;
- BUG_ON(skb_is_nonlinear(nskb));
-
- /* Rest of kernel will get very unhappy if we pass it a
- suddenly-orphaned skbuff */
- if ((*pskb)->sk)
- skb_set_owner_w(nskb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = nskb;
- return 1;
+ if (!skb_cloned(skb)) {
+ if (writable_len <= skb_headlen(skb))
+ return 1;
+ } else if (skb_clone_writable(skb, writable_len))
+ return 1;
+
+ if (writable_len <= skb_headlen(skb))
+ writable_len = 0;
+ else
+ writable_len -= skb_headlen(skb);
+
+ return !!__pskb_pull_tail(skb, writable_len);
}
EXPORT_SYMBOL(skb_make_writable);
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index e42ab230ad8..7b8239c0cd5 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -36,7 +36,7 @@ MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
module_param(ts_algo, charp, 0400);
MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)");
-unsigned int (*nf_nat_amanda_hook)(struct sk_buff **pskb,
+unsigned int (*nf_nat_amanda_hook)(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
unsigned int matchoff,
unsigned int matchlen,
@@ -79,7 +79,7 @@ static struct {
},
};
-static int amanda_help(struct sk_buff **pskb,
+static int amanda_help(struct sk_buff *skb,
unsigned int protoff,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
@@ -101,25 +101,25 @@ static int amanda_help(struct sk_buff **pskb,
/* increase the UDP timeout of the master connection as replies from
* Amanda clients to the server can be quite delayed */
- nf_ct_refresh(ct, *pskb, master_timeout * HZ);
+ nf_ct_refresh(ct, skb, master_timeout * HZ);
/* No data? */
dataoff = protoff + sizeof(struct udphdr);
- if (dataoff >= (*pskb)->len) {
+ if (dataoff >= skb->len) {
if (net_ratelimit())
- printk("amanda_help: skblen = %u\n", (*pskb)->len);
+ printk("amanda_help: skblen = %u\n", skb->len);
return NF_ACCEPT;
}
memset(&ts, 0, sizeof(ts));
- start = skb_find_text(*pskb, dataoff, (*pskb)->len,
+ start = skb_find_text(skb, dataoff, skb->len,
search[SEARCH_CONNECT].ts, &ts);
if (start == UINT_MAX)
goto out;
start += dataoff + search[SEARCH_CONNECT].len;
memset(&ts, 0, sizeof(ts));
- stop = skb_find_text(*pskb, start, (*pskb)->len,
+ stop = skb_find_text(skb, start, skb->len,
search[SEARCH_NEWLINE].ts, &ts);
if (stop == UINT_MAX)
goto out;
@@ -127,13 +127,13 @@ static int amanda_help(struct sk_buff **pskb,
for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) {
memset(&ts, 0, sizeof(ts));
- off = skb_find_text(*pskb, start, stop, search[i].ts, &ts);
+ off = skb_find_text(skb, start, stop, search[i].ts, &ts);
if (off == UINT_MAX)
continue;
off += start + search[i].len;
len = min_t(unsigned int, sizeof(pbuf) - 1, stop - off);
- if (skb_copy_bits(*pskb, off, pbuf, len))
+ if (skb_copy_bits(skb, off, pbuf, len))
break;
pbuf[len] = '\0';
@@ -153,7 +153,7 @@ static int amanda_help(struct sk_buff **pskb,
nf_nat_amanda = rcu_dereference(nf_nat_amanda_hook);
if (nf_nat_amanda && ct->status & IPS_NAT_MASK)
- ret = nf_nat_amanda(pskb, ctinfo, off - dataoff,
+ ret = nf_nat_amanda(skb, ctinfo, off - dataoff,
len, exp);
else if (nf_ct_expect_related(exp) != 0)
ret = NF_DROP;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 83c30b45d17..4d6171bc082 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -307,7 +307,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert);
/* Confirm a connection given skb; places it in hash table */
int
-__nf_conntrack_confirm(struct sk_buff **pskb)
+__nf_conntrack_confirm(struct sk_buff *skb)
{
unsigned int hash, repl_hash;
struct nf_conntrack_tuple_hash *h;
@@ -316,7 +316,7 @@ __nf_conntrack_confirm(struct sk_buff **pskb)
struct hlist_node *n;
enum ip_conntrack_info ctinfo;
- ct = nf_ct_get(*pskb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
/* ipt_REJECT uses nf_conntrack_attach to attach related
ICMP/TCP RST packets in other direction. Actual packet
@@ -367,14 +367,14 @@ __nf_conntrack_confirm(struct sk_buff **pskb)
write_unlock_bh(&nf_conntrack_lock);
help = nfct_help(ct);
if (help && help->helper)
- nf_conntrack_event_cache(IPCT_HELPER, *pskb);
+ nf_conntrack_event_cache(IPCT_HELPER, skb);
#ifdef CONFIG_NF_NAT_NEEDED
if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
- nf_conntrack_event_cache(IPCT_NATINFO, *pskb);
+ nf_conntrack_event_cache(IPCT_NATINFO, skb);
#endif
nf_conntrack_event_cache(master_ct(ct) ?
- IPCT_RELATED : IPCT_NEW, *pskb);
+ IPCT_RELATED : IPCT_NEW, skb);
return NF_ACCEPT;
out:
@@ -632,7 +632,7 @@ resolve_normal_ct(struct sk_buff *skb,
}
unsigned int
-nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
+nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb)
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
@@ -644,14 +644,14 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
int ret;
/* Previously seen (loopback or untracked)? Ignore. */
- if ((*pskb)->nfct) {
+ if (skb->nfct) {
NF_CT_STAT_INC_ATOMIC(ignore);
return NF_ACCEPT;
}
/* rcu_read_lock()ed by nf_hook_slow */
l3proto = __nf_ct_l3proto_find((u_int16_t)pf);
- ret = l3proto->get_l4proto(*pskb, skb_network_offset(*pskb),
+ ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
&dataoff, &protonum);
if (ret <= 0) {
pr_debug("not prepared to track yet or error occured\n");
@@ -666,13 +666,13 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
* inverse of the return code tells to the netfilter
* core what to do with the packet. */
if (l4proto->error != NULL &&
- (ret = l4proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
+ (ret = l4proto->error(skb, dataoff, &ctinfo, pf, hooknum)) <= 0) {
NF_CT_STAT_INC_ATOMIC(error);
NF_CT_STAT_INC_ATOMIC(invalid);
return -ret;
}
- ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, l4proto,
+ ct = resolve_normal_ct(skb, dataoff, pf, protonum, l3proto, l4proto,
&set_reply, &ctinfo);
if (!ct) {
/* Not valid part of a connection */
@@ -686,21 +686,21 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
return NF_DROP;
}
- NF_CT_ASSERT((*pskb)->nfct);
+ NF_CT_ASSERT(skb->nfct);
- ret = l4proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum);
+ ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum);
if (ret < 0) {
/* Invalid: inverse of the return code tells
* the netfilter core what to do */
pr_debug("nf_conntrack_in: Can't track with proto module\n");
- nf_conntrack_put((*pskb)->nfct);
- (*pskb)->nfct = NULL;
+ nf_conntrack_put(skb->nfct);
+ skb->nfct = NULL;
NF_CT_STAT_INC_ATOMIC(invalid);
return -ret;
}
if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
- nf_conntrack_event_cache(IPCT_STATUS, *pskb);
+ nf_conntrack_event_cache(IPCT_STATUS, skb);
return ret;
}
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index c763ee74ea0..6df259067f7 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -43,7 +43,7 @@ module_param_array(ports, ushort, &ports_c, 0400);
static int loose;
module_param(loose, bool, 0600);
-unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb,
+unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
enum nf_ct_ftp_type type,
unsigned int matchoff,
@@ -344,7 +344,7 @@ static void update_nl_seq(u32 nl_seq, struct nf_ct_ftp_master *info, int dir,
}
}
-static int help(struct sk_buff **pskb,
+static int help(struct sk_buff *skb,
unsigned int protoff,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
@@ -371,21 +371,21 @@ static int help(struct sk_buff **pskb,
return NF_ACCEPT;
}
- th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph);
+ th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
if (th == NULL)
return NF_ACCEPT;
dataoff = protoff + th->doff * 4;
/* No data? */
- if (dataoff >= (*pskb)->len) {
+ if (dataoff >= skb->len) {
pr_debug("ftp: dataoff(%u) >= skblen(%u)\n", dataoff,
- (*pskb)->len);
+ skb->len);
return NF_ACCEPT;
}
- datalen = (*pskb)->len - dataoff;
+ datalen = skb->len - dataoff;
spin_lock_bh(&nf_ftp_lock);
- fb_ptr = skb_header_pointer(*pskb, dataoff, datalen, ftp_buffer);
+ fb_ptr = skb_header_pointer(skb, dataoff, datalen, ftp_buffer);
BUG_ON(fb_ptr == NULL);
ends_in_nl = (fb_ptr[datalen - 1] == '\n');
@@ -491,7 +491,7 @@ static int help(struct sk_buff **pskb,
* (possibly changed) expectation itself. */
nf_nat_ftp = rcu_dereference(nf_nat_ftp_hook);
if (nf_nat_ftp && ct->status & IPS_NAT_MASK)
- ret = nf_nat_ftp(pskb, ctinfo, search[dir][i].ftptype,
+ ret = nf_nat_ftp(skb, ctinfo, search[dir][i].ftptype,
matchoff, matchlen, exp);
else {
/* Can't expect this? Best to drop packet now. */
@@ -508,7 +508,7 @@ out_update_nl:
/* Now if this ends in \n, update ftp info. Seq may have been
* adjusted by NAT code. */
if (ends_in_nl)
- update_nl_seq(seq, ct_ftp_info, dir, *pskb);
+ update_nl_seq(seq, ct_ftp_info, dir, skb);
out:
spin_unlock_bh(&nf_ftp_lock);
return ret;
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index a8a9dfbe7a6..f23fd9598e1 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -47,27 +47,27 @@ MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations "
"(determined by routing information)");
/* Hooks for NAT */
-int (*set_h245_addr_hook) (struct sk_buff **pskb,
+int (*set_h245_addr_hook) (struct sk_buff *skb,
unsigned char **data, int dataoff,
H245_TransportAddress *taddr,
union nf_conntrack_address *addr, __be16 port)
__read_mostly;
-int (*set_h225_addr_hook) (struct sk_buff **pskb,
+int (*set_h225_addr_hook) (struct sk_buff *skb,
unsigned char **data, int dataoff,
TransportAddress *taddr,
union nf_conntrack_address *addr, __be16 port)
__read_mostly;
-int (*set_sig_addr_hook) (struct sk_buff **pskb,
+int (*set_sig_addr_hook) (struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data,
TransportAddress *taddr, int count) __read_mostly;
-int (*set_ras_addr_hook) (struct sk_buff **pskb,
+int (*set_ras_addr_hook) (struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data,
TransportAddress *taddr, int count) __read_mostly;
-int (*nat_rtp_rtcp_hook) (struct sk_buff **pskb,
+int (*nat_rtp_rtcp_hook) (struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
@@ -75,25 +75,25 @@ int (*nat_rtp_rtcp_hook) (struct sk_buff **pskb,
__be16 port, __be16 rtp_port,
struct nf_conntrack_expect *rtp_exp,
struct nf_conntrack_expect *rtcp_exp) __read_mostly;
-int (*nat_t120_hook) (struct sk_buff **pskb,
+int (*nat_t120_hook) (struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
H245_TransportAddress *taddr, __be16 port,
struct nf_conntrack_expect *exp) __read_mostly;
-int (*nat_h245_hook) (struct sk_buff **pskb,
+int (*nat_h245_hook) (struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
TransportAddress *taddr, __be16 port,
struct nf_conntrack_expect *exp) __read_mostly;
-int (*nat_callforwarding_hook) (struct sk_buff **pskb,
+int (*nat_callforwarding_hook) (struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
TransportAddress *taddr, __be16 port,
struct nf_conntrack_expect *exp) __read_mostly;
-int (*nat_q931_hook) (struct sk_buff **pskb,
+int (*nat_q931_hook) (struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, TransportAddress *taddr, int idx,
@@ -108,7 +108,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[];
static struct nf_conntrack_helper nf_conntrack_helper_ras[];
/****************************************************************************/
-static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff,
+static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo,
unsigned char **data, int *datalen, int *dataoff)
{
@@ -122,7 +122,7 @@ static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff,
int tpktoff;
/* Get TCP header */
- th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph);
+ th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
if (th == NULL)
return 0;
@@ -130,13 +130,13 @@ static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff,
tcpdataoff = protoff + th->doff * 4;
/* Get TCP data length */
- tcpdatalen = (*pskb)->len - tcpdataoff;
+ tcpdatalen = skb->len - tcpdataoff;
if (tcpdatalen <= 0) /* No TCP data */
goto clear_out;
if (*data == NULL) { /* first TPKT */
/* Get first TPKT pointer */
- tpkt = skb_header_pointer(*pskb, tcpdataoff, tcpdatalen,
+ tpkt = skb_header_pointer(skb, tcpdataoff, tcpdatalen,
h323_buffer);
BUG_ON(tpkt == NULL);
@@ -248,7 +248,7 @@ static int get_h245_addr(struct nf_conn *ct, unsigned char *data,
}
/****************************************************************************/
-static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct,
+static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
H245_TransportAddress *taddr)
@@ -297,7 +297,7 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct,
(nat_rtp_rtcp = rcu_dereference(nat_rtp_rtcp_hook)) &&
ct->status & IPS_NAT_MASK) {
/* NAT needed */
- ret = nat_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
+ ret = nat_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
taddr, port, rtp_port, rtp_exp, rtcp_exp);
} else { /* Conntrack only */
if (nf_ct_expect_related(rtp_exp) == 0) {
@@ -321,7 +321,7 @@ static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct,
}
/****************************************************************************/
-static int expect_t120(struct sk_buff **pskb,
+static int expect_t120(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
@@ -355,7 +355,7 @@ static int expect_t120(struct sk_buff **pskb,
(nat_t120 = rcu_dereference(nat_t120_hook)) &&
ct->status & IPS_NAT_MASK) {
/* NAT needed */
- ret = nat_t120(pskb, ct, ctinfo, data, dataoff, taddr,
+ ret = nat_t120(skb, ct, ctinfo, data, dataoff, taddr,
port, exp);
} else { /* Conntrack only */
if (nf_ct_expect_related(exp) == 0) {
@@ -371,7 +371,7 @@ static int expect_t120(struct sk_buff **pskb,
}
/****************************************************************************/
-static int process_h245_channel(struct sk_buff **pskb,
+static int process_h245_channel(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
@@ -381,7 +381,7 @@ static int process_h245_channel(struct sk_buff **pskb,
if (channel->options & eH2250LogicalChannelParameters_mediaChannel) {
/* RTP */
- ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
+ ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
&channel->mediaChannel);
if (ret < 0)
return -1;
@@ -390,7 +390,7 @@ static int process_h245_channel(struct sk_buff **pskb,
if (channel->
options & eH2250LogicalChannelParameters_mediaControlChannel) {
/* RTCP */
- ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
+ ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
&channel->mediaControlChannel);
if (ret < 0)
return -1;
@@ -400,7 +400,7 @@ static int process_h245_channel(struct sk_buff **pskb,
}
/****************************************************************************/
-static int process_olc(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_olc(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
OpenLogicalChannel *olc)
@@ -412,7 +412,7 @@ static int process_olc(struct sk_buff **pskb, struct nf_conn *ct,
if (olc->forwardLogicalChannelParameters.multiplexParameters.choice ==
eOpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)
{
- ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff,
+ ret = process_h245_channel(skb, ct, ctinfo, data, dataoff,
&olc->
forwardLogicalChannelParameters.
multiplexParameters.
@@ -430,7 +430,7 @@ static int process_olc(struct sk_buff **pskb, struct nf_conn *ct,
eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
{
ret =
- process_h245_channel(pskb, ct, ctinfo, data, dataoff,
+ process_h245_channel(skb, ct, ctinfo, data, dataoff,
&olc->
reverseLogicalChannelParameters.
multiplexParameters.
@@ -448,7 +448,7 @@ static int process_olc(struct sk_buff **pskb, struct nf_conn *ct,
t120.choice == eDataProtocolCapability_separateLANStack &&
olc->separateStack.networkAddress.choice ==
eNetworkAccessParameters_networkAddress_localAreaAddress) {
- ret = expect_t120(pskb, ct, ctinfo, data, dataoff,
+ ret = expect_t120(skb, ct, ctinfo, data, dataoff,
&olc->separateStack.networkAddress.
localAreaAddress);
if (ret < 0)
@@ -459,7 +459,7 @@ static int process_olc(struct sk_buff **pskb, struct nf_conn *ct,
}
/****************************************************************************/
-static int process_olca(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_olca(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
OpenLogicalChannelAck *olca)
@@ -477,7 +477,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct,
choice ==
eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters))
{
- ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff,
+ ret = process_h245_channel(skb, ct, ctinfo, data, dataoff,
&olca->
reverseLogicalChannelParameters.
multiplexParameters.
@@ -496,7 +496,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct,
if (ack->options &
eH2250LogicalChannelAckParameters_mediaChannel) {
/* RTP */
- ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
+ ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
&ack->mediaChannel);
if (ret < 0)
return -1;
@@ -505,7 +505,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct,
if (ack->options &
eH2250LogicalChannelAckParameters_mediaControlChannel) {
/* RTCP */
- ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff,
+ ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff,
&ack->mediaControlChannel);
if (ret < 0)
return -1;
@@ -515,7 +515,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct,
if ((olca->options & eOpenLogicalChannelAck_separateStack) &&
olca->separateStack.networkAddress.choice ==
eNetworkAccessParameters_networkAddress_localAreaAddress) {
- ret = expect_t120(pskb, ct, ctinfo, data, dataoff,
+ ret = expect_t120(skb, ct, ctinfo, data, dataoff,
&olca->separateStack.networkAddress.
localAreaAddress);
if (ret < 0)
@@ -526,7 +526,7 @@ static int process_olca(struct sk_buff **pskb, struct nf_conn *ct,
}
/****************************************************************************/
-static int process_h245(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_h245(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
MultimediaSystemControlMessage *mscm)
@@ -535,7 +535,7 @@ static int process_h245(struct sk_buff **pskb, struct nf_conn *ct,
case eMultimediaSystemControlMessage_request:
if (mscm->request.choice ==
eRequestMessage_openLogicalChannel) {
- return process_olc(pskb, ct, ctinfo, data, dataoff,
+ return process_olc(skb, ct, ctinfo, data, dataoff,
&mscm->request.openLogicalChannel);
}
pr_debug("nf_ct_h323: H.245 Request %d\n",
@@ -544,7 +544,7 @@ static int process_h245(struct sk_buff **pskb, struct nf_conn *ct,
case eMultimediaSystemControlMessage_response:
if (mscm->response.choice ==
eResponseMessage_openLogicalChannelAck) {
- return process_olca(pskb, ct, ctinfo, data, dataoff,
+ return process_olca(skb, ct, ctinfo, data, dataoff,
&mscm->response.
openLogicalChannelAck);
}
@@ -560,7 +560,7 @@ static int process_h245(struct sk_buff **pskb, struct nf_conn *ct,
}
/****************************************************************************/
-static int h245_help(struct sk_buff **pskb, unsigned int protoff,
+static int h245_help(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
static MultimediaSystemControlMessage mscm;
@@ -574,12 +574,12 @@ static int h245_help(struct sk_buff **pskb, unsigned int protoff,
ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
return NF_ACCEPT;
}
- pr_debug("nf_ct_h245: skblen = %u\n", (*pskb)->len);
+ pr_debug("nf_ct_h245: skblen = %u\n", skb->len);
spin_lock_bh(&nf_h323_lock);
/* Process each TPKT */
- while (get_tpkt_data(pskb, protoff, ct, ctinfo,
+ while (get_tpkt_data(skb, protoff, ct, ctinfo,
&data, &datalen, &dataoff)) {
pr_debug("nf_ct_h245: TPKT len=%d ", datalen);
NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple);
@@ -596,7 +596,7 @@ static int h245_help(struct sk_buff **pskb, unsigned int protoff,
}
/* Process H.245 signal */
- if (process_h245(pskb, ct, ctinfo, &data, dataoff, &mscm) < 0)
+ if (process_h245(skb, ct, ctinfo, &data, dataoff, &mscm) < 0)
goto drop;
}
@@ -654,7 +654,7 @@ int get_h225_addr(struct nf_conn *ct, unsigned char *data,
}
/****************************************************************************/
-static int expect_h245(struct sk_buff **pskb, struct nf_conn *ct,
+static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
TransportAddress *taddr)
@@ -687,7 +687,7 @@ static int expect_h245(struct sk_buff **pskb, struct nf_conn *ct,
(nat_h245 = rcu_dereference(nat_h245_hook)) &&
ct->status & IPS_NAT_MASK) {
/* NAT needed */
- ret = nat_h245(pskb, ct, ctinfo, data, dataoff, taddr,
+ ret = nat_h245(skb, ct, ctinfo, data, dataoff, taddr,
port, exp);
} else { /* Conntrack only */
if (nf_ct_expect_related(exp) == 0) {
@@ -758,7 +758,7 @@ static int callforward_do_filter(union nf_conntrack_address *src,
}
/****************************************************************************/
-static int expect_callforwarding(struct sk_buff **pskb,
+static int expect_callforwarding(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
@@ -798,7 +798,7 @@ static int expect_callforwarding(struct sk_buff **pskb,
(nat_callforwarding = rcu_dereference(nat_callforwarding_hook)) &&
ct->status & IPS_NAT_MASK) {
/* Need NAT */
- ret = nat_callforwarding(pskb, ct, ctinfo, data, dataoff,
+ ret = nat_callforwarding(skb, ct, ctinfo, data, dataoff,
taddr, port, exp);
} else { /* Conntrack only */
if (nf_ct_expect_related(exp) == 0) {
@@ -814,7 +814,7 @@ static int expect_callforwarding(struct sk_buff **pskb,
}
/****************************************************************************/
-static int process_setup(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_setup(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
Setup_UUIE *setup)
@@ -829,7 +829,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct,
pr_debug("nf_ct_q931: Setup\n");
if (setup->options & eSetup_UUIE_h245Address) {
- ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+ ret = expect_h245(skb, ct, ctinfo, data, dataoff,
&setup->h245Address);
if (ret < 0)
return -1;
@@ -846,7 +846,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct,
NIP6(*(struct in6_addr *)&addr), ntohs(port),
NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.src.u3),
ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port));
- ret = set_h225_addr(pskb, data, dataoff,
+ ret = set_h225_addr(skb, data, dataoff,
&setup->destCallSignalAddress,
&ct->tuplehash[!dir].tuple.src.u3,
ct->tuplehash[!dir].tuple.src.u.tcp.port);
@@ -864,7 +864,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct,
NIP6(*(struct in6_addr *)&addr), ntohs(port),
NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.dst.u3),
ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port));
- ret = set_h225_addr(pskb, data, dataoff,
+ ret = set_h225_addr(skb, data, dataoff,
&setup->sourceCallSignalAddress,
&ct->tuplehash[!dir].tuple.dst.u3,
ct->tuplehash[!dir].tuple.dst.u.tcp.port);
@@ -874,7 +874,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct,
if (setup->options & eSetup_UUIE_fastStart) {
for (i = 0; i < setup->fastStart.count; i++) {
- ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+ ret = process_olc(skb, ct, ctinfo, data, dataoff,
&setup->fastStart.item[i]);
if (ret < 0)
return -1;
@@ -885,7 +885,7 @@ static int process_setup(struct sk_buff **pskb, struct nf_conn *ct,
}
/****************************************************************************/
-static int process_callproceeding(struct sk_buff **pskb,
+static int process_callproceeding(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
@@ -897,7 +897,7 @@ static int process_callproceeding(struct sk_buff **pskb,
pr_debug("nf_ct_q931: CallProceeding\n");
if (callproc->options & eCallProceeding_UUIE_h245Address) {
- ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+ ret = expect_h245(skb, ct, ctinfo, data, dataoff,
&callproc->h245Address);
if (ret < 0)
return -1;
@@ -905,7 +905,7 @@ static int process_callproceeding(struct sk_buff **pskb,
if (callproc->options & eCallProceeding_UUIE_fastStart) {
for (i = 0; i < callproc->fastStart.count; i++) {
- ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+ ret = process_olc(skb, ct, ctinfo, data, dataoff,
&callproc->fastStart.item[i]);
if (ret < 0)
return -1;
@@ -916,7 +916,7 @@ static int process_callproceeding(struct sk_buff **pskb,
}
/****************************************************************************/
-static int process_connect(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_connect(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
Connect_UUIE *connect)
@@ -927,7 +927,7 @@ static int process_connect(struct sk_buff **pskb, struct nf_conn *ct,
pr_debug("nf_ct_q931: Connect\n");
if (connect->options & eConnect_UUIE_h245Address) {
- ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+ ret = expect_h245(skb, ct, ctinfo, data, dataoff,
&connect->h245Address);
if (ret < 0)
return -1;
@@ -935,7 +935,7 @@ static int process_connect(struct sk_buff **pskb, struct nf_conn *ct,
if (connect->options & eConnect_UUIE_fastStart) {
for (i = 0; i < connect->fastStart.count; i++) {
- ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+ ret = process_olc(skb, ct, ctinfo, data, dataoff,
&connect->fastStart.item[i]);
if (ret < 0)
return -1;
@@ -946,7 +946,7 @@ static int process_connect(struct sk_buff **pskb, struct nf_conn *ct,
}
/****************************************************************************/
-static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_alerting(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
Alerting_UUIE *alert)
@@ -957,7 +957,7 @@ static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct,
pr_debug("nf_ct_q931: Alerting\n");
if (alert->options & eAlerting_UUIE_h245Address) {
- ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+ ret = expect_h245(skb, ct, ctinfo, data, dataoff,
&alert->h245Address);
if (ret < 0)
return -1;
@@ -965,7 +965,7 @@ static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct,
if (alert->options & eAlerting_UUIE_fastStart) {
for (i = 0; i < alert->fastStart.count; i++) {
- ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+ ret = process_olc(skb, ct, ctinfo, data, dataoff,
&alert->fastStart.item[i]);
if (ret < 0)
return -1;
@@ -976,7 +976,7 @@ static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct,
}
/****************************************************************************/
-static int process_facility(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_facility(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
Facility_UUIE *facility)
@@ -988,7 +988,7 @@ static int process_facility(struct sk_buff **pskb, struct nf_conn *ct,
if (facility->reason.choice == eFacilityReason_callForwarded) {
if (facility->options & eFacility_UUIE_alternativeAddress)
- return expect_callforwarding(pskb, ct, ctinfo, data,
+ return expect_callforwarding(skb, ct, ctinfo, data,
dataoff,
&facility->
alternativeAddress);
@@ -996,7 +996,7 @@ static int process_facility(struct sk_buff **pskb, struct nf_conn *ct,
}
if (facility->options & eFacility_UUIE_h245Address) {
- ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+ ret = expect_h245(skb, ct, ctinfo, data, dataoff,
&facility->h245Address);
if (ret < 0)
return -1;
@@ -1004,7 +1004,7 @@ static int process_facility(struct sk_buff **pskb, struct nf_conn *ct,
if (facility->options & eFacility_UUIE_fastStart) {
for (i = 0; i < facility->fastStart.count; i++) {
- ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+ ret = process_olc(skb, ct, ctinfo, data, dataoff,
&facility->fastStart.item[i]);
if (ret < 0)
return -1;
@@ -1015,7 +1015,7 @@ static int process_facility(struct sk_buff **pskb, struct nf_conn *ct,
}
/****************************************************************************/
-static int process_progress(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_progress(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff,
Progress_UUIE *progress)
@@ -1026,7 +1026,7 @@ static int process_progress(struct sk_buff **pskb, struct nf_conn *ct,
pr_debug("nf_ct_q931: Progress\n");
if (progress->options & eProgress_UUIE_h245Address) {
- ret = expect_h245(pskb, ct, ctinfo, data, dataoff,
+ ret = expect_h245(skb, ct, ctinfo, data, dataoff,
&progress->h245Address);
if (ret < 0)
return -1;
@@ -1034,7 +1034,7 @@ static int process_progress(struct sk_buff **pskb, struct nf_conn *ct,
if (progress->options & eProgress_UUIE_fastStart) {
for (i = 0; i < progress->fastStart.count; i++) {
- ret = process_olc(pskb, ct, ctinfo, data, dataoff,
+ ret = process_olc(skb, ct, ctinfo, data, dataoff,
&progress->fastStart.item[i]);
if (ret < 0)
return -1;
@@ -1045,7 +1045,7 @@ static int process_progress(struct sk_buff **pskb, struct nf_conn *ct,
}
/****************************************************************************/
-static int process_q931(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_q931(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, int dataoff, Q931 *q931)
{
@@ -1055,28 +1055,28 @@ static int process_q931(struct sk_buff **pskb, struct nf_conn *ct,
switch (pdu->h323_message_body.choice) {
case eH323_UU_PDU_h323_message_body_setup:
- ret = process_setup(pskb, ct, ctinfo, data, dataoff,
+ ret = process_setup(skb, ct, ctinfo, data, dataoff,
&pdu->h323_message_body.setup);
break;
case eH323_UU_PDU_h323_message_body_callProceeding:
- ret = process_callproceeding(pskb, ct, ctinfo, data, dataoff,
+ ret = process_callproceeding(skb, ct, ctinfo, data, dataoff,
&pdu->h323_message_body.
callProceeding);
break;
case eH323_UU_PDU_h323_message_body_connect:
- ret = process_connect(pskb, ct, ctinfo, data, dataoff,
+ ret = process_connect(skb, ct, ctinfo, data, dataoff,
&pdu->h323_message_body.connect);
break;
case eH323_UU_PDU_h323_message_body_alerting:
- ret = process_alerting(pskb, ct, ctinfo, data, dataoff,
+ ret = process_alerting(skb, ct, ctinfo, data, dataoff,
&pdu->h323_message_body.alerting);
break;
case eH323_UU_PDU_h323_message_body_facility:
- ret = process_facility(pskb, ct, ctinfo, data, dataoff,
+ ret = process_facility(skb, ct, ctinfo, data, dataoff,
&pdu->h323_message_body.facility);
break;
case eH323_UU_PDU_h323_message_body_progress:
- ret = process_progress(pskb, ct, ctinfo, data, dataoff,
+ ret = process_progress(skb, ct, ctinfo, data, dataoff,
&pdu->h323_message_body.progress);
break;
default:
@@ -1090,7 +1090,7 @@ static int process_q931(struct sk_buff **pskb, struct nf_conn *ct,
if (pdu->options & eH323_UU_PDU_h245Control) {
for (i = 0; i < pdu->h245Control.count; i++) {
- ret = process_h245(pskb, ct, ctinfo, data, dataoff,
+ ret = process_h245(skb, ct, ctinfo, data, dataoff,
&pdu->h245Control.item[i]);
if (ret < 0)
return -1;
@@ -1101,7 +1101,7 @@ static int process_q931(struct sk_buff **pskb, struct nf_conn *ct,
}
/****************************************************************************/
-static int q931_help(struct sk_buff **pskb, unsigned int protoff,
+static int q931_help(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
static Q931 q931;
@@ -1115,12 +1115,12 @@ static int q931_help(struct sk_buff **pskb, unsigned int protoff,
ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
return NF_ACCEPT;
}
- pr_debug("nf_ct_q931: skblen = %u\n", (*pskb)->len);
+ pr_debug("nf_ct_q931: skblen = %u\n", skb->len);
spin_lock_bh(&nf_h323_lock);
/* Process each TPKT */
- while (get_tpkt_data(pskb, protoff, ct, ctinfo,
+ while (get_tpkt_data(skb, protoff, ct, ctinfo,
&data, &datalen, &dataoff)) {
pr_debug("nf_ct_q931: TPKT len=%d ", datalen);
NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple);
@@ -1136,7 +1136,7 @@ static int q931_help(struct sk_buff **pskb, unsigned int protoff,
}
/* Process Q.931 signal */
- if (process_q931(pskb, ct, ctinfo, &data, dataoff, &q931) < 0)
+ if (process_q931(skb, ct, ctinfo, &data, dataoff, &q931) < 0)
goto drop;
}
@@ -1177,20 +1177,20 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = {
};
/****************************************************************************/
-static unsigned char *get_udp_data(struct sk_buff **pskb, unsigned int protoff,
+static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff,
int *datalen)
{
struct udphdr _uh, *uh;
int dataoff;
- uh = skb_header_pointer(*pskb, protoff, sizeof(_uh), &_uh);
+ uh = skb_header_pointer(skb, protoff, sizeof(_uh), &_uh);
if (uh == NULL)
return NULL;
dataoff = protoff + sizeof(_uh);
- if (dataoff >= (*pskb)->len)
+ if (dataoff >= skb->len)
return NULL;
- *datalen = (*pskb)->len - dataoff;
- return skb_header_pointer(*pskb, dataoff, *datalen, h323_buffer);
+ *datalen = skb->len - dataoff;
+ return skb_header_pointer(skb, dataoff, *datalen, h323_buffer);
}
/****************************************************************************/
@@ -1227,7 +1227,7 @@ static int set_expect_timeout(struct nf_conntrack_expect *exp,
}
/****************************************************************************/
-static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct,
+static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data,
TransportAddress *taddr, int count)
@@ -1265,7 +1265,7 @@ static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct,
nat_q931 = rcu_dereference(nat_q931_hook);
if (nat_q931 && ct->status & IPS_NAT_MASK) { /* Need NAT */
- ret = nat_q931(pskb, ct, ctinfo, data, taddr, i, port, exp);
+ ret = nat_q931(skb, ct, ctinfo, data, taddr, i, port, exp);
} else { /* Conntrack only */
if (nf_ct_expect_related(exp) == 0) {
pr_debug("nf_ct_ras: expect Q.931 ");
@@ -1283,7 +1283,7 @@ static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct,
}
/****************************************************************************/
-static int process_grq(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_grq(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, GatekeeperRequest *grq)
{
@@ -1293,13 +1293,13 @@ static int process_grq(struct sk_buff **pskb, struct nf_conn *ct,
set_ras_addr = rcu_dereference(set_ras_addr_hook);
if (set_ras_addr && ct->status & IPS_NAT_MASK) /* NATed */
- return set_ras_addr(pskb, ct, ctinfo, data,
+ return set_ras_addr(skb, ct, ctinfo, data,
&grq->rasAddress, 1);
return 0;
}
/****************************************************************************/
-static int process_gcf(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, GatekeeperConfirm *gcf)
{
@@ -1343,7 +1343,7 @@ static int process_gcf(struct sk_buff **pskb, struct nf_conn *ct,
}
/****************************************************************************/
-static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_rrq(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, RegistrationRequest *rrq)
{
@@ -1353,7 +1353,7 @@ static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct,
pr_debug("nf_ct_ras: RRQ\n");
- ret = expect_q931(pskb, ct, ctinfo, data,
+ ret = expect_q931(skb, ct, ctinfo, data,
rrq->callSignalAddress.item,
rrq->callSignalAddress.count);
if (ret < 0)
@@ -1361,7 +1361,7 @@ static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct,
set_ras_addr = rcu_dereference(set_ras_addr_hook);
if (set_ras_addr && ct->status & IPS_NAT_MASK) {
- ret = set_ras_addr(pskb, ct, ctinfo, data,
+ ret = set_ras_addr(skb, ct, ctinfo, data,
rrq->rasAddress.item,
rrq->rasAddress.count);
if (ret < 0)
@@ -1378,7 +1378,7 @@ static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct,
}
/****************************************************************************/
-static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, RegistrationConfirm *rcf)
{
@@ -1392,7 +1392,7 @@ static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct,
set_sig_addr = rcu_dereference(set_sig_addr_hook);
if (set_sig_addr && ct->status & IPS_NAT_MASK) {
- ret = set_sig_addr(pskb, ct, ctinfo, data,
+ ret = set_sig_addr(skb, ct, ctinfo, data,
rcf->callSignalAddress.item,
rcf->callSignalAddress.count);
if (ret < 0)
@@ -1407,7 +1407,7 @@ static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct,
if (info->timeout > 0) {
pr_debug("nf_ct_ras: set RAS connection timeout to "
"%u seconds\n", info->timeout);
- nf_ct_refresh(ct, *pskb, info->timeout * HZ);
+ nf_ct_refresh(ct, skb, info->timeout * HZ);
/* Set expect timeout */
read_lock_bh(&nf_conntrack_lock);
@@ -1427,7 +1427,7 @@ static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct,
}
/****************************************************************************/
-static int process_urq(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_urq(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, UnregistrationRequest *urq)
{
@@ -1440,7 +1440,7 @@ static int process_urq(struct sk_buff **pskb, struct nf_conn *ct,
set_sig_addr = rcu_dereference(set_sig_addr_hook);
if (set_sig_addr && ct->status & IPS_NAT_MASK) {
- ret = set_sig_addr(pskb, ct, ctinfo, data,
+ ret = set_sig_addr(skb, ct, ctinfo, data,
urq->callSignalAddress.item,
urq->callSignalAddress.count);
if (ret < 0)
@@ -1453,13 +1453,13 @@ static int process_urq(struct sk_buff **pskb, struct nf_conn *ct,
info->sig_port[!dir] = 0;
/* Give it 30 seconds for UCF or URJ */
- nf_ct_refresh(ct, *pskb, 30 * HZ);
+ nf_ct_refresh(ct, skb, 30 * HZ);
return 0;
}
/****************************************************************************/
-static int process_arq(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_arq(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, AdmissionRequest *arq)
{
@@ -1479,7 +1479,7 @@ static int process_arq(struct sk_buff **pskb, struct nf_conn *ct,
port == info->sig_port[dir] &&
set_h225_addr && ct->status & IPS_NAT_MASK) {
/* Answering ARQ */
- return set_h225_addr(pskb, data, 0,
+ return set_h225_addr(skb, data, 0,
&arq->destCallSignalAddress,
&ct->tuplehash[!dir].tuple.dst.u3,
info->sig_port[!dir]);
@@ -1491,7 +1491,7 @@ static int process_arq(struct sk_buff **pskb, struct nf_conn *ct,
!memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) &&
set_h225_addr && ct->status & IPS_NAT_MASK) {
/* Calling ARQ */
- return set_h225_addr(pskb, data, 0,
+ return set_h225_addr(skb, data, 0,
&arq->srcCallSignalAddress,
&ct->tuplehash[!dir].tuple.dst.u3,
port);
@@ -1501,7 +1501,7 @@ static int process_arq(struct sk_buff **pskb, struct nf_conn *ct,
}
/****************************************************************************/
-static int process_acf(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, AdmissionConfirm *acf)
{
@@ -1522,7 +1522,7 @@ static int process_acf(struct sk_buff **pskb, struct nf_conn *ct,
/* Answering ACF */
set_sig_addr = rcu_dereference(set_sig_addr_hook);
if (set_sig_addr && ct->status & IPS_NAT_MASK)
- return set_sig_addr(pskb, ct, ctinfo, data,
+ return set_sig_addr(skb, ct, ctinfo, data,
&acf->destCallSignalAddress, 1);
return 0;
}
@@ -1548,7 +1548,7 @@ static int process_acf(struct sk_buff **pskb, struct nf_conn *ct,
}
/****************************************************************************/
-static int process_lrq(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_lrq(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, LocationRequest *lrq)
{
@@ -1558,13 +1558,13 @@ static int process_lrq(struct sk_buff **pskb, struct nf_conn *ct,
set_ras_addr = rcu_dereference(set_ras_addr_hook);
if (set_ras_addr && ct->status & IPS_NAT_MASK)
- return set_ras_addr(pskb, ct, ctinfo, data,
+ return set_ras_addr(skb, ct, ctinfo, data,
&lrq->replyAddress, 1);
return 0;
}
/****************************************************************************/
-static int process_lcf(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, LocationConfirm *lcf)
{
@@ -1603,7 +1603,7 @@ static int process_lcf(struct sk_buff **pskb, struct nf_conn *ct,
}
/****************************************************************************/
-static int process_irr(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_irr(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, InfoRequestResponse *irr)
{
@@ -1615,7 +1615,7 @@ static int process_irr(struct sk_buff **pskb, struct nf_conn *ct,
set_ras_addr = rcu_dereference(set_ras_addr_hook);
if (set_ras_addr && ct->status & IPS_NAT_MASK) {
- ret = set_ras_addr(pskb, ct, ctinfo, data,
+ ret = set_ras_addr(skb, ct, ctinfo, data,
&irr->rasAddress, 1);
if (ret < 0)
return -1;
@@ -1623,7 +1623,7 @@ static int process_irr(struct sk_buff **pskb, struct nf_conn *ct,
set_sig_addr = rcu_dereference(set_sig_addr_hook);
if (set_sig_addr && ct->status & IPS_NAT_MASK) {
- ret = set_sig_addr(pskb, ct, ctinfo, data,
+ ret = set_sig_addr(skb, ct, ctinfo, data,
irr->callSignalAddress.item,
irr->callSignalAddress.count);
if (ret < 0)
@@ -1634,40 +1634,40 @@ static int process_irr(struct sk_buff **pskb, struct nf_conn *ct,
}
/****************************************************************************/
-static int process_ras(struct sk_buff **pskb, struct nf_conn *ct,
+static int process_ras(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned char **data, RasMessage *ras)
{
switch (ras->choice) {
case eRasMessage_gatekeeperRequest:
- return process_grq(pskb, ct, ctinfo, data,
+ return process_grq(skb, ct, ctinfo, data,
&ras->gatekeeperRequest);
case eRasMessage_gatekeeperConfirm:
- return process_gcf(pskb, ct, ctinfo, data,
+ return process_gcf(skb, ct, ctinfo, data,
&ras->gatekeeperConfirm);
case eRasMessage_registrationRequest:
- return process_rrq(pskb, ct, ctinfo, data,
+ return process_rrq(skb, ct, ctinfo, data,
&ras->registrationRequest);
case eRasMessage_registrationConfirm:
- return process_rcf(pskb, ct, ctinfo, data,
+ return process_rcf(skb, ct, ctinfo, data,
&ras->registrationConfirm);
case eRasMessage_unregistrationRequest:
- return process_urq(pskb, ct, ctinfo, data,
+ return process_urq(skb, ct, ctinfo, data,
&ras->unregistrationRequest);
case eRasMessage_admissionRequest:
- return process_arq(pskb, ct, ctinfo, data,
+ return process_arq(skb, ct, ctinfo, data,
&ras->admissionRequest);
case eRasMessage_admissionConfirm:
- return process_acf(pskb, ct, ctinfo, data,
+ return process_acf(skb, ct, ctinfo, data,
&ras->admissionConfirm);
case eRasMessage_locationRequest:
- return process_lrq(pskb, ct, ctinfo, data,
+ return process_lrq(skb, ct, ctinfo, data,
&ras->locationRequest);
case eRasMessage_locationConfirm:
- return process_lcf(pskb, ct, ctinfo, data,
+ return process_lcf(skb, ct, ctinfo, data,
&ras->locationConfirm);
case eRasMessage_infoRequestResponse:
- return process_irr(pskb, ct, ctinfo, data,
+ return process_irr(skb, ct, ctinfo, data,
&ras->infoRequestResponse);
default:
pr_debug("nf_ct_ras: RAS message %d\n", ras->choice);
@@ -1678,7 +1678,7 @@ static int process_ras(struct sk_buff **pskb, struct nf_conn *ct,
}
/****************************************************************************/
-static int ras_help(struct sk_buff **pskb, unsigned int protoff,
+static int ras_help(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
static RasMessage ras;
@@ -1686,12 +1686,12 @@ static int ras_help(struct sk_buff **pskb, unsigned int protoff,
int datalen = 0;
int ret;
- pr_debug("nf_ct_ras: skblen = %u\n", (*pskb)->len);
+ pr_debug("nf_ct_ras: skblen = %u\n", skb->len);
spin_lock_bh(&nf_h323_lock);
/* Get UDP data */
- data = get_udp_data(pskb, protoff, &datalen);
+ data = get_udp_data(skb, protoff, &datalen);
if (data == NULL)
goto accept;
pr_debug("nf_ct_ras: RAS message len=%d ", datalen);
@@ -1707,7 +1707,7 @@ static int ras_help(struct sk_buff **pskb, unsigned int protoff,
}
/* Process RAS message */
- if (process_ras(pskb, ct, ctinfo, &data, &ras) < 0)
+ if (process_ras(skb, ct, ctinfo, &data, &ras) < 0)
goto drop;
accept:
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 1562ca97a34..dfaed4ba83c 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -30,7 +30,7 @@ static unsigned int dcc_timeout __read_mostly = 300;
static char *irc_buffer;
static DEFINE_SPINLOCK(irc_buffer_lock);
-unsigned int (*nf_nat_irc_hook)(struct sk_buff **pskb,
+unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
unsigned int matchoff,
unsigned int matchlen,
@@ -89,7 +89,7 @@ static int parse_dcc(char *data, char *data_end, u_int32_t *ip,
return 0;
}
-static int help(struct sk_buff **pskb, unsigned int protoff,
+static int help(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
unsigned int dataoff;
@@ -116,22 +116,22 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
return NF_ACCEPT;
/* Not a full tcp header? */
- th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph);
+ th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
if (th == NULL)
return NF_ACCEPT;
/* No data? */
dataoff = protoff + th->doff*4;
- if (dataoff >= (*pskb)->len)
+ if (dataoff >= skb->len)
return NF_ACCEPT;
spin_lock_bh(&irc_buffer_lock);
- ib_ptr = skb_header_pointer(*pskb, dataoff, (*pskb)->len - dataoff,
+ ib_ptr = skb_header_pointer(skb, dataoff, skb->len - dataoff,
irc_buffer);
BUG_ON(ib_ptr == NULL);
data = ib_ptr;
- data_limit = ib_ptr + (*pskb)->len - dataoff;
+ data_limit = ib_ptr + skb->len - dataoff;
/* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24
* 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */
@@ -143,7 +143,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
data += 5;
/* we have at least (19+MINMATCHLEN)-5 bytes valid data left */
- iph = ip_hdr(*pskb);
+ iph = ip_hdr(skb);
pr_debug("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u\n",
NIPQUAD(iph->saddr), ntohs(th->source),
NIPQUAD(iph->daddr), ntohs(th->dest));
@@ -193,7 +193,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
nf_nat_irc = rcu_dereference(nf_nat_irc_hook);
if (nf_nat_irc && ct->status & IPS_NAT_MASK)
- ret = nf_nat_irc(pskb, ctinfo,
+ ret = nf_nat_irc(skb, ctinfo,
addr_beg_p - ib_ptr,
addr_end_p - addr_beg_p,
exp);
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index 1d59fabeb5f..9810d81e2a0 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -42,17 +42,17 @@ static unsigned int timeout __read_mostly = 3;
module_param(timeout, uint, 0400);
MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
-static int help(struct sk_buff **pskb, unsigned int protoff,
+static int help(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
struct nf_conntrack_expect *exp;
- struct iphdr *iph = ip_hdr(*pskb);
- struct rtable *rt = (struct rtable *)(*pskb)->dst;
+ struct iphdr *iph = ip_hdr(skb);
+ struct rtable *rt = (struct rtable *)skb->dst;
struct in_device *in_dev;
__be32 mask = 0;
/* we're only interested in locally generated packets */
- if ((*pskb)->sk == NULL)
+ if (skb->sk == NULL)
goto out;
if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
goto out;
@@ -91,7 +91,7 @@ static int help(struct sk_buff **pskb, unsigned int protoff,
nf_ct_expect_related(exp);
nf_ct_expect_put(exp);
- nf_ct_refresh(ct, *pskb, timeout * HZ);
+ nf_ct_refresh(ct, skb, timeout * HZ);
out:
return NF_ACCEPT;
}
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index b0804199ab5..099b6df3e2b 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -41,14 +41,14 @@ MODULE_ALIAS("ip_conntrack_pptp");
static DEFINE_SPINLOCK(nf_pptp_lock);
int
-(*nf_nat_pptp_hook_outbound)(struct sk_buff **pskb,
+(*nf_nat_pptp_hook_outbound)(struct sk_buff *skb,
struct nf_conn *ct, enum ip_conntrack_info ctinfo,
struct PptpControlHeader *ctlh,
union pptp_ctrl_union *pptpReq) __read_mostly;
EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_outbound);
int
-(*nf_nat_pptp_hook_inbound)(struct sk_buff **pskb,
+(*nf_nat_pptp_hook_inbound)(struct sk_buff *skb,
struct nf_conn *ct, enum ip_conntrack_info ctinfo,
struct PptpControlHeader *ctlh,
union pptp_ctrl_union *pptpReq) __read_mostly;
@@ -254,7 +254,7 @@ out_unexpect_orig:
}
static inline int
-pptp_inbound_pkt(struct sk_buff **pskb,
+pptp_inbound_pkt(struct sk_buff *skb,
struct PptpControlHeader *ctlh,
union pptp_ctrl_union *pptpReq,
unsigned int reqlen,
@@ -367,7 +367,7 @@ pptp_inbound_pkt(struct sk_buff **pskb,
nf_nat_pptp_inbound = rcu_dereference(nf_nat_pptp_hook_inbound);
if (nf_nat_pptp_inbound && ct->status & IPS_NAT_MASK)
- return nf_nat_pptp_inbound(pskb, ct, ctinfo, ctlh, pptpReq);
+ return nf_nat_pptp_inbound(skb, ct, ctinfo, ctlh, pptpReq);
return NF_ACCEPT;
invalid:
@@ -380,7 +380,7 @@ invalid:
}
static inline int
-pptp_outbound_pkt(struct sk_buff **pskb,
+pptp_outbound_pkt(struct sk_buff *skb,
struct PptpControlHeader *ctlh,
union pptp_ctrl_union *pptpReq,
unsigned int reqlen,
@@ -462,7 +462,7 @@ pptp_outbound_pkt(struct sk_buff **pskb,
nf_nat_pptp_outbound = rcu_dereference(nf_nat_pptp_hook_outbound);
if (nf_nat_pptp_outbound && ct->status & IPS_NAT_MASK)
- return nf_nat_pptp_outbound(pskb, ct, ctinfo, ctlh, pptpReq);
+ return nf_nat_pptp_outbound(skb, ct, ctinfo, ctlh, pptpReq);
return NF_ACCEPT;
invalid:
@@ -492,7 +492,7 @@ static const unsigned int pptp_msg_size[] = {
/* track caller id inside control connection, call expect_related */
static int
-conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff,
+conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff,
struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
@@ -502,7 +502,7 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff,
struct pptp_pkt_hdr _pptph, *pptph;
struct PptpControlHeader _ctlh, *ctlh;
union pptp_ctrl_union _pptpReq, *pptpReq;
- unsigned int tcplen = (*pskb)->len - protoff;
+ unsigned int tcplen = skb->len - protoff;
unsigned int datalen, reqlen, nexthdr_off;
int oldsstate, oldcstate;
int ret;
@@ -514,12 +514,12 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff,
return NF_ACCEPT;
nexthdr_off = protoff;
- tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph);
+ tcph = skb_header_pointer(skb, nexthdr_off, sizeof(_tcph), &_tcph);
BUG_ON(!tcph);
nexthdr_off += tcph->doff * 4;
datalen = tcplen - tcph->doff * 4;
- pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph);
+ pptph = skb_header_pointer(skb, nexthdr_off, sizeof(_pptph), &_pptph);
if (!pptph) {
pr_debug("no full PPTP header, can't track\n");
return NF_ACCEPT;
@@ -534,7 +534,7 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff,
return NF_ACCEPT;
}
- ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
+ ctlh = skb_header_pointer(skb, nexthdr_off, sizeof(_ctlh), &_ctlh);
if (!ctlh)
return NF_ACCEPT;
nexthdr_off += sizeof(_ctlh);
@@ -547,7 +547,7 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff,
if (reqlen > sizeof(*pptpReq))
reqlen = sizeof(*pptpReq);
- pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
+ pptpReq = skb_header_pointer(skb, nexthdr_off, reqlen, &_pptpReq);
if (!pptpReq)
return NF_ACCEPT;
@@ -560,11 +560,11 @@ conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff,
* established from PNS->PAC. However, RFC makes no guarantee */
if (dir == IP_CT_DIR_ORIGINAL)
/* client -> server (PNS -> PAC) */
- ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
+ ret = pptp_outbound_pkt(skb, ctlh, pptpReq, reqlen, ct,
ctinfo);
else
/* server -> client (PAC -> PNS) */
- ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct,
+ ret = pptp_inbound_pkt(skb, ctlh, pptpReq, reqlen, ct,
ctinfo);
pr_debug("sstate: %d->%d, cstate: %d->%d\n",
oldsstate, info->sstate, oldcstate, info->cstate);
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index 355d371bac9..b5a16c6e21c 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -56,7 +56,7 @@ struct sane_reply_net_start {
/* other fields aren't interesting for conntrack */
};
-static int help(struct sk_buff **pskb,
+static int help(struct sk_buff *skb,
unsigned int protoff,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
@@ -80,19 +80,19 @@ static int help(struct sk_buff **pskb,
return NF_ACCEPT;
/* Not a full tcp header? */
- th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph);
+ th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
if (th == NULL)
return NF_ACCEPT;
/* No data? */
dataoff = protoff + th->doff * 4;
- if (dataoff >= (*pskb)->len)
+ if (dataoff >= skb->len)
return NF_ACCEPT;
- datalen = (*pskb)->len - dataoff;
+ datalen = skb->len - dataoff;
spin_lock_bh(&nf_sane_lock);
- sb_ptr = skb_header_pointer(*pskb, dataoff, datalen, sane_buffer);
+ sb_ptr = skb_header_pointer(skb, dataoff, datalen, sane_buffer);
BUG_ON(sb_ptr == NULL);
if (dir == IP_CT_DIR_ORIGINAL) {
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index d449fa47491..8f8b5a48df3 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -36,13 +36,13 @@ static unsigned int sip_timeout __read_mostly = SIP_TIMEOUT;
module_param(sip_timeout, uint, 0600);
MODULE_PARM_DESC(sip_timeout, "timeout for the master SIP session");
-unsigned int (*nf_nat_sip_hook)(struct sk_buff **pskb,
+unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
struct nf_conn *ct,
const char **dptr) __read_mostly;
EXPORT_SYMBOL_GPL(nf_nat_sip_hook);
-unsigned int (*nf_nat_sdp_hook)(struct sk_buff **pskb,
+unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
struct nf_conntrack_expect *exp,
const char *dptr) __read_mostly;
@@ -363,7 +363,7 @@ int ct_sip_get_info(struct nf_conn *ct,
}
EXPORT_SYMBOL_GPL(ct_sip_get_info);
-static int set_expected_rtp(struct sk_buff **pskb,
+static int set_expected_rtp(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
union nf_conntrack_address *addr,
@@ -385,7 +385,7 @@ static int set_expected_rtp(struct sk_buff **pskb,
nf_nat_sdp = rcu_dereference(nf_nat_sdp_hook);
if (nf_nat_sdp && ct->status & IPS_NAT_MASK)
- ret = nf_nat_sdp(pskb, ctinfo, exp, dptr);
+ ret = nf_nat_sdp(skb, ctinfo, exp, dptr);
else {
if (nf_ct_expect_related(exp) != 0)
ret = NF_DROP;
@@ -397,7 +397,7 @@ static int set_expected_rtp(struct sk_buff **pskb,
return ret;
}
-static int sip_help(struct sk_buff **pskb,
+static int sip_help(struct sk_buff *skb,
unsigned int protoff,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
@@ -414,13 +414,13 @@ static int sip_help(struct sk_buff **pskb,
/* No Data ? */
dataoff = protoff + sizeof(struct udphdr);
- if (dataoff >= (*pskb)->len)
+ if (dataoff >= skb->len)
return NF_ACCEPT;
- nf_ct_refresh(ct, *pskb, sip_timeout * HZ);
+ nf_ct_refresh(ct, skb, sip_timeout * HZ);
- if (!skb_is_nonlinear(*pskb))
- dptr = (*pskb)->data + dataoff;
+ if (!skb_is_nonlinear(skb))
+ dptr = skb->data + dataoff;
else {
pr_debug("Copy of skbuff not supported yet.\n");
goto out;
@@ -428,13 +428,13 @@ static int sip_help(struct sk_buff **pskb,
nf_nat_sip = rcu_dereference(nf_nat_sip_hook);
if (nf_nat_sip && ct->status & IPS_NAT_MASK) {
- if (!nf_nat_sip(pskb, ctinfo, ct, &dptr)) {
+ if (!nf_nat_sip(skb, ctinfo, ct, &dptr)) {
ret = NF_DROP;
goto out;
}
}
- datalen = (*pskb)->len - dataoff;
+ datalen = skb->len - dataoff;
if (datalen < sizeof("SIP/2.0 200") - 1)
goto out;
@@ -464,7 +464,7 @@ static int sip_help(struct sk_buff **pskb,
ret = NF_DROP;
goto out;
}
- ret = set_expected_rtp(pskb, ct, ctinfo, &addr,
+ ret = set_expected_rtp(skb, ct, ctinfo, &addr,
htons(port), dptr);
}
}
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index cc19506cf2f..e894aa1ff3a 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -29,12 +29,12 @@ static int ports_c;
module_param_array(ports, ushort, &ports_c, 0400);
MODULE_PARM_DESC(ports, "Port numbers of TFTP servers");
-unsigned int (*nf_nat_tftp_hook)(struct sk_buff **pskb,
+unsigned int (*nf_nat_tftp_hook)(struct sk_buff *skb,
enum ip_conntrack_info ctinfo,
struct nf_conntrack_expect *exp) __read_mostly;
EXPORT_SYMBOL_GPL(nf_nat_tftp_hook);
-static int tftp_help(struct sk_buff **pskb,
+static int tftp_help(struct sk_buff *skb,
unsigned int protoff,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
@@ -46,7 +46,7 @@ static int tftp_help(struct sk_buff **pskb,
int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
typeof(nf_nat_tftp_hook) nf_nat_tftp;
- tfh = skb_header_pointer(*pskb, protoff + sizeof(struct udphdr),
+ tfh = skb_header_pointer(skb, protoff + sizeof(struct udphdr),
sizeof(_tftph), &_tftph);
if (tfh == NULL)
return NF_ACCEPT;
@@ -70,7 +70,7 @@ static int tftp_help(struct sk_buff **pskb,
nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook);
if (nf_nat_tftp && ct->status & IPS_NAT_MASK)
- ret = nf_nat_tftp(pskb, ctinfo, exp);
+ ret = nf_nat_tftp(skb, ctinfo, exp);
else if (nf_ct_expect_related(exp) != 0)
ret = NF_DROP;
nf_ct_expect_put(exp);
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 0df7fff196a..196269c1e58 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -14,7 +14,7 @@
/* core.c */
extern unsigned int nf_iterate(struct list_head *head,
- struct sk_buff **skb,
+ struct sk_buff *skb,
int hook,
const struct net_device *indev,
const struct net_device *outdev,
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index a481a349f7b..0cef1433d66 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -256,14 +256,14 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info,
if (verdict == NF_ACCEPT) {
afinfo = nf_get_afinfo(info->pf);
- if (!afinfo || afinfo->reroute(&skb, info) < 0)
+ if (!afinfo || afinfo->reroute(skb, info) < 0)
verdict = NF_DROP;
}
if (verdict == NF_ACCEPT) {
next_hook:
verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
- &skb, info->hook,
+ skb, info->hook,
info->indev, info->outdev, &elem,
info->okfn, INT_MIN);
}
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 49f0480afe0..3ceeffcf6f9 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -617,6 +617,7 @@ static int
nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e)
{
int diff;
+ int err;
diff = data_len - e->skb->len;
if (diff < 0) {
@@ -626,25 +627,18 @@ nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e)
if (data_len > 0xFFFF)
return -EINVAL;
if (diff > skb_tailroom(e->skb)) {
- struct sk_buff *newskb;
-
- newskb = skb_copy_expand(e->skb,
- skb_headroom(e->skb),
- diff,
- GFP_ATOMIC);
- if (newskb == NULL) {
+ err = pskb_expand_head(e->skb, 0,
+ diff - skb_tailroom(e->skb),
+ GFP_ATOMIC);
+ if (err) {
printk(KERN_WARNING "nf_queue: OOM "
"in mangle, dropping packet\n");
- return -ENOMEM;
+ return err;
}
- if (e->skb->sk)
- skb_set_owner_w(newskb, e->skb->sk);
- kfree_skb(e->skb);
- e->skb = newskb;
}
skb_put(e->skb, diff);
}
- if (!skb_make_writable(&e->skb, data_len))
+ if (!skb_make_writable(e->skb, data_len))
return -ENOMEM;
skb_copy_to_linear_data(e->skb, data, data_len);
e->skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index 07a1b966500..77eeae658d4 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -27,7 +27,7 @@ MODULE_ALIAS("ipt_CLASSIFY");
MODULE_ALIAS("ip6t_CLASSIFY");
static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -36,7 +36,7 @@ target(struct sk_buff **pskb,
{
const struct xt_classify_target_info *clinfo = targinfo;
- (*pskb)->priority = clinfo->priority;
+ skb->priority = clinfo->priority;
return XT_CONTINUE;
}
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index 7043c2757e0..8cc324b159e 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -34,7 +34,7 @@ MODULE_ALIAS("ip6t_CONNMARK");
#include <net/netfilter/nf_conntrack_ecache.h>
static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -48,28 +48,28 @@ target(struct sk_buff **pskb,
u_int32_t mark;
u_int32_t newmark;
- ct = nf_ct_get(*pskb, &ctinfo);
+ ct = nf_ct_get(skb, &ctinfo);
if (ct) {
switch(markinfo->mode) {
case XT_CONNMARK_SET:
newmark = (ct->mark & ~markinfo->mask) | markinfo->mark;
if (newmark != ct->mark) {
ct->mark = newmark;
- nf_conntrack_event_cache(IPCT_MARK, *pskb);
+ nf_conntrack_event_cache(IPCT_MARK, skb);
}
break;
case XT_CONNMARK_SAVE:
newmark = (ct->mark & ~markinfo->mask) |
- ((*pskb)->mark & markinfo->mask);
+ (skb->mark & markinfo->mask);
if (ct->mark != newmark) {
ct->mark = newmark;
- nf_conntrack_event_cache(IPCT_MARK, *pskb);
+ nf_conntrack_event_cache(IPCT_MARK, skb);
}
break;
case XT_CONNMARK_RESTORE:
- mark = (*pskb)->mark;
+ mark = skb->mark;
diff = (ct->mark ^ mark) & markinfo->mask;
- (*pskb)->mark = mark ^ diff;
+ skb->mark = mark ^ diff;
break;
}
}
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 63d73138c1b..021b5c8d20e 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -61,12 +61,11 @@ static void secmark_restore(struct sk_buff *skb)
}
}
-static unsigned int target(struct sk_buff **pskb, const struct net_device *in,
+static unsigned int target(struct sk_buff *skb, const struct net_device *in,
const struct net_device *out, unsigned int hooknum,
const struct xt_target *target,
const void *targinfo)
{
- struct sk_buff *skb = *pskb;
const struct xt_connsecmark_target_info *info = targinfo;
switch (info->mode) {
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 798ab731009..6322a933ab7 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -25,7 +25,7 @@ MODULE_LICENSE("GPL");
MODULE_ALIAS("ipt_DSCP");
MODULE_ALIAS("ip6t_DSCP");
-static unsigned int target(struct sk_buff **pskb,
+static unsigned int target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -33,20 +33,20 @@ static unsigned int target(struct sk_buff **pskb,
const void *targinfo)
{
const struct xt_DSCP_info *dinfo = targinfo;
- u_int8_t dscp = ipv4_get_dsfield(ip_hdr(*pskb)) >> XT_DSCP_SHIFT;
+ u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
if (dscp != dinfo->dscp) {
- if (!skb_make_writable(pskb, sizeof(struct iphdr)))
+ if (!skb_make_writable(skb, sizeof(struct iphdr)))
return NF_DROP;
- ipv4_change_dsfield(ip_hdr(*pskb), (__u8)(~XT_DSCP_MASK),
+ ipv4_change_dsfield(ip_hdr(skb), (__u8)(~XT_DSCP_MASK),
dinfo->dscp << XT_DSCP_SHIFT);
}
return XT_CONTINUE;
}
-static unsigned int target6(struct sk_buff **pskb,
+static unsigned int target6(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -54,13 +54,13 @@ static unsigned int target6(struct sk_buff **pskb,
const void *targinfo)
{
const struct xt_DSCP_info *dinfo = targinfo;
- u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(*pskb)) >> XT_DSCP_SHIFT;
+ u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
if (dscp != dinfo->dscp) {
- if (!skb_make_writable(pskb, sizeof(struct ipv6hdr)))
+ if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
return NF_DROP;
- ipv6_change_dsfield(ipv6_hdr(*pskb), (__u8)(~XT_DSCP_MASK),
+ ipv6_change_dsfield(ipv6_hdr(skb), (__u8)(~XT_DSCP_MASK),
dinfo->dscp << XT_DSCP_SHIFT);
}
return XT_CONTINUE;
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index f30fe0baf7d..bc6503d77d7 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -22,7 +22,7 @@ MODULE_ALIAS("ipt_MARK");
MODULE_ALIAS("ip6t_MARK");
static unsigned int
-target_v0(struct sk_buff **pskb,
+target_v0(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -31,12 +31,12 @@ target_v0(struct sk_buff **pskb,
{
const struct xt_mark_target_info *markinfo = targinfo;
- (*pskb)->mark = markinfo->mark;
+ skb->mark = markinfo->mark;
return XT_CONTINUE;
}
static unsigned int
-target_v1(struct sk_buff **pskb,
+target_v1(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -52,15 +52,15 @@ target_v1(struct sk_buff **pskb,
break;
case XT_MARK_AND:
- mark = (*pskb)->mark & markinfo->mark;
+ mark = skb->mark & markinfo->mark;
break;
case XT_MARK_OR:
- mark = (*pskb)->mark | markinfo->mark;
+ mark = skb->mark | markinfo->mark;
break;
}
- (*pskb)->mark = mark;
+ skb->mark = mark;
return XT_CONTINUE;
}
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index d3594c7ccb2..9fb449ffbf8 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -20,7 +20,7 @@ MODULE_ALIAS("ipt_NFLOG");
MODULE_ALIAS("ip6t_NFLOG");
static unsigned int
-nflog_target(struct sk_buff **pskb,
+nflog_target(struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
unsigned int hooknum, const struct xt_target *target,
const void *targinfo)
@@ -33,7 +33,7 @@ nflog_target(struct sk_buff **pskb,
li.u.ulog.group = info->group;
li.u.ulog.qthreshold = info->threshold;
- nf_log_packet(target->family, hooknum, *pskb, in, out, &li,
+ nf_log_packet(target->family, hooknum, skb, in, out, &li,
"%s", info->prefix);
return XT_CONTINUE;
}
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 13f59f3e8c3..c3984e9f766 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -24,7 +24,7 @@ MODULE_ALIAS("ip6t_NFQUEUE");
MODULE_ALIAS("arpt_NFQUEUE");
static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index fec1aefb1c3..4976ce18661 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -12,7 +12,7 @@ MODULE_ALIAS("ipt_NOTRACK");
MODULE_ALIAS("ip6t_NOTRACK");
static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
@@ -20,16 +20,16 @@ target(struct sk_buff **pskb,
const void *targinfo)
{
/* Previously seen (loopback)? Ignore. */
- if ((*pskb)->nfct != NULL)
+ if (skb->nfct != NULL)
return XT_CONTINUE;
/* Attach fake conntrack entry.
If there is a real ct entry correspondig to this packet,
it'll hang aroun till timing out. We don't deal with it
for performance reasons. JK */
- (*pskb)->nfct = &nf_conntrack_untracked.ct_general;
- (*pskb)->nfctinfo = IP_CT_NEW;
- nf_conntrack_get((*pskb)->nfct);
+ skb->nfct = &nf_conntrack_untracked.ct_general;
+ skb->nfctinfo = IP_CT_NEW;
+ nf_conntrack_get(skb->nfct);
return XT_CONTINUE;
}
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index c83779a941a..235806eb6ec 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -28,7 +28,7 @@ MODULE_ALIAS("ip6t_SECMARK");
static u8 mode;
-static unsigned int target(struct sk_buff **pskb, const struct net_device *in,
+static unsigned int target(struct sk_buff *skb, const struct net_device *in,
const struct net_device *out, unsigned int hooknum,
const struct xt_target *target,
const void *targinfo)
@@ -47,7 +47,7 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *in,
BUG();
}
- (*pskb)->secmark = secmark;
+ skb->secmark = secmark;
return XT_CONTINUE;
}
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index d40f7e4b128..07435a602b1 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -39,7 +39,7 @@ optlen(const u_int8_t *opt, unsigned int offset)
}
static int
-tcpmss_mangle_packet(struct sk_buff **pskb,
+tcpmss_mangle_packet(struct sk_buff *skb,
const struct xt_tcpmss_info *info,
unsigned int tcphoff,
unsigned int minlen)
@@ -50,11 +50,11 @@ tcpmss_mangle_packet(struct sk_buff **pskb,
u16 newmss;
u8 *opt;
- if (!skb_make_writable(pskb, (*pskb)->len))
+ if (!skb_make_writable(skb, skb->len))
return -1;
- tcplen = (*pskb)->len - tcphoff;
- tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff);
+ tcplen = skb->len - tcphoff;
+ tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
/* Since it passed flags test in tcp match, we know it is is
not a fragment, and has data >= tcp header length. SYN
@@ -64,19 +64,19 @@ tcpmss_mangle_packet(struct sk_buff **pskb,
if (tcplen != tcph->doff*4) {
if (net_ratelimit())
printk(KERN_ERR "xt_TCPMSS: bad length (%u bytes)\n",
- (*pskb)->len);
+ skb->len);
return -1;
}
if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
- if (dst_mtu((*pskb)->dst) <= minlen) {
+ if (dst_mtu(skb->dst) <= minlen) {
if (net_ratelimit())
printk(KERN_ERR "xt_TCPMSS: "
"unknown or invalid path-MTU (%u)\n",
- dst_mtu((*pskb)->dst));
+ dst_mtu(skb->dst));
return -1;
}
- newmss = dst_mtu((*pskb)->dst) - minlen;
+ newmss = dst_mtu(skb->dst) - minlen;
} else
newmss = info->mss;
@@ -95,7 +95,7 @@ tcpmss_mangle_packet(struct sk_buff **pskb,
opt[i+2] = (newmss & 0xff00) >> 8;
opt[i+3] = newmss & 0x00ff;
- nf_proto_csum_replace2(&tcph->check, *pskb,
+ nf_proto_csum_replace2(&tcph->check, skb,
htons(oldmss), htons(newmss), 0);
return 0;
}
@@ -104,57 +104,53 @@ tcpmss_mangle_packet(struct sk_buff **pskb,
/*
* MSS Option not found ?! add it..
*/
- if (skb_tailroom((*pskb)) < TCPOLEN_MSS) {
- struct sk_buff *newskb;
-
- newskb = skb_copy_expand(*pskb, skb_headroom(*pskb),
- TCPOLEN_MSS, GFP_ATOMIC);
- if (!newskb)
+ if (skb_tailroom(skb) < TCPOLEN_MSS) {
+ if (pskb_expand_head(skb, 0,
+ TCPOLEN_MSS - skb_tailroom(skb),
+ GFP_ATOMIC))
return -1;
- kfree_skb(*pskb);
- *pskb = newskb;
- tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff);
+ tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
}
- skb_put((*pskb), TCPOLEN_MSS);
+ skb_put(skb, TCPOLEN_MSS);
opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
- nf_proto_csum_replace2(&tcph->check, *pskb,
+ nf_proto_csum_replace2(&tcph->check, skb,
htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1);
opt[0] = TCPOPT_MSS;
opt[1] = TCPOLEN_MSS;
opt[2] = (newmss & 0xff00) >> 8;
opt[3] = newmss & 0x00ff;
- nf_proto_csum_replace4(&tcph->check, *pskb, 0, *((__be32 *)opt), 0);
+ nf_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0);
oldval = ((__be16 *)tcph)[6];
tcph->doff += TCPOLEN_MSS/4;
- nf_proto_csum_replace2(&tcph->check, *pskb,
+ nf_proto_csum_replace2(&tcph->check, skb,
oldval, ((__be16 *)tcph)[6], 0);
return TCPOLEN_MSS;
}
static unsigned int
-xt_tcpmss_target4(struct sk_buff **pskb,
+xt_tcpmss_target4(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
const void *targinfo)
{
- struct iphdr *iph = ip_hdr(*pskb);
+ struct iphdr *iph = ip_hdr(skb);
__be16 newlen;
int ret;
- ret = tcpmss_mangle_packet(pskb, targinfo, iph->ihl * 4,
+ ret = tcpmss_mangle_packet(skb, targinfo, iph->ihl * 4,
sizeof(*iph) + sizeof(struct tcphdr));
if (ret < 0)
return NF_DROP;
if (ret > 0) {
- iph = ip_hdr(*pskb);
+ iph = ip_hdr(skb);
newlen = htons(ntohs(iph->tot_len) + ret);
nf_csum_replace2(&iph->check, iph->tot_len, newlen);
iph->tot_len = newlen;
@@ -164,30 +160,30 @@ xt_tcpmss_target4(struct sk_buff **pskb,
#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
static unsigned int
-xt_tcpmss_target6(struct sk_buff **pskb,
+xt_tcpmss_target6(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
const void *targinfo)
{
- struct ipv6hdr *ipv6h = ipv6_hdr(*pskb);
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
u8 nexthdr;
int tcphoff;
int ret;
nexthdr = ipv6h->nexthdr;
- tcphoff = ipv6_skip_exthdr(*pskb, sizeof(*ipv6h), &nexthdr);
+ tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr);
if (tcphoff < 0) {
WARN_ON(1);
return NF_DROP;
}
- ret = tcpmss_mangle_packet(pskb, targinfo, tcphoff,
+ ret = tcpmss_mangle_packet(skb, targinfo, tcphoff,
sizeof(*ipv6h) + sizeof(struct tcphdr));
if (ret < 0)
return NF_DROP;
if (ret > 0) {
- ipv6h = ipv6_hdr(*pskb);
+ ipv6h = ipv6_hdr(skb);
ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret);
}
return XT_CONTINUE;
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index 4df2dedcc0b..26c5d08ab2c 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c
@@ -10,14 +10,14 @@ MODULE_ALIAS("ipt_TRACE");
MODULE_ALIAS("ip6t_TRACE");
static unsigned int
-target(struct sk_buff **pskb,
+target(struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
unsigned int hooknum,
const struct xt_target *target,
const void *targinfo)
{
- (*pskb)->nf_trace = 1;
+ skb->nf_trace = 1;
return XT_CONTINUE;
}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c776bcd9f82..98e313e5e59 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1378,6 +1378,8 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups,
nl_table[unit].cb_mutex = cb_mutex;
nl_table[unit].module = module;
nl_table[unit].registered = 1;
+ } else {
+ kfree(listeners);
}
netlink_table_ungrab();
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 6b407ece953..fa006e06ce3 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -202,11 +202,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
/* yes, we have to worry about both in and out dev
worry later - danger - this API seems to have changed
from earlier kernels */
-
- /* iptables targets take a double skb pointer in case the skb
- * needs to be replaced. We don't own the skb, so this must not
- * happen. The pskb_expand_head above should make sure of this */
- ret = ipt->tcfi_t->u.kernel.target->target(&skb, skb->dev, NULL,
+ ret = ipt->tcfi_t->u.kernel.target->target(skb, skb->dev, NULL,
ipt->tcfi_hook,
ipt->tcfi_t->u.kernel.target,
ipt->tcfi_t->data);
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 2d32fd27496..3f8335e6ea2 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -205,20 +205,19 @@ static unsigned int ingress_drop(struct Qdisc *sch)
#ifndef CONFIG_NET_CLS_ACT
#ifdef CONFIG_NETFILTER
static unsigned int
-ing_hook(unsigned int hook, struct sk_buff **pskb,
+ing_hook(unsigned int hook, struct sk_buff *skb,
const struct net_device *indev,
const struct net_device *outdev,
int (*okfn)(struct sk_buff *))
{
struct Qdisc *q;
- struct sk_buff *skb = *pskb;
struct net_device *dev = skb->dev;
int fwres=NF_ACCEPT;
DPRINTK("ing_hook: skb %s dev=%s len=%u\n",
skb->sk ? "(owned)" : "(unowned)",
- skb->dev ? (*pskb)->dev->name : "(no dev)",
+ skb->dev ? skb->dev->name : "(no dev)",
skb->len);
if (dev->qdisc_ingress) {
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 9de3ddaa276..eb4deaf5891 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -954,9 +954,9 @@ static struct inet_protosw sctpv6_stream_protosw = {
.flags = SCTP_PROTOSW_FLAG,
};
-static int sctp6_rcv(struct sk_buff **pskb)
+static int sctp6_rcv(struct sk_buff *skb)
{
- return sctp_rcv(*pskb) ? -1 : 0;
+ return sctp_rcv(skb) ? -1 : 0;
}
static struct inet6_protocol sctpv6_protocol = {
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9c6a4b5f626..bd6f42a15a4 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5058,6 +5058,7 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
char __user *optval, int __user *optlen)
{
+ struct sctp_authchunks __user *p = (void __user *)optval;
struct sctp_authchunks val;
struct sctp_association *asoc;
struct sctp_chunks_param *ch;
@@ -5066,10 +5067,10 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
if (len <= sizeof(struct sctp_authchunks))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks)))
+ if (copy_from_user(&val, p, sizeof(struct sctp_authchunks)))
return -EFAULT;
- to = val.gauth_chunks;
+ to = p->gauth_chunks;
asoc = sctp_id2assoc(sk, val.gauth_assoc_id);
if (!asoc)
return -EINVAL;
@@ -5092,6 +5093,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
char __user *optval, int __user *optlen)
{
+ struct sctp_authchunks __user *p = (void __user *)optval;
struct sctp_authchunks val;
struct sctp_association *asoc;
struct sctp_chunks_param *ch;
@@ -5100,10 +5102,10 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
if (len <= sizeof(struct sctp_authchunks))
return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks)))
+ if (copy_from_user(&val, p, sizeof(struct sctp_authchunks)))
return -EFAULT;
- to = val.gauth_chunks;
+ to = p->gauth_chunks;
asoc = sctp_id2assoc(sk, val.gauth_assoc_id);
if (!asoc && val.gauth_assoc_id && sctp_style(sk, UDP))
return -EINVAL;
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 8ebfc4db7f5..5c69a725e53 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -5,6 +5,7 @@
obj-$(CONFIG_SUNRPC) += sunrpc.o
obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
+obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/
sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
auth.o auth_null.o auth_unix.o \
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 42b3220bed3..8bd074df27d 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -42,7 +42,7 @@ gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize)
{
u8 *ptr;
u8 pad;
- int len = buf->len;
+ size_t len = buf->len;
if (len <= buf->head[0].iov_len) {
pad = *(u8 *)(buf->head[0].iov_base + len - 1);
@@ -53,9 +53,9 @@ gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize)
} else
len -= buf->head[0].iov_len;
if (len <= buf->page_len) {
- int last = (buf->page_base + len - 1)
+ unsigned int last = (buf->page_base + len - 1)
>>PAGE_CACHE_SHIFT;
- int offset = (buf->page_base + len - 1)
+ unsigned int offset = (buf->page_base + len - 1)
& (PAGE_CACHE_SIZE - 1);
ptr = kmap_atomic(buf->pages[last], KM_USER0);
pad = *(ptr + offset);
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 7da7050f06c..73940df6c46 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -631,7 +631,8 @@ svc_safe_putnetobj(struct kvec *resv, struct xdr_netobj *o)
return 0;
}
-/* Verify the checksum on the header and return SVC_OK on success.
+/*
+ * Verify the checksum on the header and return SVC_OK on success.
* Otherwise, return SVC_DROP (in the case of a bad sequence number)
* or return SVC_DENIED and indicate error in authp.
*/
@@ -961,6 +962,78 @@ gss_write_init_verf(struct svc_rqst *rqstp, struct rsi *rsip)
}
/*
+ * Having read the cred already and found we're in the context
+ * initiation case, read the verifier and initiate (or check the results
+ * of) upcalls to userspace for help with context initiation. If
+ * the upcall results are available, write the verifier and result.
+ * Otherwise, drop the request pending an answer to the upcall.
+ */
+static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
+ struct rpc_gss_wire_cred *gc, __be32 *authp)
+{
+ struct kvec *argv = &rqstp->rq_arg.head[0];
+ struct kvec *resv = &rqstp->rq_res.head[0];
+ struct xdr_netobj tmpobj;
+ struct rsi *rsip, rsikey;
+
+ /* Read the verifier; should be NULL: */
+ *authp = rpc_autherr_badverf;
+ if (argv->iov_len < 2 * 4)
+ return SVC_DENIED;
+ if (svc_getnl(argv) != RPC_AUTH_NULL)
+ return SVC_DENIED;
+ if (svc_getnl(argv) != 0)
+ return SVC_DENIED;
+
+ /* Martial context handle and token for upcall: */
+ *authp = rpc_autherr_badcred;
+ if (gc->gc_proc == RPC_GSS_PROC_INIT && gc->gc_ctx.len != 0)
+ return SVC_DENIED;
+ memset(&rsikey, 0, sizeof(rsikey));
+ if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx))
+ return SVC_DROP;
+ *authp = rpc_autherr_badverf;
+ if (svc_safe_getnetobj(argv, &tmpobj)) {
+ kfree(rsikey.in_handle.data);
+ return SVC_DENIED;
+ }
+ if (dup_netobj(&rsikey.in_token, &tmpobj)) {
+ kfree(rsikey.in_handle.data);
+ return SVC_DROP;
+ }
+
+ /* Perform upcall, or find upcall result: */
+ rsip = rsi_lookup(&rsikey);
+ rsi_free(&rsikey);
+ if (!rsip)
+ return SVC_DROP;
+ switch (cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) {
+ case -EAGAIN:
+ case -ETIMEDOUT:
+ case -ENOENT:
+ /* No upcall result: */
+ return SVC_DROP;
+ case 0:
+ /* Got an answer to the upcall; use it: */
+ if (gss_write_init_verf(rqstp, rsip))
+ return SVC_DROP;
+ if (resv->iov_len + 4 > PAGE_SIZE)
+ return SVC_DROP;
+ svc_putnl(resv, RPC_SUCCESS);
+ if (svc_safe_putnetobj(resv, &rsip->out_handle))
+ return SVC_DROP;
+ if (resv->iov_len + 3 * 4 > PAGE_SIZE)
+ return SVC_DROP;
+ svc_putnl(resv, rsip->major_status);
+ svc_putnl(resv, rsip->minor_status);
+ svc_putnl(resv, GSS_SEQ_WIN);
+ if (svc_safe_putnetobj(resv, &rsip->out_token))
+ return SVC_DROP;
+ }
+ return SVC_COMPLETE;
+}
+
+/*
* Accept an rpcsec packet.
* If context establishment, punt to user space
* If data exchange, verify/decrypt
@@ -974,11 +1047,9 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
u32 crlen;
- struct xdr_netobj tmpobj;
struct gss_svc_data *svcdata = rqstp->rq_auth_data;
struct rpc_gss_wire_cred *gc;
struct rsc *rsci = NULL;
- struct rsi *rsip, rsikey;
__be32 *rpcstart;
__be32 *reject_stat = resv->iov_base + resv->iov_len;
int ret;
@@ -1023,30 +1094,14 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
if ((gc->gc_proc != RPC_GSS_PROC_DATA) && (rqstp->rq_proc != 0))
goto auth_err;
- /*
- * We've successfully parsed the credential. Let's check out the
- * verifier. An AUTH_NULL verifier is allowed (and required) for
- * INIT and CONTINUE_INIT requests. AUTH_RPCSEC_GSS is required for
- * PROC_DATA and PROC_DESTROY.
- *
- * AUTH_NULL verifier is 0 (AUTH_NULL), 0 (length).
- * AUTH_RPCSEC_GSS verifier is:
- * 6 (AUTH_RPCSEC_GSS), length, checksum.
- * checksum is calculated over rpcheader from xid up to here.
- */
*authp = rpc_autherr_badverf;
switch (gc->gc_proc) {
case RPC_GSS_PROC_INIT:
case RPC_GSS_PROC_CONTINUE_INIT:
- if (argv->iov_len < 2 * 4)
- goto auth_err;
- if (svc_getnl(argv) != RPC_AUTH_NULL)
- goto auth_err;
- if (svc_getnl(argv) != 0)
- goto auth_err;
- break;
+ return svcauth_gss_handle_init(rqstp, gc, authp);
case RPC_GSS_PROC_DATA:
case RPC_GSS_PROC_DESTROY:
+ /* Look up the context, and check the verifier: */
*authp = rpcsec_gsserr_credproblem;
rsci = gss_svc_searchbyctx(&gc->gc_ctx);
if (!rsci)
@@ -1067,51 +1122,6 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
/* now act upon the command: */
switch (gc->gc_proc) {
- case RPC_GSS_PROC_INIT:
- case RPC_GSS_PROC_CONTINUE_INIT:
- *authp = rpc_autherr_badcred;
- if (gc->gc_proc == RPC_GSS_PROC_INIT && gc->gc_ctx.len != 0)
- goto auth_err;
- memset(&rsikey, 0, sizeof(rsikey));
- if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx))
- goto drop;
- *authp = rpc_autherr_badverf;
- if (svc_safe_getnetobj(argv, &tmpobj)) {
- kfree(rsikey.in_handle.data);
- goto auth_err;
- }
- if (dup_netobj(&rsikey.in_token, &tmpobj)) {
- kfree(rsikey.in_handle.data);
- goto drop;
- }
-
- rsip = rsi_lookup(&rsikey);
- rsi_free(&rsikey);
- if (!rsip) {
- goto drop;
- }
- switch(cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) {
- case -EAGAIN:
- case -ETIMEDOUT:
- case -ENOENT:
- goto drop;
- case 0:
- if (gss_write_init_verf(rqstp, rsip))
- goto drop;
- if (resv->iov_len + 4 > PAGE_SIZE)
- goto drop;
- svc_putnl(resv, RPC_SUCCESS);
- if (svc_safe_putnetobj(resv, &rsip->out_handle))
- goto drop;
- if (resv->iov_len + 3 * 4 > PAGE_SIZE)
- goto drop;
- svc_putnl(resv, rsip->major_status);
- svc_putnl(resv, rsip->minor_status);
- svc_putnl(resv, GSS_SEQ_WIN);
- if (svc_safe_putnetobj(resv, &rsip->out_token))
- goto drop;
- }
- goto complete;
case RPC_GSS_PROC_DESTROY:
if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
goto auth_err;
@@ -1158,7 +1168,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
goto out;
}
auth_err:
- /* Restore write pointer to original value: */
+ /* Restore write pointer to its original value: */
xdr_ressize_check(rqstp, reject_stat);
ret = SVC_DENIED;
goto out;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 52429b1ffcc..76be83ee4b0 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -127,7 +127,14 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
struct rpc_clnt *clnt = NULL;
struct rpc_auth *auth;
int err;
- int len;
+ size_t len;
+
+ /* sanity check the name before trying to print it */
+ err = -EINVAL;
+ len = strlen(servname);
+ if (len > RPC_MAXNETNAMELEN)
+ goto out_no_rpciod;
+ len++;
dprintk("RPC: creating %s client for %s (xprt %p)\n",
program->name, servname, xprt);
@@ -148,7 +155,6 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
clnt->cl_parent = clnt;
clnt->cl_server = clnt->cl_inline_name;
- len = strlen(servname) + 1;
if (len > sizeof(clnt->cl_inline_name)) {
char *buf = kmalloc(len, GFP_KERNEL);
if (buf != 0)
@@ -234,8 +240,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
{
struct rpc_xprt *xprt;
struct rpc_clnt *clnt;
- struct rpc_xprtsock_create xprtargs = {
- .proto = args->protocol,
+ struct xprt_create xprtargs = {
+ .ident = args->protocol,
.srcaddr = args->saddress,
.dstaddr = args->address,
.addrlen = args->addrsize,
@@ -253,7 +259,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
*/
if (args->servername == NULL) {
struct sockaddr_in *addr =
- (struct sockaddr_in *) &args->address;
+ (struct sockaddr_in *) args->address;
snprintf(servername, sizeof(servername), NIPQUAD_FMT,
NIPQUAD(addr->sin_addr.s_addr));
args->servername = servername;
@@ -269,9 +275,6 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
xprt->resvport = 0;
- dprintk("RPC: creating %s client for %s (xprt %p)\n",
- args->program->name, args->servername, xprt);
-
clnt = rpc_new_client(xprt, args->servername, args->program,
args->version, args->authflavor);
if (IS_ERR(clnt))
@@ -439,7 +442,7 @@ rpc_release_client(struct rpc_clnt *clnt)
*/
struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
struct rpc_program *program,
- int vers)
+ u32 vers)
{
struct rpc_clnt *clnt;
struct rpc_version *version;
@@ -843,8 +846,7 @@ call_allocate(struct rpc_task *task)
dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid);
if (RPC_IS_ASYNC(task) || !signalled()) {
- xprt_release(task);
- task->tk_action = call_reserve;
+ task->tk_action = call_allocate;
rpc_delay(task, HZ>>4);
return;
}
@@ -871,6 +873,7 @@ rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
buf->head[0].iov_len = len;
buf->tail[0].iov_len = 0;
buf->page_len = 0;
+ buf->flags = 0;
buf->len = 0;
buf->buflen = len;
}
@@ -937,7 +940,7 @@ call_bind(struct rpc_task *task)
static void
call_bind_status(struct rpc_task *task)
{
- int status = -EACCES;
+ int status = -EIO;
if (task->tk_status >= 0) {
dprint_status(task);
@@ -947,9 +950,20 @@ call_bind_status(struct rpc_task *task)
}
switch (task->tk_status) {
+ case -EAGAIN:
+ dprintk("RPC: %5u rpcbind waiting for another request "
+ "to finish\n", task->tk_pid);
+ /* avoid busy-waiting here -- could be a network outage. */
+ rpc_delay(task, 5*HZ);
+ goto retry_timeout;
case -EACCES:
dprintk("RPC: %5u remote rpcbind: RPC program/version "
"unavailable\n", task->tk_pid);
+ /* fail immediately if this is an RPC ping */
+ if (task->tk_msg.rpc_proc->p_proc == 0) {
+ status = -EOPNOTSUPP;
+ break;
+ }
rpc_delay(task, 3*HZ);
goto retry_timeout;
case -ETIMEDOUT:
@@ -957,6 +971,7 @@ call_bind_status(struct rpc_task *task)
task->tk_pid);
goto retry_timeout;
case -EPFNOSUPPORT:
+ /* server doesn't support any rpcbind version we know of */
dprintk("RPC: %5u remote rpcbind service unavailable\n",
task->tk_pid);
break;
@@ -969,7 +984,6 @@ call_bind_status(struct rpc_task *task)
default:
dprintk("RPC: %5u unrecognized rpcbind error (%d)\n",
task->tk_pid, -task->tk_status);
- status = -EIO;
}
rpc_exit(task, status);
@@ -1257,7 +1271,6 @@ call_refresh(struct rpc_task *task)
{
dprint_status(task);
- xprt_release(task); /* Must do to obtain new XID */
task->tk_action = call_refreshresult;
task->tk_status = 0;
task->tk_client->cl_stats->rpcauthrefresh++;
@@ -1375,6 +1388,8 @@ call_verify(struct rpc_task *task)
dprintk("RPC: %5u %s: retry stale creds\n",
task->tk_pid, __FUNCTION__);
rpcauth_invalcred(task);
+ /* Ensure we obtain a new XID! */
+ xprt_release(task);
task->tk_action = call_refresh;
goto out_retry;
case RPC_AUTH_BADCRED:
@@ -1523,13 +1538,18 @@ void rpc_show_tasks(void)
spin_lock(&clnt->cl_lock);
list_for_each_entry(t, &clnt->cl_tasks, tk_task) {
const char *rpc_waitq = "none";
+ int proc;
+
+ if (t->tk_msg.rpc_proc)
+ proc = t->tk_msg.rpc_proc->p_proc;
+ else
+ proc = -1;
if (RPC_IS_QUEUED(t))
rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq);
printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n",
- t->tk_pid,
- (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1),
+ t->tk_pid, proc,
t->tk_flags, t->tk_status,
t->tk_client,
(t->tk_client ? t->tk_client->cl_prog : 0),
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 669e12a4ed1..c8433e8865a 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -14,7 +14,7 @@
#include <linux/pagemap.h>
#include <linux/mount.h>
#include <linux/namei.h>
-#include <linux/dnotify.h>
+#include <linux/fsnotify.h>
#include <linux/kernel.h>
#include <asm/ioctls.h>
@@ -329,6 +329,7 @@ rpc_show_info(struct seq_file *m, void *v)
clnt->cl_prog, clnt->cl_vers);
seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO));
+ seq_printf(m, "port: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PORT));
return 0;
}
@@ -585,6 +586,7 @@ rpc_populate(struct dentry *parent,
if (S_ISDIR(mode))
inc_nlink(dir);
d_add(dentry, inode);
+ fsnotify_create(dir, dentry);
}
mutex_unlock(&dir->i_mutex);
return 0;
@@ -606,7 +608,7 @@ __rpc_mkdir(struct inode *dir, struct dentry *dentry)
inode->i_ino = iunique(dir->i_sb, 100);
d_instantiate(dentry, inode);
inc_nlink(dir);
- inode_dir_notify(dir, DN_CREATE);
+ fsnotify_mkdir(dir, dentry);
return 0;
out_err:
printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n",
@@ -748,7 +750,7 @@ rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pi
rpci->flags = flags;
rpci->ops = ops;
rpci->nkern_readwriters = 1;
- inode_dir_notify(dir, DN_CREATE);
+ fsnotify_create(dir, dentry);
dget(dentry);
out:
mutex_unlock(&dir->i_mutex);
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index d1740dbab99..a05493aedb6 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -16,11 +16,14 @@
#include <linux/types.h>
#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/in6.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/xprtsock.h>
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_BIND
@@ -91,26 +94,6 @@ enum {
#define RPCB_MAXADDRLEN (128u)
/*
- * r_netid
- *
- * Quoting RFC 3530, section 2.2:
- *
- * For TCP over IPv4 the value of r_netid is the string "tcp". For UDP
- * over IPv4 the value of r_netid is the string "udp".
- *
- * ...
- *
- * For TCP over IPv6 the value of r_netid is the string "tcp6". For UDP
- * over IPv6 the value of r_netid is the string "udp6".
- */
-#define RPCB_NETID_UDP "\165\144\160" /* "udp" */
-#define RPCB_NETID_TCP "\164\143\160" /* "tcp" */
-#define RPCB_NETID_UDP6 "\165\144\160\066" /* "udp6" */
-#define RPCB_NETID_TCP6 "\164\143\160\066" /* "tcp6" */
-
-#define RPCB_MAXNETIDLEN (4u)
-
-/*
* r_owner
*
* The "owner" is allowed to unset a service in the rpcbind database.
@@ -120,7 +103,7 @@ enum {
#define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING)
static void rpcb_getport_done(struct rpc_task *, void *);
-extern struct rpc_program rpcb_program;
+static struct rpc_program rpcb_program;
struct rpcbind_args {
struct rpc_xprt * r_xprt;
@@ -137,10 +120,13 @@ struct rpcbind_args {
static struct rpc_procinfo rpcb_procedures2[];
static struct rpc_procinfo rpcb_procedures3[];
-static struct rpcb_info {
+struct rpcb_info {
int rpc_vers;
struct rpc_procinfo * rpc_proc;
-} rpcb_next_version[];
+};
+
+static struct rpcb_info rpcb_next_version[];
+static struct rpcb_info rpcb_next_version6[];
static void rpcb_getport_prepare(struct rpc_task *task, void *calldata)
{
@@ -190,7 +176,17 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
RPC_CLNT_CREATE_INTR),
};
- ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT);
+ switch (srvaddr->sa_family) {
+ case AF_INET:
+ ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT);
+ break;
+ case AF_INET6:
+ ((struct sockaddr_in6 *)srvaddr)->sin6_port = htons(RPCBIND_PORT);
+ break;
+ default:
+ return NULL;
+ }
+
if (!privileged)
args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
return rpc_create(&args);
@@ -234,7 +230,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
prog, vers, prot, port);
rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin,
- IPPROTO_UDP, 2, 1);
+ XPRT_TRANSPORT_UDP, 2, 1);
if (IS_ERR(rpcb_clnt))
return PTR_ERR(rpcb_clnt);
@@ -316,6 +312,7 @@ void rpcb_getport_async(struct rpc_task *task)
struct rpc_task *child;
struct sockaddr addr;
int status;
+ struct rpcb_info *info;
dprintk("RPC: %5u %s(%s, %u, %u, %d)\n",
task->tk_pid, __FUNCTION__,
@@ -325,7 +322,7 @@ void rpcb_getport_async(struct rpc_task *task)
BUG_ON(clnt->cl_parent != clnt);
if (xprt_test_and_set_binding(xprt)) {
- status = -EACCES; /* tell caller to check again */
+ status = -EAGAIN; /* tell caller to check again */
dprintk("RPC: %5u %s: waiting for another binder\n",
task->tk_pid, __FUNCTION__);
goto bailout_nowake;
@@ -343,18 +340,43 @@ void rpcb_getport_async(struct rpc_task *task)
goto bailout_nofree;
}
- if (rpcb_next_version[xprt->bind_index].rpc_proc == NULL) {
+ rpc_peeraddr(clnt, (void *)&addr, sizeof(addr));
+
+ /* Don't ever use rpcbind v2 for AF_INET6 requests */
+ switch (addr.sa_family) {
+ case AF_INET:
+ info = rpcb_next_version;
+ break;
+ case AF_INET6:
+ info = rpcb_next_version6;
+ break;
+ default:
+ status = -EAFNOSUPPORT;
+ dprintk("RPC: %5u %s: bad address family\n",
+ task->tk_pid, __FUNCTION__);
+ goto bailout_nofree;
+ }
+ if (info[xprt->bind_index].rpc_proc == NULL) {
xprt->bind_index = 0;
- status = -EACCES; /* tell caller to try again later */
+ status = -EPFNOSUPPORT;
dprintk("RPC: %5u %s: no more getport versions available\n",
task->tk_pid, __FUNCTION__);
goto bailout_nofree;
}
- bind_version = rpcb_next_version[xprt->bind_index].rpc_vers;
+ bind_version = info[xprt->bind_index].rpc_vers;
dprintk("RPC: %5u %s: trying rpcbind version %u\n",
task->tk_pid, __FUNCTION__, bind_version);
+ rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot,
+ bind_version, 0);
+ if (IS_ERR(rpcb_clnt)) {
+ status = PTR_ERR(rpcb_clnt);
+ dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n",
+ task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt));
+ goto bailout_nofree;
+ }
+
map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC);
if (!map) {
status = -ENOMEM;
@@ -367,28 +389,19 @@ void rpcb_getport_async(struct rpc_task *task)
map->r_prot = xprt->prot;
map->r_port = 0;
map->r_xprt = xprt_get(xprt);
- map->r_netid = (xprt->prot == IPPROTO_TCP) ? RPCB_NETID_TCP :
- RPCB_NETID_UDP;
- memcpy(&map->r_addr, rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR),
- sizeof(map->r_addr));
+ map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID);
+ memcpy(map->r_addr,
+ rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR),
+ sizeof(map->r_addr));
map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */
- rpc_peeraddr(clnt, (void *)&addr, sizeof(addr));
- rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, bind_version, 0);
- if (IS_ERR(rpcb_clnt)) {
- status = PTR_ERR(rpcb_clnt);
- dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n",
- task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt));
- goto bailout;
- }
-
child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map);
rpc_release_client(rpcb_clnt);
if (IS_ERR(child)) {
status = -EIO;
dprintk("RPC: %5u %s: rpc_run_task failed\n",
task->tk_pid, __FUNCTION__);
- goto bailout_nofree;
+ goto bailout;
}
rpc_put_task(child);
@@ -403,6 +416,7 @@ bailout_nofree:
bailout_nowake:
task->tk_status = status;
}
+EXPORT_SYMBOL_GPL(rpcb_getport_async);
/*
* Rpcbind child task calls this callback via tk_exit.
@@ -413,6 +427,10 @@ static void rpcb_getport_done(struct rpc_task *child, void *data)
struct rpc_xprt *xprt = map->r_xprt;
int status = child->tk_status;
+ /* Garbage reply: retry with a lesser rpcbind version */
+ if (status == -EIO)
+ status = -EPROTONOSUPPORT;
+
/* rpcbind server doesn't support this rpcbind protocol version */
if (status == -EPROTONOSUPPORT)
xprt->bind_index++;
@@ -490,16 +508,24 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p,
unsigned short *portp)
{
char *addr;
- int addr_len, c, i, f, first, val;
+ u32 addr_len;
+ int c, i, f, first, val;
*portp = 0;
- addr_len = (unsigned int) ntohl(*p++);
- if (addr_len > RPCB_MAXADDRLEN) /* sanity */
- return -EINVAL;
-
- dprintk("RPC: rpcb_decode_getaddr returned string: '%s'\n",
- (char *) p);
-
+ addr_len = ntohl(*p++);
+
+ /*
+ * Simple sanity check. The smallest possible universal
+ * address is an IPv4 address string containing 11 bytes.
+ */
+ if (addr_len < 11 || addr_len > RPCB_MAXADDRLEN)
+ goto out_err;
+
+ /*
+ * Start at the end and walk backwards until the first dot
+ * is encountered. When the second dot is found, we have
+ * both parts of the port number.
+ */
addr = (char *)p;
val = 0;
first = 1;
@@ -521,8 +547,19 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p,
}
}
+ /*
+ * Simple sanity check. If we never saw a dot in the reply,
+ * then this was probably just garbage.
+ */
+ if (first)
+ goto out_err;
+
dprintk("RPC: rpcb_decode_getaddr port=%u\n", *portp);
return 0;
+
+out_err:
+ dprintk("RPC: rpcbind server returned malformed reply\n");
+ return -EIO;
}
#define RPCB_program_sz (1u)
@@ -531,7 +568,7 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p,
#define RPCB_port_sz (1u)
#define RPCB_boolean_sz (1u)
-#define RPCB_netid_sz (1+XDR_QUADLEN(RPCB_MAXNETIDLEN))
+#define RPCB_netid_sz (1+XDR_QUADLEN(RPCBIND_MAXNETIDLEN))
#define RPCB_addr_sz (1+XDR_QUADLEN(RPCB_MAXADDRLEN))
#define RPCB_ownerstring_sz (1+XDR_QUADLEN(RPCB_MAXOWNERLEN))
@@ -593,6 +630,14 @@ static struct rpcb_info rpcb_next_version[] = {
{ 0, NULL },
};
+static struct rpcb_info rpcb_next_version6[] = {
+#ifdef CONFIG_SUNRPC_BIND34
+ { 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] },
+ { 3, &rpcb_procedures3[RPCBPROC_GETADDR] },
+#endif
+ { 0, NULL },
+};
+
static struct rpc_version rpcb_version2 = {
.number = 2,
.nrprocs = RPCB_HIGHPROC_2,
@@ -621,7 +666,7 @@ static struct rpc_version *rpcb_version[] = {
static struct rpc_stat rpcb_stats;
-struct rpc_program rpcb_program = {
+static struct rpc_program rpcb_program = {
.name = "rpcbind",
.number = RPCBIND_PROGRAM,
.nrvers = ARRAY_SIZE(rpcb_version),
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 954d7ec86c7..3c773c53e12 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -777,6 +777,7 @@ void *rpc_malloc(struct rpc_task *task, size_t size)
task->tk_pid, size, buf);
return &buf->data;
}
+EXPORT_SYMBOL_GPL(rpc_malloc);
/**
* rpc_free - free buffer allocated via rpc_malloc
@@ -802,6 +803,7 @@ void rpc_free(void *buffer)
else
kfree(buf);
}
+EXPORT_SYMBOL_GPL(rpc_free);
/*
* Creation and deletion of RPC task structures
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 1d377d1ab7f..97ac45f034d 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -34,6 +34,7 @@ size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
desc->offset += len;
return len;
}
+EXPORT_SYMBOL_GPL(xdr_skb_read_bits);
/**
* xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer
@@ -137,6 +138,7 @@ copy_tail:
out:
return copied;
}
+EXPORT_SYMBOL_GPL(xdr_partial_copy_from_skb);
/**
* csum_partial_copy_to_xdr - checksum and copy data
@@ -179,3 +181,4 @@ no_checksum:
return -1;
return 0;
}
+EXPORT_SYMBOL_GPL(csum_partial_copy_to_xdr);
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 384c4ad5ab8..33d89e842c8 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -20,7 +20,7 @@
#include <linux/sunrpc/auth.h>
#include <linux/workqueue.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
-
+#include <linux/sunrpc/xprtsock.h>
/* RPC scheduler */
EXPORT_SYMBOL(rpc_execute);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 55ea6df069d..a4a6bf7deaa 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -777,6 +777,30 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port)
}
/*
+ * Printk the given error with the address of the client that caused it.
+ */
+static int
+__attribute__ ((format (printf, 2, 3)))
+svc_printk(struct svc_rqst *rqstp, const char *fmt, ...)
+{
+ va_list args;
+ int r;
+ char buf[RPC_MAX_ADDRBUFLEN];
+
+ if (!net_ratelimit())
+ return 0;
+
+ printk(KERN_WARNING "svc: %s: ",
+ svc_print_addr(rqstp, buf, sizeof(buf)));
+
+ va_start(args, fmt);
+ r = vprintk(fmt, args);
+ va_end(args);
+
+ return r;
+}
+
+/*
* Process the RPC request.
*/
int
@@ -963,14 +987,13 @@ svc_process(struct svc_rqst *rqstp)
return 0;
err_short_len:
- if (net_ratelimit())
- printk("svc: short len %Zd, dropping request\n", argv->iov_len);
+ svc_printk(rqstp, "short len %Zd, dropping request\n",
+ argv->iov_len);
goto dropit; /* drop request */
err_bad_dir:
- if (net_ratelimit())
- printk("svc: bad direction %d, dropping request\n", dir);
+ svc_printk(rqstp, "bad direction %d, dropping request\n", dir);
serv->sv_stats->rpcbadfmt++;
goto dropit; /* drop request */
@@ -1000,8 +1023,7 @@ err_bad_prog:
goto sendit;
err_bad_vers:
- if (net_ratelimit())
- printk("svc: unknown version (%d for prog %d, %s)\n",
+ svc_printk(rqstp, "unknown version (%d for prog %d, %s)\n",
vers, prog, progp->pg_name);
serv->sv_stats->rpcbadfmt++;
@@ -1011,16 +1033,14 @@ err_bad_vers:
goto sendit;
err_bad_proc:
- if (net_ratelimit())
- printk("svc: unknown procedure (%d)\n", proc);
+ svc_printk(rqstp, "unknown procedure (%d)\n", proc);
serv->sv_stats->rpcbadfmt++;
svc_putnl(resv, RPC_PROC_UNAVAIL);
goto sendit;
err_garbage:
- if (net_ratelimit())
- printk("svc: failed to decode args\n");
+ svc_printk(rqstp, "failed to decode args\n");
rpc_stat = rpc_garbage_args;
err_bad:
diff --git a/net/sunrpc/timer.c b/net/sunrpc/timer.c
index 8142fdb8a93..31becbf0926 100644
--- a/net/sunrpc/timer.c
+++ b/net/sunrpc/timer.c
@@ -17,6 +17,7 @@
#include <linux/types.h>
#include <linux/unistd.h>
+#include <linux/module.h>
#include <linux/sunrpc/clnt.h>
@@ -40,6 +41,7 @@ rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo)
rt->ntimeouts[i] = 0;
}
}
+EXPORT_SYMBOL_GPL(rpc_init_rtt);
/*
* NB: When computing the smoothed RTT and standard deviation,
@@ -75,6 +77,7 @@ rpc_update_rtt(struct rpc_rtt *rt, unsigned timer, long m)
if (*sdrtt < RPC_RTO_MIN)
*sdrtt = RPC_RTO_MIN;
}
+EXPORT_SYMBOL_GPL(rpc_update_rtt);
/*
* Estimate rto for an nfs rpc sent via. an unreliable datagram.
@@ -103,3 +106,4 @@ rpc_calc_rto(struct rpc_rtt *rt, unsigned timer)
return res;
}
+EXPORT_SYMBOL_GPL(rpc_calc_rto);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index c8c2edccad7..282a9a2ec90 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -62,6 +62,9 @@ static inline void do_xprt_reserve(struct rpc_task *);
static void xprt_connect_status(struct rpc_task *task);
static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
+static spinlock_t xprt_list_lock = SPIN_LOCK_UNLOCKED;
+static LIST_HEAD(xprt_list);
+
/*
* The transport code maintains an estimate on the maximum number of out-
* standing RPC requests, using a smoothed version of the congestion
@@ -81,6 +84,78 @@ static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd)
/**
+ * xprt_register_transport - register a transport implementation
+ * @transport: transport to register
+ *
+ * If a transport implementation is loaded as a kernel module, it can
+ * call this interface to make itself known to the RPC client.
+ *
+ * Returns:
+ * 0: transport successfully registered
+ * -EEXIST: transport already registered
+ * -EINVAL: transport module being unloaded
+ */
+int xprt_register_transport(struct xprt_class *transport)
+{
+ struct xprt_class *t;
+ int result;
+
+ result = -EEXIST;
+ spin_lock(&xprt_list_lock);
+ list_for_each_entry(t, &xprt_list, list) {
+ /* don't register the same transport class twice */
+ if (t->ident == transport->ident)
+ goto out;
+ }
+
+ result = -EINVAL;
+ if (try_module_get(THIS_MODULE)) {
+ list_add_tail(&transport->list, &xprt_list);
+ printk(KERN_INFO "RPC: Registered %s transport module.\n",
+ transport->name);
+ result = 0;
+ }
+
+out:
+ spin_unlock(&xprt_list_lock);
+ return result;
+}
+EXPORT_SYMBOL_GPL(xprt_register_transport);
+
+/**
+ * xprt_unregister_transport - unregister a transport implementation
+ * transport: transport to unregister
+ *
+ * Returns:
+ * 0: transport successfully unregistered
+ * -ENOENT: transport never registered
+ */
+int xprt_unregister_transport(struct xprt_class *transport)
+{
+ struct xprt_class *t;
+ int result;
+
+ result = 0;
+ spin_lock(&xprt_list_lock);
+ list_for_each_entry(t, &xprt_list, list) {
+ if (t == transport) {
+ printk(KERN_INFO
+ "RPC: Unregistered %s transport module.\n",
+ transport->name);
+ list_del_init(&transport->list);
+ module_put(THIS_MODULE);
+ goto out;
+ }
+ }
+ result = -ENOENT;
+
+out:
+ spin_unlock(&xprt_list_lock);
+ return result;
+}
+EXPORT_SYMBOL_GPL(xprt_unregister_transport);
+
+/**
* xprt_reserve_xprt - serialize write access to transports
* @task: task that is requesting access to the transport
*
@@ -118,6 +193,7 @@ out_sleep:
rpc_sleep_on(&xprt->sending, task, NULL, NULL);
return 0;
}
+EXPORT_SYMBOL_GPL(xprt_reserve_xprt);
static void xprt_clear_locked(struct rpc_xprt *xprt)
{
@@ -167,6 +243,7 @@ out_sleep:
rpc_sleep_on(&xprt->sending, task, NULL, NULL);
return 0;
}
+EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong);
static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
{
@@ -246,6 +323,7 @@ void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
__xprt_lock_write_next(xprt);
}
}
+EXPORT_SYMBOL_GPL(xprt_release_xprt);
/**
* xprt_release_xprt_cong - allow other requests to use a transport
@@ -262,6 +340,7 @@ void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
__xprt_lock_write_next_cong(xprt);
}
}
+EXPORT_SYMBOL_GPL(xprt_release_xprt_cong);
static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
{
@@ -314,6 +393,7 @@ void xprt_release_rqst_cong(struct rpc_task *task)
{
__xprt_put_cong(task->tk_xprt, task->tk_rqstp);
}
+EXPORT_SYMBOL_GPL(xprt_release_rqst_cong);
/**
* xprt_adjust_cwnd - adjust transport congestion window
@@ -345,6 +425,7 @@ void xprt_adjust_cwnd(struct rpc_task *task, int result)
xprt->cwnd = cwnd;
__xprt_put_cong(xprt, req);
}
+EXPORT_SYMBOL_GPL(xprt_adjust_cwnd);
/**
* xprt_wake_pending_tasks - wake all tasks on a transport's pending queue
@@ -359,6 +440,7 @@ void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status)
else
rpc_wake_up(&xprt->pending);
}
+EXPORT_SYMBOL_GPL(xprt_wake_pending_tasks);
/**
* xprt_wait_for_buffer_space - wait for transport output buffer to clear
@@ -373,6 +455,7 @@ void xprt_wait_for_buffer_space(struct rpc_task *task)
task->tk_timeout = req->rq_timeout;
rpc_sleep_on(&xprt->pending, task, NULL, NULL);
}
+EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space);
/**
* xprt_write_space - wake the task waiting for transport output buffer space
@@ -393,6 +476,7 @@ void xprt_write_space(struct rpc_xprt *xprt)
}
spin_unlock_bh(&xprt->transport_lock);
}
+EXPORT_SYMBOL_GPL(xprt_write_space);
/**
* xprt_set_retrans_timeout_def - set a request's retransmit timeout
@@ -406,6 +490,7 @@ void xprt_set_retrans_timeout_def(struct rpc_task *task)
{
task->tk_timeout = task->tk_rqstp->rq_timeout;
}
+EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_def);
/*
* xprt_set_retrans_timeout_rtt - set a request's retransmit timeout
@@ -425,6 +510,7 @@ void xprt_set_retrans_timeout_rtt(struct rpc_task *task)
if (task->tk_timeout > max_timeout || task->tk_timeout == 0)
task->tk_timeout = max_timeout;
}
+EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_rtt);
static void xprt_reset_majortimeo(struct rpc_rqst *req)
{
@@ -500,6 +586,7 @@ void xprt_disconnect(struct rpc_xprt *xprt)
xprt_wake_pending_tasks(xprt, -ENOTCONN);
spin_unlock_bh(&xprt->transport_lock);
}
+EXPORT_SYMBOL_GPL(xprt_disconnect);
static void
xprt_init_autodisconnect(unsigned long data)
@@ -610,6 +697,7 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
xprt->stat.bad_xids++;
return NULL;
}
+EXPORT_SYMBOL_GPL(xprt_lookup_rqst);
/**
* xprt_update_rtt - update an RPC client's RTT state after receiving a reply
@@ -629,6 +717,7 @@ void xprt_update_rtt(struct rpc_task *task)
rpc_set_timeo(rtt, timer, req->rq_ntrans - 1);
}
}
+EXPORT_SYMBOL_GPL(xprt_update_rtt);
/**
* xprt_complete_rqst - called when reply processing is complete
@@ -653,6 +742,7 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
req->rq_received = req->rq_private_buf.len = copied;
rpc_wake_up_task(task);
}
+EXPORT_SYMBOL_GPL(xprt_complete_rqst);
static void xprt_timer(struct rpc_task *task)
{
@@ -889,23 +979,25 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i
* @args: rpc transport creation arguments
*
*/
-struct rpc_xprt *xprt_create_transport(struct rpc_xprtsock_create *args)
+struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
{
struct rpc_xprt *xprt;
struct rpc_rqst *req;
+ struct xprt_class *t;
- switch (args->proto) {
- case IPPROTO_UDP:
- xprt = xs_setup_udp(args);
- break;
- case IPPROTO_TCP:
- xprt = xs_setup_tcp(args);
- break;
- default:
- printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n",
- args->proto);
- return ERR_PTR(-EIO);
+ spin_lock(&xprt_list_lock);
+ list_for_each_entry(t, &xprt_list, list) {
+ if (t->ident == args->ident) {
+ spin_unlock(&xprt_list_lock);
+ goto found;
+ }
}
+ spin_unlock(&xprt_list_lock);
+ printk(KERN_ERR "RPC: transport (%d) not supported\n", args->ident);
+ return ERR_PTR(-EIO);
+
+found:
+ xprt = t->setup(args);
if (IS_ERR(xprt)) {
dprintk("RPC: xprt_create_transport: failed, %ld\n",
-PTR_ERR(xprt));
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
new file mode 100644
index 00000000000..264f0feeb51
--- /dev/null
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o
+
+xprtrdma-y := transport.o rpc_rdma.o verbs.o
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
new file mode 100644
index 00000000000..12db6358042
--- /dev/null
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -0,0 +1,868 @@
+/*
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * Neither the name of the Network Appliance, Inc. nor the names of
+ * its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * rpc_rdma.c
+ *
+ * This file contains the guts of the RPC RDMA protocol, and
+ * does marshaling/unmarshaling, etc. It is also where interfacing
+ * to the Linux RPC framework lives.
+ */
+
+#include "xprt_rdma.h"
+
+#include <linux/highmem.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY RPCDBG_TRANS
+#endif
+
+enum rpcrdma_chunktype {
+ rpcrdma_noch = 0,
+ rpcrdma_readch,
+ rpcrdma_areadch,
+ rpcrdma_writech,
+ rpcrdma_replych
+};
+
+#ifdef RPC_DEBUG
+static const char transfertypes[][12] = {
+ "pure inline", /* no chunks */
+ " read chunk", /* some argument via rdma read */
+ "*read chunk", /* entire request via rdma read */
+ "write chunk", /* some result via rdma write */
+ "reply chunk" /* entire reply via rdma write */
+};
+#endif
+
+/*
+ * Chunk assembly from upper layer xdr_buf.
+ *
+ * Prepare the passed-in xdr_buf into representation as RPC/RDMA chunk
+ * elements. Segments are then coalesced when registered, if possible
+ * within the selected memreg mode.
+ *
+ * Note, this routine is never called if the connection's memory
+ * registration strategy is 0 (bounce buffers).
+ */
+
+static int
+rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, int pos,
+ enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs)
+{
+ int len, n = 0, p;
+
+ if (pos == 0 && xdrbuf->head[0].iov_len) {
+ seg[n].mr_page = NULL;
+ seg[n].mr_offset = xdrbuf->head[0].iov_base;
+ seg[n].mr_len = xdrbuf->head[0].iov_len;
+ pos += xdrbuf->head[0].iov_len;
+ ++n;
+ }
+
+ if (xdrbuf->page_len && (xdrbuf->pages[0] != NULL)) {
+ if (n == nsegs)
+ return 0;
+ seg[n].mr_page = xdrbuf->pages[0];
+ seg[n].mr_offset = (void *)(unsigned long) xdrbuf->page_base;
+ seg[n].mr_len = min_t(u32,
+ PAGE_SIZE - xdrbuf->page_base, xdrbuf->page_len);
+ len = xdrbuf->page_len - seg[n].mr_len;
+ pos += len;
+ ++n;
+ p = 1;
+ while (len > 0) {
+ if (n == nsegs)
+ return 0;
+ seg[n].mr_page = xdrbuf->pages[p];
+ seg[n].mr_offset = NULL;
+ seg[n].mr_len = min_t(u32, PAGE_SIZE, len);
+ len -= seg[n].mr_len;
+ ++n;
+ ++p;
+ }
+ }
+
+ if (pos < xdrbuf->len && xdrbuf->tail[0].iov_len) {
+ if (n == nsegs)
+ return 0;
+ seg[n].mr_page = NULL;
+ seg[n].mr_offset = xdrbuf->tail[0].iov_base;
+ seg[n].mr_len = xdrbuf->tail[0].iov_len;
+ pos += xdrbuf->tail[0].iov_len;
+ ++n;
+ }
+
+ if (pos < xdrbuf->len)
+ dprintk("RPC: %s: marshaled only %d of %d\n",
+ __func__, pos, xdrbuf->len);
+
+ return n;
+}
+
+/*
+ * Create read/write chunk lists, and reply chunks, for RDMA
+ *
+ * Assume check against THRESHOLD has been done, and chunks are required.
+ * Assume only encoding one list entry for read|write chunks. The NFSv3
+ * protocol is simple enough to allow this as it only has a single "bulk
+ * result" in each procedure - complicated NFSv4 COMPOUNDs are not. (The
+ * RDMA/Sessions NFSv4 proposal addresses this for future v4 revs.)
+ *
+ * When used for a single reply chunk (which is a special write
+ * chunk used for the entire reply, rather than just the data), it
+ * is used primarily for READDIR and READLINK which would otherwise
+ * be severely size-limited by a small rdma inline read max. The server
+ * response will come back as an RDMA Write, followed by a message
+ * of type RDMA_NOMSG carrying the xid and length. As a result, reply
+ * chunks do not provide data alignment, however they do not require
+ * "fixup" (moving the response to the upper layer buffer) either.
+ *
+ * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64):
+ *
+ * Read chunklist (a linked list):
+ * N elements, position P (same P for all chunks of same arg!):
+ * 1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0
+ *
+ * Write chunklist (a list of (one) counted array):
+ * N elements:
+ * 1 - N - HLOO - HLOO - ... - HLOO - 0
+ *
+ * Reply chunk (a counted array):
+ * N elements:
+ * 1 - N - HLOO - HLOO - ... - HLOO
+ */
+
+static unsigned int
+rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
+ struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type)
+{
+ struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_task->tk_xprt);
+ int nsegs, nchunks = 0;
+ int pos;
+ struct rpcrdma_mr_seg *seg = req->rl_segments;
+ struct rpcrdma_read_chunk *cur_rchunk = NULL;
+ struct rpcrdma_write_array *warray = NULL;
+ struct rpcrdma_write_chunk *cur_wchunk = NULL;
+ u32 *iptr = headerp->rm_body.rm_chunks;
+
+ if (type == rpcrdma_readch || type == rpcrdma_areadch) {
+ /* a read chunk - server will RDMA Read our memory */
+ cur_rchunk = (struct rpcrdma_read_chunk *) iptr;
+ } else {
+ /* a write or reply chunk - server will RDMA Write our memory */
+ *iptr++ = xdr_zero; /* encode a NULL read chunk list */
+ if (type == rpcrdma_replych)
+ *iptr++ = xdr_zero; /* a NULL write chunk list */
+ warray = (struct rpcrdma_write_array *) iptr;
+ cur_wchunk = (struct rpcrdma_write_chunk *) (warray + 1);
+ }
+
+ if (type == rpcrdma_replych || type == rpcrdma_areadch)
+ pos = 0;
+ else
+ pos = target->head[0].iov_len;
+
+ nsegs = rpcrdma_convert_iovs(target, pos, type, seg, RPCRDMA_MAX_SEGS);
+ if (nsegs == 0)
+ return 0;
+
+ do {
+ /* bind/register the memory, then build chunk from result. */
+ int n = rpcrdma_register_external(seg, nsegs,
+ cur_wchunk != NULL, r_xprt);
+ if (n <= 0)
+ goto out;
+ if (cur_rchunk) { /* read */
+ cur_rchunk->rc_discrim = xdr_one;
+ /* all read chunks have the same "position" */
+ cur_rchunk->rc_position = htonl(pos);
+ cur_rchunk->rc_target.rs_handle = htonl(seg->mr_rkey);
+ cur_rchunk->rc_target.rs_length = htonl(seg->mr_len);
+ xdr_encode_hyper(
+ (u32 *)&cur_rchunk->rc_target.rs_offset,
+ seg->mr_base);
+ dprintk("RPC: %s: read chunk "
+ "elem %d@0x%llx:0x%x pos %d (%s)\n", __func__,
+ seg->mr_len, seg->mr_base, seg->mr_rkey, pos,
+ n < nsegs ? "more" : "last");
+ cur_rchunk++;
+ r_xprt->rx_stats.read_chunk_count++;
+ } else { /* write/reply */
+ cur_wchunk->wc_target.rs_handle = htonl(seg->mr_rkey);
+ cur_wchunk->wc_target.rs_length = htonl(seg->mr_len);
+ xdr_encode_hyper(
+ (u32 *)&cur_wchunk->wc_target.rs_offset,
+ seg->mr_base);
+ dprintk("RPC: %s: %s chunk "
+ "elem %d@0x%llx:0x%x (%s)\n", __func__,
+ (type == rpcrdma_replych) ? "reply" : "write",
+ seg->mr_len, seg->mr_base, seg->mr_rkey,
+ n < nsegs ? "more" : "last");
+ cur_wchunk++;
+ if (type == rpcrdma_replych)
+ r_xprt->rx_stats.reply_chunk_count++;
+ else
+ r_xprt->rx_stats.write_chunk_count++;
+ r_xprt->rx_stats.total_rdma_request += seg->mr_len;
+ }
+ nchunks++;
+ seg += n;
+ nsegs -= n;
+ } while (nsegs);
+
+ /* success. all failures return above */
+ req->rl_nchunks = nchunks;
+
+ BUG_ON(nchunks == 0);
+
+ /*
+ * finish off header. If write, marshal discrim and nchunks.
+ */
+ if (cur_rchunk) {
+ iptr = (u32 *) cur_rchunk;
+ *iptr++ = xdr_zero; /* finish the read chunk list */
+ *iptr++ = xdr_zero; /* encode a NULL write chunk list */
+ *iptr++ = xdr_zero; /* encode a NULL reply chunk */
+ } else {
+ warray->wc_discrim = xdr_one;
+ warray->wc_nchunks = htonl(nchunks);
+ iptr = (u32 *) cur_wchunk;
+ if (type == rpcrdma_writech) {
+ *iptr++ = xdr_zero; /* finish the write chunk list */
+ *iptr++ = xdr_zero; /* encode a NULL reply chunk */
+ }
+ }
+
+ /*
+ * Return header size.
+ */
+ return (unsigned char *)iptr - (unsigned char *)headerp;
+
+out:
+ for (pos = 0; nchunks--;)
+ pos += rpcrdma_deregister_external(
+ &req->rl_segments[pos], r_xprt, NULL);
+ return 0;
+}
+
+/*
+ * Copy write data inline.
+ * This function is used for "small" requests. Data which is passed
+ * to RPC via iovecs (or page list) is copied directly into the
+ * pre-registered memory buffer for this request. For small amounts
+ * of data, this is efficient. The cutoff value is tunable.
+ */
+static int
+rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
+{
+ int i, npages, curlen;
+ int copy_len;
+ unsigned char *srcp, *destp;
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
+
+ destp = rqst->rq_svec[0].iov_base;
+ curlen = rqst->rq_svec[0].iov_len;
+ destp += curlen;
+ /*
+ * Do optional padding where it makes sense. Alignment of write
+ * payload can help the server, if our setting is accurate.
+ */
+ pad -= (curlen + 36/*sizeof(struct rpcrdma_msg_padded)*/);
+ if (pad < 0 || rqst->rq_slen - curlen < RPCRDMA_INLINE_PAD_THRESH)
+ pad = 0; /* don't pad this request */
+
+ dprintk("RPC: %s: pad %d destp 0x%p len %d hdrlen %d\n",
+ __func__, pad, destp, rqst->rq_slen, curlen);
+
+ copy_len = rqst->rq_snd_buf.page_len;
+ r_xprt->rx_stats.pullup_copy_count += copy_len;
+ npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT;
+ for (i = 0; copy_len && i < npages; i++) {
+ if (i == 0)
+ curlen = PAGE_SIZE - rqst->rq_snd_buf.page_base;
+ else
+ curlen = PAGE_SIZE;
+ if (curlen > copy_len)
+ curlen = copy_len;
+ dprintk("RPC: %s: page %d destp 0x%p len %d curlen %d\n",
+ __func__, i, destp, copy_len, curlen);
+ srcp = kmap_atomic(rqst->rq_snd_buf.pages[i],
+ KM_SKB_SUNRPC_DATA);
+ if (i == 0)
+ memcpy(destp, srcp+rqst->rq_snd_buf.page_base, curlen);
+ else
+ memcpy(destp, srcp, curlen);
+ kunmap_atomic(srcp, KM_SKB_SUNRPC_DATA);
+ rqst->rq_svec[0].iov_len += curlen;
+ destp += curlen;
+ copy_len -= curlen;
+ }
+ if (rqst->rq_snd_buf.tail[0].iov_len) {
+ curlen = rqst->rq_snd_buf.tail[0].iov_len;
+ if (destp != rqst->rq_snd_buf.tail[0].iov_base) {
+ memcpy(destp,
+ rqst->rq_snd_buf.tail[0].iov_base, curlen);
+ r_xprt->rx_stats.pullup_copy_count += curlen;
+ }
+ dprintk("RPC: %s: tail destp 0x%p len %d curlen %d\n",
+ __func__, destp, copy_len, curlen);
+ rqst->rq_svec[0].iov_len += curlen;
+ }
+ /* header now contains entire send message */
+ return pad;
+}
+
+/*
+ * Marshal a request: the primary job of this routine is to choose
+ * the transfer modes. See comments below.
+ *
+ * Uses multiple RDMA IOVs for a request:
+ * [0] -- RPC RDMA header, which uses memory from the *start* of the
+ * preregistered buffer that already holds the RPC data in
+ * its middle.
+ * [1] -- the RPC header/data, marshaled by RPC and the NFS protocol.
+ * [2] -- optional padding.
+ * [3] -- if padded, header only in [1] and data here.
+ */
+
+int
+rpcrdma_marshal_req(struct rpc_rqst *rqst)
+{
+ struct rpc_xprt *xprt = rqst->rq_task->tk_xprt;
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+ char *base;
+ size_t hdrlen, rpclen, padlen;
+ enum rpcrdma_chunktype rtype, wtype;
+ struct rpcrdma_msg *headerp;
+
+ /*
+ * rpclen gets amount of data in first buffer, which is the
+ * pre-registered buffer.
+ */
+ base = rqst->rq_svec[0].iov_base;
+ rpclen = rqst->rq_svec[0].iov_len;
+
+ /* build RDMA header in private area at front */
+ headerp = (struct rpcrdma_msg *) req->rl_base;
+ /* don't htonl XID, it's already done in request */
+ headerp->rm_xid = rqst->rq_xid;
+ headerp->rm_vers = xdr_one;
+ headerp->rm_credit = htonl(r_xprt->rx_buf.rb_max_requests);
+ headerp->rm_type = __constant_htonl(RDMA_MSG);
+
+ /*
+ * Chunks needed for results?
+ *
+ * o If the expected result is under the inline threshold, all ops
+ * return as inline (but see later).
+ * o Large non-read ops return as a single reply chunk.
+ * o Large read ops return data as write chunk(s), header as inline.
+ *
+ * Note: the NFS code sending down multiple result segments implies
+ * the op is one of read, readdir[plus], readlink or NFSv4 getacl.
+ */
+
+ /*
+ * This code can handle read chunks, write chunks OR reply
+ * chunks -- only one type. If the request is too big to fit
+ * inline, then we will choose read chunks. If the request is
+ * a READ, then use write chunks to separate the file data
+ * into pages; otherwise use reply chunks.
+ */
+ if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst))
+ wtype = rpcrdma_noch;
+ else if (rqst->rq_rcv_buf.page_len == 0)
+ wtype = rpcrdma_replych;
+ else if (rqst->rq_rcv_buf.flags & XDRBUF_READ)
+ wtype = rpcrdma_writech;
+ else
+ wtype = rpcrdma_replych;
+
+ /*
+ * Chunks needed for arguments?
+ *
+ * o If the total request is under the inline threshold, all ops
+ * are sent as inline.
+ * o Large non-write ops are sent with the entire message as a
+ * single read chunk (protocol 0-position special case).
+ * o Large write ops transmit data as read chunk(s), header as
+ * inline.
+ *
+ * Note: the NFS code sending down multiple argument segments
+ * implies the op is a write.
+ * TBD check NFSv4 setacl
+ */
+ if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst))
+ rtype = rpcrdma_noch;
+ else if (rqst->rq_snd_buf.page_len == 0)
+ rtype = rpcrdma_areadch;
+ else
+ rtype = rpcrdma_readch;
+
+ /* The following simplification is not true forever */
+ if (rtype != rpcrdma_noch && wtype == rpcrdma_replych)
+ wtype = rpcrdma_noch;
+ BUG_ON(rtype != rpcrdma_noch && wtype != rpcrdma_noch);
+
+ if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS &&
+ (rtype != rpcrdma_noch || wtype != rpcrdma_noch)) {
+ /* forced to "pure inline"? */
+ dprintk("RPC: %s: too much data (%d/%d) for inline\n",
+ __func__, rqst->rq_rcv_buf.len, rqst->rq_snd_buf.len);
+ return -1;
+ }
+
+ hdrlen = 28; /*sizeof *headerp;*/
+ padlen = 0;
+
+ /*
+ * Pull up any extra send data into the preregistered buffer.
+ * When padding is in use and applies to the transfer, insert
+ * it and change the message type.
+ */
+ if (rtype == rpcrdma_noch) {
+
+ padlen = rpcrdma_inline_pullup(rqst,
+ RPCRDMA_INLINE_PAD_VALUE(rqst));
+
+ if (padlen) {
+ headerp->rm_type = __constant_htonl(RDMA_MSGP);
+ headerp->rm_body.rm_padded.rm_align =
+ htonl(RPCRDMA_INLINE_PAD_VALUE(rqst));
+ headerp->rm_body.rm_padded.rm_thresh =
+ __constant_htonl(RPCRDMA_INLINE_PAD_THRESH);
+ headerp->rm_body.rm_padded.rm_pempty[0] = xdr_zero;
+ headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;
+ headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;
+ hdrlen += 2 * sizeof(u32); /* extra words in padhdr */
+ BUG_ON(wtype != rpcrdma_noch);
+
+ } else {
+ headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero;
+ headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero;
+ headerp->rm_body.rm_nochunks.rm_empty[2] = xdr_zero;
+ /* new length after pullup */
+ rpclen = rqst->rq_svec[0].iov_len;
+ /*
+ * Currently we try to not actually use read inline.
+ * Reply chunks have the desirable property that
+ * they land, packed, directly in the target buffers
+ * without headers, so they require no fixup. The
+ * additional RDMA Write op sends the same amount
+ * of data, streams on-the-wire and adds no overhead
+ * on receive. Therefore, we request a reply chunk
+ * for non-writes wherever feasible and efficient.
+ */
+ if (wtype == rpcrdma_noch &&
+ r_xprt->rx_ia.ri_memreg_strategy > RPCRDMA_REGISTER)
+ wtype = rpcrdma_replych;
+ }
+ }
+
+ /*
+ * Marshal chunks. This routine will return the header length
+ * consumed by marshaling.
+ */
+ if (rtype != rpcrdma_noch) {
+ hdrlen = rpcrdma_create_chunks(rqst,
+ &rqst->rq_snd_buf, headerp, rtype);
+ wtype = rtype; /* simplify dprintk */
+
+ } else if (wtype != rpcrdma_noch) {
+ hdrlen = rpcrdma_create_chunks(rqst,
+ &rqst->rq_rcv_buf, headerp, wtype);
+ }
+
+ if (hdrlen == 0)
+ return -1;
+
+ dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd\n"
+ " headerp 0x%p base 0x%p lkey 0x%x\n",
+ __func__, transfertypes[wtype], hdrlen, rpclen, padlen,
+ headerp, base, req->rl_iov.lkey);
+
+ /*
+ * initialize send_iov's - normally only two: rdma chunk header and
+ * single preregistered RPC header buffer, but if padding is present,
+ * then use a preregistered (and zeroed) pad buffer between the RPC
+ * header and any write data. In all non-rdma cases, any following
+ * data has been copied into the RPC header buffer.
+ */
+ req->rl_send_iov[0].addr = req->rl_iov.addr;
+ req->rl_send_iov[0].length = hdrlen;
+ req->rl_send_iov[0].lkey = req->rl_iov.lkey;
+
+ req->rl_send_iov[1].addr = req->rl_iov.addr + (base - req->rl_base);
+ req->rl_send_iov[1].length = rpclen;
+ req->rl_send_iov[1].lkey = req->rl_iov.lkey;
+
+ req->rl_niovs = 2;
+
+ if (padlen) {
+ struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+
+ req->rl_send_iov[2].addr = ep->rep_pad.addr;
+ req->rl_send_iov[2].length = padlen;
+ req->rl_send_iov[2].lkey = ep->rep_pad.lkey;
+
+ req->rl_send_iov[3].addr = req->rl_send_iov[1].addr + rpclen;
+ req->rl_send_iov[3].length = rqst->rq_slen - rpclen;
+ req->rl_send_iov[3].lkey = req->rl_iov.lkey;
+
+ req->rl_niovs = 4;
+ }
+
+ return 0;
+}
+
+/*
+ * Chase down a received write or reply chunklist to get length
+ * RDMA'd by server. See map at rpcrdma_create_chunks()! :-)
+ */
+static int
+rpcrdma_count_chunks(struct rpcrdma_rep *rep, int max, int wrchunk, u32 **iptrp)
+{
+ unsigned int i, total_len;
+ struct rpcrdma_write_chunk *cur_wchunk;
+
+ i = ntohl(**iptrp); /* get array count */
+ if (i > max)
+ return -1;
+ cur_wchunk = (struct rpcrdma_write_chunk *) (*iptrp + 1);
+ total_len = 0;
+ while (i--) {
+ struct rpcrdma_segment *seg = &cur_wchunk->wc_target;
+ ifdebug(FACILITY) {
+ u64 off;
+ xdr_decode_hyper((u32 *)&seg->rs_offset, &off);
+ dprintk("RPC: %s: chunk %d@0x%llx:0x%x\n",
+ __func__,
+ ntohl(seg->rs_length),
+ off,
+ ntohl(seg->rs_handle));
+ }
+ total_len += ntohl(seg->rs_length);
+ ++cur_wchunk;
+ }
+ /* check and adjust for properly terminated write chunk */
+ if (wrchunk) {
+ u32 *w = (u32 *) cur_wchunk;
+ if (*w++ != xdr_zero)
+ return -1;
+ cur_wchunk = (struct rpcrdma_write_chunk *) w;
+ }
+ if ((char *) cur_wchunk > rep->rr_base + rep->rr_len)
+ return -1;
+
+ *iptrp = (u32 *) cur_wchunk;
+ return total_len;
+}
+
+/*
+ * Scatter inline received data back into provided iov's.
+ */
+static void
+rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len)
+{
+ int i, npages, curlen, olen;
+ char *destp;
+
+ curlen = rqst->rq_rcv_buf.head[0].iov_len;
+ if (curlen > copy_len) { /* write chunk header fixup */
+ curlen = copy_len;
+ rqst->rq_rcv_buf.head[0].iov_len = curlen;
+ }
+
+ dprintk("RPC: %s: srcp 0x%p len %d hdrlen %d\n",
+ __func__, srcp, copy_len, curlen);
+
+ /* Shift pointer for first receive segment only */
+ rqst->rq_rcv_buf.head[0].iov_base = srcp;
+ srcp += curlen;
+ copy_len -= curlen;
+
+ olen = copy_len;
+ i = 0;
+ rpcx_to_rdmax(rqst->rq_xprt)->rx_stats.fixup_copy_count += olen;
+ if (copy_len && rqst->rq_rcv_buf.page_len) {
+ npages = PAGE_ALIGN(rqst->rq_rcv_buf.page_base +
+ rqst->rq_rcv_buf.page_len) >> PAGE_SHIFT;
+ for (; i < npages; i++) {
+ if (i == 0)
+ curlen = PAGE_SIZE - rqst->rq_rcv_buf.page_base;
+ else
+ curlen = PAGE_SIZE;
+ if (curlen > copy_len)
+ curlen = copy_len;
+ dprintk("RPC: %s: page %d"
+ " srcp 0x%p len %d curlen %d\n",
+ __func__, i, srcp, copy_len, curlen);
+ destp = kmap_atomic(rqst->rq_rcv_buf.pages[i],
+ KM_SKB_SUNRPC_DATA);
+ if (i == 0)
+ memcpy(destp + rqst->rq_rcv_buf.page_base,
+ srcp, curlen);
+ else
+ memcpy(destp, srcp, curlen);
+ flush_dcache_page(rqst->rq_rcv_buf.pages[i]);
+ kunmap_atomic(destp, KM_SKB_SUNRPC_DATA);
+ srcp += curlen;
+ copy_len -= curlen;
+ if (copy_len == 0)
+ break;
+ }
+ rqst->rq_rcv_buf.page_len = olen - copy_len;
+ } else
+ rqst->rq_rcv_buf.page_len = 0;
+
+ if (copy_len && rqst->rq_rcv_buf.tail[0].iov_len) {
+ curlen = copy_len;
+ if (curlen > rqst->rq_rcv_buf.tail[0].iov_len)
+ curlen = rqst->rq_rcv_buf.tail[0].iov_len;
+ if (rqst->rq_rcv_buf.tail[0].iov_base != srcp)
+ memcpy(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen);
+ dprintk("RPC: %s: tail srcp 0x%p len %d curlen %d\n",
+ __func__, srcp, copy_len, curlen);
+ rqst->rq_rcv_buf.tail[0].iov_len = curlen;
+ copy_len -= curlen; ++i;
+ } else
+ rqst->rq_rcv_buf.tail[0].iov_len = 0;
+
+ if (copy_len)
+ dprintk("RPC: %s: %d bytes in"
+ " %d extra segments (%d lost)\n",
+ __func__, olen, i, copy_len);
+
+ /* TBD avoid a warning from call_decode() */
+ rqst->rq_private_buf = rqst->rq_rcv_buf;
+}
+
+/*
+ * This function is called when an async event is posted to
+ * the connection which changes the connection state. All it
+ * does at this point is mark the connection up/down, the rpc
+ * timers do the rest.
+ */
+void
+rpcrdma_conn_func(struct rpcrdma_ep *ep)
+{
+ struct rpc_xprt *xprt = ep->rep_xprt;
+
+ spin_lock_bh(&xprt->transport_lock);
+ if (ep->rep_connected > 0) {
+ if (!xprt_test_and_set_connected(xprt))
+ xprt_wake_pending_tasks(xprt, 0);
+ } else {
+ if (xprt_test_and_clear_connected(xprt))
+ xprt_wake_pending_tasks(xprt, ep->rep_connected);
+ }
+ spin_unlock_bh(&xprt->transport_lock);
+}
+
+/*
+ * This function is called when memory window unbind which we are waiting
+ * for completes. Just use rr_func (zeroed by upcall) to signal completion.
+ */
+static void
+rpcrdma_unbind_func(struct rpcrdma_rep *rep)
+{
+ wake_up(&rep->rr_unbind);
+}
+
+/*
+ * Called as a tasklet to do req/reply match and complete a request
+ * Errors must result in the RPC task either being awakened, or
+ * allowed to timeout, to discover the errors at that time.
+ */
+void
+rpcrdma_reply_handler(struct rpcrdma_rep *rep)
+{
+ struct rpcrdma_msg *headerp;
+ struct rpcrdma_req *req;
+ struct rpc_rqst *rqst;
+ struct rpc_xprt *xprt = rep->rr_xprt;
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ u32 *iptr;
+ int i, rdmalen, status;
+
+ /* Check status. If bad, signal disconnect and return rep to pool */
+ if (rep->rr_len == ~0U) {
+ rpcrdma_recv_buffer_put(rep);
+ if (r_xprt->rx_ep.rep_connected == 1) {
+ r_xprt->rx_ep.rep_connected = -EIO;
+ rpcrdma_conn_func(&r_xprt->rx_ep);
+ }
+ return;
+ }
+ if (rep->rr_len < 28) {
+ dprintk("RPC: %s: short/invalid reply\n", __func__);
+ goto repost;
+ }
+ headerp = (struct rpcrdma_msg *) rep->rr_base;
+ if (headerp->rm_vers != xdr_one) {
+ dprintk("RPC: %s: invalid version %d\n",
+ __func__, ntohl(headerp->rm_vers));
+ goto repost;
+ }
+
+ /* Get XID and try for a match. */
+ spin_lock(&xprt->transport_lock);
+ rqst = xprt_lookup_rqst(xprt, headerp->rm_xid);
+ if (rqst == NULL) {
+ spin_unlock(&xprt->transport_lock);
+ dprintk("RPC: %s: reply 0x%p failed "
+ "to match any request xid 0x%08x len %d\n",
+ __func__, rep, headerp->rm_xid, rep->rr_len);
+repost:
+ r_xprt->rx_stats.bad_reply_count++;
+ rep->rr_func = rpcrdma_reply_handler;
+ if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))
+ rpcrdma_recv_buffer_put(rep);
+
+ return;
+ }
+
+ /* get request object */
+ req = rpcr_to_rdmar(rqst);
+
+ dprintk("RPC: %s: reply 0x%p completes request 0x%p\n"
+ " RPC request 0x%p xid 0x%08x\n",
+ __func__, rep, req, rqst, headerp->rm_xid);
+
+ BUG_ON(!req || req->rl_reply);
+
+ /* from here on, the reply is no longer an orphan */
+ req->rl_reply = rep;
+
+ /* check for expected message types */
+ /* The order of some of these tests is important. */
+ switch (headerp->rm_type) {
+ case __constant_htonl(RDMA_MSG):
+ /* never expect read chunks */
+ /* never expect reply chunks (two ways to check) */
+ /* never expect write chunks without having offered RDMA */
+ if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
+ (headerp->rm_body.rm_chunks[1] == xdr_zero &&
+ headerp->rm_body.rm_chunks[2] != xdr_zero) ||
+ (headerp->rm_body.rm_chunks[1] != xdr_zero &&
+ req->rl_nchunks == 0))
+ goto badheader;
+ if (headerp->rm_body.rm_chunks[1] != xdr_zero) {
+ /* count any expected write chunks in read reply */
+ /* start at write chunk array count */
+ iptr = &headerp->rm_body.rm_chunks[2];
+ rdmalen = rpcrdma_count_chunks(rep,
+ req->rl_nchunks, 1, &iptr);
+ /* check for validity, and no reply chunk after */
+ if (rdmalen < 0 || *iptr++ != xdr_zero)
+ goto badheader;
+ rep->rr_len -=
+ ((unsigned char *)iptr - (unsigned char *)headerp);
+ status = rep->rr_len + rdmalen;
+ r_xprt->rx_stats.total_rdma_reply += rdmalen;
+ } else {
+ /* else ordinary inline */
+ iptr = (u32 *)((unsigned char *)headerp + 28);
+ rep->rr_len -= 28; /*sizeof *headerp;*/
+ status = rep->rr_len;
+ }
+ /* Fix up the rpc results for upper layer */
+ rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len);
+ break;
+
+ case __constant_htonl(RDMA_NOMSG):
+ /* never expect read or write chunks, always reply chunks */
+ if (headerp->rm_body.rm_chunks[0] != xdr_zero ||
+ headerp->rm_body.rm_chunks[1] != xdr_zero ||
+ headerp->rm_body.rm_chunks[2] != xdr_one ||
+ req->rl_nchunks == 0)
+ goto badheader;
+ iptr = (u32 *)((unsigned char *)headerp + 28);
+ rdmalen = rpcrdma_count_chunks(rep, req->rl_nchunks, 0, &iptr);
+ if (rdmalen < 0)
+ goto badheader;
+ r_xprt->rx_stats.total_rdma_reply += rdmalen;
+ /* Reply chunk buffer already is the reply vector - no fixup. */
+ status = rdmalen;
+ break;
+
+badheader:
+ default:
+ dprintk("%s: invalid rpcrdma reply header (type %d):"
+ " chunks[012] == %d %d %d"
+ " expected chunks <= %d\n",
+ __func__, ntohl(headerp->rm_type),
+ headerp->rm_body.rm_chunks[0],
+ headerp->rm_body.rm_chunks[1],
+ headerp->rm_body.rm_chunks[2],
+ req->rl_nchunks);
+ status = -EIO;
+ r_xprt->rx_stats.bad_reply_count++;
+ break;
+ }
+
+ /* If using mw bind, start the deregister process now. */
+ /* (Note: if mr_free(), cannot perform it here, in tasklet context) */
+ if (req->rl_nchunks) switch (r_xprt->rx_ia.ri_memreg_strategy) {
+ case RPCRDMA_MEMWINDOWS:
+ for (i = 0; req->rl_nchunks-- > 1;)
+ i += rpcrdma_deregister_external(
+ &req->rl_segments[i], r_xprt, NULL);
+ /* Optionally wait (not here) for unbinds to complete */
+ rep->rr_func = rpcrdma_unbind_func;
+ (void) rpcrdma_deregister_external(&req->rl_segments[i],
+ r_xprt, rep);
+ break;
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+ for (i = 0; req->rl_nchunks--;)
+ i += rpcrdma_deregister_external(&req->rl_segments[i],
+ r_xprt, NULL);
+ break;
+ default:
+ break;
+ }
+
+ dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
+ __func__, xprt, rqst, status);
+ xprt_complete_rqst(rqst->rq_task, status);
+ spin_unlock(&xprt->transport_lock);
+}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
new file mode 100644
index 00000000000..dc55cc974c9
--- /dev/null
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -0,0 +1,800 @@
+/*
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * Neither the name of the Network Appliance, Inc. nor the names of
+ * its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * transport.c
+ *
+ * This file contains the top-level implementation of an RPC RDMA
+ * transport.
+ *
+ * Naming convention: functions beginning with xprt_ are part of the
+ * transport switch. All others are RPC RDMA internal.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+
+#include "xprt_rdma.h"
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY RPCDBG_TRANS
+#endif
+
+MODULE_LICENSE("Dual BSD/GPL");
+
+MODULE_DESCRIPTION("RPC/RDMA Transport for Linux kernel NFS");
+MODULE_AUTHOR("Network Appliance, Inc.");
+
+/*
+ * tunables
+ */
+
+static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
+static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
+static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
+static unsigned int xprt_rdma_inline_write_padding;
+#if !RPCRDMA_PERSISTENT_REGISTRATION
+static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */
+#else
+static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL;
+#endif
+
+#ifdef RPC_DEBUG
+
+static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE;
+static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE;
+static unsigned int zero;
+static unsigned int max_padding = PAGE_SIZE;
+static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS;
+static unsigned int max_memreg = RPCRDMA_LAST - 1;
+
+static struct ctl_table_header *sunrpc_table_header;
+
+static ctl_table xr_tunables_table[] = {
+ {
+ .ctl_name = CTL_SLOTTABLE_RDMA,
+ .procname = "rdma_slot_table_entries",
+ .data = &xprt_rdma_slot_table_entries,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &min_slot_table_size,
+ .extra2 = &max_slot_table_size
+ },
+ {
+ .ctl_name = CTL_RDMA_MAXINLINEREAD,
+ .procname = "rdma_max_inline_read",
+ .data = &xprt_rdma_max_inline_read,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = CTL_RDMA_MAXINLINEWRITE,
+ .procname = "rdma_max_inline_write",
+ .data = &xprt_rdma_max_inline_write,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ .strategy = &sysctl_intvec,
+ },
+ {
+ .ctl_name = CTL_RDMA_WRITEPADDING,
+ .procname = "rdma_inline_write_padding",
+ .data = &xprt_rdma_inline_write_padding,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero,
+ .extra2 = &max_padding,
+ },
+ {
+ .ctl_name = CTL_RDMA_MEMREG,
+ .procname = "rdma_memreg_strategy",
+ .data = &xprt_rdma_memreg_strategy,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &min_memreg,
+ .extra2 = &max_memreg,
+ },
+ {
+ .ctl_name = 0,
+ },
+};
+
+static ctl_table sunrpc_table[] = {
+ {
+ .ctl_name = CTL_SUNRPC,
+ .procname = "sunrpc",
+ .mode = 0555,
+ .child = xr_tunables_table
+ },
+ {
+ .ctl_name = 0,
+ },
+};
+
+#endif
+
+static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */
+
+static void
+xprt_rdma_format_addresses(struct rpc_xprt *xprt)
+{
+ struct sockaddr_in *addr = (struct sockaddr_in *)
+ &rpcx_to_rdmad(xprt).addr;
+ char *buf;
+
+ buf = kzalloc(20, GFP_KERNEL);
+ if (buf)
+ snprintf(buf, 20, NIPQUAD_FMT, NIPQUAD(addr->sin_addr.s_addr));
+ xprt->address_strings[RPC_DISPLAY_ADDR] = buf;
+
+ buf = kzalloc(8, GFP_KERNEL);
+ if (buf)
+ snprintf(buf, 8, "%u", ntohs(addr->sin_port));
+ xprt->address_strings[RPC_DISPLAY_PORT] = buf;
+
+ xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
+
+ buf = kzalloc(48, GFP_KERNEL);
+ if (buf)
+ snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s",
+ NIPQUAD(addr->sin_addr.s_addr),
+ ntohs(addr->sin_port), "rdma");
+ xprt->address_strings[RPC_DISPLAY_ALL] = buf;
+
+ buf = kzalloc(10, GFP_KERNEL);
+ if (buf)
+ snprintf(buf, 10, "%02x%02x%02x%02x",
+ NIPQUAD(addr->sin_addr.s_addr));
+ xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf;
+
+ buf = kzalloc(8, GFP_KERNEL);
+ if (buf)
+ snprintf(buf, 8, "%4hx", ntohs(addr->sin_port));
+ xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf;
+
+ buf = kzalloc(30, GFP_KERNEL);
+ if (buf)
+ snprintf(buf, 30, NIPQUAD_FMT".%u.%u",
+ NIPQUAD(addr->sin_addr.s_addr),
+ ntohs(addr->sin_port) >> 8,
+ ntohs(addr->sin_port) & 0xff);
+ xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
+
+ /* netid */
+ xprt->address_strings[RPC_DISPLAY_NETID] = "rdma";
+}
+
+static void
+xprt_rdma_free_addresses(struct rpc_xprt *xprt)
+{
+ kfree(xprt->address_strings[RPC_DISPLAY_ADDR]);
+ kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
+ kfree(xprt->address_strings[RPC_DISPLAY_ALL]);
+ kfree(xprt->address_strings[RPC_DISPLAY_HEX_ADDR]);
+ kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]);
+ kfree(xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR]);
+}
+
+static void
+xprt_rdma_connect_worker(struct work_struct *work)
+{
+ struct rpcrdma_xprt *r_xprt =
+ container_of(work, struct rpcrdma_xprt, rdma_connect.work);
+ struct rpc_xprt *xprt = &r_xprt->xprt;
+ int rc = 0;
+
+ if (!xprt->shutdown) {
+ xprt_clear_connected(xprt);
+
+ dprintk("RPC: %s: %sconnect\n", __func__,
+ r_xprt->rx_ep.rep_connected != 0 ? "re" : "");
+ rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
+ if (rc)
+ goto out;
+ }
+ goto out_clear;
+
+out:
+ xprt_wake_pending_tasks(xprt, rc);
+
+out_clear:
+ dprintk("RPC: %s: exit\n", __func__);
+ xprt_clear_connecting(xprt);
+}
+
+/*
+ * xprt_rdma_destroy
+ *
+ * Destroy the xprt.
+ * Free all memory associated with the object, including its own.
+ * NOTE: none of the *destroy methods free memory for their top-level
+ * objects, even though they may have allocated it (they do free
+ * private memory). It's up to the caller to handle it. In this
+ * case (RDMA transport), all structure memory is inlined with the
+ * struct rpcrdma_xprt.
+ */
+static void
+xprt_rdma_destroy(struct rpc_xprt *xprt)
+{
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ int rc;
+
+ dprintk("RPC: %s: called\n", __func__);
+
+ cancel_delayed_work(&r_xprt->rdma_connect);
+ flush_scheduled_work();
+
+ xprt_clear_connected(xprt);
+
+ rpcrdma_buffer_destroy(&r_xprt->rx_buf);
+ rc = rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
+ if (rc)
+ dprintk("RPC: %s: rpcrdma_ep_destroy returned %i\n",
+ __func__, rc);
+ rpcrdma_ia_close(&r_xprt->rx_ia);
+
+ xprt_rdma_free_addresses(xprt);
+
+ kfree(xprt->slot);
+ xprt->slot = NULL;
+ kfree(xprt);
+
+ dprintk("RPC: %s: returning\n", __func__);
+
+ module_put(THIS_MODULE);
+}
+
+/**
+ * xprt_setup_rdma - Set up transport to use RDMA
+ *
+ * @args: rpc transport arguments
+ */
+static struct rpc_xprt *
+xprt_setup_rdma(struct xprt_create *args)
+{
+ struct rpcrdma_create_data_internal cdata;
+ struct rpc_xprt *xprt;
+ struct rpcrdma_xprt *new_xprt;
+ struct rpcrdma_ep *new_ep;
+ struct sockaddr_in *sin;
+ int rc;
+
+ if (args->addrlen > sizeof(xprt->addr)) {
+ dprintk("RPC: %s: address too large\n", __func__);
+ return ERR_PTR(-EBADF);
+ }
+
+ xprt = kzalloc(sizeof(struct rpcrdma_xprt), GFP_KERNEL);
+ if (xprt == NULL) {
+ dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n",
+ __func__);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ xprt->max_reqs = xprt_rdma_slot_table_entries;
+ xprt->slot = kcalloc(xprt->max_reqs,
+ sizeof(struct rpc_rqst), GFP_KERNEL);
+ if (xprt->slot == NULL) {
+ kfree(xprt);
+ dprintk("RPC: %s: couldn't allocate %d slots\n",
+ __func__, xprt->max_reqs);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /* 60 second timeout, no retries */
+ xprt_set_timeout(&xprt->timeout, 0, 60UL * HZ);
+ xprt->bind_timeout = (60U * HZ);
+ xprt->connect_timeout = (60U * HZ);
+ xprt->reestablish_timeout = (5U * HZ);
+ xprt->idle_timeout = (5U * 60 * HZ);
+
+ xprt->resvport = 0; /* privileged port not needed */
+ xprt->tsh_size = 0; /* RPC-RDMA handles framing */
+ xprt->max_payload = RPCRDMA_MAX_DATA_SEGS * PAGE_SIZE;
+ xprt->ops = &xprt_rdma_procs;
+
+ /*
+ * Set up RDMA-specific connect data.
+ */
+
+ /* Put server RDMA address in local cdata */
+ memcpy(&cdata.addr, args->dstaddr, args->addrlen);
+
+ /* Ensure xprt->addr holds valid server TCP (not RDMA)
+ * address, for any side protocols which peek at it */
+ xprt->prot = IPPROTO_TCP;
+ xprt->addrlen = args->addrlen;
+ memcpy(&xprt->addr, &cdata.addr, xprt->addrlen);
+
+ sin = (struct sockaddr_in *)&cdata.addr;
+ if (ntohs(sin->sin_port) != 0)
+ xprt_set_bound(xprt);
+
+ dprintk("RPC: %s: %u.%u.%u.%u:%u\n", __func__,
+ NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port));
+
+ /* Set max requests */
+ cdata.max_requests = xprt->max_reqs;
+
+ /* Set some length limits */
+ cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */
+ cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */
+
+ cdata.inline_wsize = xprt_rdma_max_inline_write;
+ if (cdata.inline_wsize > cdata.wsize)
+ cdata.inline_wsize = cdata.wsize;
+
+ cdata.inline_rsize = xprt_rdma_max_inline_read;
+ if (cdata.inline_rsize > cdata.rsize)
+ cdata.inline_rsize = cdata.rsize;
+
+ cdata.padding = xprt_rdma_inline_write_padding;
+
+ /*
+ * Create new transport instance, which includes initialized
+ * o ia
+ * o endpoint
+ * o buffers
+ */
+
+ new_xprt = rpcx_to_rdmax(xprt);
+
+ rc = rpcrdma_ia_open(new_xprt, (struct sockaddr *) &cdata.addr,
+ xprt_rdma_memreg_strategy);
+ if (rc)
+ goto out1;
+
+ /*
+ * initialize and create ep
+ */
+ new_xprt->rx_data = cdata;
+ new_ep = &new_xprt->rx_ep;
+ new_ep->rep_remote_addr = cdata.addr;
+
+ rc = rpcrdma_ep_create(&new_xprt->rx_ep,
+ &new_xprt->rx_ia, &new_xprt->rx_data);
+ if (rc)
+ goto out2;
+
+ /*
+ * Allocate pre-registered send and receive buffers for headers and
+ * any inline data. Also specify any padding which will be provided
+ * from a preregistered zero buffer.
+ */
+ rc = rpcrdma_buffer_create(&new_xprt->rx_buf, new_ep, &new_xprt->rx_ia,
+ &new_xprt->rx_data);
+ if (rc)
+ goto out3;
+
+ /*
+ * Register a callback for connection events. This is necessary because
+ * connection loss notification is async. We also catch connection loss
+ * when reaping receives.
+ */
+ INIT_DELAYED_WORK(&new_xprt->rdma_connect, xprt_rdma_connect_worker);
+ new_ep->rep_func = rpcrdma_conn_func;
+ new_ep->rep_xprt = xprt;
+
+ xprt_rdma_format_addresses(xprt);
+
+ if (!try_module_get(THIS_MODULE))
+ goto out4;
+
+ return xprt;
+
+out4:
+ xprt_rdma_free_addresses(xprt);
+ rc = -EINVAL;
+out3:
+ (void) rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
+out2:
+ rpcrdma_ia_close(&new_xprt->rx_ia);
+out1:
+ kfree(xprt->slot);
+ kfree(xprt);
+ return ERR_PTR(rc);
+}
+
+/*
+ * Close a connection, during shutdown or timeout/reconnect
+ */
+static void
+xprt_rdma_close(struct rpc_xprt *xprt)
+{
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+
+ dprintk("RPC: %s: closing\n", __func__);
+ xprt_disconnect(xprt);
+ (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
+}
+
+static void
+xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
+{
+ struct sockaddr_in *sap;
+
+ sap = (struct sockaddr_in *)&xprt->addr;
+ sap->sin_port = htons(port);
+ sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr;
+ sap->sin_port = htons(port);
+ dprintk("RPC: %s: %u\n", __func__, port);
+}
+
+static void
+xprt_rdma_connect(struct rpc_task *task)
+{
+ struct rpc_xprt *xprt = (struct rpc_xprt *)task->tk_xprt;
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+
+ if (!xprt_test_and_set_connecting(xprt)) {
+ if (r_xprt->rx_ep.rep_connected != 0) {
+ /* Reconnect */
+ schedule_delayed_work(&r_xprt->rdma_connect,
+ xprt->reestablish_timeout);
+ } else {
+ schedule_delayed_work(&r_xprt->rdma_connect, 0);
+ if (!RPC_IS_ASYNC(task))
+ flush_scheduled_work();
+ }
+ }
+}
+
+static int
+xprt_rdma_reserve_xprt(struct rpc_task *task)
+{
+ struct rpc_xprt *xprt = task->tk_xprt;
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ int credits = atomic_read(&r_xprt->rx_buf.rb_credits);
+
+ /* == RPC_CWNDSCALE @ init, but *after* setup */
+ if (r_xprt->rx_buf.rb_cwndscale == 0UL) {
+ r_xprt->rx_buf.rb_cwndscale = xprt->cwnd;
+ dprintk("RPC: %s: cwndscale %lu\n", __func__,
+ r_xprt->rx_buf.rb_cwndscale);
+ BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0);
+ }
+ xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale;
+ return xprt_reserve_xprt_cong(task);
+}
+
+/*
+ * The RDMA allocate/free functions need the task structure as a place
+ * to hide the struct rpcrdma_req, which is necessary for the actual send/recv
+ * sequence. For this reason, the recv buffers are attached to send
+ * buffers for portions of the RPC. Note that the RPC layer allocates
+ * both send and receive buffers in the same call. We may register
+ * the receive buffer portion when using reply chunks.
+ */
+static void *
+xprt_rdma_allocate(struct rpc_task *task, size_t size)
+{
+ struct rpc_xprt *xprt = task->tk_xprt;
+ struct rpcrdma_req *req, *nreq;
+
+ req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf);
+ BUG_ON(NULL == req);
+
+ if (size > req->rl_size) {
+ dprintk("RPC: %s: size %zd too large for buffer[%zd]: "
+ "prog %d vers %d proc %d\n",
+ __func__, size, req->rl_size,
+ task->tk_client->cl_prog, task->tk_client->cl_vers,
+ task->tk_msg.rpc_proc->p_proc);
+ /*
+ * Outgoing length shortage. Our inline write max must have
+ * been configured to perform direct i/o.
+ *
+ * This is therefore a large metadata operation, and the
+ * allocate call was made on the maximum possible message,
+ * e.g. containing long filename(s) or symlink data. In
+ * fact, while these metadata operations *might* carry
+ * large outgoing payloads, they rarely *do*. However, we
+ * have to commit to the request here, so reallocate and
+ * register it now. The data path will never require this
+ * reallocation.
+ *
+ * If the allocation or registration fails, the RPC framework
+ * will (doggedly) retry.
+ */
+ if (rpcx_to_rdmax(xprt)->rx_ia.ri_memreg_strategy ==
+ RPCRDMA_BOUNCEBUFFERS) {
+ /* forced to "pure inline" */
+ dprintk("RPC: %s: too much data (%zd) for inline "
+ "(r/w max %d/%d)\n", __func__, size,
+ rpcx_to_rdmad(xprt).inline_rsize,
+ rpcx_to_rdmad(xprt).inline_wsize);
+ size = req->rl_size;
+ rpc_exit(task, -EIO); /* fail the operation */
+ rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++;
+ goto out;
+ }
+ if (task->tk_flags & RPC_TASK_SWAPPER)
+ nreq = kmalloc(sizeof *req + size, GFP_ATOMIC);
+ else
+ nreq = kmalloc(sizeof *req + size, GFP_NOFS);
+ if (nreq == NULL)
+ goto outfail;
+
+ if (rpcrdma_register_internal(&rpcx_to_rdmax(xprt)->rx_ia,
+ nreq->rl_base, size + sizeof(struct rpcrdma_req)
+ - offsetof(struct rpcrdma_req, rl_base),
+ &nreq->rl_handle, &nreq->rl_iov)) {
+ kfree(nreq);
+ goto outfail;
+ }
+ rpcx_to_rdmax(xprt)->rx_stats.hardway_register_count += size;
+ nreq->rl_size = size;
+ nreq->rl_niovs = 0;
+ nreq->rl_nchunks = 0;
+ nreq->rl_buffer = (struct rpcrdma_buffer *)req;
+ nreq->rl_reply = req->rl_reply;
+ memcpy(nreq->rl_segments,
+ req->rl_segments, sizeof nreq->rl_segments);
+ /* flag the swap with an unused field */
+ nreq->rl_iov.length = 0;
+ req->rl_reply = NULL;
+ req = nreq;
+ }
+ dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req);
+out:
+ return req->rl_xdr_buf;
+
+outfail:
+ rpcrdma_buffer_put(req);
+ rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++;
+ return NULL;
+}
+
+/*
+ * This function returns all RDMA resources to the pool.
+ */
+static void
+xprt_rdma_free(void *buffer)
+{
+ struct rpcrdma_req *req;
+ struct rpcrdma_xprt *r_xprt;
+ struct rpcrdma_rep *rep;
+ int i;
+
+ if (buffer == NULL)
+ return;
+
+ req = container_of(buffer, struct rpcrdma_req, rl_xdr_buf[0]);
+ r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf);
+ rep = req->rl_reply;
+
+ dprintk("RPC: %s: called on 0x%p%s\n",
+ __func__, rep, (rep && rep->rr_func) ? " (with waiter)" : "");
+
+ /*
+ * Finish the deregistration. When using mw bind, this was
+ * begun in rpcrdma_reply_handler(). In all other modes, we
+ * do it here, in thread context. The process is considered
+ * complete when the rr_func vector becomes NULL - this
+ * was put in place during rpcrdma_reply_handler() - the wait
+ * call below will not block if the dereg is "done". If
+ * interrupted, our framework will clean up.
+ */
+ for (i = 0; req->rl_nchunks;) {
+ --req->rl_nchunks;
+ i += rpcrdma_deregister_external(
+ &req->rl_segments[i], r_xprt, NULL);
+ }
+
+ if (rep && wait_event_interruptible(rep->rr_unbind, !rep->rr_func)) {
+ rep->rr_func = NULL; /* abandon the callback */
+ req->rl_reply = NULL;
+ }
+
+ if (req->rl_iov.length == 0) { /* see allocate above */
+ struct rpcrdma_req *oreq = (struct rpcrdma_req *)req->rl_buffer;
+ oreq->rl_reply = req->rl_reply;
+ (void) rpcrdma_deregister_internal(&r_xprt->rx_ia,
+ req->rl_handle,
+ &req->rl_iov);
+ kfree(req);
+ req = oreq;
+ }
+
+ /* Put back request+reply buffers */
+ rpcrdma_buffer_put(req);
+}
+
+/*
+ * send_request invokes the meat of RPC RDMA. It must do the following:
+ * 1. Marshal the RPC request into an RPC RDMA request, which means
+ * putting a header in front of data, and creating IOVs for RDMA
+ * from those in the request.
+ * 2. In marshaling, detect opportunities for RDMA, and use them.
+ * 3. Post a recv message to set up asynch completion, then send
+ * the request (rpcrdma_ep_post).
+ * 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP).
+ */
+
+static int
+xprt_rdma_send_request(struct rpc_task *task)
+{
+ struct rpc_rqst *rqst = task->tk_rqstp;
+ struct rpc_xprt *xprt = task->tk_xprt;
+ struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+
+ /* marshal the send itself */
+ if (req->rl_niovs == 0 && rpcrdma_marshal_req(rqst) != 0) {
+ r_xprt->rx_stats.failed_marshal_count++;
+ dprintk("RPC: %s: rpcrdma_marshal_req failed\n",
+ __func__);
+ return -EIO;
+ }
+
+ if (req->rl_reply == NULL) /* e.g. reconnection */
+ rpcrdma_recv_buffer_get(req);
+
+ if (req->rl_reply) {
+ req->rl_reply->rr_func = rpcrdma_reply_handler;
+ /* this need only be done once, but... */
+ req->rl_reply->rr_xprt = xprt;
+ }
+
+ if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) {
+ xprt_disconnect(xprt);
+ return -ENOTCONN; /* implies disconnect */
+ }
+
+ rqst->rq_bytes_sent = 0;
+ return 0;
+}
+
+static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
+{
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ long idle_time = 0;
+
+ if (xprt_connected(xprt))
+ idle_time = (long)(jiffies - xprt->last_used) / HZ;
+
+ seq_printf(seq,
+ "\txprt:\trdma %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu "
+ "%lu %lu %lu %Lu %Lu %Lu %Lu %lu %lu %lu\n",
+
+ 0, /* need a local port? */
+ xprt->stat.bind_count,
+ xprt->stat.connect_count,
+ xprt->stat.connect_time,
+ idle_time,
+ xprt->stat.sends,
+ xprt->stat.recvs,
+ xprt->stat.bad_xids,
+ xprt->stat.req_u,
+ xprt->stat.bklog_u,
+
+ r_xprt->rx_stats.read_chunk_count,
+ r_xprt->rx_stats.write_chunk_count,
+ r_xprt->rx_stats.reply_chunk_count,
+ r_xprt->rx_stats.total_rdma_request,
+ r_xprt->rx_stats.total_rdma_reply,
+ r_xprt->rx_stats.pullup_copy_count,
+ r_xprt->rx_stats.fixup_copy_count,
+ r_xprt->rx_stats.hardway_register_count,
+ r_xprt->rx_stats.failed_marshal_count,
+ r_xprt->rx_stats.bad_reply_count);
+}
+
+/*
+ * Plumbing for rpc transport switch and kernel module
+ */
+
+static struct rpc_xprt_ops xprt_rdma_procs = {
+ .reserve_xprt = xprt_rdma_reserve_xprt,
+ .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */
+ .release_request = xprt_release_rqst_cong, /* ditto */
+ .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */
+ .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */
+ .set_port = xprt_rdma_set_port,
+ .connect = xprt_rdma_connect,
+ .buf_alloc = xprt_rdma_allocate,
+ .buf_free = xprt_rdma_free,
+ .send_request = xprt_rdma_send_request,
+ .close = xprt_rdma_close,
+ .destroy = xprt_rdma_destroy,
+ .print_stats = xprt_rdma_print_stats
+};
+
+static struct xprt_class xprt_rdma = {
+ .list = LIST_HEAD_INIT(xprt_rdma.list),
+ .name = "rdma",
+ .owner = THIS_MODULE,
+ .ident = XPRT_TRANSPORT_RDMA,
+ .setup = xprt_setup_rdma,
+};
+
+static void __exit xprt_rdma_cleanup(void)
+{
+ int rc;
+
+ dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n");
+#ifdef RPC_DEBUG
+ if (sunrpc_table_header) {
+ unregister_sysctl_table(sunrpc_table_header);
+ sunrpc_table_header = NULL;
+ }
+#endif
+ rc = xprt_unregister_transport(&xprt_rdma);
+ if (rc)
+ dprintk("RPC: %s: xprt_unregister returned %i\n",
+ __func__, rc);
+}
+
+static int __init xprt_rdma_init(void)
+{
+ int rc;
+
+ rc = xprt_register_transport(&xprt_rdma);
+
+ if (rc)
+ return rc;
+
+ dprintk(KERN_INFO "RPCRDMA Module Init, register RPC RDMA transport\n");
+
+ dprintk(KERN_INFO "Defaults:\n");
+ dprintk(KERN_INFO "\tSlots %d\n"
+ "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n",
+ xprt_rdma_slot_table_entries,
+ xprt_rdma_max_inline_read, xprt_rdma_max_inline_write);
+ dprintk(KERN_INFO "\tPadding %d\n\tMemreg %d\n",
+ xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy);
+
+#ifdef RPC_DEBUG
+ if (!sunrpc_table_header)
+ sunrpc_table_header = register_sysctl_table(sunrpc_table);
+#endif
+ return 0;
+}
+
+module_init(xprt_rdma_init);
+module_exit(xprt_rdma_cleanup);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
new file mode 100644
index 00000000000..44b0fb942e8
--- /dev/null
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -0,0 +1,1627 @@
+/*
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * Neither the name of the Network Appliance, Inc. nor the names of
+ * its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * verbs.c
+ *
+ * Encapsulates the major functions managing:
+ * o adapters
+ * o endpoints
+ * o connections
+ * o buffer memory
+ */
+
+#include <linux/pci.h> /* for Tavor hack below */
+
+#include "xprt_rdma.h"
+
+/*
+ * Globals/Macros
+ */
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY RPCDBG_TRANS
+#endif
+
+/*
+ * internal functions
+ */
+
+/*
+ * handle replies in tasklet context, using a single, global list
+ * rdma tasklet function -- just turn around and call the func
+ * for all replies on the list
+ */
+
+static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
+static LIST_HEAD(rpcrdma_tasklets_g);
+
+static void
+rpcrdma_run_tasklet(unsigned long data)
+{
+ struct rpcrdma_rep *rep;
+ void (*func)(struct rpcrdma_rep *);
+ unsigned long flags;
+
+ data = data;
+ spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
+ while (!list_empty(&rpcrdma_tasklets_g)) {
+ rep = list_entry(rpcrdma_tasklets_g.next,
+ struct rpcrdma_rep, rr_list);
+ list_del(&rep->rr_list);
+ func = rep->rr_func;
+ rep->rr_func = NULL;
+ spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
+
+ if (func)
+ func(rep);
+ else
+ rpcrdma_recv_buffer_put(rep);
+
+ spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
+ }
+ spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
+}
+
+static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
+
+static inline void
+rpcrdma_schedule_tasklet(struct rpcrdma_rep *rep)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
+ list_add_tail(&rep->rr_list, &rpcrdma_tasklets_g);
+ spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
+ tasklet_schedule(&rpcrdma_tasklet_g);
+}
+
+static void
+rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
+{
+ struct rpcrdma_ep *ep = context;
+
+ dprintk("RPC: %s: QP error %X on device %s ep %p\n",
+ __func__, event->event, event->device->name, context);
+ if (ep->rep_connected == 1) {
+ ep->rep_connected = -EIO;
+ ep->rep_func(ep);
+ wake_up_all(&ep->rep_connect_wait);
+ }
+}
+
+static void
+rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
+{
+ struct rpcrdma_ep *ep = context;
+
+ dprintk("RPC: %s: CQ error %X on device %s ep %p\n",
+ __func__, event->event, event->device->name, context);
+ if (ep->rep_connected == 1) {
+ ep->rep_connected = -EIO;
+ ep->rep_func(ep);
+ wake_up_all(&ep->rep_connect_wait);
+ }
+}
+
+static inline
+void rpcrdma_event_process(struct ib_wc *wc)
+{
+ struct rpcrdma_rep *rep =
+ (struct rpcrdma_rep *)(unsigned long) wc->wr_id;
+
+ dprintk("RPC: %s: event rep %p status %X opcode %X length %u\n",
+ __func__, rep, wc->status, wc->opcode, wc->byte_len);
+
+ if (!rep) /* send or bind completion that we don't care about */
+ return;
+
+ if (IB_WC_SUCCESS != wc->status) {
+ dprintk("RPC: %s: %s WC status %X, connection lost\n",
+ __func__, (wc->opcode & IB_WC_RECV) ? "recv" : "send",
+ wc->status);
+ rep->rr_len = ~0U;
+ rpcrdma_schedule_tasklet(rep);
+ return;
+ }
+
+ switch (wc->opcode) {
+ case IB_WC_RECV:
+ rep->rr_len = wc->byte_len;
+ ib_dma_sync_single_for_cpu(
+ rdmab_to_ia(rep->rr_buffer)->ri_id->device,
+ rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
+ /* Keep (only) the most recent credits, after check validity */
+ if (rep->rr_len >= 16) {
+ struct rpcrdma_msg *p =
+ (struct rpcrdma_msg *) rep->rr_base;
+ unsigned int credits = ntohl(p->rm_credit);
+ if (credits == 0) {
+ dprintk("RPC: %s: server"
+ " dropped credits to 0!\n", __func__);
+ /* don't deadlock */
+ credits = 1;
+ } else if (credits > rep->rr_buffer->rb_max_requests) {
+ dprintk("RPC: %s: server"
+ " over-crediting: %d (%d)\n",
+ __func__, credits,
+ rep->rr_buffer->rb_max_requests);
+ credits = rep->rr_buffer->rb_max_requests;
+ }
+ atomic_set(&rep->rr_buffer->rb_credits, credits);
+ }
+ /* fall through */
+ case IB_WC_BIND_MW:
+ rpcrdma_schedule_tasklet(rep);
+ break;
+ default:
+ dprintk("RPC: %s: unexpected WC event %X\n",
+ __func__, wc->opcode);
+ break;
+ }
+}
+
+static inline int
+rpcrdma_cq_poll(struct ib_cq *cq)
+{
+ struct ib_wc wc;
+ int rc;
+
+ for (;;) {
+ rc = ib_poll_cq(cq, 1, &wc);
+ if (rc < 0) {
+ dprintk("RPC: %s: ib_poll_cq failed %i\n",
+ __func__, rc);
+ return rc;
+ }
+ if (rc == 0)
+ break;
+
+ rpcrdma_event_process(&wc);
+ }
+
+ return 0;
+}
+
+/*
+ * rpcrdma_cq_event_upcall
+ *
+ * This upcall handles recv, send, bind and unbind events.
+ * It is reentrant but processes single events in order to maintain
+ * ordering of receives to keep server credits.
+ *
+ * It is the responsibility of the scheduled tasklet to return
+ * recv buffers to the pool. NOTE: this affects synchronization of
+ * connection shutdown. That is, the structures required for
+ * the completion of the reply handler must remain intact until
+ * all memory has been reclaimed.
+ *
+ * Note that send events are suppressed and do not result in an upcall.
+ */
+static void
+rpcrdma_cq_event_upcall(struct ib_cq *cq, void *context)
+{
+ int rc;
+
+ rc = rpcrdma_cq_poll(cq);
+ if (rc)
+ return;
+
+ rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+ if (rc) {
+ dprintk("RPC: %s: ib_req_notify_cq failed %i\n",
+ __func__, rc);
+ return;
+ }
+
+ rpcrdma_cq_poll(cq);
+}
+
+#ifdef RPC_DEBUG
+static const char * const conn[] = {
+ "address resolved",
+ "address error",
+ "route resolved",
+ "route error",
+ "connect request",
+ "connect response",
+ "connect error",
+ "unreachable",
+ "rejected",
+ "established",
+ "disconnected",
+ "device removal"
+};
+#endif
+
+static int
+rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
+{
+ struct rpcrdma_xprt *xprt = id->context;
+ struct rpcrdma_ia *ia = &xprt->rx_ia;
+ struct rpcrdma_ep *ep = &xprt->rx_ep;
+ struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
+ struct ib_qp_attr attr;
+ struct ib_qp_init_attr iattr;
+ int connstate = 0;
+
+ switch (event->event) {
+ case RDMA_CM_EVENT_ADDR_RESOLVED:
+ case RDMA_CM_EVENT_ROUTE_RESOLVED:
+ complete(&ia->ri_done);
+ break;
+ case RDMA_CM_EVENT_ADDR_ERROR:
+ ia->ri_async_rc = -EHOSTUNREACH;
+ dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
+ __func__, ep);
+ complete(&ia->ri_done);
+ break;
+ case RDMA_CM_EVENT_ROUTE_ERROR:
+ ia->ri_async_rc = -ENETUNREACH;
+ dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
+ __func__, ep);
+ complete(&ia->ri_done);
+ break;
+ case RDMA_CM_EVENT_ESTABLISHED:
+ connstate = 1;
+ ib_query_qp(ia->ri_id->qp, &attr,
+ IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
+ &iattr);
+ dprintk("RPC: %s: %d responder resources"
+ " (%d initiator)\n",
+ __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic);
+ goto connected;
+ case RDMA_CM_EVENT_CONNECT_ERROR:
+ connstate = -ENOTCONN;
+ goto connected;
+ case RDMA_CM_EVENT_UNREACHABLE:
+ connstate = -ENETDOWN;
+ goto connected;
+ case RDMA_CM_EVENT_REJECTED:
+ connstate = -ECONNREFUSED;
+ goto connected;
+ case RDMA_CM_EVENT_DISCONNECTED:
+ connstate = -ECONNABORTED;
+ goto connected;
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+ connstate = -ENODEV;
+connected:
+ dprintk("RPC: %s: %s: %u.%u.%u.%u:%u"
+ " (ep 0x%p event 0x%x)\n",
+ __func__,
+ (event->event <= 11) ? conn[event->event] :
+ "unknown connection error",
+ NIPQUAD(addr->sin_addr.s_addr),
+ ntohs(addr->sin_port),
+ ep, event->event);
+ atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1);
+ dprintk("RPC: %s: %sconnected\n",
+ __func__, connstate > 0 ? "" : "dis");
+ ep->rep_connected = connstate;
+ ep->rep_func(ep);
+ wake_up_all(&ep->rep_connect_wait);
+ break;
+ default:
+ ia->ri_async_rc = -EINVAL;
+ dprintk("RPC: %s: unexpected CM event %X\n",
+ __func__, event->event);
+ complete(&ia->ri_done);
+ break;
+ }
+
+ return 0;
+}
+
+static struct rdma_cm_id *
+rpcrdma_create_id(struct rpcrdma_xprt *xprt,
+ struct rpcrdma_ia *ia, struct sockaddr *addr)
+{
+ struct rdma_cm_id *id;
+ int rc;
+
+ id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP);
+ if (IS_ERR(id)) {
+ rc = PTR_ERR(id);
+ dprintk("RPC: %s: rdma_create_id() failed %i\n",
+ __func__, rc);
+ return id;
+ }
+
+ ia->ri_async_rc = 0;
+ rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
+ if (rc) {
+ dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
+ __func__, rc);
+ goto out;
+ }
+ wait_for_completion(&ia->ri_done);
+ rc = ia->ri_async_rc;
+ if (rc)
+ goto out;
+
+ ia->ri_async_rc = 0;
+ rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
+ if (rc) {
+ dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
+ __func__, rc);
+ goto out;
+ }
+ wait_for_completion(&ia->ri_done);
+ rc = ia->ri_async_rc;
+ if (rc)
+ goto out;
+
+ return id;
+
+out:
+ rdma_destroy_id(id);
+ return ERR_PTR(rc);
+}
+
+/*
+ * Drain any cq, prior to teardown.
+ */
+static void
+rpcrdma_clean_cq(struct ib_cq *cq)
+{
+ struct ib_wc wc;
+ int count = 0;
+
+ while (1 == ib_poll_cq(cq, 1, &wc))
+ ++count;
+
+ if (count)
+ dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
+ __func__, count, wc.opcode);
+}
+
+/*
+ * Exported functions.
+ */
+
+/*
+ * Open and initialize an Interface Adapter.
+ * o initializes fields of struct rpcrdma_ia, including
+ * interface and provider attributes and protection zone.
+ */
+int
+rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
+{
+ int rc;
+ struct rpcrdma_ia *ia = &xprt->rx_ia;
+
+ init_completion(&ia->ri_done);
+
+ ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
+ if (IS_ERR(ia->ri_id)) {
+ rc = PTR_ERR(ia->ri_id);
+ goto out1;
+ }
+
+ ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
+ if (IS_ERR(ia->ri_pd)) {
+ rc = PTR_ERR(ia->ri_pd);
+ dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
+ __func__, rc);
+ goto out2;
+ }
+
+ /*
+ * Optionally obtain an underlying physical identity mapping in
+ * order to do a memory window-based bind. This base registration
+ * is protected from remote access - that is enabled only by binding
+ * for the specific bytes targeted during each RPC operation, and
+ * revoked after the corresponding completion similar to a storage
+ * adapter.
+ */
+ if (memreg > RPCRDMA_REGISTER) {
+ int mem_priv = IB_ACCESS_LOCAL_WRITE;
+ switch (memreg) {
+#if RPCRDMA_PERSISTENT_REGISTRATION
+ case RPCRDMA_ALLPHYSICAL:
+ mem_priv |= IB_ACCESS_REMOTE_WRITE;
+ mem_priv |= IB_ACCESS_REMOTE_READ;
+ break;
+#endif
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+ case RPCRDMA_MEMWINDOWS:
+ mem_priv |= IB_ACCESS_MW_BIND;
+ break;
+ default:
+ break;
+ }
+ ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
+ if (IS_ERR(ia->ri_bind_mem)) {
+ printk(KERN_ALERT "%s: ib_get_dma_mr for "
+ "phys register failed with %lX\n\t"
+ "Will continue with degraded performance\n",
+ __func__, PTR_ERR(ia->ri_bind_mem));
+ memreg = RPCRDMA_REGISTER;
+ ia->ri_bind_mem = NULL;
+ }
+ }
+
+ /* Else will do memory reg/dereg for each chunk */
+ ia->ri_memreg_strategy = memreg;
+
+ return 0;
+out2:
+ rdma_destroy_id(ia->ri_id);
+out1:
+ return rc;
+}
+
+/*
+ * Clean up/close an IA.
+ * o if event handles and PD have been initialized, free them.
+ * o close the IA
+ */
+void
+rpcrdma_ia_close(struct rpcrdma_ia *ia)
+{
+ int rc;
+
+ dprintk("RPC: %s: entering\n", __func__);
+ if (ia->ri_bind_mem != NULL) {
+ rc = ib_dereg_mr(ia->ri_bind_mem);
+ dprintk("RPC: %s: ib_dereg_mr returned %i\n",
+ __func__, rc);
+ }
+ if (ia->ri_id != NULL && !IS_ERR(ia->ri_id) && ia->ri_id->qp)
+ rdma_destroy_qp(ia->ri_id);
+ if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
+ rc = ib_dealloc_pd(ia->ri_pd);
+ dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
+ __func__, rc);
+ }
+ if (ia->ri_id != NULL && !IS_ERR(ia->ri_id))
+ rdma_destroy_id(ia->ri_id);
+}
+
+/*
+ * Create unconnected endpoint.
+ */
+int
+rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
+ struct rpcrdma_create_data_internal *cdata)
+{
+ struct ib_device_attr devattr;
+ int rc;
+
+ rc = ib_query_device(ia->ri_id->device, &devattr);
+ if (rc) {
+ dprintk("RPC: %s: ib_query_device failed %d\n",
+ __func__, rc);
+ return rc;
+ }
+
+ /* check provider's send/recv wr limits */
+ if (cdata->max_requests > devattr.max_qp_wr)
+ cdata->max_requests = devattr.max_qp_wr;
+
+ ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
+ ep->rep_attr.qp_context = ep;
+ /* send_cq and recv_cq initialized below */
+ ep->rep_attr.srq = NULL;
+ ep->rep_attr.cap.max_send_wr = cdata->max_requests;
+ switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+ case RPCRDMA_MEMWINDOWS:
+ /* Add room for mw_binds+unbinds - overkill! */
+ ep->rep_attr.cap.max_send_wr++;
+ ep->rep_attr.cap.max_send_wr *= (2 * RPCRDMA_MAX_SEGS);
+ if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr)
+ return -EINVAL;
+ break;
+ default:
+ break;
+ }
+ ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
+ ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
+ ep->rep_attr.cap.max_recv_sge = 1;
+ ep->rep_attr.cap.max_inline_data = 0;
+ ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
+ ep->rep_attr.qp_type = IB_QPT_RC;
+ ep->rep_attr.port_num = ~0;
+
+ dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
+ "iovs: send %d recv %d\n",
+ __func__,
+ ep->rep_attr.cap.max_send_wr,
+ ep->rep_attr.cap.max_recv_wr,
+ ep->rep_attr.cap.max_send_sge,
+ ep->rep_attr.cap.max_recv_sge);
+
+ /* set trigger for requesting send completion */
+ ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 /* - 1*/;
+ switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+ case RPCRDMA_MEMWINDOWS:
+ ep->rep_cqinit -= RPCRDMA_MAX_SEGS;
+ break;
+ default:
+ break;
+ }
+ if (ep->rep_cqinit <= 2)
+ ep->rep_cqinit = 0;
+ INIT_CQCOUNT(ep);
+ ep->rep_ia = ia;
+ init_waitqueue_head(&ep->rep_connect_wait);
+
+ /*
+ * Create a single cq for receive dto and mw_bind (only ever
+ * care about unbind, really). Send completions are suppressed.
+ * Use single threaded tasklet upcalls to maintain ordering.
+ */
+ ep->rep_cq = ib_create_cq(ia->ri_id->device, rpcrdma_cq_event_upcall,
+ rpcrdma_cq_async_error_upcall, NULL,
+ ep->rep_attr.cap.max_recv_wr +
+ ep->rep_attr.cap.max_send_wr + 1, 0);
+ if (IS_ERR(ep->rep_cq)) {
+ rc = PTR_ERR(ep->rep_cq);
+ dprintk("RPC: %s: ib_create_cq failed: %i\n",
+ __func__, rc);
+ goto out1;
+ }
+
+ rc = ib_req_notify_cq(ep->rep_cq, IB_CQ_NEXT_COMP);
+ if (rc) {
+ dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
+ __func__, rc);
+ goto out2;
+ }
+
+ ep->rep_attr.send_cq = ep->rep_cq;
+ ep->rep_attr.recv_cq = ep->rep_cq;
+
+ /* Initialize cma parameters */
+
+ /* RPC/RDMA does not use private data */
+ ep->rep_remote_cma.private_data = NULL;
+ ep->rep_remote_cma.private_data_len = 0;
+
+ /* Client offers RDMA Read but does not initiate */
+ switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_BOUNCEBUFFERS:
+ ep->rep_remote_cma.responder_resources = 0;
+ break;
+ case RPCRDMA_MTHCAFMR:
+ case RPCRDMA_REGISTER:
+ ep->rep_remote_cma.responder_resources = cdata->max_requests *
+ (RPCRDMA_MAX_DATA_SEGS / 8);
+ break;
+ case RPCRDMA_MEMWINDOWS:
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+#if RPCRDMA_PERSISTENT_REGISTRATION
+ case RPCRDMA_ALLPHYSICAL:
+#endif
+ ep->rep_remote_cma.responder_resources = cdata->max_requests *
+ (RPCRDMA_MAX_DATA_SEGS / 2);
+ break;
+ default:
+ break;
+ }
+ if (ep->rep_remote_cma.responder_resources > devattr.max_qp_rd_atom)
+ ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
+ ep->rep_remote_cma.initiator_depth = 0;
+
+ ep->rep_remote_cma.retry_count = 7;
+ ep->rep_remote_cma.flow_control = 0;
+ ep->rep_remote_cma.rnr_retry_count = 0;
+
+ return 0;
+
+out2:
+ if (ib_destroy_cq(ep->rep_cq))
+ ;
+out1:
+ return rc;
+}
+
+/*
+ * rpcrdma_ep_destroy
+ *
+ * Disconnect and destroy endpoint. After this, the only
+ * valid operations on the ep are to free it (if dynamically
+ * allocated) or re-create it.
+ *
+ * The caller's error handling must be sure to not leak the endpoint
+ * if this function fails.
+ */
+int
+rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+{
+ int rc;
+
+ dprintk("RPC: %s: entering, connected is %d\n",
+ __func__, ep->rep_connected);
+
+ if (ia->ri_id->qp) {
+ rc = rpcrdma_ep_disconnect(ep, ia);
+ if (rc)
+ dprintk("RPC: %s: rpcrdma_ep_disconnect"
+ " returned %i\n", __func__, rc);
+ }
+
+ ep->rep_func = NULL;
+
+ /* padding - could be done in rpcrdma_buffer_destroy... */
+ if (ep->rep_pad_mr) {
+ rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
+ ep->rep_pad_mr = NULL;
+ }
+
+ if (ia->ri_id->qp) {
+ rdma_destroy_qp(ia->ri_id);
+ ia->ri_id->qp = NULL;
+ }
+
+ rpcrdma_clean_cq(ep->rep_cq);
+ rc = ib_destroy_cq(ep->rep_cq);
+ if (rc)
+ dprintk("RPC: %s: ib_destroy_cq returned %i\n",
+ __func__, rc);
+
+ return rc;
+}
+
+/*
+ * Connect unconnected endpoint.
+ */
+int
+rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+{
+ struct rdma_cm_id *id;
+ int rc = 0;
+ int retry_count = 0;
+ int reconnect = (ep->rep_connected != 0);
+
+ if (reconnect) {
+ struct rpcrdma_xprt *xprt;
+retry:
+ rc = rpcrdma_ep_disconnect(ep, ia);
+ if (rc && rc != -ENOTCONN)
+ dprintk("RPC: %s: rpcrdma_ep_disconnect"
+ " status %i\n", __func__, rc);
+ rpcrdma_clean_cq(ep->rep_cq);
+
+ xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
+ id = rpcrdma_create_id(xprt, ia,
+ (struct sockaddr *)&xprt->rx_data.addr);
+ if (IS_ERR(id)) {
+ rc = PTR_ERR(id);
+ goto out;
+ }
+ /* TEMP TEMP TEMP - fail if new device:
+ * Deregister/remarshal *all* requests!
+ * Close and recreate adapter, pd, etc!
+ * Re-determine all attributes still sane!
+ * More stuff I haven't thought of!
+ * Rrrgh!
+ */
+ if (ia->ri_id->device != id->device) {
+ printk("RPC: %s: can't reconnect on "
+ "different device!\n", __func__);
+ rdma_destroy_id(id);
+ rc = -ENETDOWN;
+ goto out;
+ }
+ /* END TEMP */
+ rdma_destroy_id(ia->ri_id);
+ ia->ri_id = id;
+ }
+
+ rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
+ if (rc) {
+ dprintk("RPC: %s: rdma_create_qp failed %i\n",
+ __func__, rc);
+ goto out;
+ }
+
+/* XXX Tavor device performs badly with 2K MTU! */
+if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
+ struct pci_dev *pcid = to_pci_dev(ia->ri_id->device->dma_device);
+ if (pcid->device == PCI_DEVICE_ID_MELLANOX_TAVOR &&
+ (pcid->vendor == PCI_VENDOR_ID_MELLANOX ||
+ pcid->vendor == PCI_VENDOR_ID_TOPSPIN)) {
+ struct ib_qp_attr attr = {
+ .path_mtu = IB_MTU_1024
+ };
+ rc = ib_modify_qp(ia->ri_id->qp, &attr, IB_QP_PATH_MTU);
+ }
+}
+
+ /* Theoretically a client initiator_depth > 0 is not needed,
+ * but many peers fail to complete the connection unless they
+ * == responder_resources! */
+ if (ep->rep_remote_cma.initiator_depth !=
+ ep->rep_remote_cma.responder_resources)
+ ep->rep_remote_cma.initiator_depth =
+ ep->rep_remote_cma.responder_resources;
+
+ ep->rep_connected = 0;
+
+ rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
+ if (rc) {
+ dprintk("RPC: %s: rdma_connect() failed with %i\n",
+ __func__, rc);
+ goto out;
+ }
+
+ if (reconnect)
+ return 0;
+
+ wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
+
+ /*
+ * Check state. A non-peer reject indicates no listener
+ * (ECONNREFUSED), which may be a transient state. All
+ * others indicate a transport condition which has already
+ * undergone a best-effort.
+ */
+ if (ep->rep_connected == -ECONNREFUSED
+ && ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
+ dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
+ goto retry;
+ }
+ if (ep->rep_connected <= 0) {
+ /* Sometimes, the only way to reliably connect to remote
+ * CMs is to use same nonzero values for ORD and IRD. */
+ ep->rep_remote_cma.initiator_depth =
+ ep->rep_remote_cma.responder_resources;
+ if (ep->rep_remote_cma.initiator_depth == 0)
+ ++ep->rep_remote_cma.initiator_depth;
+ if (ep->rep_remote_cma.responder_resources == 0)
+ ++ep->rep_remote_cma.responder_resources;
+ if (retry_count++ == 0)
+ goto retry;
+ rc = ep->rep_connected;
+ } else {
+ dprintk("RPC: %s: connected\n", __func__);
+ }
+
+out:
+ if (rc)
+ ep->rep_connected = rc;
+ return rc;
+}
+
+/*
+ * rpcrdma_ep_disconnect
+ *
+ * This is separate from destroy to facilitate the ability
+ * to reconnect without recreating the endpoint.
+ *
+ * This call is not reentrant, and must not be made in parallel
+ * on the same endpoint.
+ */
+int
+rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
+{
+ int rc;
+
+ rpcrdma_clean_cq(ep->rep_cq);
+ rc = rdma_disconnect(ia->ri_id);
+ if (!rc) {
+ /* returns without wait if not connected */
+ wait_event_interruptible(ep->rep_connect_wait,
+ ep->rep_connected != 1);
+ dprintk("RPC: %s: after wait, %sconnected\n", __func__,
+ (ep->rep_connected == 1) ? "still " : "dis");
+ } else {
+ dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
+ ep->rep_connected = rc;
+ }
+ return rc;
+}
+
+/*
+ * Initialize buffer memory
+ */
+int
+rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
+ struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
+{
+ char *p;
+ size_t len;
+ int i, rc;
+
+ buf->rb_max_requests = cdata->max_requests;
+ spin_lock_init(&buf->rb_lock);
+ atomic_set(&buf->rb_credits, 1);
+
+ /* Need to allocate:
+ * 1. arrays for send and recv pointers
+ * 2. arrays of struct rpcrdma_req to fill in pointers
+ * 3. array of struct rpcrdma_rep for replies
+ * 4. padding, if any
+ * 5. mw's, if any
+ * Send/recv buffers in req/rep need to be registered
+ */
+
+ len = buf->rb_max_requests *
+ (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
+ len += cdata->padding;
+ switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_MTHCAFMR:
+ /* TBD we are perhaps overallocating here */
+ len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
+ sizeof(struct rpcrdma_mw);
+ break;
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+ case RPCRDMA_MEMWINDOWS:
+ len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
+ sizeof(struct rpcrdma_mw);
+ break;
+ default:
+ break;
+ }
+
+ /* allocate 1, 4 and 5 in one shot */
+ p = kzalloc(len, GFP_KERNEL);
+ if (p == NULL) {
+ dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
+ __func__, len);
+ rc = -ENOMEM;
+ goto out;
+ }
+ buf->rb_pool = p; /* for freeing it later */
+
+ buf->rb_send_bufs = (struct rpcrdma_req **) p;
+ p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
+ buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
+ p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
+
+ /*
+ * Register the zeroed pad buffer, if any.
+ */
+ if (cdata->padding) {
+ rc = rpcrdma_register_internal(ia, p, cdata->padding,
+ &ep->rep_pad_mr, &ep->rep_pad);
+ if (rc)
+ goto out;
+ }
+ p += cdata->padding;
+
+ /*
+ * Allocate the fmr's, or mw's for mw_bind chunk registration.
+ * We "cycle" the mw's in order to minimize rkey reuse,
+ * and also reduce unbind-to-bind collision.
+ */
+ INIT_LIST_HEAD(&buf->rb_mws);
+ switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_MTHCAFMR:
+ {
+ struct rpcrdma_mw *r = (struct rpcrdma_mw *)p;
+ struct ib_fmr_attr fa = {
+ RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT
+ };
+ /* TBD we are perhaps overallocating here */
+ for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
+ r->r.fmr = ib_alloc_fmr(ia->ri_pd,
+ IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ,
+ &fa);
+ if (IS_ERR(r->r.fmr)) {
+ rc = PTR_ERR(r->r.fmr);
+ dprintk("RPC: %s: ib_alloc_fmr"
+ " failed %i\n", __func__, rc);
+ goto out;
+ }
+ list_add(&r->mw_list, &buf->rb_mws);
+ ++r;
+ }
+ }
+ break;
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+ case RPCRDMA_MEMWINDOWS:
+ {
+ struct rpcrdma_mw *r = (struct rpcrdma_mw *)p;
+ /* Allocate one extra request's worth, for full cycling */
+ for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
+ r->r.mw = ib_alloc_mw(ia->ri_pd);
+ if (IS_ERR(r->r.mw)) {
+ rc = PTR_ERR(r->r.mw);
+ dprintk("RPC: %s: ib_alloc_mw"
+ " failed %i\n", __func__, rc);
+ goto out;
+ }
+ list_add(&r->mw_list, &buf->rb_mws);
+ ++r;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ /*
+ * Allocate/init the request/reply buffers. Doing this
+ * using kmalloc for now -- one for each buf.
+ */
+ for (i = 0; i < buf->rb_max_requests; i++) {
+ struct rpcrdma_req *req;
+ struct rpcrdma_rep *rep;
+
+ len = cdata->inline_wsize + sizeof(struct rpcrdma_req);
+ /* RPC layer requests *double* size + 1K RPC_SLACK_SPACE! */
+ /* Typical ~2400b, so rounding up saves work later */
+ if (len < 4096)
+ len = 4096;
+ req = kmalloc(len, GFP_KERNEL);
+ if (req == NULL) {
+ dprintk("RPC: %s: request buffer %d alloc"
+ " failed\n", __func__, i);
+ rc = -ENOMEM;
+ goto out;
+ }
+ memset(req, 0, sizeof(struct rpcrdma_req));
+ buf->rb_send_bufs[i] = req;
+ buf->rb_send_bufs[i]->rl_buffer = buf;
+
+ rc = rpcrdma_register_internal(ia, req->rl_base,
+ len - offsetof(struct rpcrdma_req, rl_base),
+ &buf->rb_send_bufs[i]->rl_handle,
+ &buf->rb_send_bufs[i]->rl_iov);
+ if (rc)
+ goto out;
+
+ buf->rb_send_bufs[i]->rl_size = len-sizeof(struct rpcrdma_req);
+
+ len = cdata->inline_rsize + sizeof(struct rpcrdma_rep);
+ rep = kmalloc(len, GFP_KERNEL);
+ if (rep == NULL) {
+ dprintk("RPC: %s: reply buffer %d alloc failed\n",
+ __func__, i);
+ rc = -ENOMEM;
+ goto out;
+ }
+ memset(rep, 0, sizeof(struct rpcrdma_rep));
+ buf->rb_recv_bufs[i] = rep;
+ buf->rb_recv_bufs[i]->rr_buffer = buf;
+ init_waitqueue_head(&rep->rr_unbind);
+
+ rc = rpcrdma_register_internal(ia, rep->rr_base,
+ len - offsetof(struct rpcrdma_rep, rr_base),
+ &buf->rb_recv_bufs[i]->rr_handle,
+ &buf->rb_recv_bufs[i]->rr_iov);
+ if (rc)
+ goto out;
+
+ }
+ dprintk("RPC: %s: max_requests %d\n",
+ __func__, buf->rb_max_requests);
+ /* done */
+ return 0;
+out:
+ rpcrdma_buffer_destroy(buf);
+ return rc;
+}
+
+/*
+ * Unregister and destroy buffer memory. Need to deal with
+ * partial initialization, so it's callable from failed create.
+ * Must be called before destroying endpoint, as registrations
+ * reference it.
+ */
+void
+rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
+{
+ int rc, i;
+ struct rpcrdma_ia *ia = rdmab_to_ia(buf);
+
+ /* clean up in reverse order from create
+ * 1. recv mr memory (mr free, then kfree)
+ * 1a. bind mw memory
+ * 2. send mr memory (mr free, then kfree)
+ * 3. padding (if any) [moved to rpcrdma_ep_destroy]
+ * 4. arrays
+ */
+ dprintk("RPC: %s: entering\n", __func__);
+
+ for (i = 0; i < buf->rb_max_requests; i++) {
+ if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) {
+ rpcrdma_deregister_internal(ia,
+ buf->rb_recv_bufs[i]->rr_handle,
+ &buf->rb_recv_bufs[i]->rr_iov);
+ kfree(buf->rb_recv_bufs[i]);
+ }
+ if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
+ while (!list_empty(&buf->rb_mws)) {
+ struct rpcrdma_mw *r;
+ r = list_entry(buf->rb_mws.next,
+ struct rpcrdma_mw, mw_list);
+ list_del(&r->mw_list);
+ switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_MTHCAFMR:
+ rc = ib_dealloc_fmr(r->r.fmr);
+ if (rc)
+ dprintk("RPC: %s:"
+ " ib_dealloc_fmr"
+ " failed %i\n",
+ __func__, rc);
+ break;
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+ case RPCRDMA_MEMWINDOWS:
+ rc = ib_dealloc_mw(r->r.mw);
+ if (rc)
+ dprintk("RPC: %s:"
+ " ib_dealloc_mw"
+ " failed %i\n",
+ __func__, rc);
+ break;
+ default:
+ break;
+ }
+ }
+ rpcrdma_deregister_internal(ia,
+ buf->rb_send_bufs[i]->rl_handle,
+ &buf->rb_send_bufs[i]->rl_iov);
+ kfree(buf->rb_send_bufs[i]);
+ }
+ }
+
+ kfree(buf->rb_pool);
+}
+
+/*
+ * Get a set of request/reply buffers.
+ *
+ * Reply buffer (if needed) is attached to send buffer upon return.
+ * Rule:
+ * rb_send_index and rb_recv_index MUST always be pointing to the
+ * *next* available buffer (non-NULL). They are incremented after
+ * removing buffers, and decremented *before* returning them.
+ */
+struct rpcrdma_req *
+rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
+{
+ struct rpcrdma_req *req;
+ unsigned long flags;
+
+ spin_lock_irqsave(&buffers->rb_lock, flags);
+ if (buffers->rb_send_index == buffers->rb_max_requests) {
+ spin_unlock_irqrestore(&buffers->rb_lock, flags);
+ dprintk("RPC: %s: out of request buffers\n", __func__);
+ return ((struct rpcrdma_req *)NULL);
+ }
+
+ req = buffers->rb_send_bufs[buffers->rb_send_index];
+ if (buffers->rb_send_index < buffers->rb_recv_index) {
+ dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
+ __func__,
+ buffers->rb_recv_index - buffers->rb_send_index);
+ req->rl_reply = NULL;
+ } else {
+ req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
+ buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
+ }
+ buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
+ if (!list_empty(&buffers->rb_mws)) {
+ int i = RPCRDMA_MAX_SEGS - 1;
+ do {
+ struct rpcrdma_mw *r;
+ r = list_entry(buffers->rb_mws.next,
+ struct rpcrdma_mw, mw_list);
+ list_del(&r->mw_list);
+ req->rl_segments[i].mr_chunk.rl_mw = r;
+ } while (--i >= 0);
+ }
+ spin_unlock_irqrestore(&buffers->rb_lock, flags);
+ return req;
+}
+
+/*
+ * Put request/reply buffers back into pool.
+ * Pre-decrement counter/array index.
+ */
+void
+rpcrdma_buffer_put(struct rpcrdma_req *req)
+{
+ struct rpcrdma_buffer *buffers = req->rl_buffer;
+ struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
+ int i;
+ unsigned long flags;
+
+ BUG_ON(req->rl_nchunks != 0);
+ spin_lock_irqsave(&buffers->rb_lock, flags);
+ buffers->rb_send_bufs[--buffers->rb_send_index] = req;
+ req->rl_niovs = 0;
+ if (req->rl_reply) {
+ buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply;
+ init_waitqueue_head(&req->rl_reply->rr_unbind);
+ req->rl_reply->rr_func = NULL;
+ req->rl_reply = NULL;
+ }
+ switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_MTHCAFMR:
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+ case RPCRDMA_MEMWINDOWS:
+ /*
+ * Cycle mw's back in reverse order, and "spin" them.
+ * This delays and scrambles reuse as much as possible.
+ */
+ i = 1;
+ do {
+ struct rpcrdma_mw **mw;
+ mw = &req->rl_segments[i].mr_chunk.rl_mw;
+ list_add_tail(&(*mw)->mw_list, &buffers->rb_mws);
+ *mw = NULL;
+ } while (++i < RPCRDMA_MAX_SEGS);
+ list_add_tail(&req->rl_segments[0].mr_chunk.rl_mw->mw_list,
+ &buffers->rb_mws);
+ req->rl_segments[0].mr_chunk.rl_mw = NULL;
+ break;
+ default:
+ break;
+ }
+ spin_unlock_irqrestore(&buffers->rb_lock, flags);
+}
+
+/*
+ * Recover reply buffers from pool.
+ * This happens when recovering from error conditions.
+ * Post-increment counter/array index.
+ */
+void
+rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
+{
+ struct rpcrdma_buffer *buffers = req->rl_buffer;
+ unsigned long flags;
+
+ if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */
+ buffers = ((struct rpcrdma_req *) buffers)->rl_buffer;
+ spin_lock_irqsave(&buffers->rb_lock, flags);
+ if (buffers->rb_recv_index < buffers->rb_max_requests) {
+ req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
+ buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
+ }
+ spin_unlock_irqrestore(&buffers->rb_lock, flags);
+}
+
+/*
+ * Put reply buffers back into pool when not attached to
+ * request. This happens in error conditions, and when
+ * aborting unbinds. Pre-decrement counter/array index.
+ */
+void
+rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
+{
+ struct rpcrdma_buffer *buffers = rep->rr_buffer;
+ unsigned long flags;
+
+ rep->rr_func = NULL;
+ spin_lock_irqsave(&buffers->rb_lock, flags);
+ buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
+ spin_unlock_irqrestore(&buffers->rb_lock, flags);
+}
+
+/*
+ * Wrappers for internal-use kmalloc memory registration, used by buffer code.
+ */
+
+int
+rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
+ struct ib_mr **mrp, struct ib_sge *iov)
+{
+ struct ib_phys_buf ipb;
+ struct ib_mr *mr;
+ int rc;
+
+ /*
+ * All memory passed here was kmalloc'ed, therefore phys-contiguous.
+ */
+ iov->addr = ib_dma_map_single(ia->ri_id->device,
+ va, len, DMA_BIDIRECTIONAL);
+ iov->length = len;
+
+ if (ia->ri_bind_mem != NULL) {
+ *mrp = NULL;
+ iov->lkey = ia->ri_bind_mem->lkey;
+ return 0;
+ }
+
+ ipb.addr = iov->addr;
+ ipb.size = iov->length;
+ mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
+ IB_ACCESS_LOCAL_WRITE, &iov->addr);
+
+ dprintk("RPC: %s: phys convert: 0x%llx "
+ "registered 0x%llx length %d\n",
+ __func__, (unsigned long long)ipb.addr,
+ (unsigned long long)iov->addr, len);
+
+ if (IS_ERR(mr)) {
+ *mrp = NULL;
+ rc = PTR_ERR(mr);
+ dprintk("RPC: %s: failed with %i\n", __func__, rc);
+ } else {
+ *mrp = mr;
+ iov->lkey = mr->lkey;
+ rc = 0;
+ }
+
+ return rc;
+}
+
+int
+rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
+ struct ib_mr *mr, struct ib_sge *iov)
+{
+ int rc;
+
+ ib_dma_unmap_single(ia->ri_id->device,
+ iov->addr, iov->length, DMA_BIDIRECTIONAL);
+
+ if (NULL == mr)
+ return 0;
+
+ rc = ib_dereg_mr(mr);
+ if (rc)
+ dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
+ return rc;
+}
+
+/*
+ * Wrappers for chunk registration, shared by read/write chunk code.
+ */
+
+static void
+rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
+{
+ seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+ seg->mr_dmalen = seg->mr_len;
+ if (seg->mr_page)
+ seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
+ seg->mr_page, offset_in_page(seg->mr_offset),
+ seg->mr_dmalen, seg->mr_dir);
+ else
+ seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
+ seg->mr_offset,
+ seg->mr_dmalen, seg->mr_dir);
+}
+
+static void
+rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
+{
+ if (seg->mr_page)
+ ib_dma_unmap_page(ia->ri_id->device,
+ seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
+ else
+ ib_dma_unmap_single(ia->ri_id->device,
+ seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
+}
+
+int
+rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
+ int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
+{
+ struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+ int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
+ IB_ACCESS_REMOTE_READ);
+ struct rpcrdma_mr_seg *seg1 = seg;
+ int i;
+ int rc = 0;
+
+ switch (ia->ri_memreg_strategy) {
+
+#if RPCRDMA_PERSISTENT_REGISTRATION
+ case RPCRDMA_ALLPHYSICAL:
+ rpcrdma_map_one(ia, seg, writing);
+ seg->mr_rkey = ia->ri_bind_mem->rkey;
+ seg->mr_base = seg->mr_dma;
+ seg->mr_nsegs = 1;
+ nsegs = 1;
+ break;
+#endif
+
+ /* Registration using fast memory registration */
+ case RPCRDMA_MTHCAFMR:
+ {
+ u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
+ int len, pageoff = offset_in_page(seg->mr_offset);
+ seg1->mr_offset -= pageoff; /* start of page */
+ seg1->mr_len += pageoff;
+ len = -pageoff;
+ if (nsegs > RPCRDMA_MAX_DATA_SEGS)
+ nsegs = RPCRDMA_MAX_DATA_SEGS;
+ for (i = 0; i < nsegs;) {
+ rpcrdma_map_one(ia, seg, writing);
+ physaddrs[i] = seg->mr_dma;
+ len += seg->mr_len;
+ ++seg;
+ ++i;
+ /* Check for holes */
+ if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
+ offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
+ break;
+ }
+ nsegs = i;
+ rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
+ physaddrs, nsegs, seg1->mr_dma);
+ if (rc) {
+ dprintk("RPC: %s: failed ib_map_phys_fmr "
+ "%u@0x%llx+%i (%d)... status %i\n", __func__,
+ len, (unsigned long long)seg1->mr_dma,
+ pageoff, nsegs, rc);
+ while (nsegs--)
+ rpcrdma_unmap_one(ia, --seg);
+ } else {
+ seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
+ seg1->mr_base = seg1->mr_dma + pageoff;
+ seg1->mr_nsegs = nsegs;
+ seg1->mr_len = len;
+ }
+ }
+ break;
+
+ /* Registration using memory windows */
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+ case RPCRDMA_MEMWINDOWS:
+ {
+ struct ib_mw_bind param;
+ rpcrdma_map_one(ia, seg, writing);
+ param.mr = ia->ri_bind_mem;
+ param.wr_id = 0ULL; /* no send cookie */
+ param.addr = seg->mr_dma;
+ param.length = seg->mr_len;
+ param.send_flags = 0;
+ param.mw_access_flags = mem_priv;
+
+ DECR_CQCOUNT(&r_xprt->rx_ep);
+ rc = ib_bind_mw(ia->ri_id->qp,
+ seg->mr_chunk.rl_mw->r.mw, &param);
+ if (rc) {
+ dprintk("RPC: %s: failed ib_bind_mw "
+ "%u@0x%llx status %i\n",
+ __func__, seg->mr_len,
+ (unsigned long long)seg->mr_dma, rc);
+ rpcrdma_unmap_one(ia, seg);
+ } else {
+ seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey;
+ seg->mr_base = param.addr;
+ seg->mr_nsegs = 1;
+ nsegs = 1;
+ }
+ }
+ break;
+
+ /* Default registration each time */
+ default:
+ {
+ struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS];
+ int len = 0;
+ if (nsegs > RPCRDMA_MAX_DATA_SEGS)
+ nsegs = RPCRDMA_MAX_DATA_SEGS;
+ for (i = 0; i < nsegs;) {
+ rpcrdma_map_one(ia, seg, writing);
+ ipb[i].addr = seg->mr_dma;
+ ipb[i].size = seg->mr_len;
+ len += seg->mr_len;
+ ++seg;
+ ++i;
+ /* Check for holes */
+ if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
+ offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
+ break;
+ }
+ nsegs = i;
+ seg1->mr_base = seg1->mr_dma;
+ seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd,
+ ipb, nsegs, mem_priv, &seg1->mr_base);
+ if (IS_ERR(seg1->mr_chunk.rl_mr)) {
+ rc = PTR_ERR(seg1->mr_chunk.rl_mr);
+ dprintk("RPC: %s: failed ib_reg_phys_mr "
+ "%u@0x%llx (%d)... status %i\n",
+ __func__, len,
+ (unsigned long long)seg1->mr_dma, nsegs, rc);
+ while (nsegs--)
+ rpcrdma_unmap_one(ia, --seg);
+ } else {
+ seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey;
+ seg1->mr_nsegs = nsegs;
+ seg1->mr_len = len;
+ }
+ }
+ break;
+ }
+ if (rc)
+ return -1;
+
+ return nsegs;
+}
+
+int
+rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
+ struct rpcrdma_xprt *r_xprt, void *r)
+{
+ struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+ struct rpcrdma_mr_seg *seg1 = seg;
+ int nsegs = seg->mr_nsegs, rc;
+
+ switch (ia->ri_memreg_strategy) {
+
+#if RPCRDMA_PERSISTENT_REGISTRATION
+ case RPCRDMA_ALLPHYSICAL:
+ BUG_ON(nsegs != 1);
+ rpcrdma_unmap_one(ia, seg);
+ rc = 0;
+ break;
+#endif
+
+ case RPCRDMA_MTHCAFMR:
+ {
+ LIST_HEAD(l);
+ list_add(&seg->mr_chunk.rl_mw->r.fmr->list, &l);
+ rc = ib_unmap_fmr(&l);
+ while (seg1->mr_nsegs--)
+ rpcrdma_unmap_one(ia, seg++);
+ }
+ if (rc)
+ dprintk("RPC: %s: failed ib_unmap_fmr,"
+ " status %i\n", __func__, rc);
+ break;
+
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+ case RPCRDMA_MEMWINDOWS:
+ {
+ struct ib_mw_bind param;
+ BUG_ON(nsegs != 1);
+ param.mr = ia->ri_bind_mem;
+ param.addr = 0ULL; /* unbind */
+ param.length = 0;
+ param.mw_access_flags = 0;
+ if (r) {
+ param.wr_id = (u64) (unsigned long) r;
+ param.send_flags = IB_SEND_SIGNALED;
+ INIT_CQCOUNT(&r_xprt->rx_ep);
+ } else {
+ param.wr_id = 0ULL;
+ param.send_flags = 0;
+ DECR_CQCOUNT(&r_xprt->rx_ep);
+ }
+ rc = ib_bind_mw(ia->ri_id->qp,
+ seg->mr_chunk.rl_mw->r.mw, &param);
+ rpcrdma_unmap_one(ia, seg);
+ }
+ if (rc)
+ dprintk("RPC: %s: failed ib_(un)bind_mw,"
+ " status %i\n", __func__, rc);
+ else
+ r = NULL; /* will upcall on completion */
+ break;
+
+ default:
+ rc = ib_dereg_mr(seg1->mr_chunk.rl_mr);
+ seg1->mr_chunk.rl_mr = NULL;
+ while (seg1->mr_nsegs--)
+ rpcrdma_unmap_one(ia, seg++);
+ if (rc)
+ dprintk("RPC: %s: failed ib_dereg_mr,"
+ " status %i\n", __func__, rc);
+ break;
+ }
+ if (r) {
+ struct rpcrdma_rep *rep = r;
+ void (*func)(struct rpcrdma_rep *) = rep->rr_func;
+ rep->rr_func = NULL;
+ func(rep); /* dereg done, callback now */
+ }
+ return nsegs;
+}
+
+/*
+ * Prepost any receive buffer, then post send.
+ *
+ * Receive buffer is donated to hardware, reclaimed upon recv completion.
+ */
+int
+rpcrdma_ep_post(struct rpcrdma_ia *ia,
+ struct rpcrdma_ep *ep,
+ struct rpcrdma_req *req)
+{
+ struct ib_send_wr send_wr, *send_wr_fail;
+ struct rpcrdma_rep *rep = req->rl_reply;
+ int rc;
+
+ if (rep) {
+ rc = rpcrdma_ep_post_recv(ia, ep, rep);
+ if (rc)
+ goto out;
+ req->rl_reply = NULL;
+ }
+
+ send_wr.next = NULL;
+ send_wr.wr_id = 0ULL; /* no send cookie */
+ send_wr.sg_list = req->rl_send_iov;
+ send_wr.num_sge = req->rl_niovs;
+ send_wr.opcode = IB_WR_SEND;
+ send_wr.imm_data = 0;
+ if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
+ ib_dma_sync_single_for_device(ia->ri_id->device,
+ req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
+ DMA_TO_DEVICE);
+ ib_dma_sync_single_for_device(ia->ri_id->device,
+ req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
+ DMA_TO_DEVICE);
+ ib_dma_sync_single_for_device(ia->ri_id->device,
+ req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
+ DMA_TO_DEVICE);
+
+ if (DECR_CQCOUNT(ep) > 0)
+ send_wr.send_flags = 0;
+ else { /* Provider must take a send completion every now and then */
+ INIT_CQCOUNT(ep);
+ send_wr.send_flags = IB_SEND_SIGNALED;
+ }
+
+ rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
+ if (rc)
+ dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
+ rc);
+out:
+ return rc;
+}
+
+/*
+ * (Re)post a receive buffer.
+ */
+int
+rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
+ struct rpcrdma_ep *ep,
+ struct rpcrdma_rep *rep)
+{
+ struct ib_recv_wr recv_wr, *recv_wr_fail;
+ int rc;
+
+ recv_wr.next = NULL;
+ recv_wr.wr_id = (u64) (unsigned long) rep;
+ recv_wr.sg_list = &rep->rr_iov;
+ recv_wr.num_sge = 1;
+
+ ib_dma_sync_single_for_cpu(ia->ri_id->device,
+ rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
+
+ DECR_CQCOUNT(ep);
+ rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
+
+ if (rc)
+ dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
+ rc);
+ return rc;
+}
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
new file mode 100644
index 00000000000..2427822f8bd
--- /dev/null
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -0,0 +1,330 @@
+/*
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the BSD-type
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * Neither the name of the Network Appliance, Inc. nor the names of
+ * its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _LINUX_SUNRPC_XPRT_RDMA_H
+#define _LINUX_SUNRPC_XPRT_RDMA_H
+
+#include <linux/wait.h> /* wait_queue_head_t, etc */
+#include <linux/spinlock.h> /* spinlock_t, etc */
+#include <asm/atomic.h> /* atomic_t, etc */
+
+#include <rdma/rdma_cm.h> /* RDMA connection api */
+#include <rdma/ib_verbs.h> /* RDMA verbs api */
+
+#include <linux/sunrpc/clnt.h> /* rpc_xprt */
+#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */
+#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */
+
+/*
+ * Interface Adapter -- one per transport instance
+ */
+struct rpcrdma_ia {
+ struct rdma_cm_id *ri_id;
+ struct ib_pd *ri_pd;
+ struct ib_mr *ri_bind_mem;
+ struct completion ri_done;
+ int ri_async_rc;
+ enum rpcrdma_memreg ri_memreg_strategy;
+};
+
+/*
+ * RDMA Endpoint -- one per transport instance
+ */
+
+struct rpcrdma_ep {
+ atomic_t rep_cqcount;
+ int rep_cqinit;
+ int rep_connected;
+ struct rpcrdma_ia *rep_ia;
+ struct ib_cq *rep_cq;
+ struct ib_qp_init_attr rep_attr;
+ wait_queue_head_t rep_connect_wait;
+ struct ib_sge rep_pad; /* holds zeroed pad */
+ struct ib_mr *rep_pad_mr; /* holds zeroed pad */
+ void (*rep_func)(struct rpcrdma_ep *);
+ struct rpc_xprt *rep_xprt; /* for rep_func */
+ struct rdma_conn_param rep_remote_cma;
+ struct sockaddr_storage rep_remote_addr;
+};
+
+#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
+#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
+
+/*
+ * struct rpcrdma_rep -- this structure encapsulates state required to recv
+ * and complete a reply, asychronously. It needs several pieces of
+ * state:
+ * o recv buffer (posted to provider)
+ * o ib_sge (also donated to provider)
+ * o status of reply (length, success or not)
+ * o bookkeeping state to get run by tasklet (list, etc)
+ *
+ * These are allocated during initialization, per-transport instance;
+ * however, the tasklet execution list itself is global, as it should
+ * always be pretty short.
+ *
+ * N of these are associated with a transport instance, and stored in
+ * struct rpcrdma_buffer. N is the max number of outstanding requests.
+ */
+
+/* temporary static scatter/gather max */
+#define RPCRDMA_MAX_DATA_SEGS (8) /* max scatter/gather */
+#define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */
+#define MAX_RPCRDMAHDR (\
+ /* max supported RPC/RDMA header */ \
+ sizeof(struct rpcrdma_msg) + (2 * sizeof(u32)) + \
+ (sizeof(struct rpcrdma_read_chunk) * RPCRDMA_MAX_SEGS) + sizeof(u32))
+
+struct rpcrdma_buffer;
+
+struct rpcrdma_rep {
+ unsigned int rr_len; /* actual received reply length */
+ struct rpcrdma_buffer *rr_buffer; /* home base for this structure */
+ struct rpc_xprt *rr_xprt; /* needed for request/reply matching */
+ void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */
+ struct list_head rr_list; /* tasklet list */
+ wait_queue_head_t rr_unbind; /* optional unbind wait */
+ struct ib_sge rr_iov; /* for posting */
+ struct ib_mr *rr_handle; /* handle for mem in rr_iov */
+ char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */
+};
+
+/*
+ * struct rpcrdma_req -- structure central to the request/reply sequence.
+ *
+ * N of these are associated with a transport instance, and stored in
+ * struct rpcrdma_buffer. N is the max number of outstanding requests.
+ *
+ * It includes pre-registered buffer memory for send AND recv.
+ * The recv buffer, however, is not owned by this structure, and
+ * is "donated" to the hardware when a recv is posted. When a
+ * reply is handled, the recv buffer used is given back to the
+ * struct rpcrdma_req associated with the request.
+ *
+ * In addition to the basic memory, this structure includes an array
+ * of iovs for send operations. The reason is that the iovs passed to
+ * ib_post_{send,recv} must not be modified until the work request
+ * completes.
+ *
+ * NOTES:
+ * o RPCRDMA_MAX_SEGS is the max number of addressible chunk elements we
+ * marshal. The number needed varies depending on the iov lists that
+ * are passed to us, the memory registration mode we are in, and if
+ * physical addressing is used, the layout.
+ */
+
+struct rpcrdma_mr_seg { /* chunk descriptors */
+ union { /* chunk memory handles */
+ struct ib_mr *rl_mr; /* if registered directly */
+ struct rpcrdma_mw { /* if registered from region */
+ union {
+ struct ib_mw *mw;
+ struct ib_fmr *fmr;
+ } r;
+ struct list_head mw_list;
+ } *rl_mw;
+ } mr_chunk;
+ u64 mr_base; /* registration result */
+ u32 mr_rkey; /* registration result */
+ u32 mr_len; /* length of chunk or segment */
+ int mr_nsegs; /* number of segments in chunk or 0 */
+ enum dma_data_direction mr_dir; /* segment mapping direction */
+ dma_addr_t mr_dma; /* segment mapping address */
+ size_t mr_dmalen; /* segment mapping length */
+ struct page *mr_page; /* owning page, if any */
+ char *mr_offset; /* kva if no page, else offset */
+};
+
+struct rpcrdma_req {
+ size_t rl_size; /* actual length of buffer */
+ unsigned int rl_niovs; /* 0, 2 or 4 */
+ unsigned int rl_nchunks; /* non-zero if chunks */
+ struct rpcrdma_buffer *rl_buffer; /* home base for this structure */
+ struct rpcrdma_rep *rl_reply;/* holder for reply buffer */
+ struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */
+ struct ib_sge rl_send_iov[4]; /* for active requests */
+ struct ib_sge rl_iov; /* for posting */
+ struct ib_mr *rl_handle; /* handle for mem in rl_iov */
+ char rl_base[MAX_RPCRDMAHDR]; /* start of actual buffer */
+ __u32 rl_xdr_buf[0]; /* start of returned rpc rq_buffer */
+};
+#define rpcr_to_rdmar(r) \
+ container_of((r)->rq_buffer, struct rpcrdma_req, rl_xdr_buf[0])
+
+/*
+ * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for
+ * inline requests/replies, and client/server credits.
+ *
+ * One of these is associated with a transport instance
+ */
+struct rpcrdma_buffer {
+ spinlock_t rb_lock; /* protects indexes */
+ atomic_t rb_credits; /* most recent server credits */
+ unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */
+ int rb_max_requests;/* client max requests */
+ struct list_head rb_mws; /* optional memory windows/fmrs */
+ int rb_send_index;
+ struct rpcrdma_req **rb_send_bufs;
+ int rb_recv_index;
+ struct rpcrdma_rep **rb_recv_bufs;
+ char *rb_pool;
+};
+#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
+
+/*
+ * Internal structure for transport instance creation. This
+ * exists primarily for modularity.
+ *
+ * This data should be set with mount options
+ */
+struct rpcrdma_create_data_internal {
+ struct sockaddr_storage addr; /* RDMA server address */
+ unsigned int max_requests; /* max requests (slots) in flight */
+ unsigned int rsize; /* mount rsize - max read hdr+data */
+ unsigned int wsize; /* mount wsize - max write hdr+data */
+ unsigned int inline_rsize; /* max non-rdma read data payload */
+ unsigned int inline_wsize; /* max non-rdma write data payload */
+ unsigned int padding; /* non-rdma write header padding */
+};
+
+#define RPCRDMA_INLINE_READ_THRESHOLD(rq) \
+ (rpcx_to_rdmad(rq->rq_task->tk_xprt).inline_rsize)
+
+#define RPCRDMA_INLINE_WRITE_THRESHOLD(rq)\
+ (rpcx_to_rdmad(rq->rq_task->tk_xprt).inline_wsize)
+
+#define RPCRDMA_INLINE_PAD_VALUE(rq)\
+ rpcx_to_rdmad(rq->rq_task->tk_xprt).padding
+
+/*
+ * Statistics for RPCRDMA
+ */
+struct rpcrdma_stats {
+ unsigned long read_chunk_count;
+ unsigned long write_chunk_count;
+ unsigned long reply_chunk_count;
+
+ unsigned long long total_rdma_request;
+ unsigned long long total_rdma_reply;
+
+ unsigned long long pullup_copy_count;
+ unsigned long long fixup_copy_count;
+ unsigned long hardway_register_count;
+ unsigned long failed_marshal_count;
+ unsigned long bad_reply_count;
+};
+
+/*
+ * RPCRDMA transport -- encapsulates the structures above for
+ * integration with RPC.
+ *
+ * The contained structures are embedded, not pointers,
+ * for convenience. This structure need not be visible externally.
+ *
+ * It is allocated and initialized during mount, and released
+ * during unmount.
+ */
+struct rpcrdma_xprt {
+ struct rpc_xprt xprt;
+ struct rpcrdma_ia rx_ia;
+ struct rpcrdma_ep rx_ep;
+ struct rpcrdma_buffer rx_buf;
+ struct rpcrdma_create_data_internal rx_data;
+ struct delayed_work rdma_connect;
+ struct rpcrdma_stats rx_stats;
+};
+
+#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt)
+#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
+
+/*
+ * Interface Adapter calls - xprtrdma/verbs.c
+ */
+int rpcrdma_ia_open(struct rpcrdma_xprt *, struct sockaddr *, int);
+void rpcrdma_ia_close(struct rpcrdma_ia *);
+
+/*
+ * Endpoint calls - xprtrdma/verbs.c
+ */
+int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
+ struct rpcrdma_create_data_internal *);
+int rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
+int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
+int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
+
+int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
+ struct rpcrdma_req *);
+int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *,
+ struct rpcrdma_rep *);
+
+/*
+ * Buffer calls - xprtrdma/verbs.c
+ */
+int rpcrdma_buffer_create(struct rpcrdma_buffer *, struct rpcrdma_ep *,
+ struct rpcrdma_ia *,
+ struct rpcrdma_create_data_internal *);
+void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
+
+struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
+void rpcrdma_buffer_put(struct rpcrdma_req *);
+void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
+void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
+
+int rpcrdma_register_internal(struct rpcrdma_ia *, void *, int,
+ struct ib_mr **, struct ib_sge *);
+int rpcrdma_deregister_internal(struct rpcrdma_ia *,
+ struct ib_mr *, struct ib_sge *);
+
+int rpcrdma_register_external(struct rpcrdma_mr_seg *,
+ int, int, struct rpcrdma_xprt *);
+int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
+ struct rpcrdma_xprt *, void *);
+
+/*
+ * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
+ */
+void rpcrdma_conn_func(struct rpcrdma_ep *);
+void rpcrdma_reply_handler(struct rpcrdma_rep *);
+
+/*
+ * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
+ */
+int rpcrdma_marshal_req(struct rpc_rqst *);
+
+#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 282efd447a6..02298f529da 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -13,10 +13,14 @@
* (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no>
*
* IP socket transport implementation, (C) 2005 Chuck Lever <cel@netapp.com>
+ *
+ * IPv6 support contributed by Gilles Quillard, Bull Open Source, 2005.
+ * <gilles.quillard@bull.net>
*/
#include <linux/types.h>
#include <linux/slab.h>
+#include <linux/module.h>
#include <linux/capability.h>
#include <linux/pagemap.h>
#include <linux/errno.h>
@@ -28,6 +32,7 @@
#include <linux/tcp.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/xprtsock.h>
#include <linux/file.h>
#include <net/sock.h>
@@ -260,14 +265,29 @@ struct sock_xprt {
#define TCP_RCV_COPY_XID (1UL << 2)
#define TCP_RCV_COPY_DATA (1UL << 3)
-static void xs_format_peer_addresses(struct rpc_xprt *xprt)
+static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt)
+{
+ return (struct sockaddr *) &xprt->addr;
+}
+
+static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt)
{
- struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr;
+ return (struct sockaddr_in *) &xprt->addr;
+}
+
+static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt)
+{
+ return (struct sockaddr_in6 *) &xprt->addr;
+}
+
+static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt)
+{
+ struct sockaddr_in *addr = xs_addr_in(xprt);
char *buf;
buf = kzalloc(20, GFP_KERNEL);
if (buf) {
- snprintf(buf, 20, "%u.%u.%u.%u",
+ snprintf(buf, 20, NIPQUAD_FMT,
NIPQUAD(addr->sin_addr.s_addr));
}
xprt->address_strings[RPC_DISPLAY_ADDR] = buf;
@@ -279,26 +299,123 @@ static void xs_format_peer_addresses(struct rpc_xprt *xprt)
}
xprt->address_strings[RPC_DISPLAY_PORT] = buf;
- if (xprt->prot == IPPROTO_UDP)
- xprt->address_strings[RPC_DISPLAY_PROTO] = "udp";
- else
- xprt->address_strings[RPC_DISPLAY_PROTO] = "tcp";
+ buf = kzalloc(8, GFP_KERNEL);
+ if (buf) {
+ if (xprt->prot == IPPROTO_UDP)
+ snprintf(buf, 8, "udp");
+ else
+ snprintf(buf, 8, "tcp");
+ }
+ xprt->address_strings[RPC_DISPLAY_PROTO] = buf;
buf = kzalloc(48, GFP_KERNEL);
if (buf) {
- snprintf(buf, 48, "addr=%u.%u.%u.%u port=%u proto=%s",
+ snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s",
NIPQUAD(addr->sin_addr.s_addr),
ntohs(addr->sin_port),
xprt->prot == IPPROTO_UDP ? "udp" : "tcp");
}
xprt->address_strings[RPC_DISPLAY_ALL] = buf;
+
+ buf = kzalloc(10, GFP_KERNEL);
+ if (buf) {
+ snprintf(buf, 10, "%02x%02x%02x%02x",
+ NIPQUAD(addr->sin_addr.s_addr));
+ }
+ xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf;
+
+ buf = kzalloc(8, GFP_KERNEL);
+ if (buf) {
+ snprintf(buf, 8, "%4hx",
+ ntohs(addr->sin_port));
+ }
+ xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf;
+
+ buf = kzalloc(30, GFP_KERNEL);
+ if (buf) {
+ snprintf(buf, 30, NIPQUAD_FMT".%u.%u",
+ NIPQUAD(addr->sin_addr.s_addr),
+ ntohs(addr->sin_port) >> 8,
+ ntohs(addr->sin_port) & 0xff);
+ }
+ xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
+
+ xprt->address_strings[RPC_DISPLAY_NETID] =
+ kstrdup(xprt->prot == IPPROTO_UDP ?
+ RPCBIND_NETID_UDP : RPCBIND_NETID_TCP, GFP_KERNEL);
+}
+
+static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt)
+{
+ struct sockaddr_in6 *addr = xs_addr_in6(xprt);
+ char *buf;
+
+ buf = kzalloc(40, GFP_KERNEL);
+ if (buf) {
+ snprintf(buf, 40, NIP6_FMT,
+ NIP6(addr->sin6_addr));
+ }
+ xprt->address_strings[RPC_DISPLAY_ADDR] = buf;
+
+ buf = kzalloc(8, GFP_KERNEL);
+ if (buf) {
+ snprintf(buf, 8, "%u",
+ ntohs(addr->sin6_port));
+ }
+ xprt->address_strings[RPC_DISPLAY_PORT] = buf;
+
+ buf = kzalloc(8, GFP_KERNEL);
+ if (buf) {
+ if (xprt->prot == IPPROTO_UDP)
+ snprintf(buf, 8, "udp");
+ else
+ snprintf(buf, 8, "tcp");
+ }
+ xprt->address_strings[RPC_DISPLAY_PROTO] = buf;
+
+ buf = kzalloc(64, GFP_KERNEL);
+ if (buf) {
+ snprintf(buf, 64, "addr="NIP6_FMT" port=%u proto=%s",
+ NIP6(addr->sin6_addr),
+ ntohs(addr->sin6_port),
+ xprt->prot == IPPROTO_UDP ? "udp" : "tcp");
+ }
+ xprt->address_strings[RPC_DISPLAY_ALL] = buf;
+
+ buf = kzalloc(36, GFP_KERNEL);
+ if (buf) {
+ snprintf(buf, 36, NIP6_SEQFMT,
+ NIP6(addr->sin6_addr));
+ }
+ xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf;
+
+ buf = kzalloc(8, GFP_KERNEL);
+ if (buf) {
+ snprintf(buf, 8, "%4hx",
+ ntohs(addr->sin6_port));
+ }
+ xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf;
+
+ buf = kzalloc(50, GFP_KERNEL);
+ if (buf) {
+ snprintf(buf, 50, NIP6_FMT".%u.%u",
+ NIP6(addr->sin6_addr),
+ ntohs(addr->sin6_port) >> 8,
+ ntohs(addr->sin6_port) & 0xff);
+ }
+ xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
+
+ xprt->address_strings[RPC_DISPLAY_NETID] =
+ kstrdup(xprt->prot == IPPROTO_UDP ?
+ RPCBIND_NETID_UDP6 : RPCBIND_NETID_TCP6, GFP_KERNEL);
}
static void xs_free_peer_addresses(struct rpc_xprt *xprt)
{
- kfree(xprt->address_strings[RPC_DISPLAY_ADDR]);
- kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
- kfree(xprt->address_strings[RPC_DISPLAY_ALL]);
+ int i;
+
+ for (i = 0; i < RPC_DISPLAY_MAX; i++)
+ kfree(xprt->address_strings[i]);
}
#define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL)
@@ -463,19 +580,20 @@ static int xs_udp_send_request(struct rpc_task *task)
req->rq_xtime = jiffies;
status = xs_sendpages(transport->sock,
- (struct sockaddr *) &xprt->addr,
+ xs_addr(xprt),
xprt->addrlen, xdr,
req->rq_bytes_sent);
dprintk("RPC: xs_udp_send_request(%u) = %d\n",
xdr->len - req->rq_bytes_sent, status);
- if (likely(status >= (int) req->rq_slen))
- return 0;
-
- /* Still some bytes left; set up for a retry later. */
- if (status > 0)
+ if (status >= 0) {
+ task->tk_bytes_sent += status;
+ if (status >= req->rq_slen)
+ return 0;
+ /* Still some bytes left; set up for a retry later. */
status = -EAGAIN;
+ }
switch (status) {
case -ENETUNREACH:
@@ -523,7 +641,8 @@ static int xs_tcp_send_request(struct rpc_task *task)
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct xdr_buf *xdr = &req->rq_snd_buf;
- int status, retry = 0;
+ int status;
+ unsigned int retry = 0;
xs_encode_tcp_record_marker(&req->rq_snd_buf);
@@ -661,6 +780,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
xs_free_peer_addresses(xprt);
kfree(xprt->slot);
kfree(xprt);
+ module_put(THIS_MODULE);
}
static inline struct rpc_xprt *xprt_from_sock(struct sock *sk)
@@ -1139,14 +1259,23 @@ static unsigned short xs_get_random_port(void)
*/
static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
{
- struct sockaddr_in *sap = (struct sockaddr_in *) &xprt->addr;
+ struct sockaddr *addr = xs_addr(xprt);
dprintk("RPC: setting port for xprt %p to %u\n", xprt, port);
- sap->sin_port = htons(port);
+ switch (addr->sa_family) {
+ case AF_INET:
+ ((struct sockaddr_in *)addr)->sin_port = htons(port);
+ break;
+ case AF_INET6:
+ ((struct sockaddr_in6 *)addr)->sin6_port = htons(port);
+ break;
+ default:
+ BUG();
+ }
}
-static int xs_bind(struct sock_xprt *transport, struct socket *sock)
+static int xs_bind4(struct sock_xprt *transport, struct socket *sock)
{
struct sockaddr_in myaddr = {
.sin_family = AF_INET,
@@ -1174,8 +1303,42 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
else
port--;
} while (err == -EADDRINUSE && port != transport->port);
- dprintk("RPC: xs_bind "NIPQUAD_FMT":%u: %s (%d)\n",
- NIPQUAD(myaddr.sin_addr), port, err ? "failed" : "ok", err);
+ dprintk("RPC: %s "NIPQUAD_FMT":%u: %s (%d)\n",
+ __FUNCTION__, NIPQUAD(myaddr.sin_addr),
+ port, err ? "failed" : "ok", err);
+ return err;
+}
+
+static int xs_bind6(struct sock_xprt *transport, struct socket *sock)
+{
+ struct sockaddr_in6 myaddr = {
+ .sin6_family = AF_INET6,
+ };
+ struct sockaddr_in6 *sa;
+ int err;
+ unsigned short port = transport->port;
+
+ if (!transport->xprt.resvport)
+ port = 0;
+ sa = (struct sockaddr_in6 *)&transport->addr;
+ myaddr.sin6_addr = sa->sin6_addr;
+ do {
+ myaddr.sin6_port = htons(port);
+ err = kernel_bind(sock, (struct sockaddr *) &myaddr,
+ sizeof(myaddr));
+ if (!transport->xprt.resvport)
+ break;
+ if (err == 0) {
+ transport->port = port;
+ break;
+ }
+ if (port <= xprt_min_resvport)
+ port = xprt_max_resvport;
+ else
+ port--;
+ } while (err == -EADDRINUSE && port != transport->port);
+ dprintk("RPC: xs_bind6 "NIP6_FMT":%u: %s (%d)\n",
+ NIP6(myaddr.sin6_addr), port, err ? "failed" : "ok", err);
return err;
}
@@ -1183,38 +1346,69 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
static struct lock_class_key xs_key[2];
static struct lock_class_key xs_slock_key[2];
-static inline void xs_reclassify_socket(struct socket *sock)
+static inline void xs_reclassify_socket4(struct socket *sock)
{
struct sock *sk = sock->sk;
+
BUG_ON(sock_owned_by_user(sk));
- switch (sk->sk_family) {
- case AF_INET:
- sock_lock_init_class_and_name(sk, "slock-AF_INET-NFS",
- &xs_slock_key[0], "sk_lock-AF_INET-NFS", &xs_key[0]);
- break;
+ sock_lock_init_class_and_name(sk, "slock-AF_INET-RPC",
+ &xs_slock_key[0], "sk_lock-AF_INET-RPC", &xs_key[0]);
+}
- case AF_INET6:
- sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFS",
- &xs_slock_key[1], "sk_lock-AF_INET6-NFS", &xs_key[1]);
- break;
+static inline void xs_reclassify_socket6(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
- default:
- BUG();
- }
+ BUG_ON(sock_owned_by_user(sk));
+ sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC",
+ &xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]);
}
#else
-static inline void xs_reclassify_socket(struct socket *sock)
+static inline void xs_reclassify_socket4(struct socket *sock)
+{
+}
+
+static inline void xs_reclassify_socket6(struct socket *sock)
{
}
#endif
+static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
+{
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+
+ if (!transport->inet) {
+ struct sock *sk = sock->sk;
+
+ write_lock_bh(&sk->sk_callback_lock);
+
+ sk->sk_user_data = xprt;
+ transport->old_data_ready = sk->sk_data_ready;
+ transport->old_state_change = sk->sk_state_change;
+ transport->old_write_space = sk->sk_write_space;
+ sk->sk_data_ready = xs_udp_data_ready;
+ sk->sk_write_space = xs_udp_write_space;
+ sk->sk_no_check = UDP_CSUM_NORCV;
+ sk->sk_allocation = GFP_ATOMIC;
+
+ xprt_set_connected(xprt);
+
+ /* Reset to new socket */
+ transport->sock = sock;
+ transport->inet = sk;
+
+ write_unlock_bh(&sk->sk_callback_lock);
+ }
+ xs_udp_do_set_buffer_size(xprt);
+}
+
/**
- * xs_udp_connect_worker - set up a UDP socket
+ * xs_udp_connect_worker4 - set up a UDP socket
* @work: RPC transport to connect
*
* Invoked by a work queue tasklet.
*/
-static void xs_udp_connect_worker(struct work_struct *work)
+static void xs_udp_connect_worker4(struct work_struct *work)
{
struct sock_xprt *transport =
container_of(work, struct sock_xprt, connect_worker.work);
@@ -1232,9 +1426,9 @@ static void xs_udp_connect_worker(struct work_struct *work)
dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
goto out;
}
- xs_reclassify_socket(sock);
+ xs_reclassify_socket4(sock);
- if (xs_bind(transport, sock)) {
+ if (xs_bind4(transport, sock)) {
sock_release(sock);
goto out;
}
@@ -1242,29 +1436,48 @@ static void xs_udp_connect_worker(struct work_struct *work)
dprintk("RPC: worker connecting xprt %p to address: %s\n",
xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
- if (!transport->inet) {
- struct sock *sk = sock->sk;
+ xs_udp_finish_connecting(xprt, sock);
+ status = 0;
+out:
+ xprt_wake_pending_tasks(xprt, status);
+ xprt_clear_connecting(xprt);
+}
- write_lock_bh(&sk->sk_callback_lock);
+/**
+ * xs_udp_connect_worker6 - set up a UDP socket
+ * @work: RPC transport to connect
+ *
+ * Invoked by a work queue tasklet.
+ */
+static void xs_udp_connect_worker6(struct work_struct *work)
+{
+ struct sock_xprt *transport =
+ container_of(work, struct sock_xprt, connect_worker.work);
+ struct rpc_xprt *xprt = &transport->xprt;
+ struct socket *sock = transport->sock;
+ int err, status = -EIO;
- sk->sk_user_data = xprt;
- transport->old_data_ready = sk->sk_data_ready;
- transport->old_state_change = sk->sk_state_change;
- transport->old_write_space = sk->sk_write_space;
- sk->sk_data_ready = xs_udp_data_ready;
- sk->sk_write_space = xs_udp_write_space;
- sk->sk_no_check = UDP_CSUM_NORCV;
- sk->sk_allocation = GFP_ATOMIC;
+ if (xprt->shutdown || !xprt_bound(xprt))
+ goto out;
- xprt_set_connected(xprt);
+ /* Start by resetting any existing state */
+ xs_close(xprt);
- /* Reset to new socket */
- transport->sock = sock;
- transport->inet = sk;
+ if ((err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
+ dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
+ goto out;
+ }
+ xs_reclassify_socket6(sock);
- write_unlock_bh(&sk->sk_callback_lock);
+ if (xs_bind6(transport, sock) < 0) {
+ sock_release(sock);
+ goto out;
}
- xs_udp_do_set_buffer_size(xprt);
+
+ dprintk("RPC: worker connecting xprt %p to address: %s\n",
+ xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
+
+ xs_udp_finish_connecting(xprt, sock);
status = 0;
out:
xprt_wake_pending_tasks(xprt, status);
@@ -1295,13 +1508,52 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
result);
}
+static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
+{
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+
+ if (!transport->inet) {
+ struct sock *sk = sock->sk;
+
+ write_lock_bh(&sk->sk_callback_lock);
+
+ sk->sk_user_data = xprt;
+ transport->old_data_ready = sk->sk_data_ready;
+ transport->old_state_change = sk->sk_state_change;
+ transport->old_write_space = sk->sk_write_space;
+ sk->sk_data_ready = xs_tcp_data_ready;
+ sk->sk_state_change = xs_tcp_state_change;
+ sk->sk_write_space = xs_tcp_write_space;
+ sk->sk_allocation = GFP_ATOMIC;
+
+ /* socket options */
+ sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
+ sock_reset_flag(sk, SOCK_LINGER);
+ tcp_sk(sk)->linger2 = 0;
+ tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
+
+ xprt_clear_connected(xprt);
+
+ /* Reset to new socket */
+ transport->sock = sock;
+ transport->inet = sk;
+
+ write_unlock_bh(&sk->sk_callback_lock);
+ }
+
+ /* Tell the socket layer to start connecting... */
+ xprt->stat.connect_count++;
+ xprt->stat.connect_start = jiffies;
+ return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
+}
+
/**
- * xs_tcp_connect_worker - connect a TCP socket to a remote endpoint
+ * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
* @work: RPC transport to connect
*
* Invoked by a work queue tasklet.
*/
-static void xs_tcp_connect_worker(struct work_struct *work)
+static void xs_tcp_connect_worker4(struct work_struct *work)
{
struct sock_xprt *transport =
container_of(work, struct sock_xprt, connect_worker.work);
@@ -1315,13 +1567,12 @@ static void xs_tcp_connect_worker(struct work_struct *work)
if (!sock) {
/* start from scratch */
if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
- dprintk("RPC: can't create TCP transport "
- "socket (%d).\n", -err);
+ dprintk("RPC: can't create TCP transport socket (%d).\n", -err);
goto out;
}
- xs_reclassify_socket(sock);
+ xs_reclassify_socket4(sock);
- if (xs_bind(transport, sock)) {
+ if (xs_bind4(transport, sock) < 0) {
sock_release(sock);
goto out;
}
@@ -1332,43 +1583,70 @@ static void xs_tcp_connect_worker(struct work_struct *work)
dprintk("RPC: worker connecting xprt %p to address: %s\n",
xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
- if (!transport->inet) {
- struct sock *sk = sock->sk;
-
- write_lock_bh(&sk->sk_callback_lock);
+ status = xs_tcp_finish_connecting(xprt, sock);
+ dprintk("RPC: %p connect status %d connected %d sock state %d\n",
+ xprt, -status, xprt_connected(xprt),
+ sock->sk->sk_state);
+ if (status < 0) {
+ switch (status) {
+ case -EINPROGRESS:
+ case -EALREADY:
+ goto out_clear;
+ case -ECONNREFUSED:
+ case -ECONNRESET:
+ /* retry with existing socket, after a delay */
+ break;
+ default:
+ /* get rid of existing socket, and retry */
+ xs_close(xprt);
+ break;
+ }
+ }
+out:
+ xprt_wake_pending_tasks(xprt, status);
+out_clear:
+ xprt_clear_connecting(xprt);
+}
- sk->sk_user_data = xprt;
- transport->old_data_ready = sk->sk_data_ready;
- transport->old_state_change = sk->sk_state_change;
- transport->old_write_space = sk->sk_write_space;
- sk->sk_data_ready = xs_tcp_data_ready;
- sk->sk_state_change = xs_tcp_state_change;
- sk->sk_write_space = xs_tcp_write_space;
- sk->sk_allocation = GFP_ATOMIC;
+/**
+ * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
+ * @work: RPC transport to connect
+ *
+ * Invoked by a work queue tasklet.
+ */
+static void xs_tcp_connect_worker6(struct work_struct *work)
+{
+ struct sock_xprt *transport =
+ container_of(work, struct sock_xprt, connect_worker.work);
+ struct rpc_xprt *xprt = &transport->xprt;
+ struct socket *sock = transport->sock;
+ int err, status = -EIO;
- /* socket options */
- sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
- sock_reset_flag(sk, SOCK_LINGER);
- tcp_sk(sk)->linger2 = 0;
- tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
+ if (xprt->shutdown || !xprt_bound(xprt))
+ goto out;
- xprt_clear_connected(xprt);
+ if (!sock) {
+ /* start from scratch */
+ if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
+ dprintk("RPC: can't create TCP transport socket (%d).\n", -err);
+ goto out;
+ }
+ xs_reclassify_socket6(sock);
- /* Reset to new socket */
- transport->sock = sock;
- transport->inet = sk;
+ if (xs_bind6(transport, sock) < 0) {
+ sock_release(sock);
+ goto out;
+ }
+ } else
+ /* "close" the socket, preserving the local port */
+ xs_tcp_reuse_connection(xprt);
- write_unlock_bh(&sk->sk_callback_lock);
- }
+ dprintk("RPC: worker connecting xprt %p to address: %s\n",
+ xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
- /* Tell the socket layer to start connecting... */
- xprt->stat.connect_count++;
- xprt->stat.connect_start = jiffies;
- status = kernel_connect(sock, (struct sockaddr *) &xprt->addr,
- xprt->addrlen, O_NONBLOCK);
+ status = xs_tcp_finish_connecting(xprt, sock);
dprintk("RPC: %p connect status %d connected %d sock state %d\n",
- xprt, -status, xprt_connected(xprt),
- sock->sk->sk_state);
+ xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
if (status < 0) {
switch (status) {
case -EINPROGRESS:
@@ -1508,7 +1786,8 @@ static struct rpc_xprt_ops xs_tcp_ops = {
.print_stats = xs_tcp_print_stats,
};
-static struct rpc_xprt *xs_setup_xprt(struct rpc_xprtsock_create *args, unsigned int slot_table_size)
+static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
+ unsigned int slot_table_size)
{
struct rpc_xprt *xprt;
struct sock_xprt *new;
@@ -1549,8 +1828,9 @@ static struct rpc_xprt *xs_setup_xprt(struct rpc_xprtsock_create *args, unsigned
* @args: rpc transport creation arguments
*
*/
-struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args)
+struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
{
+ struct sockaddr *addr = args->dstaddr;
struct rpc_xprt *xprt;
struct sock_xprt *transport;
@@ -1559,15 +1839,11 @@ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args)
return xprt;
transport = container_of(xprt, struct sock_xprt, xprt);
- if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0)
- xprt_set_bound(xprt);
-
xprt->prot = IPPROTO_UDP;
xprt->tsh_size = 0;
/* XXX: header size can vary due to auth type, IPv6, etc. */
xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
- INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker);
xprt->bind_timeout = XS_BIND_TO;
xprt->connect_timeout = XS_UDP_CONN_TO;
xprt->reestablish_timeout = XS_UDP_REEST_TO;
@@ -1580,11 +1856,37 @@ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args)
else
xprt_set_timeout(&xprt->timeout, 5, 5 * HZ);
- xs_format_peer_addresses(xprt);
+ switch (addr->sa_family) {
+ case AF_INET:
+ if (((struct sockaddr_in *)addr)->sin_port != htons(0))
+ xprt_set_bound(xprt);
+
+ INIT_DELAYED_WORK(&transport->connect_worker,
+ xs_udp_connect_worker4);
+ xs_format_ipv4_peer_addresses(xprt);
+ break;
+ case AF_INET6:
+ if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
+ xprt_set_bound(xprt);
+
+ INIT_DELAYED_WORK(&transport->connect_worker,
+ xs_udp_connect_worker6);
+ xs_format_ipv6_peer_addresses(xprt);
+ break;
+ default:
+ kfree(xprt);
+ return ERR_PTR(-EAFNOSUPPORT);
+ }
+
dprintk("RPC: set up transport to address %s\n",
xprt->address_strings[RPC_DISPLAY_ALL]);
- return xprt;
+ if (try_module_get(THIS_MODULE))
+ return xprt;
+
+ kfree(xprt->slot);
+ kfree(xprt);
+ return ERR_PTR(-EINVAL);
}
/**
@@ -1592,8 +1894,9 @@ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args)
* @args: rpc transport creation arguments
*
*/
-struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args)
+struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
{
+ struct sockaddr *addr = args->dstaddr;
struct rpc_xprt *xprt;
struct sock_xprt *transport;
@@ -1602,14 +1905,10 @@ struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args)
return xprt;
transport = container_of(xprt, struct sock_xprt, xprt);
- if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0)
- xprt_set_bound(xprt);
-
xprt->prot = IPPROTO_TCP;
xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
- INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker);
xprt->bind_timeout = XS_BIND_TO;
xprt->connect_timeout = XS_TCP_CONN_TO;
xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
@@ -1622,15 +1921,55 @@ struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args)
else
xprt_set_timeout(&xprt->timeout, 2, 60 * HZ);
- xs_format_peer_addresses(xprt);
+ switch (addr->sa_family) {
+ case AF_INET:
+ if (((struct sockaddr_in *)addr)->sin_port != htons(0))
+ xprt_set_bound(xprt);
+
+ INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker4);
+ xs_format_ipv4_peer_addresses(xprt);
+ break;
+ case AF_INET6:
+ if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
+ xprt_set_bound(xprt);
+
+ INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker6);
+ xs_format_ipv6_peer_addresses(xprt);
+ break;
+ default:
+ kfree(xprt);
+ return ERR_PTR(-EAFNOSUPPORT);
+ }
+
dprintk("RPC: set up transport to address %s\n",
xprt->address_strings[RPC_DISPLAY_ALL]);
- return xprt;
+ if (try_module_get(THIS_MODULE))
+ return xprt;
+
+ kfree(xprt->slot);
+ kfree(xprt);
+ return ERR_PTR(-EINVAL);
}
+static struct xprt_class xs_udp_transport = {
+ .list = LIST_HEAD_INIT(xs_udp_transport.list),
+ .name = "udp",
+ .owner = THIS_MODULE,
+ .ident = IPPROTO_UDP,
+ .setup = xs_setup_udp,
+};
+
+static struct xprt_class xs_tcp_transport = {
+ .list = LIST_HEAD_INIT(xs_tcp_transport.list),
+ .name = "tcp",
+ .owner = THIS_MODULE,
+ .ident = IPPROTO_TCP,
+ .setup = xs_setup_tcp,
+};
+
/**
- * init_socket_xprt - set up xprtsock's sysctls
+ * init_socket_xprt - set up xprtsock's sysctls, register with RPC client
*
*/
int init_socket_xprt(void)
@@ -1640,11 +1979,14 @@ int init_socket_xprt(void)
sunrpc_table_header = register_sysctl_table(sunrpc_table);
#endif
+ xprt_register_transport(&xs_udp_transport);
+ xprt_register_transport(&xs_tcp_transport);
+
return 0;
}
/**
- * cleanup_socket_xprt - remove xprtsock's sysctls
+ * cleanup_socket_xprt - remove xprtsock's sysctls, unregister
*
*/
void cleanup_socket_xprt(void)
@@ -1655,4 +1997,7 @@ void cleanup_socket_xprt(void)
sunrpc_table_header = NULL;
}
#endif
+
+ xprt_unregister_transport(&xs_udp_transport);
+ xprt_unregister_transport(&xs_tcp_transport);
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 2b57eaf66ab..6996cba5aa9 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -334,7 +334,7 @@ static void unix_write_space(struct sock *sk)
read_lock(&sk->sk_callback_lock);
if (unix_writable(sk)) {
if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
- wake_up_interruptible(sk->sk_sleep);
+ wake_up_interruptible_sync(sk->sk_sleep);
sk_wake_async(sk, 2, POLL_OUT);
}
read_unlock(&sk->sk_callback_lock);
@@ -1639,7 +1639,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
if (!skb)
goto out_unlock;
- wake_up_interruptible(&u->peer_wait);
+ wake_up_interruptible_sync(&u->peer_wait);
if (msg->msg_name)
unix_copy_addr(msg, skb->sk);
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 2d5d2255a27..29f820e1825 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -53,8 +53,7 @@ static void wiphy_dev_release(struct device *dev)
}
#ifdef CONFIG_HOTPLUG
-static int wiphy_uevent(struct device *dev, char **envp,
- int num_envp, char *buf, int size)
+static int wiphy_uevent(struct device *dev, struct kobj_uevent_env *env)
{
/* TODO, we probably need stuff here */
return 0;