aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c28
-rw-r--r--net/8021q/vlan_dev.c2
-rw-r--r--net/core/dev.c8
-rw-r--r--net/core/pktgen.c4
-rw-r--r--net/ipv4/arp.c5
-rw-r--r--net/ipv4/ip_gre.c146
-rw-r--r--net/ipv4/ipip.c130
-rw-r--r--net/ipv4/route.c2
-rw-r--r--net/ipv4/tcp_output.c10
-rw-r--r--net/ipv6/addrconf.c75
-rw-r--r--net/ipv6/ndisc.c8
-rw-r--r--net/ipv6/route.c12
-rw-r--r--net/ipv6/sit.c89
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/mac80211/mlme.c29
-rw-r--r--net/mac80211/util.c37
-rw-r--r--net/mac80211/wext.c1
-rw-r--r--net/sched/cls_api.c2
-rw-r--r--net/sunrpc/auth_generic.c8
-rw-r--r--net/sunrpc/svc_xprt.c23
-rw-r--r--net/sunrpc/svcauth_unix.c4
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c102
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c11
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c290
-rw-r--r--net/xfrm/xfrm_user.c11
25 files changed, 387 insertions, 652 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 2a739adaa92..ab2225da0ee 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -382,6 +382,18 @@ static void vlan_sync_address(struct net_device *dev,
memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN);
}
+static void vlan_transfer_features(struct net_device *dev,
+ struct net_device *vlandev)
+{
+ unsigned long old_features = vlandev->features;
+
+ vlandev->features &= ~dev->vlan_features;
+ vlandev->features |= dev->features & dev->vlan_features;
+
+ if (old_features != vlandev->features)
+ netdev_features_change(vlandev);
+}
+
static void __vlan_device_event(struct net_device *dev, unsigned long event)
{
switch (event) {
@@ -410,10 +422,8 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
int i, flgs;
struct net_device *vlandev;
- if (is_vlan_dev(dev)) {
+ if (is_vlan_dev(dev))
__vlan_device_event(dev, event);
- goto out;
- }
grp = __vlan_find_group(dev);
if (!grp)
@@ -450,6 +460,18 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
}
break;
+ case NETDEV_FEAT_CHANGE:
+ /* Propagate device features to underlying device */
+ for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
+ vlandev = vlan_group_get_device(grp, i);
+ if (!vlandev)
+ continue;
+
+ vlan_transfer_features(dev, vlandev);
+ }
+
+ break;
+
case NETDEV_DOWN:
/* Put all VLANs for this dev in the down state too. */
for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index c961f082600..5d055c242ed 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -663,6 +663,8 @@ static int vlan_dev_init(struct net_device *dev)
(1<<__LINK_STATE_DORMANT))) |
(1<<__LINK_STATE_PRESENT);
+ dev->features |= real_dev->features & real_dev->vlan_features;
+
/* ipv6 shared card related stuff */
dev->dev_id = real_dev->dev_id;
diff --git a/net/core/dev.c b/net/core/dev.c
index a1607bc0cd4..58296307787 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -903,7 +903,11 @@ int dev_change_name(struct net_device *dev, char *newname)
strlcpy(dev->name, newname, IFNAMSIZ);
rollback:
- device_rename(&dev->dev, dev->name);
+ err = device_rename(&dev->dev, dev->name);
+ if (err) {
+ memcpy(dev->name, oldname, IFNAMSIZ);
+ return err;
+ }
write_lock_bh(&dev_base_lock);
hlist_del(&dev->name_hlist);
@@ -3137,7 +3141,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
* Load in the correct multicast list now the flags have changed.
*/
- if (dev->change_rx_flags && (dev->flags ^ flags) & IFF_MULTICAST)
+ if (dev->change_rx_flags && (old_flags ^ flags) & IFF_MULTICAST)
dev->change_rx_flags(dev, IFF_MULTICAST);
dev_set_rx_mode(dev);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 8dca2111049..fdf537707e5 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -390,6 +390,7 @@ struct pktgen_thread {
int cpu;
wait_queue_head_t queue;
+ struct completion start_done;
};
#define REMOVE 1
@@ -3414,6 +3415,7 @@ static int pktgen_thread_worker(void *arg)
BUG_ON(smp_processor_id() != cpu);
init_waitqueue_head(&t->queue);
+ complete(&t->start_done);
pr_debug("pktgen: starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current));
@@ -3615,6 +3617,7 @@ static int __init pktgen_create_thread(int cpu)
INIT_LIST_HEAD(&t->if_list);
list_add_tail(&t->th_list, &pktgen_threads);
+ init_completion(&t->start_done);
p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu);
if (IS_ERR(p)) {
@@ -3639,6 +3642,7 @@ static int __init pktgen_create_thread(int cpu)
}
wake_up_process(p);
+ wait_for_completion(&t->start_done);
return 0;
}
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 418862f1bf2..9b539fa9fe1 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1288,7 +1288,6 @@ static void arp_format_neigh_entry(struct seq_file *seq,
struct neighbour *n)
{
char hbuffer[HBUFFERLEN];
- const char hexbuf[] = "0123456789ABCDEF";
int k, j;
char tbuf[16];
struct net_device *dev = n->dev;
@@ -1302,8 +1301,8 @@ static void arp_format_neigh_entry(struct seq_file *seq,
else {
#endif
for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < dev->addr_len; j++) {
- hbuffer[k++] = hexbuf[(n->ha[j] >> 4) & 15];
- hbuffer[k++] = hexbuf[n->ha[j] & 15];
+ hbuffer[k++] = hex_asc_hi(n->ha[j]);
+ hbuffer[k++] = hex_asc_lo(n->ha[j]);
hbuffer[k++] = ':';
}
hbuffer[--k] = 0;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 2ada033406d..4342cba4ff8 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -313,9 +313,8 @@ static void ipgre_tunnel_uninit(struct net_device *dev)
static void ipgre_err(struct sk_buff *skb, u32 info)
{
-#ifndef I_WISH_WORLD_WERE_PERFECT
-/* It is not :-( All the routers (except for Linux) return only
+/* All the routers (except for Linux) return only
8 bytes of packet payload. It means, that precise relaying of
ICMP in the real Internet is absolutely infeasible.
@@ -398,149 +397,6 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
out:
read_unlock(&ipgre_lock);
return;
-#else
- struct iphdr *iph = (struct iphdr*)dp;
- struct iphdr *eiph;
- __be16 *p = (__be16*)(dp+(iph->ihl<<2));
- const int type = icmp_hdr(skb)->type;
- const int code = icmp_hdr(skb)->code;
- int rel_type = 0;
- int rel_code = 0;
- __be32 rel_info = 0;
- __u32 n = 0;
- __be16 flags;
- int grehlen = (iph->ihl<<2) + 4;
- struct sk_buff *skb2;
- struct flowi fl;
- struct rtable *rt;
-
- if (p[1] != htons(ETH_P_IP))
- return;
-
- flags = p[0];
- if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
- if (flags&(GRE_VERSION|GRE_ROUTING))
- return;
- if (flags&GRE_CSUM)
- grehlen += 4;
- if (flags&GRE_KEY)
- grehlen += 4;
- if (flags&GRE_SEQ)
- grehlen += 4;
- }
- if (len < grehlen + sizeof(struct iphdr))
- return;
- eiph = (struct iphdr*)(dp + grehlen);
-
- switch (type) {
- default:
- return;
- case ICMP_PARAMETERPROB:
- n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
- if (n < (iph->ihl<<2))
- return;
-
- /* So... This guy found something strange INSIDE encapsulated
- packet. Well, he is fool, but what can we do ?
- */
- rel_type = ICMP_PARAMETERPROB;
- n -= grehlen;
- rel_info = htonl(n << 24);
- break;
-
- case ICMP_DEST_UNREACH:
- switch (code) {
- case ICMP_SR_FAILED:
- case ICMP_PORT_UNREACH:
- /* Impossible event. */
- return;
- case ICMP_FRAG_NEEDED:
- /* And it is the only really necessary thing :-) */
- n = ntohs(icmp_hdr(skb)->un.frag.mtu);
- if (n < grehlen+68)
- return;
- n -= grehlen;
- /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
- if (n > ntohs(eiph->tot_len))
- return;
- rel_info = htonl(n);
- break;
- default:
- /* All others are translated to HOST_UNREACH.
- rfc2003 contains "deep thoughts" about NET_UNREACH,
- I believe, it is just ether pollution. --ANK
- */
- rel_type = ICMP_DEST_UNREACH;
- rel_code = ICMP_HOST_UNREACH;
- break;
- }
- break;
- case ICMP_TIME_EXCEEDED:
- if (code != ICMP_EXC_TTL)
- return;
- break;
- }
-
- /* Prepare fake skb to feed it to icmp_send */
- skb2 = skb_clone(skb, GFP_ATOMIC);
- if (skb2 == NULL)
- return;
- dst_release(skb2->dst);
- skb2->dst = NULL;
- skb_pull(skb2, skb->data - (u8*)eiph);
- skb_reset_network_header(skb2);
-
- /* Try to guess incoming interface */
- memset(&fl, 0, sizeof(fl));
- fl.fl4_dst = eiph->saddr;
- fl.fl4_tos = RT_TOS(eiph->tos);
- fl.proto = IPPROTO_GRE;
- if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) {
- kfree_skb(skb2);
- return;
- }
- skb2->dev = rt->u.dst.dev;
-
- /* route "incoming" packet */
- if (rt->rt_flags&RTCF_LOCAL) {
- ip_rt_put(rt);
- rt = NULL;
- fl.fl4_dst = eiph->daddr;
- fl.fl4_src = eiph->saddr;
- fl.fl4_tos = eiph->tos;
- if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
- rt->u.dst.dev->type != ARPHRD_IPGRE) {
- ip_rt_put(rt);
- kfree_skb(skb2);
- return;
- }
- } else {
- ip_rt_put(rt);
- if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
- skb2->dst->dev->type != ARPHRD_IPGRE) {
- kfree_skb(skb2);
- return;
- }
- }
-
- /* change mtu on this route */
- if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
- if (n > dst_mtu(skb2->dst)) {
- kfree_skb(skb2);
- return;
- }
- skb2->dst->ops->update_pmtu(skb2->dst, n);
- } else if (type == ICMP_TIME_EXCEEDED) {
- struct ip_tunnel *t = netdev_priv(skb2->dev);
- if (t->parms.iph.ttl) {
- rel_type = ICMP_DEST_UNREACH;
- rel_code = ICMP_HOST_UNREACH;
- }
- }
-
- icmp_send(skb2, rel_type, rel_code, rel_info);
- kfree_skb(skb2);
-#endif
}
static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 149111f08e8..af5cb53da5c 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -278,9 +278,8 @@ static void ipip_tunnel_uninit(struct net_device *dev)
static int ipip_err(struct sk_buff *skb, u32 info)
{
-#ifndef I_WISH_WORLD_WERE_PERFECT
-/* It is not :-( All the routers (except for Linux) return only
+/* All the routers (except for Linux) return only
8 bytes of packet payload. It means, that precise relaying of
ICMP in the real Internet is absolutely infeasible.
*/
@@ -337,133 +336,6 @@ static int ipip_err(struct sk_buff *skb, u32 info)
out:
read_unlock(&ipip_lock);
return err;
-#else
- struct iphdr *iph = (struct iphdr*)dp;
- int hlen = iph->ihl<<2;
- struct iphdr *eiph;
- const int type = icmp_hdr(skb)->type;
- const int code = icmp_hdr(skb)->code;
- int rel_type = 0;
- int rel_code = 0;
- __be32 rel_info = 0;
- __u32 n = 0;
- struct sk_buff *skb2;
- struct flowi fl;
- struct rtable *rt;
-
- if (len < hlen + sizeof(struct iphdr))
- return 0;
- eiph = (struct iphdr*)(dp + hlen);
-
- switch (type) {
- default:
- return 0;
- case ICMP_PARAMETERPROB:
- n = ntohl(icmp_hdr(skb)->un.gateway) >> 24;
- if (n < hlen)
- return 0;
-
- /* So... This guy found something strange INSIDE encapsulated
- packet. Well, he is fool, but what can we do ?
- */
- rel_type = ICMP_PARAMETERPROB;
- rel_info = htonl((n - hlen) << 24);
- break;
-
- case ICMP_DEST_UNREACH:
- switch (code) {
- case ICMP_SR_FAILED:
- case ICMP_PORT_UNREACH:
- /* Impossible event. */
- return 0;
- case ICMP_FRAG_NEEDED:
- /* And it is the only really necessary thing :-) */
- n = ntohs(icmp_hdr(skb)->un.frag.mtu);
- if (n < hlen+68)
- return 0;
- n -= hlen;
- /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
- if (n > ntohs(eiph->tot_len))
- return 0;
- rel_info = htonl(n);
- break;
- default:
- /* All others are translated to HOST_UNREACH.
- rfc2003 contains "deep thoughts" about NET_UNREACH,
- I believe, it is just ether pollution. --ANK
- */
- rel_type = ICMP_DEST_UNREACH;
- rel_code = ICMP_HOST_UNREACH;
- break;
- }
- break;
- case ICMP_TIME_EXCEEDED:
- if (code != ICMP_EXC_TTL)
- return 0;
- break;
- }
-
- /* Prepare fake skb to feed it to icmp_send */
- skb2 = skb_clone(skb, GFP_ATOMIC);
- if (skb2 == NULL)
- return 0;
- dst_release(skb2->dst);
- skb2->dst = NULL;
- skb_pull(skb2, skb->data - (u8*)eiph);
- skb_reset_network_header(skb2);
-
- /* Try to guess incoming interface */
- memset(&fl, 0, sizeof(fl));
- fl.fl4_daddr = eiph->saddr;
- fl.fl4_tos = RT_TOS(eiph->tos);
- fl.proto = IPPROTO_IPIP;
- if (ip_route_output_key(dev_net(skb->dev), &rt, &key)) {
- kfree_skb(skb2);
- return 0;
- }
- skb2->dev = rt->u.dst.dev;
-
- /* route "incoming" packet */
- if (rt->rt_flags&RTCF_LOCAL) {
- ip_rt_put(rt);
- rt = NULL;
- fl.fl4_daddr = eiph->daddr;
- fl.fl4_src = eiph->saddr;
- fl.fl4_tos = eiph->tos;
- if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
- rt->u.dst.dev->type != ARPHRD_TUNNEL) {
- ip_rt_put(rt);
- kfree_skb(skb2);
- return 0;
- }
- } else {
- ip_rt_put(rt);
- if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
- skb2->dst->dev->type != ARPHRD_TUNNEL) {
- kfree_skb(skb2);
- return 0;
- }
- }
-
- /* change mtu on this route */
- if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
- if (n > dst_mtu(skb2->dst)) {
- kfree_skb(skb2);
- return 0;
- }
- skb2->dst->ops->update_pmtu(skb2->dst, n);
- } else if (type == ICMP_TIME_EXCEEDED) {
- struct ip_tunnel *t = netdev_priv(skb2->dev);
- if (t->parms.iph.ttl) {
- rel_type = ICMP_DEST_UNREACH;
- rel_code = ICMP_HOST_UNREACH;
- }
- }
-
- icmp_send(skb2, rel_type, rel_code, rel_info);
- kfree_skb(skb2);
- return 0;
-#endif
}
static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 92f90ae46f4..df41026b60d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -160,7 +160,7 @@ static struct dst_ops ipv4_dst_ops = {
.negative_advice = ipv4_negative_advice,
.link_failure = ipv4_link_failure,
.update_pmtu = ip_rt_update_pmtu,
- .local_out = ip_local_out,
+ .local_out = __ip_local_out,
.entry_size = sizeof(struct rtable),
.entries = ATOMIC_INIT(0),
};
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index debf2358160..e399bde7813 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1836,7 +1836,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
- unsigned int cur_mss = tcp_current_mss(sk, 0);
+ unsigned int cur_mss;
int err;
/* Inconslusive MTU probe */
@@ -1858,6 +1858,11 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
return -ENOMEM;
}
+ if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
+ return -EHOSTUNREACH; /* Routing failure or similar. */
+
+ cur_mss = tcp_current_mss(sk, 0);
+
/* If receiver has shrunk his window, and skb is out of
* new window, do not retransmit it. The exception is the
* case, when window is shrunk to zero. In this case
@@ -1884,9 +1889,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
(sysctl_tcp_retrans_collapse != 0))
tcp_retrans_try_collapse(sk, skb, cur_mss);
- if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
- return -EHOSTUNREACH; /* Routing failure or similar. */
-
/* Some Solaris stacks overoptimize and ignore the FIN on a
* retransmit when old data is attached. So strip it off
* since it is cheap to do so and saves bytes on the network.
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e591e09e5e4..3a835578fd1 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1764,14 +1764,16 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
* 2) Configure prefixes with the auto flag set
*/
- /* Avoid arithmetic overflow. Really, we could
- save rt_expires in seconds, likely valid_lft,
- but it would require division in fib gc, that it
- not good.
- */
- if (valid_lft >= 0x7FFFFFFF/HZ)
+ if (valid_lft == INFINITY_LIFE_TIME)
+ rt_expires = ~0UL;
+ else if (valid_lft >= 0x7FFFFFFF/HZ) {
+ /* Avoid arithmetic overflow. Really, we could
+ * save rt_expires in seconds, likely valid_lft,
+ * but it would require division in fib gc, that it
+ * not good.
+ */
rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ);
- else
+ } else
rt_expires = valid_lft * HZ;
/*
@@ -1779,7 +1781,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
* Avoid arithmetic overflow there as well.
* Overflow can happen only if HZ < USER_HZ.
*/
- if (HZ < USER_HZ && rt_expires > 0x7FFFFFFF / USER_HZ)
+ if (HZ < USER_HZ && ~rt_expires && rt_expires > 0x7FFFFFFF / USER_HZ)
rt_expires = 0x7FFFFFFF / USER_HZ;
if (pinfo->onlink) {
@@ -1788,17 +1790,28 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
dev->ifindex, 1);
if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
- if (rt->rt6i_flags&RTF_EXPIRES) {
- if (valid_lft == 0) {
- ip6_del_rt(rt);
- rt = NULL;
- } else {
- rt->rt6i_expires = jiffies + rt_expires;
- }
+ /* Autoconf prefix route */
+ if (valid_lft == 0) {
+ ip6_del_rt(rt);
+ rt = NULL;
+ } else if (~rt_expires) {
+ /* not infinity */
+ rt->rt6i_expires = jiffies + rt_expires;
+ rt->rt6i_flags |= RTF_EXPIRES;
+ } else {
+ rt->rt6i_flags &= ~RTF_EXPIRES;
+ rt->rt6i_expires = 0;
}
} else if (valid_lft) {
+ int flags = RTF_ADDRCONF | RTF_PREFIX_RT;
+ clock_t expires = 0;
+ if (~rt_expires) {
+ /* not infinity */
+ flags |= RTF_EXPIRES;
+ expires = jiffies_to_clock_t(rt_expires);
+ }
addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
- dev, jiffies_to_clock_t(rt_expires), RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT);
+ dev, expires, flags);
}
if (rt)
dst_release(&rt->u.dst);
@@ -2021,7 +2034,8 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
struct inet6_dev *idev;
struct net_device *dev;
int scope;
- u32 flags = RTF_EXPIRES;
+ u32 flags;
+ clock_t expires;
ASSERT_RTNL();
@@ -2041,8 +2055,13 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
if (valid_lft == INFINITY_LIFE_TIME) {
ifa_flags |= IFA_F_PERMANENT;
flags = 0;
- } else if (valid_lft >= 0x7FFFFFFF/HZ)
- valid_lft = 0x7FFFFFFF/HZ;
+ expires = 0;
+ } else {
+ if (valid_lft >= 0x7FFFFFFF/HZ)
+ valid_lft = 0x7FFFFFFF/HZ;
+ flags = RTF_EXPIRES;
+ expires = jiffies_to_clock_t(valid_lft * HZ);
+ }
if (prefered_lft == 0)
ifa_flags |= IFA_F_DEPRECATED;
@@ -2060,7 +2079,7 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
spin_unlock_bh(&ifp->lock);
addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
- jiffies_to_clock_t(valid_lft * HZ), flags);
+ expires, flags);
/*
* Note that section 3.1 of RFC 4429 indicates
* that the Optimistic flag should not be set for
@@ -3148,7 +3167,8 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
u32 prefered_lft, u32 valid_lft)
{
- u32 flags = RTF_EXPIRES;
+ u32 flags;
+ clock_t expires;
if (!valid_lft || (prefered_lft > valid_lft))
return -EINVAL;
@@ -3156,8 +3176,13 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
if (valid_lft == INFINITY_LIFE_TIME) {
ifa_flags |= IFA_F_PERMANENT;
flags = 0;
- } else if (valid_lft >= 0x7FFFFFFF/HZ)
- valid_lft = 0x7FFFFFFF/HZ;
+ expires = 0;
+ } else {
+ if (valid_lft >= 0x7FFFFFFF/HZ)
+ valid_lft = 0x7FFFFFFF/HZ;
+ flags = RTF_EXPIRES;
+ expires = jiffies_to_clock_t(valid_lft * HZ);
+ }
if (prefered_lft == 0)
ifa_flags |= IFA_F_DEPRECATED;
@@ -3176,7 +3201,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
ipv6_ifa_notify(0, ifp);
addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev,
- jiffies_to_clock_t(valid_lft * HZ), flags);
+ expires, flags);
addrconf_verify(0);
return 0;
@@ -4242,7 +4267,7 @@ static void addrconf_sysctl_register(struct inet6_dev *idev)
neigh_sysctl_register(idev->dev, idev->nd_parms, NET_IPV6,
NET_IPV6_NEIGH, "ipv6",
&ndisc_ifinfo_sysctl_change,
- NULL);
+ ndisc_ifinfo_sysctl_strategy);
__addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name,
idev->dev->ifindex, idev, &idev->cnf);
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index a55fc05b812..282fdb31f8e 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1727,10 +1727,10 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f
return ret;
}
-static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
- int nlen, void __user *oldval,
- size_t __user *oldlenp,
- void __user *newval, size_t newlen)
+int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
+ int nlen, void __user *oldval,
+ size_t __user *oldlenp,
+ void __user *newval, size_t newlen)
{
struct net_device *dev = ctl->extra1;
struct inet6_dev *idev;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 12bba088034..48534c6c073 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -109,7 +109,7 @@ static struct dst_ops ip6_dst_ops_template = {
.negative_advice = ip6_negative_advice,
.link_failure = ip6_link_failure,
.update_pmtu = ip6_rt_update_pmtu,
- .local_out = ip6_local_out,
+ .local_out = __ip6_local_out,
.entry_size = sizeof(struct rt6_info),
.entries = ATOMIC_INIT(0),
};
@@ -475,7 +475,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
lifetime = ntohl(rinfo->lifetime);
if (lifetime == 0xffffffff) {
/* infinity */
- } else if (lifetime > 0x7fffffff/HZ) {
+ } else if (lifetime > 0x7fffffff/HZ - 1) {
/* Avoid arithmetic overflow */
lifetime = 0x7fffffff/HZ - 1;
}
@@ -1106,7 +1106,9 @@ int ip6_route_add(struct fib6_config *cfg)
}
rt->u.dst.obsolete = -1;
- rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
+ rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
+ jiffies + clock_t_to_jiffies(cfg->fc_expires) :
+ 0;
if (cfg->fc_protocol == RTPROT_UNSPEC)
cfg->fc_protocol = RTPROT_BOOT;
@@ -2200,7 +2202,9 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
- expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
+ expires = (rt->rt6i_flags & RTF_EXPIRES) ?
+ rt->rt6i_expires - jiffies : 0;
+
if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
expires, rt->u.dst.error) < 0)
goto nla_put_failure;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 5a6fab95569..3de6ffdaedf 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -403,9 +403,8 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
static int ipip6_err(struct sk_buff *skb, u32 info)
{
-#ifndef I_WISH_WORLD_WERE_PERFECT
-/* It is not :-( All the routers (except for Linux) return only
+/* All the routers (except for Linux) return only
8 bytes of packet payload. It means, that precise relaying of
ICMP in the real Internet is absolutely infeasible.
*/
@@ -462,92 +461,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
out:
read_unlock(&ipip6_lock);
return err;
-#else
- struct iphdr *iph = (struct iphdr*)dp;
- int hlen = iph->ihl<<2;
- struct ipv6hdr *iph6;
- const int type = icmp_hdr(skb)->type;
- const int code = icmp_hdr(skb)->code;
- int rel_type = 0;
- int rel_code = 0;
- int rel_info = 0;
- struct sk_buff *skb2;
- struct rt6_info *rt6i;
-
- if (len < hlen + sizeof(struct ipv6hdr))
- return;
- iph6 = (struct ipv6hdr*)(dp + hlen);
-
- switch (type) {
- default:
- return;
- case ICMP_PARAMETERPROB:
- if (icmp_hdr(skb)->un.gateway < hlen)
- return;
-
- /* So... This guy found something strange INSIDE encapsulated
- packet. Well, he is fool, but what can we do ?
- */
- rel_type = ICMPV6_PARAMPROB;
- rel_info = icmp_hdr(skb)->un.gateway - hlen;
- break;
-
- case ICMP_DEST_UNREACH:
- switch (code) {
- case ICMP_SR_FAILED:
- case ICMP_PORT_UNREACH:
- /* Impossible event. */
- return;
- case ICMP_FRAG_NEEDED:
- /* Too complicated case ... */
- return;
- default:
- /* All others are translated to HOST_UNREACH.
- rfc2003 contains "deep thoughts" about NET_UNREACH,
- I believe, it is just ether pollution. --ANK
- */
- rel_type = ICMPV6_DEST_UNREACH;
- rel_code = ICMPV6_ADDR_UNREACH;
- break;
- }
- break;
- case ICMP_TIME_EXCEEDED:
- if (code != ICMP_EXC_TTL)
- return;
- rel_type = ICMPV6_TIME_EXCEED;
- rel_code = ICMPV6_EXC_HOPLIMIT;
- break;
- }
-
- /* Prepare fake skb to feed it to icmpv6_send */
- skb2 = skb_clone(skb, GFP_ATOMIC);
- if (skb2 == NULL)
- return 0;
- dst_release(skb2->dst);
- skb2->dst = NULL;
- skb_pull(skb2, skb->data - (u8*)iph6);
- skb_reset_network_header(skb2);
-
- /* Try to guess incoming interface */
- rt6i = rt6_lookup(dev_net(skb->dev), &iph6->saddr, NULL, NULL, 0);
- if (rt6i && rt6i->rt6i_dev) {
- skb2->dev = rt6i->rt6i_dev;
-
- rt6i = rt6_lookup(dev_net(skb->dev),
- &iph6->daddr, &iph6->saddr, NULL, 0);
-
- if (rt6i && rt6i->rt6i_dev && rt6i->rt6i_dev->type == ARPHRD_SIT) {
- struct ip_tunnel *t = netdev_priv(rt6i->rt6i_dev);
- if (rel_type == ICMPV6_TIME_EXCEED && t->parms.iph.ttl) {
- rel_type = ICMPV6_DEST_UNREACH;
- rel_code = ICMPV6_ADDR_UNREACH;
- }
- icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev);
- }
- }
- kfree_skb(skb2);
- return 0;
-#endif
}
static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 9e7236ff6bc..9bba7ac5fee 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1251,7 +1251,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr,
x->sel.prefixlen_s = addr->sadb_address_prefixlen;
}
- if (x->props.mode == XFRM_MODE_TRANSPORT)
+ if (!x->sel.family)
x->sel.family = x->props.family;
if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) {
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 4adba09e80c..7cfd12e0d1e 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -730,7 +730,17 @@ static void ieee80211_send_assoc(struct net_device *dev,
if (bss->wmm_ie) {
wmm = 1;
}
+
+ /* get all rates supported by the device and the AP as
+ * some APs don't like getting a superset of their rates
+ * in the association request (e.g. D-Link DAP 1353 in
+ * b-only mode) */
+ rates_len = ieee80211_compatible_rates(bss, sband, &rates);
+
ieee80211_rx_bss_put(dev, bss);
+ } else {
+ rates = ~0;
+ rates_len = sband->n_bitrates;
}
mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
@@ -761,10 +771,7 @@ static void ieee80211_send_assoc(struct net_device *dev,
*pos++ = ifsta->ssid_len;
memcpy(pos, ifsta->ssid, ifsta->ssid_len);
- /* all supported rates should be added here but some APs
- * (e.g. D-Link DAP 1353 in b-only mode) don't like that
- * Therefore only add rates the AP supports */
- rates_len = ieee80211_compatible_rates(bss, sband, &rates);
+ /* add all rates which were marked to be used above */
supp_rates_len = rates_len;
if (supp_rates_len > 8)
supp_rates_len = 8;
@@ -3446,21 +3453,17 @@ static int ieee80211_sta_config_auth(struct net_device *dev,
struct ieee80211_sta_bss *bss, *selected = NULL;
int top_rssi = 0, freq;
- if (!(ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL |
- IEEE80211_STA_AUTO_BSSID_SEL | IEEE80211_STA_AUTO_CHANNEL_SEL))) {
- ifsta->state = IEEE80211_AUTHENTICATE;
- ieee80211_sta_reset_auth(dev, ifsta);
- return 0;
- }
-
spin_lock_bh(&local->sta_bss_lock);
freq = local->oper_channel->center_freq;
list_for_each_entry(bss, &local->sta_bss_list, list) {
if (!(bss->capability & WLAN_CAPABILITY_ESS))
continue;
- if (!!(bss->capability & WLAN_CAPABILITY_PRIVACY) ^
- !!sdata->default_key)
+ if ((ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL |
+ IEEE80211_STA_AUTO_BSSID_SEL |
+ IEEE80211_STA_AUTO_CHANNEL_SEL)) &&
+ (!!(bss->capability & WLAN_CAPABILITY_PRIVACY) ^
+ !!sdata->default_key))
continue;
if (!(ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) &&
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 24a465c4df0..131e9e6c8a3 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -389,6 +389,41 @@ void ieee80211_iterate_active_interfaces(
struct ieee80211_local *local = hw_to_local(hw);
struct ieee80211_sub_if_data *sdata;
+ rtnl_lock();
+
+ list_for_each_entry(sdata, &local->interfaces, list) {
+ switch (sdata->vif.type) {
+ case IEEE80211_IF_TYPE_INVALID:
+ case IEEE80211_IF_TYPE_MNTR:
+ case IEEE80211_IF_TYPE_VLAN:
+ continue;
+ case IEEE80211_IF_TYPE_AP:
+ case IEEE80211_IF_TYPE_STA:
+ case IEEE80211_IF_TYPE_IBSS:
+ case IEEE80211_IF_TYPE_WDS:
+ case IEEE80211_IF_TYPE_MESH_POINT:
+ break;
+ }
+ if (sdata->dev == local->mdev)
+ continue;
+ if (netif_running(sdata->dev))
+ iterator(data, sdata->dev->dev_addr,
+ &sdata->vif);
+ }
+
+ rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces);
+
+void ieee80211_iterate_active_interfaces_atomic(
+ struct ieee80211_hw *hw,
+ void (*iterator)(void *data, u8 *mac,
+ struct ieee80211_vif *vif),
+ void *data)
+{
+ struct ieee80211_local *local = hw_to_local(hw);
+ struct ieee80211_sub_if_data *sdata;
+
rcu_read_lock();
list_for_each_entry_rcu(sdata, &local->interfaces, list) {
@@ -413,4 +448,4 @@ void ieee80211_iterate_active_interfaces(
rcu_read_unlock();
}
-EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces);
+EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_atomic);
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 76e1de1dc73..457ebf9e85a 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -209,7 +209,6 @@ static int ieee80211_ioctl_giwrange(struct net_device *dev,
range->num_frequency = c;
IW_EVENT_CAPA_SET_KERNEL(range->event_capa);
- IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWTHRSPY);
IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWAP);
IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWSCAN);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 1086df7478b..9360fc81e8c 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -220,7 +220,7 @@ replay:
tp = kzalloc(sizeof(*tp), GFP_KERNEL);
if (tp == NULL)
goto errout;
- err = -EINVAL;
+ err = -ENOENT;
tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]);
if (tp_ops == NULL) {
#ifdef CONFIG_KMOD
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index d927d9f5741..744b79fdcb1 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -17,8 +17,8 @@
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
-#define RPC_ANONYMOUS_USERID ((uid_t)-2)
-#define RPC_ANONYMOUS_GROUPID ((gid_t)-2)
+#define RPC_MACHINE_CRED_USERID ((uid_t)0)
+#define RPC_MACHINE_CRED_GROUPID ((gid_t)0)
struct generic_cred {
struct rpc_cred gc_base;
@@ -44,8 +44,8 @@ EXPORT_SYMBOL_GPL(rpc_lookup_cred);
struct rpc_cred *rpc_lookup_machine_cred(void)
{
struct auth_cred acred = {
- .uid = RPC_ANONYMOUS_USERID,
- .gid = RPC_ANONYMOUS_GROUPID,
+ .uid = RPC_MACHINE_CRED_USERID,
+ .gid = RPC_MACHINE_CRED_GROUPID,
.machine_cred = 1,
};
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index d8e8d79a845..e46c825f495 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -6,30 +6,9 @@
#include <linux/sched.h>
#include <linux/errno.h>
-#include <linux/fcntl.h>
-#include <linux/net.h>
-#include <linux/in.h>
-#include <linux/inet.h>
-#include <linux/udp.h>
-#include <linux/tcp.h>
-#include <linux/unistd.h>
-#include <linux/slab.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <linux/file.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <net/sock.h>
-#include <net/checksum.h>
-#include <net/ip.h>
-#include <net/ipv6.h>
-#include <net/tcp_states.h>
-#include <linux/uaccess.h>
-#include <asm/ioctls.h>
-
-#include <linux/sunrpc/types.h>
-#include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/svc_xprt.h>
@@ -296,8 +275,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
if (!(xprt->xpt_flags &
((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED))))
return;
- if (test_bit(XPT_DEAD, &xprt->xpt_flags))
- return;
cpu = get_cpu();
pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 3f30ee6006a..f24800f2c09 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -278,7 +278,7 @@ static int ip_map_show(struct seq_file *m,
dom = im->m_client->h.name;
if (ipv6_addr_v4mapped(&addr)) {
- seq_printf(m, "%s" NIPQUAD_FMT "%s\n",
+ seq_printf(m, "%s " NIPQUAD_FMT " %s\n",
im->m_class,
ntohl(addr.s6_addr32[3]) >> 24 & 0xff,
ntohl(addr.s6_addr32[3]) >> 16 & 0xff,
@@ -286,7 +286,7 @@ static int ip_map_show(struct seq_file *m,
ntohl(addr.s6_addr32[3]) >> 0 & 0xff,
dom);
} else {
- seq_printf(m, "%s" NIP6_FMT "%s\n",
+ seq_printf(m, "%s " NIP6_FMT " %s\n",
im->m_class, NIP6(addr), dom);
}
return 0;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index c22d6b6f2db..06ab4841537 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -260,11 +260,16 @@ static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
* On our side, we need to read into a pagelist. The first page immediately
* follows the RPC header.
*
- * This function returns 1 to indicate success. The data is not yet in
+ * This function returns:
+ * 0 - No error and no read-list found.
+ *
+ * 1 - Successful read-list processing. The data is not yet in
* the pagelist and therefore the RPC request must be deferred. The
* I/O completion will enqueue the transport again and
* svc_rdma_recvfrom will complete the request.
*
+ * <0 - Error processing/posting read-list.
+ *
* NOTE: The ctxt must not be touched after the last WR has been posted
* because the I/O completion processing may occur on another
* processor and free / modify the context. Ne touche pas!
@@ -284,7 +289,6 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
u64 sgl_offset;
struct rpcrdma_read_chunk *ch;
struct svc_rdma_op_ctxt *ctxt = NULL;
- struct svc_rdma_op_ctxt *head;
struct svc_rdma_op_ctxt *tmp_sge_ctxt;
struct svc_rdma_op_ctxt *tmp_ch_ctxt;
struct chunk_sge *ch_sge_ary;
@@ -302,25 +306,19 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge;
svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
+ if (ch_count > RPCSVC_MAXPAGES)
+ return -EINVAL;
sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp,
sge, ch_sge_ary,
ch_count, byte_count);
- head = svc_rdma_get_context(xprt);
sgl_offset = 0;
ch_no = 0;
for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
ch->rc_discrim != 0; ch++, ch_no++) {
next_sge:
- if (!ctxt)
- ctxt = head;
- else {
- ctxt->next = svc_rdma_get_context(xprt);
- ctxt = ctxt->next;
- }
- ctxt->next = NULL;
+ ctxt = svc_rdma_get_context(xprt);
ctxt->direction = DMA_FROM_DEVICE;
- clear_bit(RDMACTXT_F_READ_DONE, &ctxt->flags);
clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
/* Prepare READ WR */
@@ -347,20 +345,15 @@ next_sge:
* the client and the RPC needs to be enqueued.
*/
set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
- ctxt->next = hdr_ctxt;
- hdr_ctxt->next = head;
+ ctxt->read_hdr = hdr_ctxt;
}
/* Post the read */
err = svc_rdma_send(xprt, &read_wr);
if (err) {
- printk(KERN_ERR "svcrdma: Error posting send = %d\n",
+ printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n",
err);
- /*
- * Break the circular list so free knows when
- * to stop if the error happened to occur on
- * the last read
- */
- ctxt->next = NULL;
+ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+ svc_rdma_put_context(ctxt, 0);
goto out;
}
atomic_inc(&rdma_stat_read);
@@ -371,7 +364,7 @@ next_sge:
goto next_sge;
}
sgl_offset = 0;
- err = 0;
+ err = 1;
}
out:
@@ -389,25 +382,12 @@ next_sge:
while (rqstp->rq_resused)
rqstp->rq_respages[--rqstp->rq_resused] = NULL;
- if (err) {
- printk(KERN_ERR "svcrdma : RDMA_READ error = %d\n", err);
- set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
- /* Free the linked list of read contexts */
- while (head != NULL) {
- ctxt = head->next;
- svc_rdma_put_context(head, 1);
- head = ctxt;
- }
- return 0;
- }
-
- return 1;
+ return err;
}
static int rdma_read_complete(struct svc_rqst *rqstp,
- struct svc_rdma_op_ctxt *data)
+ struct svc_rdma_op_ctxt *head)
{
- struct svc_rdma_op_ctxt *head = data->next;
int page_no;
int ret;
@@ -433,21 +413,12 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
rqstp->rq_arg.len = head->arg.len;
rqstp->rq_arg.buflen = head->arg.buflen;
+ /* Free the context */
+ svc_rdma_put_context(head, 0);
+
/* XXX: What should this be? */
rqstp->rq_prot = IPPROTO_MAX;
-
- /*
- * Free the contexts we used to build the RDMA_READ. We have
- * to be careful here because the context list uses the same
- * next pointer used to chain the contexts associated with the
- * RDMA_READ
- */
- data->next = NULL; /* terminate circular list */
- do {
- data = head->next;
- svc_rdma_put_context(head, 0);
- head = data;
- } while (head != NULL);
+ svc_xprt_copy_addrs(rqstp, rqstp->rq_xprt);
ret = rqstp->rq_arg.head[0].iov_len
+ rqstp->rq_arg.page_len
@@ -457,8 +428,6 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base,
rqstp->rq_arg.head[0].iov_len);
- /* Indicate that we've consumed an RQ credit */
- rqstp->rq_xprt_ctxt = rqstp->rq_xprt;
svc_xprt_received(rqstp->rq_xprt);
return ret;
}
@@ -480,13 +449,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
dprintk("svcrdma: rqstp=%p\n", rqstp);
- /*
- * The rq_xprt_ctxt indicates if we've consumed an RQ credit
- * or not. It is used in the rdma xpo_release_rqst function to
- * determine whether or not to return an RQ WQE to the RQ.
- */
- rqstp->rq_xprt_ctxt = NULL;
-
spin_lock_bh(&rdma_xprt->sc_read_complete_lock);
if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
ctxt = list_entry(rdma_xprt->sc_read_complete_q.next,
@@ -537,21 +499,22 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
/* If the request is invalid, reply with an error */
if (len < 0) {
if (len == -ENOSYS)
- (void)svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS);
+ svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS);
goto close_out;
}
- /* Read read-list data. If we would need to wait, defer
- * it. Not that in this case, we don't return the RQ credit
- * until after the read completes.
- */
- if (rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt)) {
+ /* Read read-list data. */
+ ret = rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt);
+ if (ret > 0) {
+ /* read-list posted, defer until data received from client. */
svc_xprt_received(xprt);
return 0;
}
-
- /* Indicate we've consumed an RQ credit */
- rqstp->rq_xprt_ctxt = rqstp->rq_xprt;
+ if (ret < 0) {
+ /* Post of read-list failed, free context. */
+ svc_rdma_put_context(ctxt, 1);
+ return 0;
+ }
ret = rqstp->rq_arg.head[0].iov_len
+ rqstp->rq_arg.page_len
@@ -569,11 +532,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
return ret;
close_out:
- if (ctxt) {
+ if (ctxt)
svc_rdma_put_context(ctxt, 1);
- /* Indicate we've consumed an RQ credit */
- rqstp->rq_xprt_ctxt = rqstp->rq_xprt;
- }
dprintk("svcrdma: transport %p is closing\n", xprt);
/*
* Set the close bit and enqueue it. svc_recv will see the
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 981f190c1b3..fb82b1b683f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -389,6 +389,17 @@ static int send_reply(struct svcxprt_rdma *rdma,
int page_no;
int ret;
+ /* Post a recv buffer to handle another request. */
+ ret = svc_rdma_post_recv(rdma);
+ if (ret) {
+ printk(KERN_INFO
+ "svcrdma: could not post a receive buffer, err=%d."
+ "Closing transport %p.\n", ret, rdma);
+ set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+ svc_rdma_put_context(ctxt, 0);
+ return -ENOTCONN;
+ }
+
/* Prepare the context */
ctxt->pages[0] = page;
ctxt->count = 1;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index af408fc1263..e132509d1db 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -103,8 +103,8 @@ static int rdma_bump_context_cache(struct svcxprt_rdma *xprt)
spin_lock_bh(&xprt->sc_ctxt_lock);
if (ctxt) {
at_least_one = 1;
- ctxt->next = xprt->sc_ctxt_head;
- xprt->sc_ctxt_head = ctxt;
+ INIT_LIST_HEAD(&ctxt->free_list);
+ list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
} else {
/* kmalloc failed...give up for now */
xprt->sc_ctxt_cnt--;
@@ -123,7 +123,7 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
while (1) {
spin_lock_bh(&xprt->sc_ctxt_lock);
- if (unlikely(xprt->sc_ctxt_head == NULL)) {
+ if (unlikely(list_empty(&xprt->sc_ctxt_free))) {
/* Try to bump my cache. */
spin_unlock_bh(&xprt->sc_ctxt_lock);
@@ -136,12 +136,15 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
schedule_timeout_uninterruptible(msecs_to_jiffies(500));
continue;
}
- ctxt = xprt->sc_ctxt_head;
- xprt->sc_ctxt_head = ctxt->next;
+ ctxt = list_entry(xprt->sc_ctxt_free.next,
+ struct svc_rdma_op_ctxt,
+ free_list);
+ list_del_init(&ctxt->free_list);
spin_unlock_bh(&xprt->sc_ctxt_lock);
ctxt->xprt = xprt;
INIT_LIST_HEAD(&ctxt->dto_q);
ctxt->count = 0;
+ atomic_inc(&xprt->sc_ctxt_used);
break;
}
return ctxt;
@@ -159,14 +162,15 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
put_page(ctxt->pages[i]);
for (i = 0; i < ctxt->count; i++)
- dma_unmap_single(xprt->sc_cm_id->device->dma_device,
- ctxt->sge[i].addr,
- ctxt->sge[i].length,
- ctxt->direction);
+ ib_dma_unmap_single(xprt->sc_cm_id->device,
+ ctxt->sge[i].addr,
+ ctxt->sge[i].length,
+ ctxt->direction);
+
spin_lock_bh(&xprt->sc_ctxt_lock);
- ctxt->next = xprt->sc_ctxt_head;
- xprt->sc_ctxt_head = ctxt;
+ list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
spin_unlock_bh(&xprt->sc_ctxt_lock);
+ atomic_dec(&xprt->sc_ctxt_used);
}
/* ib_cq event handler */
@@ -228,23 +232,8 @@ static void dto_tasklet_func(unsigned long data)
list_del_init(&xprt->sc_dto_q);
spin_unlock_irqrestore(&dto_lock, flags);
- if (test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags)) {
- ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP);
- rq_cq_reap(xprt);
- set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
- /*
- * If data arrived before established event,
- * don't enqueue. This defers RPC I/O until the
- * RDMA connection is complete.
- */
- if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
- svc_xprt_enqueue(&xprt->sc_xprt);
- }
-
- if (test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) {
- ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
- sq_cq_reap(xprt);
- }
+ rq_cq_reap(xprt);
+ sq_cq_reap(xprt);
svc_xprt_put(&xprt->sc_xprt);
spin_lock_irqsave(&dto_lock, flags);
@@ -263,11 +252,15 @@ static void rq_comp_handler(struct ib_cq *cq, void *cq_context)
struct svcxprt_rdma *xprt = cq_context;
unsigned long flags;
+ /* Guard against unconditional flush call for destroyed QP */
+ if (atomic_read(&xprt->sc_xprt.xpt_ref.refcount)==0)
+ return;
+
/*
* Set the bit regardless of whether or not it's on the list
* because it may be on the list already due to an SQ
* completion.
- */
+ */
set_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags);
/*
@@ -290,6 +283,8 @@ static void rq_comp_handler(struct ib_cq *cq, void *cq_context)
*
* Take all completing WC off the CQE and enqueue the associated DTO
* context on the dto_q for the transport.
+ *
+ * Note that caller must hold a transport reference.
*/
static void rq_cq_reap(struct svcxprt_rdma *xprt)
{
@@ -297,29 +292,47 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
struct ib_wc wc;
struct svc_rdma_op_ctxt *ctxt = NULL;
+ if (!test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags))
+ return;
+
+ ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP);
atomic_inc(&rdma_stat_rq_poll);
- spin_lock_bh(&xprt->sc_rq_dto_lock);
while ((ret = ib_poll_cq(xprt->sc_rq_cq, 1, &wc)) > 0) {
ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
ctxt->wc_status = wc.status;
ctxt->byte_len = wc.byte_len;
if (wc.status != IB_WC_SUCCESS) {
/* Close the transport */
+ dprintk("svcrdma: transport closing putting ctxt %p\n", ctxt);
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
svc_rdma_put_context(ctxt, 1);
+ svc_xprt_put(&xprt->sc_xprt);
continue;
}
+ spin_lock_bh(&xprt->sc_rq_dto_lock);
list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q);
+ spin_unlock_bh(&xprt->sc_rq_dto_lock);
+ svc_xprt_put(&xprt->sc_xprt);
}
- spin_unlock_bh(&xprt->sc_rq_dto_lock);
if (ctxt)
atomic_inc(&rdma_stat_rq_prod);
+
+ set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
+ /*
+ * If data arrived before established event,
+ * don't enqueue. This defers RPC I/O until the
+ * RDMA connection is complete.
+ */
+ if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
+ svc_xprt_enqueue(&xprt->sc_xprt);
}
/*
* Send Queue Completion Handler - potentially called on interrupt context.
+ *
+ * Note that caller must hold a transport reference.
*/
static void sq_cq_reap(struct svcxprt_rdma *xprt)
{
@@ -328,6 +341,11 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
struct ib_cq *cq = xprt->sc_sq_cq;
int ret;
+
+ if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
+ return;
+
+ ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
atomic_inc(&rdma_stat_sq_poll);
while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
@@ -349,14 +367,16 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
case IB_WR_RDMA_READ:
if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
+ struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
+ BUG_ON(!read_hdr);
set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
- set_bit(RDMACTXT_F_READ_DONE, &ctxt->flags);
spin_lock_bh(&xprt->sc_read_complete_lock);
- list_add_tail(&ctxt->dto_q,
+ list_add_tail(&read_hdr->dto_q,
&xprt->sc_read_complete_q);
spin_unlock_bh(&xprt->sc_read_complete_lock);
svc_xprt_enqueue(&xprt->sc_xprt);
}
+ svc_rdma_put_context(ctxt, 0);
break;
default:
@@ -365,6 +385,7 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
wc.opcode, wc.status);
break;
}
+ svc_xprt_put(&xprt->sc_xprt);
}
if (ctxt)
@@ -376,11 +397,15 @@ static void sq_comp_handler(struct ib_cq *cq, void *cq_context)
struct svcxprt_rdma *xprt = cq_context;
unsigned long flags;
+ /* Guard against unconditional flush call for destroyed QP */
+ if (atomic_read(&xprt->sc_xprt.xpt_ref.refcount)==0)
+ return;
+
/*
* Set the bit regardless of whether or not it's on the list
* because it may be on the list already due to an RQ
* completion.
- */
+ */
set_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags);
/*
@@ -407,28 +432,29 @@ static void create_context_cache(struct svcxprt_rdma *xprt,
xprt->sc_ctxt_max = ctxt_max;
xprt->sc_ctxt_bump = ctxt_bump;
xprt->sc_ctxt_cnt = 0;
- xprt->sc_ctxt_head = NULL;
+ atomic_set(&xprt->sc_ctxt_used, 0);
+
+ INIT_LIST_HEAD(&xprt->sc_ctxt_free);
for (i = 0; i < ctxt_count; i++) {
ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
if (ctxt) {
- ctxt->next = xprt->sc_ctxt_head;
- xprt->sc_ctxt_head = ctxt;
+ INIT_LIST_HEAD(&ctxt->free_list);
+ list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
xprt->sc_ctxt_cnt++;
}
}
}
-static void destroy_context_cache(struct svc_rdma_op_ctxt *ctxt)
+static void destroy_context_cache(struct svcxprt_rdma *xprt)
{
- struct svc_rdma_op_ctxt *next;
- if (!ctxt)
- return;
-
- do {
- next = ctxt->next;
+ while (!list_empty(&xprt->sc_ctxt_free)) {
+ struct svc_rdma_op_ctxt *ctxt;
+ ctxt = list_entry(xprt->sc_ctxt_free.next,
+ struct svc_rdma_op_ctxt,
+ free_list);
+ list_del_init(&ctxt->free_list);
kfree(ctxt);
- ctxt = next;
- } while (next);
+ }
}
static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
@@ -465,7 +491,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
reqs +
cma_xprt->sc_sq_depth +
RPCRDMA_MAX_THREADS + 1); /* max */
- if (!cma_xprt->sc_ctxt_head) {
+ if (list_empty(&cma_xprt->sc_ctxt_free)) {
kfree(cma_xprt);
return NULL;
}
@@ -520,7 +546,12 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
recv_wr.num_sge = ctxt->count;
recv_wr.wr_id = (u64)(unsigned long)ctxt;
+ svc_xprt_get(&xprt->sc_xprt);
ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr);
+ if (ret) {
+ svc_xprt_put(&xprt->sc_xprt);
+ svc_rdma_put_context(ctxt, 1);
+ }
return ret;
}
@@ -539,6 +570,7 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id)
{
struct svcxprt_rdma *listen_xprt = new_cma_id->context;
struct svcxprt_rdma *newxprt;
+ struct sockaddr *sa;
/* Create a new transport */
newxprt = rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, 0);
@@ -551,6 +583,12 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id)
dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n",
newxprt, newxprt->sc_cm_id, listen_xprt);
+ /* Set the local and remote addresses in the transport */
+ sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
+ svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa));
+ sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
+ svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa));
+
/*
* Enqueue the new transport on the accept queue of the listening
* transport
@@ -627,6 +665,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id,
if (xprt) {
set_bit(XPT_CLOSE, &xprt->xpt_flags);
svc_xprt_enqueue(xprt);
+ svc_xprt_put(xprt);
}
break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
@@ -661,31 +700,27 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
cma_xprt = rdma_create_xprt(serv, 1);
if (!cma_xprt)
- return ERR_PTR(ENOMEM);
+ return ERR_PTR(-ENOMEM);
xprt = &cma_xprt->sc_xprt;
listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP);
if (IS_ERR(listen_id)) {
- svc_xprt_put(&cma_xprt->sc_xprt);
- dprintk("svcrdma: rdma_create_id failed = %ld\n",
- PTR_ERR(listen_id));
- return (void *)listen_id;
+ ret = PTR_ERR(listen_id);
+ dprintk("svcrdma: rdma_create_id failed = %d\n", ret);
+ goto err0;
}
+
ret = rdma_bind_addr(listen_id, sa);
if (ret) {
- rdma_destroy_id(listen_id);
- svc_xprt_put(&cma_xprt->sc_xprt);
dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret);
- return ERR_PTR(ret);
+ goto err1;
}
cma_xprt->sc_cm_id = listen_id;
ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG);
if (ret) {
- rdma_destroy_id(listen_id);
- svc_xprt_put(&cma_xprt->sc_xprt);
dprintk("svcrdma: rdma_listen failed = %d\n", ret);
- return ERR_PTR(ret);
+ goto err1;
}
/*
@@ -696,6 +731,12 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
svc_xprt_set_local(&cma_xprt->sc_xprt, sa, salen);
return &cma_xprt->sc_xprt;
+
+ err1:
+ rdma_destroy_id(listen_id);
+ err0:
+ kfree(cma_xprt);
+ return ERR_PTR(ret);
}
/*
@@ -716,7 +757,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
struct rdma_conn_param conn_param;
struct ib_qp_init_attr qp_attr;
struct ib_device_attr devattr;
- struct sockaddr *sa;
int ret;
int i;
@@ -826,7 +866,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
newxprt->sc_sq_depth = qp_attr.cap.max_send_wr;
newxprt->sc_max_requests = qp_attr.cap.max_recv_wr;
}
- svc_xprt_get(&newxprt->sc_xprt);
newxprt->sc_qp = newxprt->sc_cm_id->qp;
/* Register all of physical memory */
@@ -850,6 +889,13 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
/* Swap out the handler */
newxprt->sc_cm_id->event_handler = rdma_cma_handler;
+ /*
+ * Arm the CQs for the SQ and RQ before accepting so we can't
+ * miss the first message
+ */
+ ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP);
+ ib_req_notify_cq(newxprt->sc_rq_cq, IB_CQ_NEXT_COMP);
+
/* Accept Connection */
set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags);
memset(&conn_param, 0, sizeof conn_param);
@@ -886,58 +932,26 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
newxprt->sc_max_requests,
newxprt->sc_ord);
- /* Set the local and remote addresses in the transport */
- sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
- svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa));
- sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
- svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa));
-
- ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP);
- ib_req_notify_cq(newxprt->sc_rq_cq, IB_CQ_NEXT_COMP);
return &newxprt->sc_xprt;
errout:
dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret);
/* Take a reference in case the DTO handler runs */
svc_xprt_get(&newxprt->sc_xprt);
- if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) {
+ if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp))
ib_destroy_qp(newxprt->sc_qp);
- svc_xprt_put(&newxprt->sc_xprt);
- }
rdma_destroy_id(newxprt->sc_cm_id);
/* This call to put will destroy the transport */
svc_xprt_put(&newxprt->sc_xprt);
return NULL;
}
-/*
- * Post an RQ WQE to the RQ when the rqst is being released. This
- * effectively returns an RQ credit to the client. The rq_xprt_ctxt
- * will be null if the request is deferred due to an RDMA_READ or the
- * transport had no data ready (EAGAIN). Note that an RPC deferred in
- * svc_process will still return the credit, this is because the data
- * is copied and no longer consume a WQE/WC.
- */
static void svc_rdma_release_rqst(struct svc_rqst *rqstp)
{
- int err;
- struct svcxprt_rdma *rdma =
- container_of(rqstp->rq_xprt, struct svcxprt_rdma, sc_xprt);
- if (rqstp->rq_xprt_ctxt) {
- BUG_ON(rqstp->rq_xprt_ctxt != rdma);
- err = svc_rdma_post_recv(rdma);
- if (err)
- dprintk("svcrdma: failed to post an RQ WQE error=%d\n",
- err);
- }
- rqstp->rq_xprt_ctxt = NULL;
}
/*
- * When connected, an svc_xprt has at least three references:
- *
- * - A reference held by the QP. We still hold that here because this
- * code deletes the QP and puts the reference.
+ * When connected, an svc_xprt has at least two references:
*
* - A reference held by the cm_id between the ESTABLISHED and
* DISCONNECTED events. If the remote peer disconnected first, this
@@ -946,7 +960,7 @@ static void svc_rdma_release_rqst(struct svc_rqst *rqstp)
* - A reference held by the svc_recv code that called this function
* as part of close processing.
*
- * At a minimum two references should still be held.
+ * At a minimum one references should still be held.
*/
static void svc_rdma_detach(struct svc_xprt *xprt)
{
@@ -956,23 +970,53 @@ static void svc_rdma_detach(struct svc_xprt *xprt)
/* Disconnect and flush posted WQE */
rdma_disconnect(rdma->sc_cm_id);
-
- /* Destroy the QP if present (not a listener) */
- if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) {
- ib_destroy_qp(rdma->sc_qp);
- svc_xprt_put(xprt);
- }
-
- /* Destroy the CM ID */
- rdma_destroy_id(rdma->sc_cm_id);
}
-static void svc_rdma_free(struct svc_xprt *xprt)
+static void __svc_rdma_free(struct work_struct *work)
{
- struct svcxprt_rdma *rdma = (struct svcxprt_rdma *)xprt;
+ struct svcxprt_rdma *rdma =
+ container_of(work, struct svcxprt_rdma, sc_work);
dprintk("svcrdma: svc_rdma_free(%p)\n", rdma);
+
/* We should only be called from kref_put */
- BUG_ON(atomic_read(&xprt->xpt_ref.refcount) != 0);
+ BUG_ON(atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0);
+
+ /*
+ * Destroy queued, but not processed read completions. Note
+ * that this cleanup has to be done before destroying the
+ * cm_id because the device ptr is needed to unmap the dma in
+ * svc_rdma_put_context.
+ */
+ spin_lock_bh(&rdma->sc_read_complete_lock);
+ while (!list_empty(&rdma->sc_read_complete_q)) {
+ struct svc_rdma_op_ctxt *ctxt;
+ ctxt = list_entry(rdma->sc_read_complete_q.next,
+ struct svc_rdma_op_ctxt,
+ dto_q);
+ list_del_init(&ctxt->dto_q);
+ svc_rdma_put_context(ctxt, 1);
+ }
+ spin_unlock_bh(&rdma->sc_read_complete_lock);
+
+ /* Destroy queued, but not processed recv completions */
+ spin_lock_bh(&rdma->sc_rq_dto_lock);
+ while (!list_empty(&rdma->sc_rq_dto_q)) {
+ struct svc_rdma_op_ctxt *ctxt;
+ ctxt = list_entry(rdma->sc_rq_dto_q.next,
+ struct svc_rdma_op_ctxt,
+ dto_q);
+ list_del_init(&ctxt->dto_q);
+ svc_rdma_put_context(ctxt, 1);
+ }
+ spin_unlock_bh(&rdma->sc_rq_dto_lock);
+
+ /* Warn if we leaked a resource or under-referenced */
+ WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0);
+
+ /* Destroy the QP if present (not a listener) */
+ if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
+ ib_destroy_qp(rdma->sc_qp);
+
if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq))
ib_destroy_cq(rdma->sc_sq_cq);
@@ -985,10 +1029,21 @@ static void svc_rdma_free(struct svc_xprt *xprt)
if (rdma->sc_pd && !IS_ERR(rdma->sc_pd))
ib_dealloc_pd(rdma->sc_pd);
- destroy_context_cache(rdma->sc_ctxt_head);
+ /* Destroy the CM ID */
+ rdma_destroy_id(rdma->sc_cm_id);
+
+ destroy_context_cache(rdma);
kfree(rdma);
}
+static void svc_rdma_free(struct svc_xprt *xprt)
+{
+ struct svcxprt_rdma *rdma =
+ container_of(xprt, struct svcxprt_rdma, sc_xprt);
+ INIT_WORK(&rdma->sc_work, __svc_rdma_free);
+ schedule_work(&rdma->sc_work);
+}
+
static int svc_rdma_has_wspace(struct svc_xprt *xprt)
{
struct svcxprt_rdma *rdma =
@@ -1018,7 +1073,7 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
int ret;
if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
- return 0;
+ return -ENOTCONN;
BUG_ON(wr->send_flags != IB_SEND_SIGNALED);
BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op !=
@@ -1029,7 +1084,8 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) {
spin_unlock_bh(&xprt->sc_lock);
atomic_inc(&rdma_stat_sq_starve);
- /* See if we can reap some SQ WR */
+
+ /* See if we can opportunistically reap SQ WR to make room */
sq_cq_reap(xprt);
/* Wait until SQ WR available if SQ still full */
@@ -1041,22 +1097,25 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
continue;
}
/* Bumped used SQ WR count and post */
+ svc_xprt_get(&xprt->sc_xprt);
ret = ib_post_send(xprt->sc_qp, wr, &bad_wr);
if (!ret)
atomic_inc(&xprt->sc_sq_count);
- else
+ else {
+ svc_xprt_put(&xprt->sc_xprt);
dprintk("svcrdma: failed to post SQ WR rc=%d, "
"sc_sq_count=%d, sc_sq_depth=%d\n",
ret, atomic_read(&xprt->sc_sq_count),
xprt->sc_sq_depth);
+ }
spin_unlock_bh(&xprt->sc_lock);
break;
}
return ret;
}
-int svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
- enum rpcrdma_errcode err)
+void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
+ enum rpcrdma_errcode err)
{
struct ib_send_wr err_wr;
struct ib_sge sge;
@@ -1094,9 +1153,8 @@ int svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
/* Post It */
ret = svc_rdma_send(xprt, &err_wr);
if (ret) {
- dprintk("svcrdma: Error posting send = %d\n", ret);
+ dprintk("svcrdma: Error %d posting send for protocol error\n",
+ ret);
svc_rdma_put_context(ctxt, 1);
}
-
- return ret;
}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index a1b0fbe3ea3..b976d9ed10e 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -50,19 +50,8 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type)
switch (type) {
case XFRMA_ALG_AUTH:
- if (!algp->alg_key_len &&
- strcmp(algp->alg_name, "digest_null") != 0)
- return -EINVAL;
- break;
-
case XFRMA_ALG_CRYPT:
- if (!algp->alg_key_len &&
- strcmp(algp->alg_name, "cipher_null") != 0)
- return -EINVAL;
- break;
-
case XFRMA_ALG_COMP:
- /* Zero length keys are legal. */
break;
default: