aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_core.c46
-rw-r--r--net/9p/Kconfig6
-rw-r--r--net/9p/Makefile4
-rw-r--r--net/9p/client.c60
-rw-r--r--net/9p/protocol.c33
-rw-r--r--net/9p/trans_fd.c4
-rw-r--r--net/9p/trans_rdma.c713
-rw-r--r--net/bridge/br_device.c2
-rw-r--r--net/bridge/br_if.c14
-rw-r--r--net/core/dev.c138
-rw-r--r--net/core/net_namespace.c32
-rw-r--r--net/core/pktgen.c27
-rw-r--r--net/core/scm.c24
-rw-r--r--net/core/skbuff.c12
-rw-r--r--net/ipv4/cipso_ipv4.c7
-rw-r--r--net/ipv4/tcp.c3
-rw-r--r--net/ipv4/tcp_output.c35
-rw-r--r--net/ipv4/udp.c12
-rw-r--r--net/ipv4/xfrm4_state.c1
-rw-r--r--net/ipv6/addrconf.c4
-rw-r--r--net/ipv6/udp.c36
-rw-r--r--net/ipv6/xfrm6_state.c1
-rw-r--r--net/key/af_key.c1
-rw-r--r--net/mac80211/rc80211_minstrel_debugfs.c6
-rw-r--r--net/netfilter/nf_conntrack_helper.c3
-rw-r--r--net/netfilter/nf_conntrack_proto.c5
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c4
-rw-r--r--net/netlabel/netlabel_addrlist.c2
-rw-r--r--net/netlabel/netlabel_addrlist.h22
-rw-r--r--net/netlabel/netlabel_mgmt.c2
-rw-r--r--net/phonet/af_phonet.c5
-rw-r--r--net/rfkill/rfkill-input.c5
-rw-r--r--net/rfkill/rfkill.c2
-rw-r--r--net/sched/sch_cbq.c7
-rw-r--r--net/sctp/input.c2
-rw-r--r--net/sctp/sm_statefuns.c54
-rw-r--r--net/sctp/sm_statetable.c4
-rw-r--r--net/socket.c1
-rw-r--r--net/sunrpc/auth.c18
-rw-r--r--net/sunrpc/xprtsock.c58
-rw-r--r--net/unix/af_unix.c51
-rw-r--r--net/unix/garbage.c49
-rw-r--r--net/wireless/Kconfig11
-rw-r--r--net/xfrm/xfrm_policy.c6
-rw-r--r--net/xfrm/xfrm_user.c2
45 files changed, 1290 insertions, 244 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 916061f681b..68ced4bf158 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -3,11 +3,20 @@
#include <linux/if_vlan.h>
#include "vlan.h"
+struct vlan_hwaccel_cb {
+ struct net_device *dev;
+};
+
+static inline struct vlan_hwaccel_cb *vlan_hwaccel_cb(struct sk_buff *skb)
+{
+ return (struct vlan_hwaccel_cb *)skb->cb;
+}
+
/* VLAN rx hw acceleration helper. This acts like netif_{rx,receive_skb}(). */
int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
u16 vlan_tci, int polling)
{
- struct net_device_stats *stats;
+ struct vlan_hwaccel_cb *cb = vlan_hwaccel_cb(skb);
if (skb_bond_should_drop(skb)) {
dev_kfree_skb_any(skb);
@@ -15,23 +24,35 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
}
skb->vlan_tci = vlan_tci;
+ cb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK);
+
+ return (polling ? netif_receive_skb(skb) : netif_rx(skb));
+}
+EXPORT_SYMBOL(__vlan_hwaccel_rx);
+
+int vlan_hwaccel_do_receive(struct sk_buff *skb)
+{
+ struct vlan_hwaccel_cb *cb = vlan_hwaccel_cb(skb);
+ struct net_device *dev = cb->dev;
+ struct net_device_stats *stats;
+
netif_nit_deliver(skb);
- skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK);
- if (skb->dev == NULL) {
- dev_kfree_skb_any(skb);
- /* Not NET_RX_DROP, this is not being dropped
- * due to congestion. */
- return NET_RX_SUCCESS;
+ if (dev == NULL) {
+ kfree_skb(skb);
+ return -1;
}
- skb->dev->last_rx = jiffies;
+
+ skb->dev = dev;
+ skb->priority = vlan_get_ingress_priority(dev, skb->vlan_tci);
skb->vlan_tci = 0;
- stats = &skb->dev->stats;
+ dev->last_rx = jiffies;
+
+ stats = &dev->stats;
stats->rx_packets++;
stats->rx_bytes += skb->len;
- skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci);
switch (skb->pkt_type) {
case PACKET_BROADCAST:
break;
@@ -43,13 +64,12 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
* This allows the VLAN to have a different MAC than the
* underlying device, and still route correctly. */
if (!compare_ether_addr(eth_hdr(skb)->h_dest,
- skb->dev->dev_addr))
+ dev->dev_addr))
skb->pkt_type = PACKET_HOST;
break;
};
- return (polling ? netif_receive_skb(skb) : netif_rx(skb));
+ return 0;
}
-EXPORT_SYMBOL(__vlan_hwaccel_rx);
struct net_device *vlan_dev_real_dev(const struct net_device *dev)
{
diff --git a/net/9p/Kconfig b/net/9p/Kconfig
index ff34c5acc13..c42c0c400bf 100644
--- a/net/9p/Kconfig
+++ b/net/9p/Kconfig
@@ -20,6 +20,12 @@ config NET_9P_VIRTIO
This builds support for a transports between
guest partitions and a host partition.
+config NET_9P_RDMA
+ depends on NET_9P && INFINIBAND && EXPERIMENTAL
+ tristate "9P RDMA Transport (Experimental)"
+ help
+ This builds support for a RDMA transport.
+
config NET_9P_DEBUG
bool "Debug information"
depends on NET_9P
diff --git a/net/9p/Makefile b/net/9p/Makefile
index 1041b7bd12e..198a640d53a 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
@@ -1,5 +1,6 @@
obj-$(CONFIG_NET_9P) := 9pnet.o
obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o
+obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o
9pnet-objs := \
mod.o \
@@ -11,3 +12,6 @@ obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o
9pnet_virtio-objs := \
trans_virtio.o \
+
+9pnet_rdma-objs := \
+ trans_rdma.o \
diff --git a/net/9p/client.c b/net/9p/client.c
index bbac2f72b4d..4b529454616 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -159,6 +159,7 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
if (!c->reqs[row]) {
printk(KERN_ERR "Couldn't grow tag array\n");
+ spin_unlock_irqrestore(&c->lock, flags);
return ERR_PTR(-ENOMEM);
}
for (col = 0; col < P9_ROW_MAXTAG; col++) {
@@ -188,6 +189,9 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
printk(KERN_ERR "Couldn't grow tag array\n");
kfree(req->tc);
kfree(req->rc);
+ kfree(req->wq);
+ req->tc = req->rc = NULL;
+ req->wq = NULL;
return ERR_PTR(-ENOMEM);
}
req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
@@ -310,12 +314,6 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r)
r->status = REQ_STATUS_IDLE;
if (tag != P9_NOTAG && p9_idpool_check(tag, c->tagpool))
p9_idpool_put(tag, c->tagpool);
-
- /* if this was a flush request we have to free response fcall */
- if (r->rc->id == P9_RFLUSH) {
- kfree(r->tc);
- kfree(r->rc);
- }
}
/**
@@ -610,19 +608,21 @@ reterr:
static struct p9_fid *p9_fid_create(struct p9_client *clnt)
{
- int err;
+ int ret;
struct p9_fid *fid;
+ unsigned long flags;
P9_DPRINTK(P9_DEBUG_FID, "clnt %p\n", clnt);
fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL);
if (!fid)
return ERR_PTR(-ENOMEM);
- fid->fid = p9_idpool_get(clnt->fidpool);
+ ret = p9_idpool_get(clnt->fidpool);
if (fid->fid < 0) {
- err = -ENOSPC;
+ ret = -ENOSPC;
goto error;
}
+ fid->fid = ret;
memset(&fid->qid, 0, sizeof(struct p9_qid));
fid->mode = -1;
@@ -631,27 +631,28 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt)
fid->clnt = clnt;
fid->aux = NULL;
- spin_lock(&clnt->lock);
+ spin_lock_irqsave(&clnt->lock, flags);
list_add(&fid->flist, &clnt->fidlist);
- spin_unlock(&clnt->lock);
+ spin_unlock_irqrestore(&clnt->lock, flags);
return fid;
error:
kfree(fid);
- return ERR_PTR(err);
+ return ERR_PTR(ret);
}
static void p9_fid_destroy(struct p9_fid *fid)
{
struct p9_client *clnt;
+ unsigned long flags;
P9_DPRINTK(P9_DEBUG_FID, "fid %d\n", fid->fid);
clnt = fid->clnt;
p9_idpool_put(fid->fid, clnt->fidpool);
- spin_lock(&clnt->lock);
+ spin_lock_irqsave(&clnt->lock, flags);
list_del(&fid->flist);
- spin_unlock(&clnt->lock);
+ spin_unlock_irqrestore(&clnt->lock, flags);
kfree(fid);
}
@@ -817,7 +818,9 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
}
P9_DPRINTK(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n",
- qid.type, qid.path, qid.version);
+ qid.type,
+ (unsigned long long)qid.path,
+ qid.version);
memmove(&fid->qid, &qid, sizeof(struct p9_qid));
@@ -864,7 +867,9 @@ p9_client_auth(struct p9_client *clnt, char *uname, u32 n_uname, char *aname)
}
P9_DPRINTK(P9_DEBUG_9P, "<<< RAUTH qid %x.%llx.%x\n",
- qid.type, qid.path, qid.version);
+ qid.type,
+ (unsigned long long)qid.path,
+ qid.version);
memmove(&afid->qid, &qid, sizeof(struct p9_qid));
p9_free_req(clnt, req);
@@ -929,7 +934,8 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,
for (count = 0; count < nwqids; count++)
P9_DPRINTK(P9_DEBUG_9P, "<<< [%d] %x.%llx.%x\n",
- count, wqids[count].type, wqids[count].path,
+ count, wqids[count].type,
+ (unsigned long long)wqids[count].path,
wqids[count].version);
if (nwname)
@@ -979,7 +985,9 @@ int p9_client_open(struct p9_fid *fid, int mode)
}
P9_DPRINTK(P9_DEBUG_9P, "<<< ROPEN qid %x.%llx.%x iounit %x\n",
- qid.type, qid.path, qid.version, iounit);
+ qid.type,
+ (unsigned long long)qid.path,
+ qid.version, iounit);
fid->mode = mode;
fid->iounit = iounit;
@@ -1022,7 +1030,9 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
}
P9_DPRINTK(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n",
- qid.type, qid.path, qid.version, iounit);
+ qid.type,
+ (unsigned long long)qid.path,
+ qid.version, iounit);
fid->mode = mode;
fid->iounit = iounit;
@@ -1229,9 +1239,9 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
"<<< name=%s uid=%s gid=%s muid=%s extension=(%s)\n"
"<<< uid=%d gid=%d n_muid=%d\n",
ret->size, ret->type, ret->dev, ret->qid.type,
- ret->qid.path, ret->qid.version, ret->mode,
- ret->atime, ret->mtime, ret->length, ret->name,
- ret->uid, ret->gid, ret->muid, ret->extension,
+ (unsigned long long)ret->qid.path, ret->qid.version, ret->mode,
+ ret->atime, ret->mtime, (unsigned long long)ret->length,
+ ret->name, ret->uid, ret->gid, ret->muid, ret->extension,
ret->n_uid, ret->n_gid, ret->n_muid);
free_and_error:
@@ -1254,9 +1264,9 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
" name=%s uid=%s gid=%s muid=%s extension=(%s)\n"
" uid=%d gid=%d n_muid=%d\n",
wst->size, wst->type, wst->dev, wst->qid.type,
- wst->qid.path, wst->qid.version, wst->mode,
- wst->atime, wst->mtime, wst->length, wst->name,
- wst->uid, wst->gid, wst->muid, wst->extension,
+ (unsigned long long)wst->qid.path, wst->qid.version, wst->mode,
+ wst->atime, wst->mtime, (unsigned long long)wst->length,
+ wst->name, wst->uid, wst->gid, wst->muid, wst->extension,
wst->n_uid, wst->n_gid, wst->n_muid);
err = 0;
clnt = fid->clnt;
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 29be5243908..dcd7666824b 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -53,6 +53,7 @@
static int
p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...);
+#ifdef CONFIG_NET_9P_DEBUG
void
p9pdu_dump(int way, struct p9_fcall *pdu)
{
@@ -81,6 +82,12 @@ p9pdu_dump(int way, struct p9_fcall *pdu)
else
P9_DPRINTK(P9_DEBUG_PKT, "]]](%d) %s\n", datalen, buf);
}
+#else
+void
+p9pdu_dump(int way, struct p9_fcall *pdu)
+{
+}
+#endif
EXPORT_SYMBOL(p9pdu_dump);
void p9stat_free(struct p9_wstat *stbuf)
@@ -179,7 +186,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
}
break;
case 's':{
- char **ptr = va_arg(ap, char **);
+ char **sptr = va_arg(ap, char **);
int16_t len;
int size;
@@ -189,17 +196,17 @@ p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
size = MAX(len, 0);
- *ptr = kmalloc(size + 1, GFP_KERNEL);
- if (*ptr == NULL) {
+ *sptr = kmalloc(size + 1, GFP_KERNEL);
+ if (*sptr == NULL) {
errcode = -EFAULT;
break;
}
- if (pdu_read(pdu, *ptr, size)) {
+ if (pdu_read(pdu, *sptr, size)) {
errcode = -EFAULT;
- kfree(*ptr);
- *ptr = NULL;
+ kfree(*sptr);
+ *sptr = NULL;
} else
- (*ptr)[size] = 0;
+ (*sptr)[size] = 0;
}
break;
case 'Q':{
@@ -373,13 +380,13 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
}
break;
case 's':{
- const char *ptr = va_arg(ap, const char *);
+ const char *sptr = va_arg(ap, const char *);
int16_t len = 0;
- if (ptr)
- len = MIN(strlen(ptr), USHORT_MAX);
+ if (sptr)
+ len = MIN(strlen(sptr), USHORT_MAX);
errcode = p9pdu_writef(pdu, optional, "w", len);
- if (!errcode && pdu_write(pdu, ptr, len))
+ if (!errcode && pdu_write(pdu, sptr, len))
errcode = -EFAULT;
}
break;
@@ -419,7 +426,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap)
case 'U':{
int32_t count = va_arg(ap, int32_t);
const char __user *udata =
- va_arg(ap, const void *);
+ va_arg(ap, const void __user *);
errcode =
p9pdu_writef(pdu, optional, "d", count);
if (!errcode && pdu_write_u(pdu, udata, count))
@@ -542,8 +549,10 @@ int p9pdu_finalize(struct p9_fcall *pdu)
err = p9pdu_writef(pdu, 0, "d", size);
pdu->size = size;
+#ifdef CONFIG_NET_9P_DEBUG
if ((p9_debug_level & P9_DEBUG_PKT) == P9_DEBUG_PKT)
p9pdu_dump(0, pdu);
+#endif
P9_DPRINTK(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", pdu->size,
pdu->id, pdu->tag);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index be65d8242fd..1df0356f242 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -678,11 +678,9 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
{
- struct p9_trans_fd *ts = client->trans;
- struct p9_conn *m = ts->conn;
int ret = 1;
- P9_DPRINTK(P9_DEBUG_TRANS, "mux %p req %p\n", m, req);
+ P9_DPRINTK(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
spin_lock(&client->lock);
list_del(&req->req_list);
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
new file mode 100644
index 00000000000..2f1fe5fc122
--- /dev/null
+++ b/net/9p/trans_rdma.c
@@ -0,0 +1,713 @@
+/*
+ * linux/fs/9p/trans_rdma.c
+ *
+ * RDMA transport layer based on the trans_fd.c implementation.
+ *
+ * Copyright (C) 2008 by Tom Tucker <tom@opengridcomputing.com>
+ * Copyright (C) 2006 by Russ Cox <rsc@swtch.com>
+ * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
+ * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com>
+ * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to:
+ * Free Software Foundation
+ * 51 Franklin Street, Fifth Floor
+ * Boston, MA 02111-1301 USA
+ *
+ */
+
+#include <linux/in.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/ipv6.h>
+#include <linux/kthread.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/un.h>
+#include <linux/uaccess.h>
+#include <linux/inet.h>
+#include <linux/idr.h>
+#include <linux/file.h>
+#include <linux/parser.h>
+#include <linux/semaphore.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
+#include <net/9p/transport.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_cm.h>
+
+#define P9_PORT 5640
+#define P9_RDMA_SQ_DEPTH 32
+#define P9_RDMA_RQ_DEPTH 32
+#define P9_RDMA_SEND_SGE 4
+#define P9_RDMA_RECV_SGE 4
+#define P9_RDMA_IRD 0
+#define P9_RDMA_ORD 0
+#define P9_RDMA_TIMEOUT 30000 /* 30 seconds */
+#define P9_RDMA_MAXSIZE (4*4096) /* Min SGE is 4, so we can
+ * safely advertise a maxsize
+ * of 64k */
+
+#define P9_RDMA_MAX_SGE (P9_RDMA_MAXSIZE >> PAGE_SHIFT)
+/**
+ * struct p9_trans_rdma - RDMA transport instance
+ *
+ * @state: tracks the transport state machine for connection setup and tear down
+ * @cm_id: The RDMA CM ID
+ * @pd: Protection Domain pointer
+ * @qp: Queue Pair pointer
+ * @cq: Completion Queue pointer
+ * @lkey: The local access only memory region key
+ * @timeout: Number of uSecs to wait for connection management events
+ * @sq_depth: The depth of the Send Queue
+ * @sq_sem: Semaphore for the SQ
+ * @rq_depth: The depth of the Receive Queue.
+ * @addr: The remote peer's address
+ * @req_lock: Protects the active request list
+ * @send_wait: Wait list when the SQ fills up
+ * @cm_done: Completion event for connection management tracking
+ */
+struct p9_trans_rdma {
+ enum {
+ P9_RDMA_INIT,
+ P9_RDMA_ADDR_RESOLVED,
+ P9_RDMA_ROUTE_RESOLVED,
+ P9_RDMA_CONNECTED,
+ P9_RDMA_FLUSHING,
+ P9_RDMA_CLOSING,
+ P9_RDMA_CLOSED,
+ } state;
+ struct rdma_cm_id *cm_id;
+ struct ib_pd *pd;
+ struct ib_qp *qp;
+ struct ib_cq *cq;
+ struct ib_mr *dma_mr;
+ u32 lkey;
+ long timeout;
+ int sq_depth;
+ struct semaphore sq_sem;
+ int rq_depth;
+ atomic_t rq_count;
+ struct sockaddr_in addr;
+ spinlock_t req_lock;
+
+ struct completion cm_done;
+};
+
+/**
+ * p9_rdma_context - Keeps track of in-process WR
+ *
+ * @wc_op: The original WR op for when the CQE completes in error.
+ * @busa: Bus address to unmap when the WR completes
+ * @req: Keeps track of requests (send)
+ * @rc: Keepts track of replies (receive)
+ */
+struct p9_rdma_req;
+struct p9_rdma_context {
+ enum ib_wc_opcode wc_op;
+ dma_addr_t busa;
+ union {
+ struct p9_req_t *req;
+ struct p9_fcall *rc;
+ };
+};
+
+/**
+ * p9_rdma_opts - Collection of mount options
+ * @port: port of connection
+ * @sq_depth: The requested depth of the SQ. This really doesn't need
+ * to be any deeper than the number of threads used in the client
+ * @rq_depth: The depth of the RQ. Should be greater than or equal to SQ depth
+ * @timeout: Time to wait in msecs for CM events
+ */
+struct p9_rdma_opts {
+ short port;
+ int sq_depth;
+ int rq_depth;
+ long timeout;
+};
+
+/*
+ * Option Parsing (code inspired by NFS code)
+ */
+enum {
+ /* Options that take integer arguments */
+ Opt_port, Opt_rq_depth, Opt_sq_depth, Opt_timeout, Opt_err,
+};
+
+static match_table_t tokens = {
+ {Opt_port, "port=%u"},
+ {Opt_sq_depth, "sq=%u"},
+ {Opt_rq_depth, "rq=%u"},
+ {Opt_timeout, "timeout=%u"},
+ {Opt_err, NULL},
+};
+
+/**
+ * parse_options - parse mount options into session structure
+ * @options: options string passed from mount
+ * @opts: transport-specific structure to parse options into
+ *
+ * Returns 0 upon success, -ERRNO upon failure
+ */
+static int parse_opts(char *params, struct p9_rdma_opts *opts)
+{
+ char *p;
+ substring_t args[MAX_OPT_ARGS];
+ int option;
+ char *options;
+ int ret;
+
+ opts->port = P9_PORT;
+ opts->sq_depth = P9_RDMA_SQ_DEPTH;
+ opts->rq_depth = P9_RDMA_RQ_DEPTH;
+ opts->timeout = P9_RDMA_TIMEOUT;
+
+ if (!params)
+ return 0;
+
+ options = kstrdup(params, GFP_KERNEL);
+ if (!options) {
+ P9_DPRINTK(P9_DEBUG_ERROR,
+ "failed to allocate copy of option string\n");
+ return -ENOMEM;
+ }
+
+ while ((p = strsep(&options, ",")) != NULL) {
+ int token;
+ int r;
+ if (!*p)
+ continue;
+ token = match_token(p, tokens, args);
+ r = match_int(&args[0], &option);
+ if (r < 0) {
+ P9_DPRINTK(P9_DEBUG_ERROR,
+ "integer field, but no integer?\n");
+ ret = r;
+ continue;
+ }
+ switch (token) {
+ case Opt_port:
+ opts->port = option;
+ break;
+ case Opt_sq_depth:
+ opts->sq_depth = option;
+ break;
+ case Opt_rq_depth:
+ opts->rq_depth = option;
+ break;
+ case Opt_timeout:
+ opts->timeout = option;
+ break;
+ default:
+ continue;
+ }
+ }
+ /* RQ must be at least as large as the SQ */
+ opts->rq_depth = max(opts->rq_depth, opts->sq_depth);
+ kfree(options);
+ return 0;
+}
+
+static int
+p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
+{
+ struct p9_client *c = id->context;
+ struct p9_trans_rdma *rdma = c->trans;
+ switch (event->event) {
+ case RDMA_CM_EVENT_ADDR_RESOLVED:
+ BUG_ON(rdma->state != P9_RDMA_INIT);
+ rdma->state = P9_RDMA_ADDR_RESOLVED;
+ break;
+
+ case RDMA_CM_EVENT_ROUTE_RESOLVED:
+ BUG_ON(rdma->state != P9_RDMA_ADDR_RESOLVED);
+ rdma->state = P9_RDMA_ROUTE_RESOLVED;
+ break;
+
+ case RDMA_CM_EVENT_ESTABLISHED:
+ BUG_ON(rdma->state != P9_RDMA_ROUTE_RESOLVED);
+ rdma->state = P9_RDMA_CONNECTED;
+ break;
+
+ case RDMA_CM_EVENT_DISCONNECTED:
+ if (rdma)
+ rdma->state = P9_RDMA_CLOSED;
+ if (c)
+ c->status = Disconnected;
+ break;
+
+ case RDMA_CM_EVENT_TIMEWAIT_EXIT:
+ break;
+
+ case RDMA_CM_EVENT_ADDR_CHANGE:
+ case RDMA_CM_EVENT_ROUTE_ERROR:
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+ case RDMA_CM_EVENT_MULTICAST_JOIN:
+ case RDMA_CM_EVENT_MULTICAST_ERROR:
+ case RDMA_CM_EVENT_REJECTED:
+ case RDMA_CM_EVENT_CONNECT_REQUEST:
+ case RDMA_CM_EVENT_CONNECT_RESPONSE:
+ case RDMA_CM_EVENT_CONNECT_ERROR:
+ case RDMA_CM_EVENT_ADDR_ERROR:
+ case RDMA_CM_EVENT_UNREACHABLE:
+ c->status = Disconnected;
+ rdma_disconnect(rdma->cm_id);
+ break;
+ default:
+ BUG();
+ }
+ complete(&rdma->cm_done);
+ return 0;
+}
+
+static void
+handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
+ struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len)
+{
+ struct p9_req_t *req;
+ int err = 0;
+ int16_t tag;
+
+ req = NULL;
+ ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize,
+ DMA_FROM_DEVICE);
+
+ if (status != IB_WC_SUCCESS)
+ goto err_out;
+
+ err = p9_parse_header(c->rc, NULL, NULL, &tag, 1);
+ if (err)
+ goto err_out;
+
+ req = p9_tag_lookup(client, tag);
+ if (!req)
+ goto err_out;
+
+ req->rc = c->rc;
+ p9_client_cb(client, req);
+
+ return;
+
+ err_out:
+ P9_DPRINTK(P9_DEBUG_ERROR, "req %p err %d status %d\n",
+ req, err, status);
+ rdma->state = P9_RDMA_FLUSHING;
+ client->status = Disconnected;
+ return;
+}
+
+static void
+handle_send(struct p9_client *client, struct p9_trans_rdma *rdma,
+ struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len)
+{
+ ib_dma_unmap_single(rdma->cm_id->device,
+ c->busa, c->req->tc->size,
+ DMA_TO_DEVICE);
+}
+
+static void qp_event_handler(struct ib_event *event, void *context)
+{
+ P9_DPRINTK(P9_DEBUG_ERROR, "QP event %d context %p\n", event->event,
+ context);
+}
+
+static void cq_comp_handler(struct ib_cq *cq, void *cq_context)
+{
+ struct p9_client *client = cq_context;
+ struct p9_trans_rdma *rdma = client->trans;
+ int ret;
+ struct ib_wc wc;
+
+ ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);
+ while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
+ struct p9_rdma_context *c = (void *) (unsigned long) wc.wr_id;
+
+ switch (c->wc_op) {
+ case IB_WC_RECV:
+ atomic_dec(&rdma->rq_count);
+ handle_recv(client, rdma, c, wc.status, wc.byte_len);
+ break;
+
+ case IB_WC_SEND:
+ handle_send(client, rdma, c, wc.status, wc.byte_len);
+ up(&rdma->sq_sem);
+ break;
+
+ default:
+ printk(KERN_ERR "9prdma: unexpected completion type, "
+ "c->wc_op=%d, wc.opcode=%d, status=%d\n",
+ c->wc_op, wc.opcode, wc.status);
+ break;
+ }
+ kfree(c);
+ }
+}
+
+static void cq_event_handler(struct ib_event *e, void *v)
+{
+ P9_DPRINTK(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v);
+}
+
+static void rdma_destroy_trans(struct p9_trans_rdma *rdma)
+{
+ if (!rdma)
+ return;
+
+ if (rdma->dma_mr && !IS_ERR(rdma->dma_mr))
+ ib_dereg_mr(rdma->dma_mr);
+
+ if (rdma->qp && !IS_ERR(rdma->qp))
+ ib_destroy_qp(rdma->qp);
+
+ if (rdma->pd && !IS_ERR(rdma->pd))
+ ib_dealloc_pd(rdma->pd);
+
+ if (rdma->cq && !IS_ERR(rdma->cq))
+ ib_destroy_cq(rdma->cq);
+
+ if (rdma->cm_id && !IS_ERR(rdma->cm_id))
+ rdma_destroy_id(rdma->cm_id);
+
+ kfree(rdma);
+}
+
+static int
+post_recv(struct p9_client *client, struct p9_rdma_context *c)
+{
+ struct p9_trans_rdma *rdma = client->trans;
+ struct ib_recv_wr wr, *bad_wr;
+ struct ib_sge sge;
+
+ c->busa = ib_dma_map_single(rdma->cm_id->device,
+ c->rc->sdata, client->msize,
+ DMA_FROM_DEVICE);
+ if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
+ goto error;
+
+ sge.addr = c->busa;
+ sge.length = client->msize;
+ sge.lkey = rdma->lkey;
+
+ wr.next = NULL;
+ c->wc_op = IB_WC_RECV;
+ wr.wr_id = (unsigned long) c;
+ wr.sg_list = &sge;
+ wr.num_sge = 1;
+ return ib_post_recv(rdma->qp, &wr, &bad_wr);
+
+ error:
+ P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n");
+ return -EIO;
+}
+
+static int rdma_request(struct p9_client *client, struct p9_req_t *req)
+{
+ struct p9_trans_rdma *rdma = client->trans;
+ struct ib_send_wr wr, *bad_wr;
+ struct ib_sge sge;
+ int err = 0;
+ unsigned long flags;
+ struct p9_rdma_context *c = NULL;
+ struct p9_rdma_context *rpl_context = NULL;
+
+ /* Allocate an fcall for the reply */
+ rpl_context = kmalloc(sizeof *rpl_context, GFP_KERNEL);
+ if (!rpl_context)
+ goto err_close;
+
+ /*
+ * If the request has a buffer, steal it, otherwise
+ * allocate a new one. Typically, requests should already
+ * have receive buffers allocated and just swap them around
+ */
+ if (!req->rc) {
+ req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize,
+ GFP_KERNEL);
+ if (req->rc) {
+ req->rc->sdata = (char *) req->rc +
+ sizeof(struct p9_fcall);
+ req->rc->capacity = client->msize;
+ }
+ }
+ rpl_context->rc = req->rc;
+ if (!rpl_context->rc) {
+ kfree(rpl_context);
+ goto err_close;
+ }
+
+ /*
+ * Post a receive buffer for this request. We need to ensure
+ * there is a reply buffer available for every outstanding
+ * request. A flushed request can result in no reply for an
+ * outstanding request, so we must keep a count to avoid
+ * overflowing the RQ.
+ */
+ if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) {
+ err = post_recv(client, rpl_context);
+ if (err) {
+ kfree(rpl_context->rc);
+ kfree(rpl_context);
+ goto err_close;
+ }
+ } else
+ atomic_dec(&rdma->rq_count);
+
+ /* remove posted receive buffer from request structure */
+ req->rc = NULL;
+
+ /* Post the request */
+ c = kmalloc(sizeof *c, GFP_KERNEL);
+ if (!c)
+ goto err_close;
+ c->req = req;
+
+ c->busa = ib_dma_map_single(rdma->cm_id->device,
+ c->req->tc->sdata, c->req->tc->size,
+ DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
+ goto error;
+
+ sge.addr = c->busa;
+ sge.length = c->req->tc->size;
+ sge.lkey = rdma->lkey;
+
+ wr.next = NULL;
+ c->wc_op = IB_WC_SEND;
+ wr.wr_id = (unsigned long) c;
+ wr.opcode = IB_WR_SEND;
+ wr.send_flags = IB_SEND_SIGNALED;
+ wr.sg_list = &sge;
+ wr.num_sge = 1;
+
+ if (down_interruptible(&rdma->sq_sem))
+ goto error;
+
+ return ib_post_send(rdma->qp, &wr, &bad_wr);
+
+ error:
+ P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n");
+ return -EIO;
+
+ err_close:
+ spin_lock_irqsave(&rdma->req_lock, flags);
+ if (rdma->state < P9_RDMA_CLOSING) {
+ rdma->state = P9_RDMA_CLOSING;
+ spin_unlock_irqrestore(&rdma->req_lock, flags);
+ rdma_disconnect(rdma->cm_id);
+ } else
+ spin_unlock_irqrestore(&rdma->req_lock, flags);
+ return err;
+}
+
+static void rdma_close(struct p9_client *client)
+{
+ struct p9_trans_rdma *rdma;
+
+ if (!client)
+ return;
+
+ rdma = client->trans;
+ if (!rdma)
+ return;
+
+ client->status = Disconnected;
+ rdma_disconnect(rdma->cm_id);
+ rdma_destroy_trans(rdma);
+}
+
+/**
+ * alloc_rdma - Allocate and initialize the rdma transport structure
+ * @msize: MTU
+ * @dotu: Extension attribute
+ * @opts: Mount options structure
+ */
+static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts)
+{
+ struct p9_trans_rdma *rdma;
+
+ rdma = kzalloc(sizeof(struct p9_trans_rdma), GFP_KERNEL);
+ if (!rdma)
+ return NULL;
+
+ rdma->sq_depth = opts->sq_depth;
+ rdma->rq_depth = opts->rq_depth;
+ rdma->timeout = opts->timeout;
+ spin_lock_init(&rdma->req_lock);
+ init_completion(&rdma->cm_done);
+ sema_init(&rdma->sq_sem, rdma->sq_depth);
+ atomic_set(&rdma->rq_count, 0);
+
+ return rdma;
+}
+
+/* its not clear to me we can do anything after send has been posted */
+static int rdma_cancel(struct p9_client *client, struct p9_req_t *req)
+{
+ return 1;
+}
+
+/**
+ * trans_create_rdma - Transport method for creating atransport instance
+ * @client: client instance
+ * @addr: IP address string
+ * @args: Mount options string
+ */
+static int
+rdma_create_trans(struct p9_client *client, const char *addr, char *args)
+{
+ int err;
+ struct p9_rdma_opts opts;
+ struct p9_trans_rdma *rdma;
+ struct rdma_conn_param conn_param;
+ struct ib_qp_init_attr qp_attr;
+ struct ib_device_attr devattr;
+
+ /* Parse the transport specific mount options */
+ err = parse_opts(args, &opts);
+ if (err < 0)
+ return err;
+
+ /* Create and initialize the RDMA transport structure */
+ rdma = alloc_rdma(&opts);
+ if (!rdma)
+ return -ENOMEM;
+
+ /* Create the RDMA CM ID */
+ rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP);
+ if (IS_ERR(rdma->cm_id))
+ goto error;
+
+ /* Associate the client with the transport */
+ client->trans = rdma;
+
+ /* Resolve the server's address */
+ rdma->addr.sin_family = AF_INET;
+ rdma->addr.sin_addr.s_addr = in_aton(addr);
+ rdma->addr.sin_port = htons(opts.port);
+ err = rdma_resolve_addr(rdma->cm_id, NULL,
+ (struct sockaddr *)&rdma->addr,
+ rdma->timeout);
+ if (err)
+ goto error;
+ err = wait_for_completion_interruptible(&rdma->cm_done);
+ if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED))
+ goto error;
+
+ /* Resolve the route to the server */
+ err = rdma_resolve_route(rdma->cm_id, rdma->timeout);
+ if (err)
+ goto error;
+ err = wait_for_completion_interruptible(&rdma->cm_done);
+ if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED))
+ goto error;
+
+ /* Query the device attributes */
+ err = ib_query_device(rdma->cm_id->device, &devattr);
+ if (err)
+ goto error;
+
+ /* Create the Completion Queue */
+ rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler,
+ cq_event_handler, client,
+ opts.sq_depth + opts.rq_depth + 1, 0);
+ if (IS_ERR(rdma->cq))
+ goto error;
+ ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);
+
+ /* Create the Protection Domain */
+ rdma->pd = ib_alloc_pd(rdma->cm_id->device);
+ if (IS_ERR(rdma->pd))
+ goto error;
+
+ /* Cache the DMA lkey in the transport */
+ rdma->dma_mr = NULL;
+ if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
+ rdma->lkey = rdma->cm_id->device->local_dma_lkey;
+ else {
+ rdma->dma_mr = ib_get_dma_mr(rdma->pd, IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(rdma->dma_mr))
+ goto error;
+ rdma->lkey = rdma->dma_mr->lkey;
+ }
+
+ /* Create the Queue Pair */
+ memset(&qp_attr, 0, sizeof qp_attr);
+ qp_attr.event_handler = qp_event_handler;
+ qp_attr.qp_context = client;
+ qp_attr.cap.max_send_wr = opts.sq_depth;
+ qp_attr.cap.max_recv_wr = opts.rq_depth;
+ qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE;
+ qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE;
+ qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
+ qp_attr.qp_type = IB_QPT_RC;
+ qp_attr.send_cq = rdma->cq;
+ qp_attr.recv_cq = rdma->cq;
+ err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr);
+ if (err)
+ goto error;
+ rdma->qp = rdma->cm_id->qp;
+
+ /* Request a connection */
+ memset(&conn_param, 0, sizeof(conn_param));
+ conn_param.private_data = NULL;
+ conn_param.private_data_len = 0;
+ conn_param.responder_resources = P9_RDMA_IRD;
+ conn_param.initiator_depth = P9_RDMA_ORD;
+ err = rdma_connect(rdma->cm_id, &conn_param);
+ if (err)
+ goto error;
+ err = wait_for_completion_interruptible(&rdma->cm_done);
+ if (err || (rdma->state != P9_RDMA_CONNECTED))
+ goto error;
+
+ client->status = Connected;
+
+ return 0;
+
+error:
+ rdma_destroy_trans(rdma);
+ return -ENOTCONN;
+}
+
+static struct p9_trans_module p9_rdma_trans = {
+ .name = "rdma",
+ .maxsize = P9_RDMA_MAXSIZE,
+ .def = 0,
+ .owner = THIS_MODULE,
+ .create = rdma_create_trans,
+ .close = rdma_close,
+ .request = rdma_request,
+ .cancel = rdma_cancel,
+};
+
+/**
+ * p9_trans_rdma_init - Register the 9P RDMA transport driver
+ */
+static int __init p9_trans_rdma_init(void)
+{
+ v9fs_register_trans(&p9_rdma_trans);
+ return 0;
+}
+
+static void __exit p9_trans_rdma_exit(void)
+{
+ v9fs_unregister_trans(&p9_rdma_trans);
+}
+
+module_init(p9_trans_rdma_init);
+module_exit(p9_trans_rdma_exit);
+
+MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
+MODULE_DESCRIPTION("RDMA Transport for 9P");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 22ba8632196..6c023f0f825 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -179,5 +179,5 @@ void br_dev_setup(struct net_device *dev)
dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX |
- NETIF_F_NETNS_LOCAL;
+ NETIF_F_NETNS_LOCAL | NETIF_F_GSO;
}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 573e20f7dba..0a09ccf68c1 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -347,15 +347,21 @@ int br_min_mtu(const struct net_bridge *br)
void br_features_recompute(struct net_bridge *br)
{
struct net_bridge_port *p;
- unsigned long features;
+ unsigned long features, mask;
- features = br->feature_mask;
+ features = mask = br->feature_mask;
+ if (list_empty(&br->port_list))
+ goto done;
+
+ features &= ~NETIF_F_ONE_FOR_ALL;
list_for_each_entry(p, &br->port_list, list) {
- features = netdev_compute_features(features, p->dev->features);
+ features = netdev_increment_features(features,
+ p->dev->features, mask);
}
- br->dev->features = features;
+done:
+ br->dev->features = netdev_fix_features(features, NULL);
}
/* called with RTNL */
diff --git a/net/core/dev.c b/net/core/dev.c
index b8a4fd0806a..9174c77d311 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2218,6 +2218,9 @@ int netif_receive_skb(struct sk_buff *skb)
int ret = NET_RX_DROP;
__be16 type;
+ if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
+ return NET_RX_SUCCESS;
+
/* if we've gotten here through NAPI, check netpoll */
if (netpoll_receive_skb(skb))
return NET_RX_DROP;
@@ -3947,6 +3950,46 @@ static void netdev_init_queue_locks(struct net_device *dev)
__netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
}
+unsigned long netdev_fix_features(unsigned long features, const char *name)
+{
+ /* Fix illegal SG+CSUM combinations. */
+ if ((features & NETIF_F_SG) &&
+ !(features & NETIF_F_ALL_CSUM)) {
+ if (name)
+ printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
+ "checksum feature.\n", name);
+ features &= ~NETIF_F_SG;
+ }
+
+ /* TSO requires that SG is present as well. */
+ if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
+ if (name)
+ printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
+ "SG feature.\n", name);
+ features &= ~NETIF_F_TSO;
+ }
+
+ if (features & NETIF_F_UFO) {
+ if (!(features & NETIF_F_GEN_CSUM)) {
+ if (name)
+ printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
+ "since no NETIF_F_HW_CSUM feature.\n",
+ name);
+ features &= ~NETIF_F_UFO;
+ }
+
+ if (!(features & NETIF_F_SG)) {
+ if (name)
+ printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
+ "since no NETIF_F_SG feature.\n", name);
+ features &= ~NETIF_F_UFO;
+ }
+ }
+
+ return features;
+}
+EXPORT_SYMBOL(netdev_fix_features);
+
/**
* register_netdevice - register a network device
* @dev: device to register
@@ -4032,36 +4075,7 @@ int register_netdevice(struct net_device *dev)
dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
}
-
- /* Fix illegal SG+CSUM combinations. */
- if ((dev->features & NETIF_F_SG) &&
- !(dev->features & NETIF_F_ALL_CSUM)) {
- printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
- dev->name);
- dev->features &= ~NETIF_F_SG;
- }
-
- /* TSO requires that SG is present as well. */
- if ((dev->features & NETIF_F_TSO) &&
- !(dev->features & NETIF_F_SG)) {
- printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
- dev->name);
- dev->features &= ~NETIF_F_TSO;
- }
- if (dev->features & NETIF_F_UFO) {
- if (!(dev->features & NETIF_F_HW_CSUM)) {
- printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
- "NETIF_F_HW_CSUM feature.\n",
- dev->name);
- dev->features &= ~NETIF_F_UFO;
- }
- if (!(dev->features & NETIF_F_SG)) {
- printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
- "NETIF_F_SG feature.\n",
- dev->name);
- dev->features &= ~NETIF_F_UFO;
- }
- }
+ dev->features = netdev_fix_features(dev->features, dev->name);
/* Enable software GSO if SG is supported. */
if (dev->features & NETIF_F_SG)
@@ -4700,49 +4714,45 @@ static int __init netdev_dma_register(void) { return -ENODEV; }
#endif /* CONFIG_NET_DMA */
/**
- * netdev_compute_feature - compute conjunction of two feature sets
- * @all: first feature set
- * @one: second feature set
+ * netdev_increment_features - increment feature set by one
+ * @all: current feature set
+ * @one: new feature set
+ * @mask: mask feature set
*
* Computes a new feature set after adding a device with feature set
- * @one to the master device with current feature set @all. Returns
- * the new feature set.
+ * @one to the master device with current feature set @all. Will not
+ * enable anything that is off in @mask. Returns the new feature set.
*/
-int netdev_compute_features(unsigned long all, unsigned long one)
-{
- /* if device needs checksumming, downgrade to hw checksumming */
- if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
- all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
-
- /* if device can't do all checksum, downgrade to ipv4/ipv6 */
- if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM))
- all ^= NETIF_F_HW_CSUM
- | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
-
- if (one & NETIF_F_GSO)
- one |= NETIF_F_GSO_SOFTWARE;
- one |= NETIF_F_GSO;
-
- /*
- * If even one device supports a GSO protocol with software fallback,
- * enable it for all.
- */
- all |= one & NETIF_F_GSO_SOFTWARE;
+unsigned long netdev_increment_features(unsigned long all, unsigned long one,
+ unsigned long mask)
+{
+ /* If device needs checksumming, downgrade to it. */
+ if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
+ all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
+ else if (mask & NETIF_F_ALL_CSUM) {
+ /* If one device supports v4/v6 checksumming, set for all. */
+ if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
+ !(all & NETIF_F_GEN_CSUM)) {
+ all &= ~NETIF_F_ALL_CSUM;
+ all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
+ }
- /* If even one device supports robust GSO, enable it for all. */
- if (one & NETIF_F_GSO_ROBUST)
- all |= NETIF_F_GSO_ROBUST;
+ /* If one device supports hw checksumming, set for all. */
+ if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
+ all &= ~NETIF_F_ALL_CSUM;
+ all |= NETIF_F_HW_CSUM;
+ }
+ }
- all &= one | NETIF_F_LLTX;
+ one |= NETIF_F_ALL_CSUM;
- if (!(all & NETIF_F_ALL_CSUM))
- all &= ~NETIF_F_SG;
- if (!(all & NETIF_F_SG))
- all &= ~NETIF_F_GSO_MASK;
+ one |= all & NETIF_F_ONE_FOR_ALL;
+ all &= one | NETIF_F_LLTX | NETIF_F_GSO;
+ all |= one & mask & NETIF_F_ONE_FOR_ALL;
return all;
}
-EXPORT_SYMBOL(netdev_compute_features);
+EXPORT_SYMBOL(netdev_increment_features);
static struct hlist_head *netdev_create_hash(void)
{
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index f1d07b5c1e1..1895a4ca9c4 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -325,6 +325,38 @@ void unregister_pernet_subsys(struct pernet_operations *module)
}
EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
+int register_pernet_gen_subsys(int *id, struct pernet_operations *ops)
+{
+ int rv;
+
+ mutex_lock(&net_mutex);
+again:
+ rv = ida_get_new_above(&net_generic_ids, 1, id);
+ if (rv < 0) {
+ if (rv == -EAGAIN) {
+ ida_pre_get(&net_generic_ids, GFP_KERNEL);
+ goto again;
+ }
+ goto out;
+ }
+ rv = register_pernet_operations(first_device, ops);
+ if (rv < 0)
+ ida_remove(&net_generic_ids, *id);
+ mutex_unlock(&net_mutex);
+out:
+ return rv;
+}
+EXPORT_SYMBOL_GPL(register_pernet_gen_subsys);
+
+void unregister_pernet_gen_subsys(int id, struct pernet_operations *ops)
+{
+ mutex_lock(&net_mutex);
+ unregister_pernet_operations(ops);
+ ida_remove(&net_generic_ids, id);
+ mutex_unlock(&net_mutex);
+}
+EXPORT_SYMBOL_GPL(unregister_pernet_gen_subsys);
+
/**
* register_pernet_device - register a network namespace device
* @ops: pernet operations structure for the subsystem
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 99f656d35b4..a47f5bad110 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1973,28 +1973,27 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
/* make sure that we don't pick a non-existing transmit queue */
ntxq = pkt_dev->odev->real_num_tx_queues;
- if (ntxq <= num_online_cpus() && (pkt_dev->flags & F_QUEUE_MAP_CPU)) {
+ if (ntxq > num_online_cpus() && (pkt_dev->flags & F_QUEUE_MAP_CPU)) {
printk(KERN_WARNING "pktgen: WARNING: QUEUE_MAP_CPU "
- "disabled because CPU count (%d) exceeds number ",
- num_online_cpus());
- printk(KERN_WARNING "pktgen: WARNING: of tx queues "
- "(%d) on %s \n", ntxq, pkt_dev->odev->name);
+ "disabled because CPU count (%d) exceeds number "
+ "of tx queues (%d) on %s\n", num_online_cpus(), ntxq,
+ pkt_dev->odev->name);
pkt_dev->flags &= ~F_QUEUE_MAP_CPU;
}
if (ntxq <= pkt_dev->queue_map_min) {
printk(KERN_WARNING "pktgen: WARNING: Requested "
- "queue_map_min (%d) exceeds number of tx\n",
- pkt_dev->queue_map_min);
- printk(KERN_WARNING "pktgen: WARNING: queues (%d) on "
- "%s, resetting\n", ntxq, pkt_dev->odev->name);
+ "queue_map_min (zero-based) (%d) exceeds valid range "
+ "[0 - %d] for (%d) queues on %s, resetting\n",
+ pkt_dev->queue_map_min, (ntxq ?: 1)- 1, ntxq,
+ pkt_dev->odev->name);
pkt_dev->queue_map_min = ntxq - 1;
}
- if (ntxq <= pkt_dev->queue_map_max) {
+ if (pkt_dev->queue_map_max >= ntxq) {
printk(KERN_WARNING "pktgen: WARNING: Requested "
- "queue_map_max (%d) exceeds number of tx\n",
- pkt_dev->queue_map_max);
- printk(KERN_WARNING "pktgen: WARNING: queues (%d) on "
- "%s, resetting\n", ntxq, pkt_dev->odev->name);
+ "queue_map_max (zero-based) (%d) exceeds valid range "
+ "[0 - %d] for (%d) queues on %s, resetting\n",
+ pkt_dev->queue_map_max, (ntxq ?: 1)- 1, ntxq,
+ pkt_dev->odev->name);
pkt_dev->queue_map_max = ntxq - 1;
}
diff --git a/net/core/scm.c b/net/core/scm.c
index 10f5c65f6a4..ab242cc1acc 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -75,6 +75,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
if (!fpl)
return -ENOMEM;
*fplp = fpl;
+ INIT_LIST_HEAD(&fpl->list);
fpl->count = 0;
}
fpp = &fpl->fp[fpl->count];
@@ -106,9 +107,25 @@ void __scm_destroy(struct scm_cookie *scm)
if (fpl) {
scm->fp = NULL;
- for (i=fpl->count-1; i>=0; i--)
- fput(fpl->fp[i]);
- kfree(fpl);
+ if (current->scm_work_list) {
+ list_add_tail(&fpl->list, current->scm_work_list);
+ } else {
+ LIST_HEAD(work_list);
+
+ current->scm_work_list = &work_list;
+
+ list_add(&fpl->list, &work_list);
+ while (!list_empty(&work_list)) {
+ fpl = list_first_entry(&work_list, struct scm_fp_list, list);
+
+ list_del(&fpl->list);
+ for (i=fpl->count-1; i>=0; i--)
+ fput(fpl->fp[i]);
+ kfree(fpl);
+ }
+
+ current->scm_work_list = NULL;
+ }
}
}
@@ -284,6 +301,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
if (new_fpl) {
+ INIT_LIST_HEAD(&new_fpl->list);
for (i=fpl->count-1; i>=0; i--)
get_file(fpl->fp[i]);
memcpy(new_fpl, fpl, sizeof(*fpl));
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4e22e3a3535..ebb6b94f8af 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -449,6 +449,18 @@ void kfree_skb(struct sk_buff *skb)
__kfree_skb(skb);
}
+/**
+ * skb_recycle_check - check if skb can be reused for receive
+ * @skb: buffer
+ * @skb_size: minimum receive buffer size
+ *
+ * Checks that the skb passed in is not shared or cloned, and
+ * that it is linear and its head portion at least as large as
+ * skb_size so that it can be recycled as a receive buffer.
+ * If these conditions are met, this function does any necessary
+ * reference count dropping and cleans up the skbuff as if it
+ * just came from __alloc_skb().
+ */
int skb_recycle_check(struct sk_buff *skb, int skb_size)
{
struct skb_shared_info *shinfo;
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 490e035c6d9..2e78f6bd977 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -2063,9 +2063,10 @@ int cipso_v4_skbuff_setattr(struct sk_buff *skb,
u32 opt_len;
int len_delta;
- buf_len = cipso_v4_genopt(buf, buf_len, doi_def, secattr);
- if (buf_len < 0)
- return buf_len;
+ ret_val = cipso_v4_genopt(buf, buf_len, doi_def, secattr);
+ if (ret_val < 0)
+ return ret_val;
+ buf_len = ret_val;
opt_len = (buf_len + 3) & ~3;
/* we overwrite any existing options to ensure that we have enough
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index eccb7165a80..c5aca0bb116 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1374,8 +1374,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
sk->sk_state == TCP_CLOSE ||
(sk->sk_shutdown & RCV_SHUTDOWN) ||
!timeo ||
- signal_pending(current) ||
- (flags & MSG_PEEK))
+ signal_pending(current))
break;
} else {
if (sock_flag(sk, SOCK_DONE))
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 990a5849323..ba85d883189 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -362,6 +362,17 @@ struct tcp_out_options {
__u32 tsval, tsecr; /* need to include OPTION_TS */
};
+/* Beware: Something in the Internet is very sensitive to the ordering of
+ * TCP options, we learned this through the hard way, so be careful here.
+ * Luckily we can at least blame others for their non-compliance but from
+ * inter-operatibility perspective it seems that we're somewhat stuck with
+ * the ordering which we have been using if we want to keep working with
+ * those broken things (not that it currently hurts anybody as there isn't
+ * particular reason why the ordering would need to be changed).
+ *
+ * At least SACK_PERM as the first option is known to lead to a disaster
+ * (but it may well be that other scenarios fail similarly).
+ */
static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
const struct tcp_out_options *opts,
__u8 **md5_hash) {
@@ -376,6 +387,12 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
*md5_hash = NULL;
}
+ if (unlikely(opts->mss)) {
+ *ptr++ = htonl((TCPOPT_MSS << 24) |
+ (TCPOLEN_MSS << 16) |
+ opts->mss);
+ }
+
if (likely(OPTION_TS & opts->options)) {
if (unlikely(OPTION_SACK_ADVERTISE & opts->options)) {
*ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
@@ -392,12 +409,6 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
*ptr++ = htonl(opts->tsecr);
}
- if (unlikely(opts->mss)) {
- *ptr++ = htonl((TCPOPT_MSS << 24) |
- (TCPOLEN_MSS << 16) |
- opts->mss);
- }
-
if (unlikely(OPTION_SACK_ADVERTISE & opts->options &&
!(OPTION_TS & opts->options))) {
*ptr++ = htonl((TCPOPT_NOP << 24) |
@@ -432,7 +443,7 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
if (tp->rx_opt.dsack) {
tp->rx_opt.dsack = 0;
- tp->rx_opt.eff_sacks--;
+ tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks;
}
}
}
@@ -2268,6 +2279,11 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
}
memset(&opts, 0, sizeof(opts));
+#ifdef CONFIG_SYN_COOKIES
+ if (unlikely(req->cookie_ts))
+ TCP_SKB_CB(skb)->when = cookie_init_timestamp(req);
+ else
+#endif
TCP_SKB_CB(skb)->when = tcp_time_stamp;
tcp_header_size = tcp_synack_options(sk, req, mss,
skb, &opts, &md5) +
@@ -2293,11 +2309,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
th->window = htons(min(req->rcv_wnd, 65535U));
-#ifdef CONFIG_SYN_COOKIES
- if (unlikely(req->cookie_ts))
- TCP_SKB_CB(skb)->when = cookie_init_timestamp(req);
- else
-#endif
tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location);
th->doff = (tcp_header_size >> 2);
TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 2095abc3cab..cf02701ced4 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -284,7 +284,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
}
EXPORT_SYMBOL_GPL(udp4_lib_lookup);
-static inline struct sock *udp_v4_mcast_next(struct sock *sk,
+static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk,
__be16 loc_port, __be32 loc_addr,
__be16 rmt_port, __be32 rmt_addr,
int dif)
@@ -296,7 +296,8 @@ static inline struct sock *udp_v4_mcast_next(struct sock *sk,
sk_for_each_from(s, node) {
struct inet_sock *inet = inet_sk(s);
- if (s->sk_hash != hnum ||
+ if (!net_eq(sock_net(s), net) ||
+ s->sk_hash != hnum ||
(inet->daddr && inet->daddr != rmt_addr) ||
(inet->dport != rmt_port && inet->dport) ||
(inet->rcv_saddr && inet->rcv_saddr != loc_addr) ||
@@ -1079,15 +1080,16 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
read_lock(&udp_hash_lock);
sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]);
dif = skb->dev->ifindex;
- sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
+ sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
if (sk) {
struct sock *sknext = NULL;
do {
struct sk_buff *skb1 = skb;
- sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr,
- uh->source, saddr, dif);
+ sknext = udp_v4_mcast_next(net, sk_next(sk), uh->dest,
+ daddr, uh->source, saddr,
+ dif);
if (sknext)
skb1 = skb_clone(skb, GFP_ATOMIC);
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 07735ed280d..55dc6beab9a 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -33,6 +33,7 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
x->sel.dport_mask = htons(0xffff);
x->sel.sport = xfrm_flowi_sport(fl);
x->sel.sport_mask = htons(0xffff);
+ x->sel.family = AF_INET;
x->sel.prefixlen_d = 32;
x->sel.prefixlen_s = 32;
x->sel.proto = fl->proto;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index eea9542728c..d9da5eb9dcb 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2483,8 +2483,10 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (!idev && dev->mtu >= IPV6_MIN_MTU)
idev = ipv6_add_dev(dev);
- if (idev)
+ if (idev) {
idev->if_flags |= IF_READY;
+ run_pending = 1;
+ }
} else {
if (!addrconf_qdisc_ok(dev)) {
/* device is still not ready. */
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e51da8c092f..8b48512ebf6 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -138,6 +138,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
int peeked;
int err;
int is_udplite = IS_UDPLITE(sk);
+ int is_udp4;
if (addr_len)
*addr_len=sizeof(struct sockaddr_in6);
@@ -158,6 +159,8 @@ try_again:
else if (copied < ulen)
msg->msg_flags |= MSG_TRUNC;
+ is_udp4 = (skb->protocol == htons(ETH_P_IP));
+
/*
* If checksum is needed at all, try to do it while copying the
* data. If the data is truncated, or if we only want a partial
@@ -180,9 +183,14 @@ try_again:
if (err)
goto out_free;
- if (!peeked)
- UDP6_INC_STATS_USER(sock_net(sk),
- UDP_MIB_INDATAGRAMS, is_udplite);
+ if (!peeked) {
+ if (is_udp4)
+ UDP_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_INDATAGRAMS, is_udplite);
+ else
+ UDP6_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_INDATAGRAMS, is_udplite);
+ }
sock_recv_timestamp(msg, sk, skb);
@@ -196,7 +204,7 @@ try_again:
sin6->sin6_flowinfo = 0;
sin6->sin6_scope_id = 0;
- if (skb->protocol == htons(ETH_P_IP))
+ if (is_udp4)
ipv6_addr_set(&sin6->sin6_addr, 0, 0,
htonl(0xffff), ip_hdr(skb)->saddr);
else {
@@ -207,7 +215,7 @@ try_again:
}
}
- if (skb->protocol == htons(ETH_P_IP)) {
+ if (is_udp4) {
if (inet->cmsg_flags)
ip_cmsg_recv(msg, skb);
} else {
@@ -228,8 +236,14 @@ out:
csum_copy_err:
lock_sock(sk);
- if (!skb_kill_datagram(sk, skb, flags))
- UDP6_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+ if (!skb_kill_datagram(sk, skb, flags)) {
+ if (is_udp4)
+ UDP_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_INERRORS, is_udplite);
+ else
+ UDP6_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_INERRORS, is_udplite);
+ }
release_sock(sk);
if (flags & MSG_DONTWAIT)
@@ -328,7 +342,7 @@ drop:
return -1;
}
-static struct sock *udp_v6_mcast_next(struct sock *sk,
+static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
__be16 loc_port, struct in6_addr *loc_addr,
__be16 rmt_port, struct in6_addr *rmt_addr,
int dif)
@@ -340,7 +354,7 @@ static struct sock *udp_v6_mcast_next(struct sock *sk,
sk_for_each_from(s, node) {
struct inet_sock *inet = inet_sk(s);
- if (sock_net(s) != sock_net(sk))
+ if (!net_eq(sock_net(s), net))
continue;
if (s->sk_hash == num && s->sk_family == PF_INET6) {
@@ -383,14 +397,14 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
read_lock(&udp_hash_lock);
sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]);
dif = inet6_iif(skb);
- sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
+ sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
if (!sk) {
kfree_skb(skb);
goto out;
}
sk2 = sk;
- while ((sk2 = udp_v6_mcast_next(sk_next(sk2), uh->dest, daddr,
+ while ((sk2 = udp_v6_mcast_next(net, sk_next(sk2), uh->dest, daddr,
uh->source, saddr, dif))) {
struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC);
if (buff) {
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 89884a4f23a..60c78cfc273 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -34,6 +34,7 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl,
x->sel.dport_mask = htons(0xffff);
x->sel.sport = xfrm_flowi_sport(fl);
x->sel.sport_mask = htons(0xffff);
+ x->sel.family = AF_INET6;
x->sel.prefixlen_d = 128;
x->sel.prefixlen_s = 128;
x->sel.proto = fl->proto;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index e55e0441e4d..3440a4637f0 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2075,7 +2075,6 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in
req_size += socklen * 2;
} else {
size -= 2*socklen;
- socklen = 0;
}
rq = (void*)skb_put(skb, req_size);
pol->sadb_x_policy_len += req_size/8;
diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
index 0b024cd6b80..98f48070805 100644
--- a/net/mac80211/rc80211_minstrel_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_debugfs.c
@@ -94,8 +94,8 @@ minstrel_stats_open(struct inode *inode, struct file *file)
prob / 10, prob % 10,
mr->last_success,
mr->last_attempts,
- mr->succ_hist,
- mr->att_hist);
+ (unsigned long long)mr->succ_hist,
+ (unsigned long long)mr->att_hist);
}
p += sprintf(p, "\nTotal packet count:: ideal %d "
"lookaround %d\n\n",
@@ -106,7 +106,7 @@ minstrel_stats_open(struct inode *inode, struct file *file)
return 0;
}
-static int
+static ssize_t
minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *o)
{
struct minstrel_stats_info *ms;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 9c06b9f86ad..c39b6a99413 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -21,6 +21,7 @@
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/rculist.h>
+#include <linux/rtnetlink.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
@@ -167,10 +168,12 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
*/
synchronize_rcu();
+ rtnl_lock();
spin_lock_bh(&nf_conntrack_lock);
for_each_net(net)
__nf_conntrack_helper_unregister(me, net);
spin_unlock_bh(&nf_conntrack_lock);
+ rtnl_unlock();
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index a59a307e685..592d73344d4 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -22,6 +22,7 @@
#include <linux/notifier.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
@@ -221,8 +222,10 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
synchronize_rcu();
/* Remove all contrack entries for this protocol */
+ rtnl_lock();
for_each_net(net)
nf_ct_iterate_cleanup(net, kill_l3proto, proto);
+ rtnl_unlock();
}
EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister);
@@ -333,8 +336,10 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
synchronize_rcu();
/* Remove all contrack entries for this protocol */
+ rtnl_lock();
for_each_net(net)
nf_ct_iterate_cleanup(net, kill_l4proto, l4proto);
+ rtnl_unlock();
}
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister);
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index a2cdbcbf64c..4ab62ad85dd 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -335,7 +335,7 @@ static int __init nf_ct_proto_gre_init(void)
rv = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_gre4);
if (rv < 0)
return rv;
- rv = register_pernet_gen_device(&proto_gre_net_id, &proto_gre_net_ops);
+ rv = register_pernet_gen_subsys(&proto_gre_net_id, &proto_gre_net_ops);
if (rv < 0)
nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4);
return rv;
@@ -344,7 +344,7 @@ static int __init nf_ct_proto_gre_init(void)
static void nf_ct_proto_gre_fini(void)
{
nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4);
- unregister_pernet_gen_device(proto_gre_net_id, &proto_gre_net_ops);
+ unregister_pernet_gen_subsys(proto_gre_net_id, &proto_gre_net_ops);
}
module_init(nf_ct_proto_gre_init);
diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c
index b0925a30335..249f6b92f15 100644
--- a/net/netlabel/netlabel_addrlist.c
+++ b/net/netlabel/netlabel_addrlist.c
@@ -315,6 +315,7 @@ struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr,
* Audit Helper Functions
*/
+#ifdef CONFIG_AUDIT
/**
* netlbl_af4list_audit_addr - Audit an IPv4 address
* @audit_buf: audit buffer
@@ -386,3 +387,4 @@ void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf,
}
}
#endif /* IPv6 */
+#endif /* CONFIG_AUDIT */
diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h
index 0242bead405..07ae7fd82be 100644
--- a/net/netlabel/netlabel_addrlist.h
+++ b/net/netlabel/netlabel_addrlist.h
@@ -120,9 +120,19 @@ struct netlbl_af4list *netlbl_af4list_search(__be32 addr,
struct netlbl_af4list *netlbl_af4list_search_exact(__be32 addr,
__be32 mask,
struct list_head *head);
+
+#ifdef CONFIG_AUDIT
void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf,
int src, const char *dev,
__be32 addr, __be32 mask);
+#else
+static inline void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf,
+ int src, const char *dev,
+ __be32 addr, __be32 mask)
+{
+ return;
+}
+#endif
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -179,11 +189,23 @@ struct netlbl_af6list *netlbl_af6list_search(const struct in6_addr *addr,
struct netlbl_af6list *netlbl_af6list_search_exact(const struct in6_addr *addr,
const struct in6_addr *mask,
struct list_head *head);
+
+#ifdef CONFIG_AUDIT
void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf,
int src,
const char *dev,
const struct in6_addr *addr,
const struct in6_addr *mask);
+#else
+static inline void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf,
+ int src,
+ const char *dev,
+ const struct in6_addr *addr,
+ const struct in6_addr *mask)
+{
+ return;
+}
+#endif
#endif /* IPV6 */
#endif
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index ee769ecaa13..0a0ef17b2a4 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -265,7 +265,7 @@ add_failure:
static int netlbl_mgmt_listentry(struct sk_buff *skb,
struct netlbl_dom_map *entry)
{
- int ret_val;
+ int ret_val = 0;
struct nlattr *nla_a;
struct nlattr *nla_b;
struct netlbl_af4list *iter4;
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index b9d97effebe..defeb7a0d50 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -261,6 +261,8 @@ static inline int can_respond(struct sk_buff *skb)
return 0; /* we are not the destination */
if (ph->pn_res == PN_PREFIX && !pskb_may_pull(skb, 5))
return 0;
+ if (ph->pn_res == PN_COMMGR) /* indications */
+ return 0;
ph = pn_hdr(skb); /* re-acquires the pointer */
pm = pn_msg(skb);
@@ -309,7 +311,8 @@ static int send_reset_indications(struct sk_buff *rskb)
return pn_raw_send(data, sizeof(data), rskb->dev,
pn_object(oph->pn_sdev, 0x00),
- pn_object(oph->pn_rdev, oph->pn_robj), 0x10);
+ pn_object(oph->pn_rdev, oph->pn_robj),
+ PN_COMMGR);
}
diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c
index 21124ec0a73..bfdade72e06 100644
--- a/net/rfkill/rfkill-input.c
+++ b/net/rfkill/rfkill-input.c
@@ -256,6 +256,11 @@ static struct input_handler rfkill_handler = {
static int __init rfkill_handler_init(void)
{
+ unsigned long last_run = jiffies - msecs_to_jiffies(500);
+ rfkill_wlan.last = last_run;
+ rfkill_bt.last = last_run;
+ rfkill_uwb.last = last_run;
+ rfkill_wimax.last = last_run;
return input_register_handler(&rfkill_handler);
}
diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c
index f949a482b00..25ba3bd57e6 100644
--- a/net/rfkill/rfkill.c
+++ b/net/rfkill/rfkill.c
@@ -603,7 +603,7 @@ static int rfkill_check_duplicity(const struct rfkill *rfkill)
}
/* 0: first switch of its kind */
- return test_bit(rfkill->type, seen);
+ return (test_bit(rfkill->type, seen)) ? 1 : 0;
}
static int rfkill_add_switch(struct rfkill *rfkill)
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 8b06fa90048..03e389e8d94 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -545,9 +545,10 @@ static void cbq_ovl_delay(struct cbq_class *cl)
expires = ktime_set(0, 0);
expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
if (hrtimer_try_to_cancel(&q->delay_timer) &&
- ktime_to_ns(ktime_sub(q->delay_timer.expires,
- expires)) > 0)
- q->delay_timer.expires = expires;
+ ktime_to_ns(ktime_sub(
+ hrtimer_get_expires(&q->delay_timer),
+ expires)) > 0)
+ hrtimer_set_expires(&q->delay_timer, expires);
hrtimer_restart(&q->delay_timer);
cl->delayed = 1;
cl->xstats.overactions++;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index a49fa80b57b..bf612d954d4 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -369,7 +369,7 @@ static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
struct sctp_transport *t, __u32 pmtu)
{
- if (!t || (t->pathmtu == pmtu))
+ if (!t || (t->pathmtu <= pmtu))
return;
if (sock_owned_by_user(sk)) {
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index d4c3fbc4671..a6a0ea71ae9 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -2544,6 +2544,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep,
sctp_shutdownhdr_t *sdh;
sctp_disposition_t disposition;
struct sctp_ulpevent *ev;
+ __u32 ctsn;
if (!sctp_vtag_verify(chunk, asoc))
return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
@@ -2558,6 +2559,14 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep,
sdh = (sctp_shutdownhdr_t *)chunk->skb->data;
skb_pull(chunk->skb, sizeof(sctp_shutdownhdr_t));
chunk->subh.shutdown_hdr = sdh;
+ ctsn = ntohl(sdh->cum_tsn_ack);
+
+ /* If Cumulative TSN Ack beyond the max tsn currently
+ * send, terminating the association and respond to the
+ * sender with an ABORT.
+ */
+ if (!TSN_lt(ctsn, asoc->next_tsn))
+ return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands);
/* API 5.3.1.5 SCTP_SHUTDOWN_EVENT
* When a peer sends a SHUTDOWN, SCTP delivers this notification to
@@ -2599,6 +2608,51 @@ out:
return disposition;
}
+/*
+ * sctp_sf_do_9_2_shut_ctsn
+ *
+ * Once an endpoint has reached the SHUTDOWN-RECEIVED state,
+ * it MUST NOT send a SHUTDOWN in response to a ULP request.
+ * The Cumulative TSN Ack of the received SHUTDOWN chunk
+ * MUST be processed.
+ */
+sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const sctp_subtype_t type,
+ void *arg,
+ sctp_cmd_seq_t *commands)
+{
+ struct sctp_chunk *chunk = arg;
+ sctp_shutdownhdr_t *sdh;
+
+ if (!sctp_vtag_verify(chunk, asoc))
+ return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
+
+ /* Make sure that the SHUTDOWN chunk has a valid length. */
+ if (!sctp_chunk_length_valid(chunk,
+ sizeof(struct sctp_shutdown_chunk_t)))
+ return sctp_sf_violation_chunklen(ep, asoc, type, arg,
+ commands);
+
+ sdh = (sctp_shutdownhdr_t *)chunk->skb->data;
+
+ /* If Cumulative TSN Ack beyond the max tsn currently
+ * send, terminating the association and respond to the
+ * sender with an ABORT.
+ */
+ if (!TSN_lt(ntohl(sdh->cum_tsn_ack), asoc->next_tsn))
+ return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands);
+
+ /* verify, by checking the Cumulative TSN Ack field of the
+ * chunk, that all its outstanding DATA chunks have been
+ * received by the SHUTDOWN sender.
+ */
+ sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_CTSN,
+ SCTP_BE32(sdh->cum_tsn_ack));
+
+ return SCTP_DISPOSITION_CONSUME;
+}
+
/* RFC 2960 9.2
* If an endpoint is in SHUTDOWN-ACK-SENT state and receives an INIT chunk
* (e.g., if the SHUTDOWN COMPLETE was lost) with source and destination
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index dd4ddc40c0a..5c8186d88c6 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -266,11 +266,11 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
/* SCTP_STATE_ESTABLISHED */ \
TYPE_SCTP_FUNC(sctp_sf_do_9_2_shutdown), \
/* SCTP_STATE_SHUTDOWN_PENDING */ \
- TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
+ TYPE_SCTP_FUNC(sctp_sf_do_9_2_shutdown), \
/* SCTP_STATE_SHUTDOWN_SENT */ \
TYPE_SCTP_FUNC(sctp_sf_do_9_2_shutdown_ack), \
/* SCTP_STATE_SHUTDOWN_RECEIVED */ \
- TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
+ TYPE_SCTP_FUNC(sctp_sf_do_9_2_shut_ctsn), \
/* SCTP_STATE_SHUTDOWN_ACK_SENT */ \
TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \
} /* TYPE_SCTP_SHUTDOWN */
diff --git a/net/socket.c b/net/socket.c
index 2b7a4b5c9b7..57550c3bcab 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -990,7 +990,6 @@ static int sock_close(struct inode *inode, struct file *filp)
printk(KERN_DEBUG "sock_close: NULL inode\n");
return 0;
}
- sock_fasync(-1, filp, 0);
sock_release(SOCKET_I(inode));
return 0;
}
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 436bf1b4b76..cb216b2df66 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -228,19 +228,21 @@ static int
rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
{
spinlock_t *cache_lock;
- struct rpc_cred *cred;
+ struct rpc_cred *cred, *next;
unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM;
- while (!list_empty(&cred_unused)) {
- cred = list_entry(cred_unused.next, struct rpc_cred, cr_lru);
+ list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) {
+
+ /* Enforce a 60 second garbage collection moratorium */
+ if (time_in_range(cred->cr_expire, expired, jiffies) &&
+ test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
+ continue;
+
list_del_init(&cred->cr_lru);
number_cred_unused--;
if (atomic_read(&cred->cr_count) != 0)
continue;
- /* Enforce a 5 second garbage collection moratorium */
- if (time_in_range(cred->cr_expire, expired, jiffies) &&
- test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0)
- continue;
+
cache_lock = &cred->cr_auth->au_credcache->lock;
spin_lock(cache_lock);
if (atomic_read(&cred->cr_count) == 0) {
@@ -453,7 +455,7 @@ need_lock:
}
if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0)
rpcauth_unhash_cred(cred);
- else if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) {
+ if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) {
cred->cr_expire = jiffies;
list_add_tail(&cred->cr_lru, &cred_unused);
number_cred_unused++;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 9a288d5eea6..0a50361e3d8 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -249,6 +249,7 @@ struct sock_xprt {
void (*old_data_ready)(struct sock *, int);
void (*old_state_change)(struct sock *);
void (*old_write_space)(struct sock *);
+ void (*old_error_report)(struct sock *);
};
/*
@@ -698,8 +699,9 @@ static int xs_tcp_send_request(struct rpc_task *task)
case -EAGAIN:
xs_nospace(task);
break;
- case -ECONNREFUSED:
case -ECONNRESET:
+ xs_tcp_shutdown(xprt);
+ case -ECONNREFUSED:
case -ENOTCONN:
case -EPIPE:
status = -ENOTCONN;
@@ -742,6 +744,22 @@ out_release:
xprt_release_xprt(xprt, task);
}
+static void xs_save_old_callbacks(struct sock_xprt *transport, struct sock *sk)
+{
+ transport->old_data_ready = sk->sk_data_ready;
+ transport->old_state_change = sk->sk_state_change;
+ transport->old_write_space = sk->sk_write_space;
+ transport->old_error_report = sk->sk_error_report;
+}
+
+static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *sk)
+{
+ sk->sk_data_ready = transport->old_data_ready;
+ sk->sk_state_change = transport->old_state_change;
+ sk->sk_write_space = transport->old_write_space;
+ sk->sk_error_report = transport->old_error_report;
+}
+
/**
* xs_close - close a socket
* @xprt: transport
@@ -765,9 +783,8 @@ static void xs_close(struct rpc_xprt *xprt)
transport->sock = NULL;
sk->sk_user_data = NULL;
- sk->sk_data_ready = transport->old_data_ready;
- sk->sk_state_change = transport->old_state_change;
- sk->sk_write_space = transport->old_write_space;
+
+ xs_restore_old_callbacks(transport, sk);
write_unlock_bh(&sk->sk_callback_lock);
sk->sk_no_check = 0;
@@ -1180,6 +1197,28 @@ static void xs_tcp_state_change(struct sock *sk)
}
/**
+ * xs_tcp_error_report - callback mainly for catching RST events
+ * @sk: socket
+ */
+static void xs_tcp_error_report(struct sock *sk)
+{
+ struct rpc_xprt *xprt;
+
+ read_lock(&sk->sk_callback_lock);
+ if (sk->sk_err != ECONNRESET || sk->sk_state != TCP_ESTABLISHED)
+ goto out;
+ if (!(xprt = xprt_from_sock(sk)))
+ goto out;
+ dprintk("RPC: %s client %p...\n"
+ "RPC: error %d\n",
+ __func__, xprt, sk->sk_err);
+
+ xprt_force_disconnect(xprt);
+out:
+ read_unlock(&sk->sk_callback_lock);
+}
+
+/**
* xs_udp_write_space - callback invoked when socket buffer space
* becomes available
* @sk: socket whose state has changed
@@ -1454,10 +1493,9 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
write_lock_bh(&sk->sk_callback_lock);
+ xs_save_old_callbacks(transport, sk);
+
sk->sk_user_data = xprt;
- transport->old_data_ready = sk->sk_data_ready;
- transport->old_state_change = sk->sk_state_change;
- transport->old_write_space = sk->sk_write_space;
sk->sk_data_ready = xs_udp_data_ready;
sk->sk_write_space = xs_udp_write_space;
sk->sk_no_check = UDP_CSUM_NORCV;
@@ -1589,13 +1627,13 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
write_lock_bh(&sk->sk_callback_lock);
+ xs_save_old_callbacks(transport, sk);
+
sk->sk_user_data = xprt;
- transport->old_data_ready = sk->sk_data_ready;
- transport->old_state_change = sk->sk_state_change;
- transport->old_write_space = sk->sk_write_space;
sk->sk_data_ready = xs_tcp_data_ready;
sk->sk_state_change = xs_tcp_state_change;
sk->sk_write_space = xs_tcp_write_space;
+ sk->sk_error_report = xs_tcp_error_report;
sk->sk_allocation = GFP_ATOMIC;
/* socket options */
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c647aab8d41..eb90f77bb0e 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -711,28 +711,30 @@ static struct sock *unix_find_other(struct net *net,
int type, unsigned hash, int *error)
{
struct sock *u;
- struct nameidata nd;
+ struct path path;
int err = 0;
if (sunname->sun_path[0]) {
- err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
+ struct inode *inode;
+ err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
if (err)
goto fail;
- err = vfs_permission(&nd, MAY_WRITE);
+ inode = path.dentry->d_inode;
+ err = inode_permission(inode, MAY_WRITE);
if (err)
goto put_fail;
err = -ECONNREFUSED;
- if (!S_ISSOCK(nd.path.dentry->d_inode->i_mode))
+ if (!S_ISSOCK(inode->i_mode))
goto put_fail;
- u = unix_find_socket_byinode(net, nd.path.dentry->d_inode);
+ u = unix_find_socket_byinode(net, inode);
if (!u)
goto put_fail;
if (u->sk_type == type)
- touch_atime(nd.path.mnt, nd.path.dentry);
+ touch_atime(path.mnt, path.dentry);
- path_put(&nd.path);
+ path_put(&path);
err=-EPROTOTYPE;
if (u->sk_type != type) {
@@ -753,7 +755,7 @@ static struct sock *unix_find_other(struct net *net,
return u;
put_fail:
- path_put(&nd.path);
+ path_put(&path);
fail:
*error=err;
return NULL;
@@ -1300,14 +1302,23 @@ static void unix_destruct_fds(struct sk_buff *skb)
sock_wfree(skb);
}
-static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
{
int i;
+
+ /*
+ * Need to duplicate file references for the sake of garbage
+ * collection. Otherwise a socket in the fps might become a
+ * candidate for GC while the skb is not yet queued.
+ */
+ UNIXCB(skb).fp = scm_fp_dup(scm->fp);
+ if (!UNIXCB(skb).fp)
+ return -ENOMEM;
+
for (i=scm->fp->count-1; i>=0; i--)
unix_inflight(scm->fp->fp[i]);
- UNIXCB(skb).fp = scm->fp;
skb->destructor = unix_destruct_fds;
- scm->fp = NULL;
+ return 0;
}
/*
@@ -1366,8 +1377,11 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
goto out;
memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
- if (siocb->scm->fp)
- unix_attach_fds(siocb->scm, skb);
+ if (siocb->scm->fp) {
+ err = unix_attach_fds(siocb->scm, skb);
+ if (err)
+ goto out_free;
+ }
unix_get_secdata(siocb->scm, skb);
skb_reset_transport_header(skb);
@@ -1536,8 +1550,13 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
size = min_t(int, size, skb_tailroom(skb));
memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
- if (siocb->scm->fp)
- unix_attach_fds(siocb->scm, skb);
+ if (siocb->scm->fp) {
+ err = unix_attach_fds(siocb->scm, skb);
+ if (err) {
+ kfree_skb(skb);
+ goto out_err;
+ }
+ }
if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
kfree_skb(skb);
@@ -2211,7 +2230,7 @@ static int unix_net_init(struct net *net)
#endif
error = 0;
out:
- return 0;
+ return error;
}
static void unix_net_exit(struct net *net)
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 2a27b84f740..6d4a9a8de5e 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -186,8 +186,17 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
*/
struct sock *sk = unix_get_socket(*fp++);
if (sk) {
- hit = true;
- func(unix_sk(sk));
+ struct unix_sock *u = unix_sk(sk);
+
+ /*
+ * Ignore non-candidates, they could
+ * have been added to the queues after
+ * starting the garbage collection
+ */
+ if (u->gc_candidate) {
+ hit = true;
+ func(u);
+ }
}
}
if (hit && hitlist != NULL) {
@@ -249,11 +258,11 @@ static void inc_inflight_move_tail(struct unix_sock *u)
{
atomic_long_inc(&u->inflight);
/*
- * If this is still a candidate, move it to the end of the
- * list, so that it's checked even if it was already passed
- * over
+ * If this still might be part of a cycle, move it to the end
+ * of the list, so that it's checked even if it was already
+ * passed over
*/
- if (u->gc_candidate)
+ if (u->gc_maybe_cycle)
list_move_tail(&u->link, &gc_candidates);
}
@@ -267,6 +276,7 @@ void unix_gc(void)
struct unix_sock *next;
struct sk_buff_head hitlist;
struct list_head cursor;
+ LIST_HEAD(not_cycle_list);
spin_lock(&unix_gc_lock);
@@ -282,10 +292,14 @@ void unix_gc(void)
*
* Holding unix_gc_lock will protect these candidates from
* being detached, and hence from gaining an external
- * reference. This also means, that since there are no
- * possible receivers, the receive queues of these sockets are
- * static during the GC, even though the dequeue is done
- * before the detach without atomicity guarantees.
+ * reference. Since there are no possible receivers, all
+ * buffers currently on the candidates' queues stay there
+ * during the garbage collection.
+ *
+ * We also know that no new candidate can be added onto the
+ * receive queues. Other, non candidate sockets _can_ be
+ * added to queue, so we must make sure only to touch
+ * candidates.
*/
list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
long total_refs;
@@ -299,6 +313,7 @@ void unix_gc(void)
if (total_refs == inflight_refs) {
list_move_tail(&u->link, &gc_candidates);
u->gc_candidate = 1;
+ u->gc_maybe_cycle = 1;
}
}
@@ -325,14 +340,24 @@ void unix_gc(void)
list_move(&cursor, &u->link);
if (atomic_long_read(&u->inflight) > 0) {
- list_move_tail(&u->link, &gc_inflight_list);
- u->gc_candidate = 0;
+ list_move_tail(&u->link, &not_cycle_list);
+ u->gc_maybe_cycle = 0;
scan_children(&u->sk, inc_inflight_move_tail, NULL);
}
}
list_del(&cursor);
/*
+ * not_cycle_list contains those sockets which do not make up a
+ * cycle. Restore these to the inflight list.
+ */
+ while (!list_empty(&not_cycle_list)) {
+ u = list_entry(not_cycle_list.next, struct unix_sock, link);
+ u->gc_candidate = 0;
+ list_move_tail(&u->link, &gc_inflight_list);
+ }
+
+ /*
* Now gc_candidates contains only garbage. Restore original
* inflight counters for these as well, and remove the skbuffs
* which are creating the cycle(s).
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 7d82be07fa1..646c7121dbc 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -16,7 +16,7 @@ config NL80211
config WIRELESS_OLD_REGULATORY
bool "Old wireless static regulatory definitions"
- default n
+ default y
---help---
This option enables the old static regulatory information
and uses it within the new framework. This is available
@@ -40,11 +40,10 @@ config WIRELESS_OLD_REGULATORY
ieee80211_regdom module parameter. This is being phased out and you
should stop using them ASAP.
- Say N unless you cannot install a new userspace application
- or have one currently depending on the ieee80211_regdom module
- parameter and cannot port it to use the new userspace interfaces.
-
- This is scheduled for removal for 2.6.29.
+ Say Y unless you have installed a new userspace application.
+ Also say Y if have one currently depending on the ieee80211_regdom
+ module parameter and cannot port it to use the new userspace
+ interfaces.
config WIRELESS_EXT
bool "Wireless extensions"
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 832b47c1de8..058f04f54b9 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -315,9 +315,9 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
return;
}
- spin_lock(&xfrm_policy_gc_lock);
+ spin_lock_bh(&xfrm_policy_gc_lock);
hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
- spin_unlock(&xfrm_policy_gc_lock);
+ spin_unlock_bh(&xfrm_policy_gc_lock);
schedule_work(&xfrm_policy_gc_work);
}
@@ -1251,6 +1251,8 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
-EINVAL : -EAGAIN);
xfrm_state_put(x);
}
+ else if (error == -ESRCH)
+ error = -EAGAIN;
if (!tmpl->optional)
goto fail;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 4a8a1abb59e..a278a6f3b99 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1816,7 +1816,7 @@ static int copy_to_user_kmaddress(struct xfrm_kmaddress *k, struct sk_buff *skb)
uk.family = k->family;
uk.reserved = k->reserved;
memcpy(&uk.local, &k->local, sizeof(uk.local));
- memcpy(&uk.remote, &k->local, sizeof(uk.remote));
+ memcpy(&uk.remote, &k->remote, sizeof(uk.remote));
return nla_put(skb, XFRMA_KMADDRESS, sizeof(uk), &uk);
}