From f75e6745aa3084124ae1434fd7629853bdaf6798 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 21 Apr 2009 17:18:20 -0400
Subject: SUNRPC: Fix the problem of EADDRNOTAVAIL syslog floods on reconnect

See http://bugzilla.kernel.org/show_bug.cgi?id=13034

If the port gets into a TIME_WAIT state, then we cannot reconnect without
binding to a new port.

Tested-by: Petr Vandrovec <petr@vandrovec.name>
Tested-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/sunrpc/xprt.c     |  6 ++----
 net/sunrpc/xprtsock.c | 26 +++++++++++++++++++++-----
 2 files changed, 23 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index a0bfe53f162..06ca058572f 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -672,10 +672,8 @@ xprt_init_autodisconnect(unsigned long data)
 	if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
 		goto out_abort;
 	spin_unlock(&xprt->transport_lock);
-	if (xprt_connecting(xprt))
-		xprt_release_write(xprt, NULL);
-	else
-		queue_work(rpciod_workqueue, &xprt->task_cleanup);
+	set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
+	queue_work(rpciod_workqueue, &xprt->task_cleanup);
 	return;
 out_abort:
 	spin_unlock(&xprt->transport_lock);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index d40ff50887a..e1859614601 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -807,6 +807,9 @@ static void xs_reset_transport(struct sock_xprt *transport)
  *
  * This is used when all requests are complete; ie, no DRC state remains
  * on the server we want to save.
+ *
+ * The caller _must_ be holding XPRT_LOCKED in order to avoid issues with
+ * xs_reset_transport() zeroing the socket from underneath a writer.
  */
 static void xs_close(struct rpc_xprt *xprt)
 {
@@ -824,6 +827,14 @@ static void xs_close(struct rpc_xprt *xprt)
 	xprt_disconnect_done(xprt);
 }
 
+static void xs_tcp_close(struct rpc_xprt *xprt)
+{
+	if (test_and_clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state))
+		xs_close(xprt);
+	else
+		xs_tcp_shutdown(xprt);
+}
+
 /**
  * xs_destroy - prepare to shutdown a transport
  * @xprt: doomed transport
@@ -1772,6 +1783,15 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
 			xprt, -status, xprt_connected(xprt),
 			sock->sk->sk_state);
 	switch (status) {
+	default:
+		printk("%s: connect returned unhandled error %d\n",
+			__func__, status);
+	case -EADDRNOTAVAIL:
+		/* We're probably in TIME_WAIT. Get rid of existing socket,
+		 * and retry
+		 */
+		set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
+		xprt_force_disconnect(xprt);
 	case -ECONNREFUSED:
 	case -ECONNRESET:
 	case -ENETUNREACH:
@@ -1782,10 +1802,6 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
 		xprt_clear_connecting(xprt);
 		return;
 	}
-	/* get rid of existing socket, and retry */
-	xs_tcp_shutdown(xprt);
-	printk("%s: connect returned unhandled error %d\n",
-			__func__, status);
 out_eagain:
 	status = -EAGAIN;
 out:
@@ -1994,7 +2010,7 @@ static struct rpc_xprt_ops xs_tcp_ops = {
 	.buf_free		= rpc_free,
 	.send_request		= xs_tcp_send_request,
 	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
-	.close			= xs_tcp_shutdown,
+	.close			= xs_tcp_close,
 	.destroy		= xs_destroy,
 	.print_stats		= xs_tcp_print_stats,
 };
-- 
cgit v1.2.3


From 453ed90d1395a5281a8f1a0de5d8aabc66202e34 Mon Sep 17 00:00:00 2001
From: Latchesar Ionkov <lucho@ionkov.net>
Date: Sun, 5 Apr 2009 16:22:16 -0500
Subject: net/9p: set correct stat size when sending Twstat messages

The 9P2000 Twstat message requires the size of the stat structure to be
specified. Currently the 9p code writes zero instead of the actual size.
This behavior confuses some of the file servers that check if the size is
correct.

This patch adds a new function that calculcates the stat size and puts the
value in the appropriate place in the 9P message.

Signed-off-by: Latchesar Ionkov <lucho@ionkov.net>
Reviewed-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 1eb580c38fb..93f442aaa11 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1251,12 +1251,42 @@ error:
 }
 EXPORT_SYMBOL(p9_client_stat);
 
+static int p9_client_statsize(struct p9_wstat *wst, int optional)
+{
+	int ret;
+
+	/* size[2] type[2] dev[4] qid[13] */
+	/* mode[4] atime[4] mtime[4] length[8]*/
+	/* name[s] uid[s] gid[s] muid[s] */
+	ret = 2+2+4+13+4+4+4+8+2+2+2+2;
+
+	if (wst->name)
+		ret += strlen(wst->name);
+	if (wst->uid)
+		ret += strlen(wst->uid);
+	if (wst->gid)
+		ret += strlen(wst->gid);
+	if (wst->muid)
+		ret += strlen(wst->muid);
+
+	if (optional) {
+		ret += 2+4+4+4;	/* extension[s] n_uid[4] n_gid[4] n_muid[4] */
+		if (wst->extension)
+			ret += strlen(wst->extension);
+	}
+
+	return ret;
+}
+
 int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
 {
 	int err;
 	struct p9_req_t *req;
 	struct p9_client *clnt;
 
+	err = 0;
+	clnt = fid->clnt;
+	wst->size = p9_client_statsize(wst, clnt->dotu);
 	P9_DPRINTK(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid);
 	P9_DPRINTK(P9_DEBUG_9P,
 		"     sz=%x type=%x dev=%x qid=%x.%llx.%x\n"
@@ -1268,10 +1298,8 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
 		wst->atime, wst->mtime, (unsigned long long)wst->length,
 		wst->name, wst->uid, wst->gid, wst->muid, wst->extension,
 		wst->n_uid, wst->n_gid, wst->n_muid);
-	err = 0;
-	clnt = fid->clnt;
 
-	req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, 0, wst);
+	req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size, wst);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		goto error;
-- 
cgit v1.2.3


From 742b11a7ec60faa25d76c95c268041ab215c25ad Mon Sep 17 00:00:00 2001
From: Latchesar Ionkov <lucho@ionkov.net>
Date: Sun, 5 Apr 2009 16:26:41 -0500
Subject: net/9p: return error when p9_client_stat fails

p9_client_stat function doesn't return correct value if it fails.
p9_client_stat should return ERR_PTR of the error value when it fails.
Instead, it always returns a value to the allocated p9_wstat struct even
when it is not populated correctly.

This patch makes p9_client_stat to handle failure correctly.

Signed-off-by: Latchesar Ionkov <lucho@ionkov.net>
Reviewed-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 93f442aaa11..781d89a952e 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1244,10 +1244,14 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
 		ret->name, ret->uid, ret->gid, ret->muid, ret->extension,
 		ret->n_uid, ret->n_gid, ret->n_muid);
 
+	p9_free_req(clnt, req);
+	return ret;
+
 free_and_error:
 	p9_free_req(clnt, req);
 error:
-	return ret;
+	kfree(ret);
+	return ERR_PTR(err);
 }
 EXPORT_SYMBOL(p9_client_stat);
 
-- 
cgit v1.2.3


From 1bab88b2310998de18b32529a27ea835d164254a Mon Sep 17 00:00:00 2001
From: Latchesar Ionkov <lucho@ionkov.net>
Date: Sun, 5 Apr 2009 16:28:59 -0500
Subject: net/9p: handle correctly interrupted 9P requests

Currently the 9p code crashes when a operation is interrupted, i.e. for
example when the user presses ^C while reading from a file.

This patch fixes the code that is responsible for interruption and flushing
of 9P operations.

Signed-off-by: Latchesar Ionkov <lucho@ionkov.net>
---
 net/9p/client.c       | 74 +++++++++++++--------------------------------------
 net/9p/trans_fd.c     | 14 ++++++----
 net/9p/trans_rdma.c   |  1 +
 net/9p/trans_virtio.c |  1 +
 4 files changed, 30 insertions(+), 60 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 781d89a952e..dd43a8289b0 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -203,7 +203,6 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
 	p9pdu_reset(req->tc);
 	p9pdu_reset(req->rc);
 
-	req->flush_tag = 0;
 	req->tc->tag = tag-1;
 	req->status = REQ_STATUS_ALLOC;
 
@@ -324,35 +323,9 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r)
  */
 void p9_client_cb(struct p9_client *c, struct p9_req_t *req)
 {
-	struct p9_req_t *other_req;
-	unsigned long flags;
-
 	P9_DPRINTK(P9_DEBUG_MUX, " tag %d\n", req->tc->tag);
-
-	if (req->status == REQ_STATUS_ERROR)
-		wake_up(req->wq);
-
-	if (req->flush_tag) { 			/* flush receive path */
-		P9_DPRINTK(P9_DEBUG_9P, "<<< RFLUSH %d\n", req->tc->tag);
-		spin_lock_irqsave(&c->lock, flags);
-		other_req = p9_tag_lookup(c, req->flush_tag);
-		if (other_req->status != REQ_STATUS_FLSH) /* stale flush */
-			spin_unlock_irqrestore(&c->lock, flags);
-		else {
-			other_req->status = REQ_STATUS_FLSHD;
-			spin_unlock_irqrestore(&c->lock, flags);
-			wake_up(other_req->wq);
-		}
-		p9_free_req(c, req);
-	} else { 				/* normal receive path */
-		P9_DPRINTK(P9_DEBUG_MUX, "normal: tag %d\n", req->tc->tag);
-		spin_lock_irqsave(&c->lock, flags);
-		if (req->status != REQ_STATUS_FLSHD)
-			req->status = REQ_STATUS_RCVD;
-		spin_unlock_irqrestore(&c->lock, flags);
-		wake_up(req->wq);
-		P9_DPRINTK(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);
-	}
+	wake_up(req->wq);
+	P9_DPRINTK(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);
 }
 EXPORT_SYMBOL(p9_client_cb);
 
@@ -486,9 +459,15 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
-	req->flush_tag = oldtag;
 
-	/* we don't free anything here because RPC isn't complete */
+	/* if we haven't received a response for oldreq,
+	   remove it from the list. */
+	spin_lock(&c->lock);
+	if (oldreq->status == REQ_STATUS_FLSH)
+		list_del(&oldreq->req_list);
+	spin_unlock(&c->lock);
+
+	p9_free_req(c, req);
 	return 0;
 }
 
@@ -509,7 +488,6 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
 	struct p9_req_t *req;
 	unsigned long flags;
 	int sigpending;
-	int flushed = 0;
 
 	P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type);
 
@@ -546,42 +524,28 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
 		goto reterr;
 	}
 
-	/* if it was a flush we just transmitted, return our tag */
-	if (type == P9_TFLUSH)
-		return req;
-again:
 	P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d\n", req->wq, tag);
 	err = wait_event_interruptible(*req->wq,
 						req->status >= REQ_STATUS_RCVD);
-	P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d (flushed=%d)\n",
-						req->wq, tag, err, flushed);
+	P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d\n",
+						req->wq, tag, err);
 
 	if (req->status == REQ_STATUS_ERROR) {
 		P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
 		err = req->t_err;
-	} else if (err == -ERESTARTSYS && flushed) {
-		P9_DPRINTK(P9_DEBUG_MUX, "flushed - going again\n");
-		goto again;
-	} else if (req->status == REQ_STATUS_FLSHD) {
-		P9_DPRINTK(P9_DEBUG_MUX, "flushed - erestartsys\n");
-		err = -ERESTARTSYS;
 	}
 
-	if ((err == -ERESTARTSYS) && (c->status == Connected) && (!flushed)) {
+	if ((err == -ERESTARTSYS) && (c->status == Connected)) {
 		P9_DPRINTK(P9_DEBUG_MUX, "flushing\n");
-		spin_lock_irqsave(&c->lock, flags);
-		if (req->status == REQ_STATUS_SENT)
-			req->status = REQ_STATUS_FLSH;
-		spin_unlock_irqrestore(&c->lock, flags);
 		sigpending = 1;
-		flushed = 1;
 		clear_thread_flag(TIF_SIGPENDING);
 
-		if (c->trans_mod->cancel(c, req)) {
-			err = p9_client_flush(c, req);
-			if (err == 0)
-				goto again;
-		}
+		if (c->trans_mod->cancel(c, req))
+			p9_client_flush(c, req);
+
+		/* if we received the response anyway, don't signal error */
+		if (req->status == REQ_STATUS_RCVD)
+			err = 0;
 	}
 
 	if (sigpending) {
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index c613ed08a5e..a2a1814c7a8 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -213,8 +213,8 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
 	spin_unlock_irqrestore(&m->client->lock, flags);
 
 	list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
-		list_del(&req->req_list);
 		P9_DPRINTK(P9_DEBUG_ERROR, "call back req %p\n", req);
+		list_del(&req->req_list);
 		p9_client_cb(m->client, req);
 	}
 }
@@ -336,7 +336,8 @@ static void p9_read_work(struct work_struct *work)
 			"mux %p pkt: size: %d bytes tag: %d\n", m, n, tag);
 
 		m->req = p9_tag_lookup(m->client, tag);
-		if (!m->req) {
+		if (!m->req || (m->req->status != REQ_STATUS_SENT &&
+					m->req->status != REQ_STATUS_FLSH)) {
 			P9_DPRINTK(P9_DEBUG_ERROR, "Unexpected packet tag %d\n",
 								 tag);
 			err = -EIO;
@@ -361,10 +362,11 @@ static void p9_read_work(struct work_struct *work)
 	if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */
 		P9_DPRINTK(P9_DEBUG_TRANS, "got new packet\n");
 		spin_lock(&m->client->lock);
+		if (m->req->status != REQ_STATUS_ERROR)
+			m->req->status = REQ_STATUS_RCVD;
 		list_del(&m->req->req_list);
 		spin_unlock(&m->client->lock);
 		p9_client_cb(m->client, m->req);
-
 		m->rbuf = NULL;
 		m->rpos = 0;
 		m->rsize = 0;
@@ -454,6 +456,7 @@ static void p9_write_work(struct work_struct *work)
 		req = list_entry(m->unsent_req_list.next, struct p9_req_t,
 			       req_list);
 		req->status = REQ_STATUS_SENT;
+		P9_DPRINTK(P9_DEBUG_TRANS, "move req %p\n", req);
 		list_move_tail(&req->req_list, &m->req_list);
 
 		m->wbuf = req->tc->sdata;
@@ -683,12 +686,13 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
 	P9_DPRINTK(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
 
 	spin_lock(&client->lock);
-	list_del(&req->req_list);
 
 	if (req->status == REQ_STATUS_UNSENT) {
+		list_del(&req->req_list);
 		req->status = REQ_STATUS_FLSHD;
 		ret = 0;
-	}
+	} else if (req->status == REQ_STATUS_SENT)
+		req->status = REQ_STATUS_FLSH;
 
 	spin_unlock(&client->lock);
 
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 7fa0eb20b2f..ac4990041eb 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -295,6 +295,7 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
 		goto err_out;
 
 	req->rc = c->rc;
+	req->status = REQ_STATUS_RCVD;
 	p9_client_cb(client, req);
 
 	return;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 2d7781ec663..bb8579a141a 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -134,6 +134,7 @@ static void req_done(struct virtqueue *vq)
 		P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
 		P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
 		req = p9_tag_lookup(chan->client, rc->tag);
+		req->status = REQ_STATUS_RCVD;
 		p9_client_cb(chan->client, req);
 	}
 }
-- 
cgit v1.2.3


From c197facc8ea08062f8f949aade6a33649ee06771 Mon Sep 17 00:00:00 2001
From: "hummerbliss@gmail.com" <hummerbliss@gmail.com>
Date: Mon, 20 Apr 2009 17:12:35 +0200
Subject: netfilter: bridge: allow fragmentation of VLAN packets traversing a
 bridge

br_nf_dev_queue_xmit only checks for ETH_P_IP packets for fragmenting but not
VLAN packets. This results in dropping of large VLAN packets. This can be
observed when connection tracking is enabled. Connection tracking re-assembles
fragmented packets, and these have to re-fragmented when transmitting out. Also,
make sure only refragmented packets are defragmented as per suggestion from
Patrick McHardy.

Signed-off-by: Saikiran Madugula <hummerbliss@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/br_netfilter.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 3953ac4214c..e4a418fcb35 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -788,15 +788,23 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb,
 	return NF_STOLEN;
 }
 
+#if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE)
 static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 {
-	if (skb->protocol == htons(ETH_P_IP) &&
+	if (skb->nfct != NULL &&
+	    (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb)) &&
 	    skb->len > skb->dev->mtu &&
 	    !skb_is_gso(skb))
 		return ip_fragment(skb, br_dev_queue_push_xmit);
 	else
 		return br_dev_queue_push_xmit(skb);
 }
+#else
+static int br_nf_dev_queue_xmit(struct sk_buff *skb)
+{
+        return br_dev_queue_push_xmit(skb);
+}
+#endif
 
 /* PF_BRIDGE/POST_ROUTING ********************************************/
 static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
-- 
cgit v1.2.3


From 5ff482940f5aa2cdc3424c4a8ea94b9833b2af5f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 24 Apr 2009 15:37:44 +0200
Subject: netfilter: nf_ct_dccp/udplite: fix protocol registration error

Commit d0dba725 (netfilter: ctnetlink: add callbacks to the per-proto
nlattrs) changed the protocol registration function to abort if the
to-be registered protocol doesn't provide a new callback function.

The DCCP and UDP-Lite IPv6 protocols were missed in this conversion,
add the required callback pointer.

Reported-and-tested-by: Steven Jan Springl <steven@springl.ukfsn.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_proto_dccp.c    | 1 +
 net/netfilter/nf_conntrack_proto_udplite.c | 1 +
 2 files changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 50dac8dbe7d..5411d63f31a 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -777,6 +777,7 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = {
 	.print_conntrack	= dccp_print_conntrack,
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
 	.to_nlattr		= dccp_to_nlattr,
+	.nlattr_size		= dccp_nlattr_size,
 	.from_nlattr		= nlattr_to_dccp,
 	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
 	.nlattr_tuple_size	= nf_ct_port_nlattr_tuple_size,
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 4614696c1b8..0badedc542d 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -204,6 +204,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
 	.error			= udplite_error,
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
 	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
+	.nlattr_tuple_size	= nf_ct_port_nlattr_tuple_size,
 	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
 	.nla_policy		= nf_ct_port_nla_policy,
 #endif
-- 
cgit v1.2.3


From 4b0706624930dc75c3b0d0df463d89759ef7de29 Mon Sep 17 00:00:00 2001
From: Laszlo Attila Toth <panther@balabit.hu>
Date: Fri, 24 Apr 2009 16:55:25 +0200
Subject: netfilter: Kconfig: TProxy doesn't depend on NF_CONNTRACK

Signed-off-by: Laszlo Attila Toth <panther@balabit.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 2329c5f5055..881203c4a14 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -275,6 +275,8 @@ config NF_CT_NETLINK
 	help
 	  This option enables support for a netlink-based userspace interface
 
+endif # NF_CONNTRACK
+
 # transparent proxy support
 config NETFILTER_TPROXY
 	tristate "Transparent proxying support (EXPERIMENTAL)"
@@ -290,8 +292,6 @@ config NETFILTER_TPROXY
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-endif # NF_CONNTRACK
-
 config NETFILTER_XTABLES
 	tristate "Netfilter Xtables support (required for ip_tables)"
 	default m if NETFILTER_ADVANCED=n
-- 
cgit v1.2.3


From 71951b64a5a87c09eb6fde59ce51aaab2fdaeab2 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 24 Apr 2009 16:58:41 +0200
Subject: netfilter: nf_ct_dccp: add missing role attributes for DCCP

This patch adds missing role attribute to the DCCP type, otherwise
the creation of entries is not of any use.

The attribute added is CTA_PROTOINFO_DCCP_ROLE which contains the
role of the conntrack original tuple.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_proto_dccp.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 5411d63f31a..8e757dd5339 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -633,6 +633,8 @@ static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
 	if (!nest_parms)
 		goto nla_put_failure;
 	NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state);
+	NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_ROLE,
+		   ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]);
 	nla_nest_end(skb, nest_parms);
 	read_unlock_bh(&dccp_lock);
 	return 0;
@@ -644,6 +646,7 @@ nla_put_failure:
 
 static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = {
 	[CTA_PROTOINFO_DCCP_STATE]	= { .type = NLA_U8 },
+	[CTA_PROTOINFO_DCCP_ROLE]	= { .type = NLA_U8 },
 };
 
 static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
@@ -661,11 +664,21 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
 		return err;
 
 	if (!tb[CTA_PROTOINFO_DCCP_STATE] ||
-	    nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE)
+	    !tb[CTA_PROTOINFO_DCCP_ROLE] ||
+	    nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) > CT_DCCP_ROLE_MAX ||
+	    nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) {
 		return -EINVAL;
+	}
 
 	write_lock_bh(&dccp_lock);
 	ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]);
+	if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) {
+		ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
+		ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER;
+	} else {
+		ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER;
+		ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT;
+	}
 	write_unlock_bh(&dccp_lock);
 	return 0;
 }
-- 
cgit v1.2.3


From 37e55cf0ceb8803256bf69a3e45bd668bf90b76f Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 24 Apr 2009 17:05:21 +0200
Subject: netfilter: xt_recent: fix stack overread in compat code

Related-to: commit 325fb5b4d26038cba665dd0d8ee09555321061f0

The compat path suffers from a similar problem. It only uses a __be32
when all of the recent code uses, and expects, an nf_inet_addr
everywhere. As a result, addresses stored by xt_recents were
filled with whatever other stuff was on the stack following the be32.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>

With a minor compile fix from Roman.

Reported-and-tested-by: Roman Hoog Antink <rha@open.ch>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_recent.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 791e030ea90..eb0ceb84652 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -474,7 +474,7 @@ static ssize_t recent_old_proc_write(struct file *file,
 	struct recent_table *t = pde->data;
 	struct recent_entry *e;
 	char buf[sizeof("+255.255.255.255")], *c = buf;
-	__be32 addr;
+	union nf_inet_addr addr = {};
 	int add;
 
 	if (size > sizeof(buf))
@@ -506,14 +506,13 @@ static ssize_t recent_old_proc_write(struct file *file,
 		add = 1;
 		break;
 	}
-	addr = in_aton(c);
+	addr.ip = in_aton(c);
 
 	spin_lock_bh(&recent_lock);
-	e = recent_entry_lookup(t, (const void *)&addr, NFPROTO_IPV4, 0);
+	e = recent_entry_lookup(t, &addr, NFPROTO_IPV4, 0);
 	if (e == NULL) {
 		if (add)
-			recent_entry_init(t, (const void *)&addr,
-					  NFPROTO_IPV4, 0);
+			recent_entry_init(t, &addr, NFPROTO_IPV4, 0);
 	} else {
 		if (add)
 			recent_entry_update(t, e);
-- 
cgit v1.2.3


From adc667e84f086aa110d810f3476c494e48eaabaa Mon Sep 17 00:00:00 2001
From: Jay Vosburgh <fubar@us.ibm.com>
Date: Sat, 25 Apr 2009 18:03:35 -0700
Subject: vlan: update vlan carrier state for admin up/down

	Currently, the VLAN event handler does not adjust the VLAN
device's carrier state when the real device or the VLAN device is set
administratively up or down.

	The following patch adds a transfer of operating state from the
real device to the VLAN device when the real device is administratively
set up or down, and sets the carrier state up or down during init, open
and close of the VLAN device.

	This permits observers above the VLAN device that care about the
carrier state (bonding's link monitor, for example) to receive updates
for administrative changes by more closely mimicing the behavior of real
devices.

Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/8021q/vlan.c     | 2 ++
 net/8021q/vlan_dev.c | 5 +++++
 2 files changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 2b7390e377b..d1e10546eb8 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -492,6 +492,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 				continue;
 
 			dev_change_flags(vlandev, flgs & ~IFF_UP);
+			vlan_transfer_operstate(dev, vlandev);
 		}
 		break;
 
@@ -507,6 +508,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 				continue;
 
 			dev_change_flags(vlandev, flgs | IFF_UP);
+			vlan_transfer_operstate(dev, vlandev);
 		}
 		break;
 
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 6b092136401..b4b9068e55a 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -462,6 +462,7 @@ static int vlan_dev_open(struct net_device *dev)
 	if (vlan->flags & VLAN_FLAG_GVRP)
 		vlan_gvrp_request_join(dev);
 
+	netif_carrier_on(dev);
 	return 0;
 
 clear_allmulti:
@@ -471,6 +472,7 @@ del_unicast:
 	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
 		dev_unicast_delete(real_dev, dev->dev_addr, ETH_ALEN);
 out:
+	netif_carrier_off(dev);
 	return err;
 }
 
@@ -492,6 +494,7 @@ static int vlan_dev_stop(struct net_device *dev)
 	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
 		dev_unicast_delete(real_dev, dev->dev_addr, dev->addr_len);
 
+	netif_carrier_off(dev);
 	return 0;
 }
 
@@ -612,6 +615,8 @@ static int vlan_dev_init(struct net_device *dev)
 	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
 	int subclass = 0;
 
+	netif_carrier_off(dev);
+
 	/* IFF_BROADCAST|IFF_MULTICAST; ??? */
 	dev->flags  = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI);
 	dev->iflink = real_dev->ifindex;
-- 
cgit v1.2.3


From 6a783c9067e3f71aac61a9262fe42c1f68efd4fc Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 27 Apr 2009 02:58:59 -0700
Subject: xfrm: wrong hash value for temporary SA

When kernel inserts a temporary SA for IKE, it uses the wrong hash
value for dst list. Two hash values were calcultated before: one with
source address and one with a wildcard source address.

Bug hinted by Junwei Zhang <junwei.zhang@6wind.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_state.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 82271720d97..5f1f86565f1 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -794,7 +794,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 {
 	static xfrm_address_t saddr_wildcard = { };
 	struct net *net = xp_net(pol);
-	unsigned int h;
+	unsigned int h, h_wildcard;
 	struct hlist_node *entry;
 	struct xfrm_state *x, *x0, *to_put;
 	int acquire_in_progress = 0;
@@ -819,8 +819,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 	if (best)
 		goto found;
 
-	h = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, family);
-	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) {
+	h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, family);
+	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h_wildcard, bydst) {
 		if (x->props.family == family &&
 		    x->props.reqid == tmpl->reqid &&
 		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
-- 
cgit v1.2.3


From c9503e0fe052020e0294cd07d0ecd982eb7c9177 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 27 Apr 2009 05:42:24 -0700
Subject: ipv4: Limit size of route cache hash table

Right now we have no upper limit on the size of the route cache hash table.
On a 128GB POWER6 box it ends up as 32MB:

    IP route cache hash table entries: 4194304 (order: 9, 33554432 bytes)

It would be nice to cap this for memory consumption reasons, but a massive
hashtable also causes a significant spike when measuring OS jitter.

With a 32MB hashtable and 4 million entries, rt_worker_func is taking
5 ms to complete. On another system with more memory it's taking 14 ms.
Even though rt_worker_func does call cond_sched() to limit its impact,
in an HPC environment we want to keep all sources of OS jitter to a minimum.

With the patch applied we limit the number of entries to 512k which
can still be overriden by using the rt_entries boot option:

    IP route cache hash table entries: 524288 (order: 6, 4194304 bytes)

With this patch rt_worker_func now takes 0.460 ms on the same system.

Signed-off-by: Anton Blanchard <anton@samba.org>
Acked-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c40debe51b3..c4c60e9f068 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3397,7 +3397,7 @@ int __init ip_rt_init(void)
 					0,
 					&rt_hash_log,
 					&rt_hash_mask,
-					0);
+					rhash_entries ? 0 : 512 * 1024);
 	memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket));
 	rt_hash_lock_init();
 
-- 
cgit v1.2.3


From bf368e4e70cd4e0f880923c44e95a4273d725ab4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 28 Apr 2009 02:24:21 -0700
Subject: net: Avoid extra wakeups of threads blocked in wait_for_packet()

In 2.6.25 we added UDP mem accounting.

This unfortunatly added a penalty when a frame is transmitted, since
we have at TX completion time to call sock_wfree() to perform necessary
memory accounting. This calls sock_def_write_space() and utimately
scheduler if any thread is waiting on the socket.
Thread(s) waiting for an incoming frame was scheduled, then had to sleep
again as event was meaningless.

(All threads waiting on a socket are using same sk_sleep anchor)

This adds lot of extra wakeups and increases latencies, as noted
by Christoph Lameter, and slows down softirq handler.

Reference : http://marc.info/?l=linux-netdev&m=124060437012283&w=2

Fortunatly, Davide Libenzi recently added concept of keyed wakeups
into kernel, and particularly for sockets (see commit
37e5540b3c9d838eb20f2ca8ea2eb8072271e403
epoll keyed wakeups: make sockets use keyed wakeups)

Davide goal was to optimize epoll, but this new wakeup infrastructure
can help non epoll users as well, if they care to setup an appropriate
handler.

This patch introduces new DEFINE_WAIT_FUNC() helper and uses it
in wait_for_packet(), so that only relevant event can wakeup a thread
blocked in this function.

Trace of function calls from bnx2 TX completion bnx2_poll_work() is :
__kfree_skb()
 skb_release_head_state()
  sock_wfree()
   sock_def_write_space()
    __wake_up_sync_key()
     __wake_up_common()
      receiver_wake_function() : Stops here since thread is waiting for an INPUT


Reported-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/datagram.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/datagram.c b/net/core/datagram.c
index d0de644b378..b01a76abe1d 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -64,13 +64,25 @@ static inline int connection_based(struct sock *sk)
 	return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
 }
 
+static int receiver_wake_function(wait_queue_t *wait, unsigned mode, int sync,
+				  void *key)
+{
+	unsigned long bits = (unsigned long)key;
+
+	/*
+	 * Avoid a wakeup if event not interesting for us
+	 */
+	if (bits && !(bits & (POLLIN | POLLERR)))
+		return 0;
+	return autoremove_wake_function(wait, mode, sync, key);
+}
 /*
  * Wait for a packet..
  */
 static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
 {
 	int error;
-	DEFINE_WAIT(wait);
+	DEFINE_WAIT_FUNC(wait, receiver_wake_function);
 
 	prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
 
-- 
cgit v1.2.3


From f3784d834c71689336fa272df420b45345cb6b84 Mon Sep 17 00:00:00 2001
From: Roger Quadros <ext-roger.quadros@nokia.com>
Date: Thu, 23 Apr 2009 14:50:54 +0300
Subject: Bluetooth: Ensure that HCI sysfs add/del is preempt safe

Use a different work_struct variables for add_conn() and del_conn() and
use single work queue instead of two for adding and deleting connections.

It eliminates the following error on a preemptible kernel:

[  204.358032] Unable to handle kernel NULL pointer dereference at virtual address 0000000c
[  204.370697] pgd = c0004000
[  204.373443] [0000000c] *pgd=00000000
[  204.378601] Internal error: Oops: 17 [#1] PREEMPT
[  204.383361] Modules linked in: vfat fat rfcomm sco l2cap sd_mod scsi_mod iphb pvr2d drm omaplfb ps
[  204.438537] CPU: 0    Not tainted  (2.6.28-maemo2 #1)
[  204.443664] PC is at klist_put+0x2c/0xb4
[  204.447601] LR is at klist_put+0x18/0xb4
[  204.451568] pc : [<c0270f08>]    lr : [<c0270ef4>]    psr: a0000113
[  204.451568] sp : cf1b3f10  ip : cf1b3f10  fp : cf1b3f2c
[  204.463104] r10: 00000000  r9 : 00000000  r8 : bf08029c
[  204.468353] r7 : c7869200  r6 : cfbe2690  r5 : c78692c8  r4 : 00000001
[  204.474945] r3 : 00000001  r2 : cf1b2000  r1 : 00000001  r0 : 00000000
[  204.481506] Flags: NzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM Segment kernel
[  204.488861] Control: 10c5387d  Table: 887fc018  DAC: 00000017
[  204.494628] Process btdelconn (pid: 515, stack limit = 0xcf1b22e0)

Signed-off-by: Roger Quadros <ext-roger.quadros@nokia.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_sysfs.c | 37 ++++++++++++++++---------------------
 1 file changed, 16 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index ed82796d4a0..b7c51082dde 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -9,8 +9,7 @@
 struct class *bt_class = NULL;
 EXPORT_SYMBOL_GPL(bt_class);
 
-static struct workqueue_struct *btaddconn;
-static struct workqueue_struct *btdelconn;
+static struct workqueue_struct *bluetooth;
 
 static inline char *link_typetostr(int type)
 {
@@ -88,9 +87,10 @@ static struct device_type bt_link = {
 
 static void add_conn(struct work_struct *work)
 {
-	struct hci_conn *conn = container_of(work, struct hci_conn, work);
+	struct hci_conn *conn = container_of(work, struct hci_conn, work_add);
 
-	flush_workqueue(btdelconn);
+	/* ensure previous add/del is complete */
+	flush_workqueue(bluetooth);
 
 	if (device_add(&conn->dev) < 0) {
 		BT_ERR("Failed to register connection device");
@@ -114,9 +114,9 @@ void hci_conn_add_sysfs(struct hci_conn *conn)
 
 	device_initialize(&conn->dev);
 
-	INIT_WORK(&conn->work, add_conn);
+	INIT_WORK(&conn->work_add, add_conn);
 
-	queue_work(btaddconn, &conn->work);
+	queue_work(bluetooth, &conn->work_add);
 }
 
 /*
@@ -131,9 +131,12 @@ static int __match_tty(struct device *dev, void *data)
 
 static void del_conn(struct work_struct *work)
 {
-	struct hci_conn *conn = container_of(work, struct hci_conn, work);
+	struct hci_conn *conn = container_of(work, struct hci_conn, work_del);
 	struct hci_dev *hdev = conn->hdev;
 
+	/* ensure previous add/del is complete */
+	flush_workqueue(bluetooth);
+
 	while (1) {
 		struct device *dev;
 
@@ -156,9 +159,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn)
 	if (!device_is_registered(&conn->dev))
 		return;
 
-	INIT_WORK(&conn->work, del_conn);
+	INIT_WORK(&conn->work_del, del_conn);
 
-	queue_work(btdelconn, &conn->work);
+	queue_work(bluetooth, &conn->work_del);
 }
 
 static inline char *host_typetostr(int type)
@@ -435,20 +438,13 @@ void hci_unregister_sysfs(struct hci_dev *hdev)
 
 int __init bt_sysfs_init(void)
 {
-	btaddconn = create_singlethread_workqueue("btaddconn");
-	if (!btaddconn)
-		return -ENOMEM;
-
-	btdelconn = create_singlethread_workqueue("btdelconn");
-	if (!btdelconn) {
-		destroy_workqueue(btaddconn);
+	bluetooth = create_singlethread_workqueue("bluetooth");
+	if (!bluetooth)
 		return -ENOMEM;
-	}
 
 	bt_class = class_create(THIS_MODULE, "bluetooth");
 	if (IS_ERR(bt_class)) {
-		destroy_workqueue(btdelconn);
-		destroy_workqueue(btaddconn);
+		destroy_workqueue(bluetooth);
 		return PTR_ERR(bt_class);
 	}
 
@@ -457,8 +453,7 @@ int __init bt_sysfs_init(void)
 
 void bt_sysfs_cleanup(void)
 {
-	destroy_workqueue(btaddconn);
-	destroy_workqueue(btdelconn);
+	destroy_workqueue(bluetooth);
 
 	class_destroy(bt_class);
 }
-- 
cgit v1.2.3


From 052b30b0a8eec8db5b18ad49effdf2a9ba4c1e1a Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sun, 26 Apr 2009 20:01:22 +0200
Subject: Bluetooth: Add different pairing timeout for Legacy Pairing

The Bluetooth stack uses a reference counting for all established ACL
links and if no user (L2CAP connection) is present, the link will be
terminated to save power. The problem part is the dedicated pairing
when using Legacy Pairing (Bluetooth 2.0 and before). At that point
no user is present and pairing attempts will be disconnected within
10 seconds or less. In previous kernel version this was not a problem
since the disconnect timeout wasn't triggered on incoming connections
for the first time. However this caused issues with broken host stacks
that kept the connections around after dedicated pairing. When the
support for Simple Pairing got added, the link establishment procedure
needed to be changed and now causes issues when using Legacy Pairing

When using Simple Pairing it is possible to do a proper reference
counting of ACL link users. With Legacy Pairing this is not possible
since the specification is unclear in some areas and too many broken
Bluetooth devices have already been deployed. So instead of trying to
deal with all the broken devices, a special pairing timeout will be
introduced that increases the timeout to 60 seconds when pairing is
triggered.

If a broken devices now puts the stack into an unforeseen state, the
worst that happens is the disconnect timeout triggers after 120 seconds
instead of 4 seconds. This allows successful pairings with legacy and
broken devices now.

Based on a report by Johan Hedberg <johan.hedberg@nokia.com>

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_conn.c  |  1 +
 net/bluetooth/hci_event.c | 36 +++++++++++++++++++++++++++++++++++-
 2 files changed, 36 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 1181db08d9d..75ebbe2221a 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -215,6 +215,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
 	conn->state = BT_OPEN;
 
 	conn->power_save = 1;
+	conn->disc_timeout = HCI_DISCONN_TIMEOUT;
 
 	switch (type) {
 	case ACL_LINK:
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 15f40ea8d54..4e7cb88e5da 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -883,6 +883,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 		if (conn->type == ACL_LINK) {
 			conn->state = BT_CONFIG;
 			hci_conn_hold(conn);
+			conn->disc_timeout = HCI_DISCONN_TIMEOUT;
 		} else
 			conn->state = BT_CONNECTED;
 
@@ -1063,9 +1064,14 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 				hci_proto_connect_cfm(conn, ev->status);
 				hci_conn_put(conn);
 			}
-		} else
+		} else {
 			hci_auth_cfm(conn, ev->status);
 
+			hci_conn_hold(conn);
+			conn->disc_timeout = HCI_DISCONN_TIMEOUT;
+			hci_conn_put(conn);
+		}
+
 		if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) {
 			if (!ev->status) {
 				struct hci_cp_set_conn_encrypt cp;
@@ -1479,7 +1485,21 @@ static inline void hci_mode_change_evt(struct hci_dev *hdev, struct sk_buff *skb
 
 static inline void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
+	struct hci_ev_pin_code_req *ev = (void *) skb->data;
+	struct hci_conn *conn;
+
 	BT_DBG("%s", hdev->name);
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
+	if (conn) {
+		hci_conn_hold(conn);
+		conn->disc_timeout = HCI_PAIRING_TIMEOUT;
+		hci_conn_put(conn);
+	}
+
+	hci_dev_unlock(hdev);
 }
 
 static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1489,7 +1509,21 @@ static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff
 
 static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
+	struct hci_ev_link_key_notify *ev = (void *) skb->data;
+	struct hci_conn *conn;
+
 	BT_DBG("%s", hdev->name);
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
+	if (conn) {
+		hci_conn_hold(conn);
+		conn->disc_timeout = HCI_DISCONN_TIMEOUT;
+		hci_conn_put(conn);
+	}
+
+	hci_dev_unlock(hdev);
 }
 
 static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *skb)
-- 
cgit v1.2.3


From 3fdca1e1370ffe89980927cdef0583bebcd8caaf Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Tue, 28 Apr 2009 09:04:55 -0700
Subject: Bluetooth: Fix connection establishment with low security requirement

The Bluetooth 2.1 specification introduced four different security modes
that can be mapped using Legacy Pairing and Simple Pairing. With the
usage of Simple Pairing it is required that all connections (except
the ones for SDP) are encrypted. So even the low security requirement
mandates an encrypted connection when using Simple Pairing. When using
Legacy Pairing (for Bluetooth 2.0 devices and older) this is not required
since it causes interoperability issues.

To support this properly the low security requirement translates into
different host controller transactions depending if Simple Pairing is
supported or not. However in case of Simple Pairing the command to
switch on encryption after a successful authentication is not triggered
for the low security mode. This patch fixes this and actually makes
the logic to differentiate between Simple Pairing and Legacy Pairing
a lot simpler.

Based on a report by Ville Tervo <ville.tervo@nokia.com>

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_conn.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 75ebbe2221a..375f4b4f7f7 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -425,12 +425,9 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
 	if (sec_level == BT_SECURITY_SDP)
 		return 1;
 
-	if (sec_level == BT_SECURITY_LOW) {
-		if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0)
-			return hci_conn_auth(conn, sec_level, auth_type);
-		else
-			return 1;
-	}
+	if (sec_level == BT_SECURITY_LOW &&
+				(!conn->ssp_mode || !conn->hdev->ssp_mode))
+		return 1;
 
 	if (conn->link_mode & HCI_LM_ENCRYPT)
 		return hci_conn_auth(conn, sec_level, auth_type);
-- 
cgit v1.2.3


From 942e4a2bd680c606af0211e64eb216be2e19bf61 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 28 Apr 2009 22:36:33 -0700
Subject: netfilter: revised locking for x_tables

The x_tables are organized with a table structure and a per-cpu copies
of the counters and rules. On older kernels there was a reader/writer
lock per table which was a performance bottleneck. In 2.6.30-rc, this
was converted to use RCU and the counters/rules which solved the performance
problems for do_table but made replacing rules much slower because of
the necessary RCU grace period.

This version uses a per-cpu set of spinlocks and counters to allow to
table processing to proceed without the cache thrashing of a global
reader lock and keeps the same performance for table updates.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/arp_tables.c | 125 ++++++++++++---------------------------
 net/ipv4/netfilter/ip_tables.c  | 126 +++++++++++-----------------------------
 net/ipv6/netfilter/ip6_tables.c | 123 ++++++++++++---------------------------
 net/netfilter/x_tables.c        |  53 +++++++++--------
 4 files changed, 136 insertions(+), 291 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 5ba533d234d..831fe1879dc 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -253,9 +253,9 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	indev = in ? in->name : nulldevname;
 	outdev = out ? out->name : nulldevname;
 
-	rcu_read_lock_bh();
-	private = rcu_dereference(table->private);
-	table_base = rcu_dereference(private->entries[smp_processor_id()]);
+	xt_info_rdlock_bh();
+	private = table->private;
+	table_base = private->entries[smp_processor_id()];
 
 	e = get_entry(table_base, private->hook_entry[hook]);
 	back = get_entry(table_base, private->underflow[hook]);
@@ -273,6 +273,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 
 			hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
 				(2 * skb->dev->addr_len);
+
 			ADD_COUNTER(e->counters, hdr_len, 1);
 
 			t = arpt_get_target(e);
@@ -328,8 +329,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 			e = (void *)e + e->next_offset;
 		}
 	} while (!hotdrop);
-
-	rcu_read_unlock_bh();
+	xt_info_rdunlock_bh();
 
 	if (hotdrop)
 		return NF_DROP;
@@ -711,9 +711,12 @@ static void get_counters(const struct xt_table_info *t,
 	/* Instead of clearing (by a previous call to memset())
 	 * the counters and using adds, we set the counters
 	 * with data used by 'current' CPU
-	 * We dont care about preemption here.
+	 *
+	 * Bottom half has to be disabled to prevent deadlock
+	 * if new softirq were to run and call ipt_do_table
 	 */
-	curcpu = raw_smp_processor_id();
+	local_bh_disable();
+	curcpu = smp_processor_id();
 
 	i = 0;
 	ARPT_ENTRY_ITERATE(t->entries[curcpu],
@@ -726,73 +729,22 @@ static void get_counters(const struct xt_table_info *t,
 		if (cpu == curcpu)
 			continue;
 		i = 0;
+		xt_info_wrlock(cpu);
 		ARPT_ENTRY_ITERATE(t->entries[cpu],
 				   t->size,
 				   add_entry_to_counter,
 				   counters,
 				   &i);
+		xt_info_wrunlock(cpu);
 	}
-}
-
-
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
-static int
-add_counter_to_entry(struct arpt_entry *e,
-		     const struct xt_counters addme[],
-		     unsigned int *i)
-{
-	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
-	(*i)++;
-	return 0;
-}
-
-/* Take values from counters and add them back onto the current cpu */
-static void put_counters(struct xt_table_info *t,
-			 const struct xt_counters counters[])
-{
-	unsigned int i, cpu;
-
-	local_bh_disable();
-	cpu = smp_processor_id();
-	i = 0;
-	ARPT_ENTRY_ITERATE(t->entries[cpu],
-			  t->size,
-			  add_counter_to_entry,
-			  counters,
-			  &i);
 	local_bh_enable();
 }
 
-static inline int
-zero_entry_counter(struct arpt_entry *e, void *arg)
-{
-	e->counters.bcnt = 0;
-	e->counters.pcnt = 0;
-	return 0;
-}
-
-static void
-clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
-{
-	unsigned int cpu;
-	const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
-
-	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
-	for_each_possible_cpu(cpu) {
-		memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
-		ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
-				  zero_entry_counter, NULL);
-	}
-}
-
 static struct xt_counters *alloc_counters(struct xt_table *table)
 {
 	unsigned int countersize;
 	struct xt_counters *counters;
 	struct xt_table_info *private = table->private;
-	struct xt_table_info *info;
 
 	/* We need atomic snapshot of counters: rest doesn't change
 	 * (other than comefrom, which userspace doesn't care
@@ -802,30 +754,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table)
 	counters = vmalloc_node(countersize, numa_node_id());
 
 	if (counters == NULL)
-		goto nomem;
-
-	info = xt_alloc_table_info(private->size);
-	if (!info)
-		goto free_counters;
-
-	clone_counters(info, private);
-
-	mutex_lock(&table->lock);
-	xt_table_entry_swap_rcu(private, info);
-	synchronize_net();	/* Wait until smoke has cleared */
+		return ERR_PTR(-ENOMEM);
 
-	get_counters(info, counters);
-	put_counters(private, counters);
-	mutex_unlock(&table->lock);
-
-	xt_free_table_info(info);
+	get_counters(private, counters);
 
 	return counters;
-
- free_counters:
-	vfree(counters);
- nomem:
-	return ERR_PTR(-ENOMEM);
 }
 
 static int copy_entries_to_user(unsigned int total_size,
@@ -1094,8 +1027,9 @@ static int __do_replace(struct net *net, const char *name,
 	    (newinfo->number <= oldinfo->initial_entries))
 		module_put(t->me);
 
-	/* Get the old counters. */
+	/* Get the old counters, and synchronize with replace */
 	get_counters(oldinfo, counters);
+
 	/* Decrease module usage counts and free resource */
 	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
 	ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
@@ -1165,10 +1099,23 @@ static int do_replace(struct net *net, void __user *user, unsigned int len)
 	return ret;
 }
 
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static int
+add_counter_to_entry(struct arpt_entry *e,
+		     const struct xt_counters addme[],
+		     unsigned int *i)
+{
+	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+	(*i)++;
+	return 0;
+}
+
 static int do_add_counters(struct net *net, void __user *user, unsigned int len,
 			   int compat)
 {
-	unsigned int i;
+	unsigned int i, curcpu;
 	struct xt_counters_info tmp;
 	struct xt_counters *paddc;
 	unsigned int num_counters;
@@ -1224,26 +1171,26 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len,
 		goto free;
 	}
 
-	mutex_lock(&t->lock);
+	local_bh_disable();
 	private = t->private;
 	if (private->number != num_counters) {
 		ret = -EINVAL;
 		goto unlock_up_free;
 	}
 
-	preempt_disable();
 	i = 0;
 	/* Choose the copy that is on our node */
-	loc_cpu_entry = private->entries[smp_processor_id()];
+	curcpu = smp_processor_id();
+	loc_cpu_entry = private->entries[curcpu];
+	xt_info_wrlock(curcpu);
 	ARPT_ENTRY_ITERATE(loc_cpu_entry,
 			   private->size,
 			   add_counter_to_entry,
 			   paddc,
 			   &i);
-	preempt_enable();
+	xt_info_wrunlock(curcpu);
  unlock_up_free:
-	mutex_unlock(&t->lock);
-
+	local_bh_enable();
 	xt_table_unlock(t);
 	module_put(t->me);
  free:
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 810c0b62c7d..2ec8d7290c4 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -338,10 +338,9 @@ ipt_do_table(struct sk_buff *skb,
 	tgpar.hooknum = hook;
 
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
-
-	rcu_read_lock_bh();
-	private = rcu_dereference(table->private);
-	table_base = rcu_dereference(private->entries[smp_processor_id()]);
+	xt_info_rdlock_bh();
+	private = table->private;
+	table_base = private->entries[smp_processor_id()];
 
 	e = get_entry(table_base, private->hook_entry[hook]);
 
@@ -436,8 +435,7 @@ ipt_do_table(struct sk_buff *skb,
 			e = (void *)e + e->next_offset;
 		}
 	} while (!hotdrop);
-
-	rcu_read_unlock_bh();
+	xt_info_rdunlock_bh();
 
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
@@ -896,10 +894,13 @@ get_counters(const struct xt_table_info *t,
 
 	/* Instead of clearing (by a previous call to memset())
 	 * the counters and using adds, we set the counters
-	 * with data used by 'current' CPU
-	 * We dont care about preemption here.
+	 * with data used by 'current' CPU.
+	 *
+	 * Bottom half has to be disabled to prevent deadlock
+	 * if new softirq were to run and call ipt_do_table
 	 */
-	curcpu = raw_smp_processor_id();
+	local_bh_disable();
+	curcpu = smp_processor_id();
 
 	i = 0;
 	IPT_ENTRY_ITERATE(t->entries[curcpu],
@@ -912,74 +913,22 @@ get_counters(const struct xt_table_info *t,
 		if (cpu == curcpu)
 			continue;
 		i = 0;
+		xt_info_wrlock(cpu);
 		IPT_ENTRY_ITERATE(t->entries[cpu],
 				  t->size,
 				  add_entry_to_counter,
 				  counters,
 				  &i);
+		xt_info_wrunlock(cpu);
 	}
-
-}
-
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
-static int
-add_counter_to_entry(struct ipt_entry *e,
-		     const struct xt_counters addme[],
-		     unsigned int *i)
-{
-	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
-	(*i)++;
-	return 0;
-}
-
-/* Take values from counters and add them back onto the current cpu */
-static void put_counters(struct xt_table_info *t,
-			 const struct xt_counters counters[])
-{
-	unsigned int i, cpu;
-
-	local_bh_disable();
-	cpu = smp_processor_id();
-	i = 0;
-	IPT_ENTRY_ITERATE(t->entries[cpu],
-			  t->size,
-			  add_counter_to_entry,
-			  counters,
-			  &i);
 	local_bh_enable();
 }
 
-
-static inline int
-zero_entry_counter(struct ipt_entry *e, void *arg)
-{
-	e->counters.bcnt = 0;
-	e->counters.pcnt = 0;
-	return 0;
-}
-
-static void
-clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
-{
-	unsigned int cpu;
-	const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
-
-	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
-	for_each_possible_cpu(cpu) {
-		memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
-		IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
-				  zero_entry_counter, NULL);
-	}
-}
-
 static struct xt_counters * alloc_counters(struct xt_table *table)
 {
 	unsigned int countersize;
 	struct xt_counters *counters;
 	struct xt_table_info *private = table->private;
-	struct xt_table_info *info;
 
 	/* We need atomic snapshot of counters: rest doesn't change
 	   (other than comefrom, which userspace doesn't care
@@ -988,30 +937,11 @@ static struct xt_counters * alloc_counters(struct xt_table *table)
 	counters = vmalloc_node(countersize, numa_node_id());
 
 	if (counters == NULL)
-		goto nomem;
+		return ERR_PTR(-ENOMEM);
 
-	info = xt_alloc_table_info(private->size);
-	if (!info)
-		goto free_counters;
-
-	clone_counters(info, private);
-
-	mutex_lock(&table->lock);
-	xt_table_entry_swap_rcu(private, info);
-	synchronize_net();	/* Wait until smoke has cleared */
-
-	get_counters(info, counters);
-	put_counters(private, counters);
-	mutex_unlock(&table->lock);
-
-	xt_free_table_info(info);
+	get_counters(private, counters);
 
 	return counters;
-
- free_counters:
-	vfree(counters);
- nomem:
-	return ERR_PTR(-ENOMEM);
 }
 
 static int
@@ -1306,8 +1236,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	    (newinfo->number <= oldinfo->initial_entries))
 		module_put(t->me);
 
-	/* Get the old counters. */
+	/* Get the old counters, and synchronize with replace */
 	get_counters(oldinfo, counters);
+
 	/* Decrease module usage counts and free resource */
 	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
 	IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
@@ -1377,11 +1308,23 @@ do_replace(struct net *net, void __user *user, unsigned int len)
 	return ret;
 }
 
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static int
+add_counter_to_entry(struct ipt_entry *e,
+		     const struct xt_counters addme[],
+		     unsigned int *i)
+{
+	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+	(*i)++;
+	return 0;
+}
 
 static int
 do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
 {
-	unsigned int i;
+	unsigned int i, curcpu;
 	struct xt_counters_info tmp;
 	struct xt_counters *paddc;
 	unsigned int num_counters;
@@ -1437,25 +1380,26 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat
 		goto free;
 	}
 
-	mutex_lock(&t->lock);
+	local_bh_disable();
 	private = t->private;
 	if (private->number != num_counters) {
 		ret = -EINVAL;
 		goto unlock_up_free;
 	}
 
-	preempt_disable();
 	i = 0;
 	/* Choose the copy that is on our node */
-	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+	curcpu = smp_processor_id();
+	loc_cpu_entry = private->entries[curcpu];
+	xt_info_wrlock(curcpu);
 	IPT_ENTRY_ITERATE(loc_cpu_entry,
 			  private->size,
 			  add_counter_to_entry,
 			  paddc,
 			  &i);
-	preempt_enable();
+	xt_info_wrunlock(curcpu);
  unlock_up_free:
-	mutex_unlock(&t->lock);
+	local_bh_enable();
 	xt_table_unlock(t);
 	module_put(t->me);
  free:
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 800ae854247..219e165aea1 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -365,9 +365,9 @@ ip6t_do_table(struct sk_buff *skb,
 
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
 
-	rcu_read_lock_bh();
-	private = rcu_dereference(table->private);
-	table_base = rcu_dereference(private->entries[smp_processor_id()]);
+	xt_info_rdlock_bh();
+	private = table->private;
+	table_base = private->entries[smp_processor_id()];
 
 	e = get_entry(table_base, private->hook_entry[hook]);
 
@@ -466,7 +466,7 @@ ip6t_do_table(struct sk_buff *skb,
 #ifdef CONFIG_NETFILTER_DEBUG
 	((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON;
 #endif
-	rcu_read_unlock_bh();
+	xt_info_rdunlock_bh();
 
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
@@ -926,9 +926,12 @@ get_counters(const struct xt_table_info *t,
 	/* Instead of clearing (by a previous call to memset())
 	 * the counters and using adds, we set the counters
 	 * with data used by 'current' CPU
-	 * We dont care about preemption here.
+	 *
+	 * Bottom half has to be disabled to prevent deadlock
+	 * if new softirq were to run and call ipt_do_table
 	 */
-	curcpu = raw_smp_processor_id();
+	local_bh_disable();
+	curcpu = smp_processor_id();
 
 	i = 0;
 	IP6T_ENTRY_ITERATE(t->entries[curcpu],
@@ -941,72 +944,22 @@ get_counters(const struct xt_table_info *t,
 		if (cpu == curcpu)
 			continue;
 		i = 0;
+		xt_info_wrlock(cpu);
 		IP6T_ENTRY_ITERATE(t->entries[cpu],
 				  t->size,
 				  add_entry_to_counter,
 				  counters,
 				  &i);
+		xt_info_wrunlock(cpu);
 	}
-}
-
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
-static int
-add_counter_to_entry(struct ip6t_entry *e,
-		     const struct xt_counters addme[],
-		     unsigned int *i)
-{
-	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
-	(*i)++;
-	return 0;
-}
-
-/* Take values from counters and add them back onto the current cpu */
-static void put_counters(struct xt_table_info *t,
-			 const struct xt_counters counters[])
-{
-	unsigned int i, cpu;
-
-	local_bh_disable();
-	cpu = smp_processor_id();
-	i = 0;
-	IP6T_ENTRY_ITERATE(t->entries[cpu],
-			   t->size,
-			   add_counter_to_entry,
-			   counters,
-			   &i);
 	local_bh_enable();
 }
 
-static inline int
-zero_entry_counter(struct ip6t_entry *e, void *arg)
-{
-	e->counters.bcnt = 0;
-	e->counters.pcnt = 0;
-	return 0;
-}
-
-static void
-clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
-{
-	unsigned int cpu;
-	const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
-
-	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
-	for_each_possible_cpu(cpu) {
-		memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
-		IP6T_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
-				   zero_entry_counter, NULL);
-	}
-}
-
 static struct xt_counters *alloc_counters(struct xt_table *table)
 {
 	unsigned int countersize;
 	struct xt_counters *counters;
 	struct xt_table_info *private = table->private;
-	struct xt_table_info *info;
 
 	/* We need atomic snapshot of counters: rest doesn't change
 	   (other than comefrom, which userspace doesn't care
@@ -1015,30 +968,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table)
 	counters = vmalloc_node(countersize, numa_node_id());
 
 	if (counters == NULL)
-		goto nomem;
+		return ERR_PTR(-ENOMEM);
 
-	info = xt_alloc_table_info(private->size);
-	if (!info)
-		goto free_counters;
-
-	clone_counters(info, private);
-
-	mutex_lock(&table->lock);
-	xt_table_entry_swap_rcu(private, info);
-	synchronize_net();	/* Wait until smoke has cleared */
-
-	get_counters(info, counters);
-	put_counters(private, counters);
-	mutex_unlock(&table->lock);
-
-	xt_free_table_info(info);
+	get_counters(private, counters);
 
 	return counters;
-
- free_counters:
-	vfree(counters);
- nomem:
-	return ERR_PTR(-ENOMEM);
 }
 
 static int
@@ -1334,8 +1268,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	    (newinfo->number <= oldinfo->initial_entries))
 		module_put(t->me);
 
-	/* Get the old counters. */
+	/* Get the old counters, and synchronize with replace */
 	get_counters(oldinfo, counters);
+
 	/* Decrease module usage counts and free resource */
 	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
 	IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
@@ -1405,11 +1340,24 @@ do_replace(struct net *net, void __user *user, unsigned int len)
 	return ret;
 }
 
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static int
+add_counter_to_entry(struct ip6t_entry *e,
+		     const struct xt_counters addme[],
+		     unsigned int *i)
+{
+	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+	(*i)++;
+	return 0;
+}
+
 static int
 do_add_counters(struct net *net, void __user *user, unsigned int len,
 		int compat)
 {
-	unsigned int i;
+	unsigned int i, curcpu;
 	struct xt_counters_info tmp;
 	struct xt_counters *paddc;
 	unsigned int num_counters;
@@ -1465,25 +1413,28 @@ do_add_counters(struct net *net, void __user *user, unsigned int len,
 		goto free;
 	}
 
-	mutex_lock(&t->lock);
+
+	local_bh_disable();
 	private = t->private;
 	if (private->number != num_counters) {
 		ret = -EINVAL;
 		goto unlock_up_free;
 	}
 
-	preempt_disable();
 	i = 0;
 	/* Choose the copy that is on our node */
-	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+	curcpu = smp_processor_id();
+	xt_info_wrlock(curcpu);
+	loc_cpu_entry = private->entries[curcpu];
 	IP6T_ENTRY_ITERATE(loc_cpu_entry,
 			  private->size,
 			  add_counter_to_entry,
 			  paddc,
 			  &i);
-	preempt_enable();
+	xt_info_wrunlock(curcpu);
+
  unlock_up_free:
-	mutex_unlock(&t->lock);
+	local_bh_enable();
 	xt_table_unlock(t);
 	module_put(t->me);
  free:
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 509a95621f9..150e5cf62f8 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -625,20 +625,6 @@ void xt_free_table_info(struct xt_table_info *info)
 }
 EXPORT_SYMBOL(xt_free_table_info);
 
-void xt_table_entry_swap_rcu(struct xt_table_info *oldinfo,
-			     struct xt_table_info *newinfo)
-{
-	unsigned int cpu;
-
-	for_each_possible_cpu(cpu) {
-		void *p = oldinfo->entries[cpu];
-		rcu_assign_pointer(oldinfo->entries[cpu], newinfo->entries[cpu]);
-		newinfo->entries[cpu] = p;
-	}
-
-}
-EXPORT_SYMBOL_GPL(xt_table_entry_swap_rcu);
-
 /* Find table by name, grabs mutex & ref.  Returns ERR_PTR() on error. */
 struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
 				    const char *name)
@@ -676,32 +662,43 @@ void xt_compat_unlock(u_int8_t af)
 EXPORT_SYMBOL_GPL(xt_compat_unlock);
 #endif
 
+DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks);
+EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks);
+
+
 struct xt_table_info *
 xt_replace_table(struct xt_table *table,
 	      unsigned int num_counters,
 	      struct xt_table_info *newinfo,
 	      int *error)
 {
-	struct xt_table_info *oldinfo, *private;
+	struct xt_table_info *private;
 
 	/* Do the substitution. */
-	mutex_lock(&table->lock);
+	local_bh_disable();
 	private = table->private;
+
 	/* Check inside lock: is the old number correct? */
 	if (num_counters != private->number) {
 		duprintf("num_counters != table->private->number (%u/%u)\n",
 			 num_counters, private->number);
-		mutex_unlock(&table->lock);
+		local_bh_enable();
 		*error = -EAGAIN;
 		return NULL;
 	}
-	oldinfo = private;
-	rcu_assign_pointer(table->private, newinfo);
-	newinfo->initial_entries = oldinfo->initial_entries;
-	mutex_unlock(&table->lock);
 
-	synchronize_net();
-	return oldinfo;
+	table->private = newinfo;
+	newinfo->initial_entries = private->initial_entries;
+
+	/*
+	 * Even though table entries have now been swapped, other CPU's
+	 * may still be using the old entries. This is okay, because
+	 * resynchronization happens because of the locking done
+	 * during the get_counters() routine.
+	 */
+	local_bh_enable();
+
+	return private;
 }
 EXPORT_SYMBOL_GPL(xt_replace_table);
 
@@ -734,7 +731,6 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table *table,
 
 	/* Simplifies replace_table code. */
 	table->private = bootstrap;
-	mutex_init(&table->lock);
 
 	if (!xt_replace_table(table, 0, newinfo, &ret))
 		goto unlock;
@@ -1147,7 +1143,14 @@ static struct pernet_operations xt_net_ops = {
 
 static int __init xt_init(void)
 {
-	int i, rv;
+	unsigned int i;
+	int rv;
+
+	for_each_possible_cpu(i) {
+		struct xt_info_lock *lock = &per_cpu(xt_info_locks, i);
+		spin_lock_init(&lock->lock);
+		lock->readers = 0;
+	}
 
 	xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL);
 	if (!xt)
-- 
cgit v1.2.3


From d4c4a9a1bce1912ed5681251f0037fd4f2364a3e Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Wed, 29 Apr 2009 11:41:24 +0100
Subject: mac80211: fix modprobe deadlock by not calling wep_init under
 rtnl_lock

- ieee80211_wep_init(), which is called with rtnl_lock held, blocks in
   request_module() [waiting for modprobe to load a crypto module].

 - modprobe blocks in a call to flush_workqueue(), when it closes a TTY
   [presumably when it exits].

 - The workqueue item linkwatch_event() blocks on rtnl_lock.

There's no reason for wep_init() to be called with rtnl_lock held, so
just move it outside the critical section.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index fbcbed6cad0..00968c2d22b 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -909,6 +909,13 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	if (result < 0)
 		goto fail_sta_info;
 
+	result = ieee80211_wep_init(local);
+	if (result < 0) {
+		printk(KERN_DEBUG "%s: Failed to initialize wep: %d\n",
+		       wiphy_name(local->hw.wiphy), result);
+		goto fail_wep;
+	}
+
 	rtnl_lock();
 	result = dev_alloc_name(local->mdev, local->mdev->name);
 	if (result < 0)
@@ -930,14 +937,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		goto fail_rate;
 	}
 
-	result = ieee80211_wep_init(local);
-
-	if (result < 0) {
-		printk(KERN_DEBUG "%s: Failed to initialize wep: %d\n",
-		       wiphy_name(local->hw.wiphy), result);
-		goto fail_wep;
-	}
-
 	/* add one default STA interface if supported */
 	if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION)) {
 		result = ieee80211_if_add(local, "wlan%d", NULL,
@@ -967,13 +966,13 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 
 	return 0;
 
-fail_wep:
-	rate_control_deinitialize(local);
 fail_rate:
 	unregister_netdevice(local->mdev);
 	local->mdev = NULL;
 fail_dev:
 	rtnl_unlock();
+	ieee80211_wep_free(local);
+fail_wep:
 	sta_info_stop(local);
 fail_sta_info:
 	debugfs_hw_del(local);
-- 
cgit v1.2.3


From c428c89201a57a0ce24c37ed79e540d1f4101cf3 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 29 Apr 2009 00:28:18 +0200
Subject: mac80211: default to automatic power control

In "mac80211: correct wext transmit power handler"
I fixed the wext handler, but forgot to make the default of the
user_power_level -1 (aka "auto"), so that now the transmit power
is always set to 0, causing associations to time out and similar
problems since we're transmitting with very little power. Correct
this by correcting the default user_power_level to -1.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Bisected-by: Niel Lambrechts <niel.lambrechts@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 00968c2d22b..14134193cd1 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -757,6 +757,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 	local->hw.conf.long_frame_max_tx_count = 4;
 	local->hw.conf.short_frame_max_tx_count = 7;
 	local->hw.conf.radio_enabled = true;
+	local->user_power_level = -1;
 
 	INIT_LIST_HEAD(&local->interfaces);
 	mutex_init(&local->iflist_mtx);
-- 
cgit v1.2.3


From 7a67e56fd362d3edfde1f19170893508c3940d3a Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Thu, 30 Apr 2009 05:41:19 -0700
Subject: net: Fix oops when splicing skbs from a frag_list.

Lennert Buytenhek wrote:
> Since 4fb669948116d928ae44262ab7743732c574630d ("net: Optimize memory
> usage when splicing from sockets.") I'm seeing this oops (e.g. in
> 2.6.30-rc3) when splicing from a TCP socket to /dev/null on a driver
> (mv643xx_eth) that uses LRO in the skb mode (lro_receive_skb) rather
> than the frag mode:

My patch incorrectly assumed skb->sk was always valid, but for
"frag_listed" skbs we can only use skb->sk of their parent.

Reported-by: Lennert Buytenhek <buytenh@wantstofly.org>
Debugged-by: Lennert Buytenhek <buytenh@wantstofly.org>
Tested-by: Lennert Buytenhek <buytenh@wantstofly.org>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ce6356cd9f7..f091a5a845c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1365,9 +1365,8 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
 
 static inline struct page *linear_to_page(struct page *page, unsigned int *len,
 					  unsigned int *offset,
-					  struct sk_buff *skb)
+					  struct sk_buff *skb, struct sock *sk)
 {
-	struct sock *sk = skb->sk;
 	struct page *p = sk->sk_sndmsg_page;
 	unsigned int off;
 
@@ -1405,13 +1404,14 @@ new_page:
  */
 static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
 				unsigned int *len, unsigned int offset,
-				struct sk_buff *skb, int linear)
+				struct sk_buff *skb, int linear,
+				struct sock *sk)
 {
 	if (unlikely(spd->nr_pages == PIPE_BUFFERS))
 		return 1;
 
 	if (linear) {
-		page = linear_to_page(page, len, &offset, skb);
+		page = linear_to_page(page, len, &offset, skb, sk);
 		if (!page)
 			return 1;
 	} else
@@ -1442,7 +1442,8 @@ static inline void __segment_seek(struct page **page, unsigned int *poff,
 static inline int __splice_segment(struct page *page, unsigned int poff,
 				   unsigned int plen, unsigned int *off,
 				   unsigned int *len, struct sk_buff *skb,
-				   struct splice_pipe_desc *spd, int linear)
+				   struct splice_pipe_desc *spd, int linear,
+				   struct sock *sk)
 {
 	if (!*len)
 		return 1;
@@ -1465,7 +1466,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
 		/* the linear region may spread across several pages  */
 		flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
 
-		if (spd_fill_page(spd, page, &flen, poff, skb, linear))
+		if (spd_fill_page(spd, page, &flen, poff, skb, linear, sk))
 			return 1;
 
 		__segment_seek(&page, &poff, &plen, flen);
@@ -1481,8 +1482,8 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
  * pipe is full or if we already spliced the requested length.
  */
 static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
-		      unsigned int *len,
-		      struct splice_pipe_desc *spd)
+			     unsigned int *len, struct splice_pipe_desc *spd,
+			     struct sock *sk)
 {
 	int seg;
 
@@ -1492,7 +1493,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
 	if (__splice_segment(virt_to_page(skb->data),
 			     (unsigned long) skb->data & (PAGE_SIZE - 1),
 			     skb_headlen(skb),
-			     offset, len, skb, spd, 1))
+			     offset, len, skb, spd, 1, sk))
 		return 1;
 
 	/*
@@ -1502,7 +1503,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
 		const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
 
 		if (__splice_segment(f->page, f->page_offset, f->size,
-				     offset, len, skb, spd, 0))
+				     offset, len, skb, spd, 0, sk))
 			return 1;
 	}
 
@@ -1528,12 +1529,13 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
 		.ops = &sock_pipe_buf_ops,
 		.spd_release = sock_spd_release,
 	};
+	struct sock *sk = skb->sk;
 
 	/*
 	 * __skb_splice_bits() only fails if the output has no room left,
 	 * so no point in going over the frag_list for the error case.
 	 */
-	if (__skb_splice_bits(skb, &offset, &tlen, &spd))
+	if (__skb_splice_bits(skb, &offset, &tlen, &spd, sk))
 		goto done;
 	else if (!tlen)
 		goto done;
@@ -1545,14 +1547,13 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
 		struct sk_buff *list = skb_shinfo(skb)->frag_list;
 
 		for (; list && tlen; list = list->next) {
-			if (__skb_splice_bits(list, &offset, &tlen, &spd))
+			if (__skb_splice_bits(list, &offset, &tlen, &spd, sk))
 				break;
 		}
 	}
 
 done:
 	if (spd.nr_pages) {
-		struct sock *sk = skb->sk;
 		int ret;
 
 		/*
-- 
cgit v1.2.3


From ec581f6a42bbbea5271c66da9769a41b46c74e10 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Fri, 1 May 2009 09:05:06 -0700
Subject: net: Fix skb_tx_hash() for forwarding workloads.

When skb_rx_queue_recorded() is true, we dont want to use jash distribution
as the device driver exactly told us which queue was selected at RX time.
jhash makes a statistical shuffle, but this wont work with 8 static inputs.

Later improvements would be to compute reciprocal value of real_num_tx_queues
to avoid a divide here. But this computation should be done once,
when real_num_tx_queues is set. This needs a separate patch, and a new
field in struct net_device.

Reported-by: Andrew Dickinson <andrew@whydna.net>
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 308a7d0c277..e2e9e4af3ac 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1735,11 +1735,12 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
 {
 	u32 hash;
 
-	if (skb_rx_queue_recorded(skb)) {
-		hash = skb_get_rx_queue(skb);
-	} else if (skb->sk && skb->sk->sk_hash) {
+	if (skb_rx_queue_recorded(skb))
+		return skb_get_rx_queue(skb) % dev->real_num_tx_queues;
+
+	if (skb->sk && skb->sk->sk_hash)
 		hash = skb->sk->sk_hash;
-	} else
+	else
 		hash = skb->protocol;
 
 	hash = jhash_1word(hash, skb_tx_hashrnd);
-- 
cgit v1.2.3


From acda074390270ca9e28f2a9729f7b835e2ad6da4 Mon Sep 17 00:00:00 2001
From: Laszlo Attila Toth <panther@balabit.hu>
Date: Fri, 1 May 2009 15:23:10 -0700
Subject: xt_socket: checks for the state of nf_conntrack

xt_socket can use connection tracking, and checks whether it is a module.

Signed-off-by: Laszlo Attila Toth <panther@balabit.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 881203c4a14..cb3ad741ebf 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -837,6 +837,7 @@ config NETFILTER_XT_MATCH_SOCKET
 	depends on NETFILTER_TPROXY
 	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
+	depends on !NF_CONNTRACK || NF_CONNTRACK
 	select NF_DEFRAG_IPV4
 	help
 	  This option adds a `socket' match, which can be used to match
-- 
cgit v1.2.3


From d0ab8ff81bf1b01bae7d6b92ca067badbbb02cc9 Mon Sep 17 00:00:00 2001
From: Robert Love <robert.w.love@intel.com>
Date: Sat, 2 May 2009 13:48:32 -0700
Subject: net: Only store high 16 bits of kernel generated filter priorities

The kernel should only be using the high 16 bits of a kernel
generated priority. Filter priorities in all other cases only
use the upper 16 bits of the u32 'prio' field of 'struct tcf_proto',
but when the kernel generates the priority of a filter is saves all
32 bits which can result in incorrect lookup failures when a filter
needs to be deleted or modified.

Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 173fcc4b050..0759f32e9dc 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -254,7 +254,7 @@ replay:
 		}
 		tp->ops = tp_ops;
 		tp->protocol = protocol;
-		tp->prio = nprio ? : tcf_auto_prio(*back);
+		tp->prio = nprio ? : TC_H_MAJ(tcf_auto_prio(*back));
 		tp->q = q;
 		tp->classify = tp_ops->classify;
 		tp->classid = parent;
-- 
cgit v1.2.3


From 902e5ea15f8471a3213a37b11b98196f3406aeaf Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin@rab.in>
Date: Sat, 2 May 2009 13:49:36 -0700
Subject: Subject: [PATCH] br2684: restore net_dev initialization

Commit 0ba25ff4c669e5395110ba6ab4958a97a9f96922 ("br2684: convert to
net_device_ops") inadvertently deleted the initialization of the net_dev
pointer in the br2684_dev structure, leading to crashes.  This patch
adds it back.

Reported-by: Mikko Vinni <mmvinni@yahoo.com>
Tested-by: Mikko Vinni <mmvinni@yahoo.com>
Signed-off-by: Rabin Vincent <rabin@rab.in>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/br2684.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 334fcd4a4ea..3100a8940af 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -549,6 +549,7 @@ static void br2684_setup(struct net_device *netdev)
 	struct br2684_dev *brdev = BRPRIV(netdev);
 
 	ether_setup(netdev);
+	brdev->net_dev = netdev;
 
 	netdev->netdev_ops = &br2684_netdev_ops;
 
-- 
cgit v1.2.3


From 0c266898b42fe4e4e2f9edfc9d3474c10f93aa6a Mon Sep 17 00:00:00 2001
From: Satoru SATOH <satoru.satoh@gmail.com>
Date: Mon, 4 May 2009 11:11:01 -0700
Subject: tcp: Fix tcp_prequeue() to get correct rto_min value

tcp_prequeue() refers to the constant value (TCP_RTO_MIN) regardless of
the actual value might be tuned. The following patches fix this and make
tcp_prequeue get the actual value returns from tcp_rto_min().

Signed-off-by: Satoru SATOH <satoru.satoh@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c96a6bb2543..eec3e6f9956 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -597,16 +597,6 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
 		tcp_grow_window(sk, skb);
 }
 
-static u32 tcp_rto_min(struct sock *sk)
-{
-	struct dst_entry *dst = __sk_dst_get(sk);
-	u32 rto_min = TCP_RTO_MIN;
-
-	if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
-		rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
-	return rto_min;
-}
-
 /* Called to compute a smoothed rtt estimate. The data fed to this
  * routine either comes from timestamps, or from segments that were
  * known _not_ to have been retransmitted [see Karn/Partridge
-- 
cgit v1.2.3


From a67e899cf38ae542d1a028ccd021f9189f76fb74 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 2 May 2009 18:24:06 -0700
Subject: Bluetooth: Fix issue with sysfs handling for connections

Due to a semantic changes in flush_workqueue() the current approach of
synchronizing the sysfs handling for connections doesn't work anymore. The
whole approach is actually fully broken and based on assumptions that are
no longer valid.

With the introduction of Simple Pairing support, the creation of low-level
ACL links got changed. This change invalidates the reason why in the past
two independent work queues have been used for adding/removing sysfs
devices. The adding of the actual sysfs device is now postponed until the
host controller successfully assigns an unique handle to that link. So
the real synchronization happens inside the controller and not the host.

The only left-over problem is that some internals of the sysfs device
handling are not initialized ahead of time. This leaves potential access
to invalid data and can cause various NULL pointer dereferences. To fix
this a new function makes sure that all sysfs details are initialized
when an connection attempt is made. The actual sysfs device is only
registered when the connection has been successfully established. To
avoid a race condition with the registration, the check if a device is
registered has been moved into the removal work.

As an extra protection two flush_work() calls are left in place to
make sure a previous add/del work has been completed first.

Based on a report by Marc Pignat <marc.pignat@hevs.ch>

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Tested-by: Justin P. Mattock <justinmattock@gmail.com>
Tested-by: Roger Quadros <ext-roger.quadros@nokia.com>
Tested-by: Marc Pignat <marc.pignat@hevs.ch>
---
 net/bluetooth/hci_conn.c  |  2 ++
 net/bluetooth/hci_sysfs.c | 74 +++++++++++++++++++++++++----------------------
 2 files changed, 42 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 375f4b4f7f7..61309b26f27 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -248,6 +248,8 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
 	if (hdev->notify)
 		hdev->notify(hdev, HCI_NOTIFY_CONN_ADD);
 
+	hci_conn_init_sysfs(conn);
+
 	tasklet_enable(&hdev->tx_task);
 
 	return conn;
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index b7c51082dde..582d8877078 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -9,7 +9,7 @@
 struct class *bt_class = NULL;
 EXPORT_SYMBOL_GPL(bt_class);
 
-static struct workqueue_struct *bluetooth;
+static struct workqueue_struct *bt_workq;
 
 static inline char *link_typetostr(int type)
 {
@@ -89,8 +89,8 @@ static void add_conn(struct work_struct *work)
 {
 	struct hci_conn *conn = container_of(work, struct hci_conn, work_add);
 
-	/* ensure previous add/del is complete */
-	flush_workqueue(bluetooth);
+	/* ensure previous del is complete */
+	flush_work(&conn->work_del);
 
 	if (device_add(&conn->dev) < 0) {
 		BT_ERR("Failed to register connection device");
@@ -98,27 +98,6 @@ static void add_conn(struct work_struct *work)
 	}
 }
 
-void hci_conn_add_sysfs(struct hci_conn *conn)
-{
-	struct hci_dev *hdev = conn->hdev;
-
-	BT_DBG("conn %p", conn);
-
-	conn->dev.type = &bt_link;
-	conn->dev.class = bt_class;
-	conn->dev.parent = &hdev->dev;
-
-	dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle);
-
-	dev_set_drvdata(&conn->dev, conn);
-
-	device_initialize(&conn->dev);
-
-	INIT_WORK(&conn->work_add, add_conn);
-
-	queue_work(bluetooth, &conn->work_add);
-}
-
 /*
  * The rfcomm tty device will possibly retain even when conn
  * is down, and sysfs doesn't support move zombie device,
@@ -134,8 +113,11 @@ static void del_conn(struct work_struct *work)
 	struct hci_conn *conn = container_of(work, struct hci_conn, work_del);
 	struct hci_dev *hdev = conn->hdev;
 
-	/* ensure previous add/del is complete */
-	flush_workqueue(bluetooth);
+	/* ensure previous add is complete */
+	flush_work(&conn->work_add);
+
+	if (!device_is_registered(&conn->dev))
+		return;
 
 	while (1) {
 		struct device *dev;
@@ -152,16 +134,40 @@ static void del_conn(struct work_struct *work)
 	hci_dev_put(hdev);
 }
 
-void hci_conn_del_sysfs(struct hci_conn *conn)
+void hci_conn_init_sysfs(struct hci_conn *conn)
 {
+	struct hci_dev *hdev = conn->hdev;
+
 	BT_DBG("conn %p", conn);
 
-	if (!device_is_registered(&conn->dev))
-		return;
+	conn->dev.type = &bt_link;
+	conn->dev.class = bt_class;
+	conn->dev.parent = &hdev->dev;
+
+	dev_set_drvdata(&conn->dev, conn);
 
+	device_initialize(&conn->dev);
+
+	INIT_WORK(&conn->work_add, add_conn);
 	INIT_WORK(&conn->work_del, del_conn);
+}
+
+void hci_conn_add_sysfs(struct hci_conn *conn)
+{
+	struct hci_dev *hdev = conn->hdev;
+
+	BT_DBG("conn %p", conn);
+
+	dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle);
+
+	queue_work(bt_workq, &conn->work_add);
+}
+
+void hci_conn_del_sysfs(struct hci_conn *conn)
+{
+	BT_DBG("conn %p", conn);
 
-	queue_work(bluetooth, &conn->work_del);
+	queue_work(bt_workq, &conn->work_del);
 }
 
 static inline char *host_typetostr(int type)
@@ -438,13 +444,13 @@ void hci_unregister_sysfs(struct hci_dev *hdev)
 
 int __init bt_sysfs_init(void)
 {
-	bluetooth = create_singlethread_workqueue("bluetooth");
-	if (!bluetooth)
+	bt_workq = create_singlethread_workqueue("bluetooth");
+	if (!bt_workq)
 		return -ENOMEM;
 
 	bt_class = class_create(THIS_MODULE, "bluetooth");
 	if (IS_ERR(bt_class)) {
-		destroy_workqueue(bluetooth);
+		destroy_workqueue(bt_workq);
 		return PTR_ERR(bt_class);
 	}
 
@@ -453,7 +459,7 @@ int __init bt_sysfs_init(void)
 
 void bt_sysfs_cleanup(void)
 {
-	destroy_workqueue(bluetooth);
+	destroy_workqueue(bt_workq);
 
 	class_destroy(bt_class);
 }
-- 
cgit v1.2.3