From f75e6745aa3084124ae1434fd7629853bdaf6798 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 21 Apr 2009 17:18:20 -0400
Subject: SUNRPC: Fix the problem of EADDRNOTAVAIL syslog floods on reconnect

See http://bugzilla.kernel.org/show_bug.cgi?id=13034

If the port gets into a TIME_WAIT state, then we cannot reconnect without
binding to a new port.

Tested-by: Petr Vandrovec <petr@vandrovec.name>
Tested-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/sunrpc/xprt.c     |  6 ++----
 net/sunrpc/xprtsock.c | 26 +++++++++++++++++++++-----
 2 files changed, 23 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index a0bfe53f162..06ca058572f 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -672,10 +672,8 @@ xprt_init_autodisconnect(unsigned long data)
 	if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
 		goto out_abort;
 	spin_unlock(&xprt->transport_lock);
-	if (xprt_connecting(xprt))
-		xprt_release_write(xprt, NULL);
-	else
-		queue_work(rpciod_workqueue, &xprt->task_cleanup);
+	set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
+	queue_work(rpciod_workqueue, &xprt->task_cleanup);
 	return;
 out_abort:
 	spin_unlock(&xprt->transport_lock);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index d40ff50887a..e1859614601 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -807,6 +807,9 @@ static void xs_reset_transport(struct sock_xprt *transport)
  *
  * This is used when all requests are complete; ie, no DRC state remains
  * on the server we want to save.
+ *
+ * The caller _must_ be holding XPRT_LOCKED in order to avoid issues with
+ * xs_reset_transport() zeroing the socket from underneath a writer.
  */
 static void xs_close(struct rpc_xprt *xprt)
 {
@@ -824,6 +827,14 @@ static void xs_close(struct rpc_xprt *xprt)
 	xprt_disconnect_done(xprt);
 }
 
+static void xs_tcp_close(struct rpc_xprt *xprt)
+{
+	if (test_and_clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state))
+		xs_close(xprt);
+	else
+		xs_tcp_shutdown(xprt);
+}
+
 /**
  * xs_destroy - prepare to shutdown a transport
  * @xprt: doomed transport
@@ -1772,6 +1783,15 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
 			xprt, -status, xprt_connected(xprt),
 			sock->sk->sk_state);
 	switch (status) {
+	default:
+		printk("%s: connect returned unhandled error %d\n",
+			__func__, status);
+	case -EADDRNOTAVAIL:
+		/* We're probably in TIME_WAIT. Get rid of existing socket,
+		 * and retry
+		 */
+		set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
+		xprt_force_disconnect(xprt);
 	case -ECONNREFUSED:
 	case -ECONNRESET:
 	case -ENETUNREACH:
@@ -1782,10 +1802,6 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
 		xprt_clear_connecting(xprt);
 		return;
 	}
-	/* get rid of existing socket, and retry */
-	xs_tcp_shutdown(xprt);
-	printk("%s: connect returned unhandled error %d\n",
-			__func__, status);
 out_eagain:
 	status = -EAGAIN;
 out:
@@ -1994,7 +2010,7 @@ static struct rpc_xprt_ops xs_tcp_ops = {
 	.buf_free		= rpc_free,
 	.send_request		= xs_tcp_send_request,
 	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
-	.close			= xs_tcp_shutdown,
+	.close			= xs_tcp_close,
 	.destroy		= xs_destroy,
 	.print_stats		= xs_tcp_print_stats,
 };
-- 
cgit v1.2.3


From 453ed90d1395a5281a8f1a0de5d8aabc66202e34 Mon Sep 17 00:00:00 2001
From: Latchesar Ionkov <lucho@ionkov.net>
Date: Sun, 5 Apr 2009 16:22:16 -0500
Subject: net/9p: set correct stat size when sending Twstat messages

The 9P2000 Twstat message requires the size of the stat structure to be
specified. Currently the 9p code writes zero instead of the actual size.
This behavior confuses some of the file servers that check if the size is
correct.

This patch adds a new function that calculcates the stat size and puts the
value in the appropriate place in the 9P message.

Signed-off-by: Latchesar Ionkov <lucho@ionkov.net>
Reviewed-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 1eb580c38fb..93f442aaa11 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1251,12 +1251,42 @@ error:
 }
 EXPORT_SYMBOL(p9_client_stat);
 
+static int p9_client_statsize(struct p9_wstat *wst, int optional)
+{
+	int ret;
+
+	/* size[2] type[2] dev[4] qid[13] */
+	/* mode[4] atime[4] mtime[4] length[8]*/
+	/* name[s] uid[s] gid[s] muid[s] */
+	ret = 2+2+4+13+4+4+4+8+2+2+2+2;
+
+	if (wst->name)
+		ret += strlen(wst->name);
+	if (wst->uid)
+		ret += strlen(wst->uid);
+	if (wst->gid)
+		ret += strlen(wst->gid);
+	if (wst->muid)
+		ret += strlen(wst->muid);
+
+	if (optional) {
+		ret += 2+4+4+4;	/* extension[s] n_uid[4] n_gid[4] n_muid[4] */
+		if (wst->extension)
+			ret += strlen(wst->extension);
+	}
+
+	return ret;
+}
+
 int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
 {
 	int err;
 	struct p9_req_t *req;
 	struct p9_client *clnt;
 
+	err = 0;
+	clnt = fid->clnt;
+	wst->size = p9_client_statsize(wst, clnt->dotu);
 	P9_DPRINTK(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid);
 	P9_DPRINTK(P9_DEBUG_9P,
 		"     sz=%x type=%x dev=%x qid=%x.%llx.%x\n"
@@ -1268,10 +1298,8 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
 		wst->atime, wst->mtime, (unsigned long long)wst->length,
 		wst->name, wst->uid, wst->gid, wst->muid, wst->extension,
 		wst->n_uid, wst->n_gid, wst->n_muid);
-	err = 0;
-	clnt = fid->clnt;
 
-	req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, 0, wst);
+	req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size, wst);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		goto error;
-- 
cgit v1.2.3


From 742b11a7ec60faa25d76c95c268041ab215c25ad Mon Sep 17 00:00:00 2001
From: Latchesar Ionkov <lucho@ionkov.net>
Date: Sun, 5 Apr 2009 16:26:41 -0500
Subject: net/9p: return error when p9_client_stat fails

p9_client_stat function doesn't return correct value if it fails.
p9_client_stat should return ERR_PTR of the error value when it fails.
Instead, it always returns a value to the allocated p9_wstat struct even
when it is not populated correctly.

This patch makes p9_client_stat to handle failure correctly.

Signed-off-by: Latchesar Ionkov <lucho@ionkov.net>
Reviewed-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 93f442aaa11..781d89a952e 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1244,10 +1244,14 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
 		ret->name, ret->uid, ret->gid, ret->muid, ret->extension,
 		ret->n_uid, ret->n_gid, ret->n_muid);
 
+	p9_free_req(clnt, req);
+	return ret;
+
 free_and_error:
 	p9_free_req(clnt, req);
 error:
-	return ret;
+	kfree(ret);
+	return ERR_PTR(err);
 }
 EXPORT_SYMBOL(p9_client_stat);
 
-- 
cgit v1.2.3


From 1bab88b2310998de18b32529a27ea835d164254a Mon Sep 17 00:00:00 2001
From: Latchesar Ionkov <lucho@ionkov.net>
Date: Sun, 5 Apr 2009 16:28:59 -0500
Subject: net/9p: handle correctly interrupted 9P requests

Currently the 9p code crashes when a operation is interrupted, i.e. for
example when the user presses ^C while reading from a file.

This patch fixes the code that is responsible for interruption and flushing
of 9P operations.

Signed-off-by: Latchesar Ionkov <lucho@ionkov.net>
---
 net/9p/client.c       | 74 +++++++++++++--------------------------------------
 net/9p/trans_fd.c     | 14 ++++++----
 net/9p/trans_rdma.c   |  1 +
 net/9p/trans_virtio.c |  1 +
 4 files changed, 30 insertions(+), 60 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 781d89a952e..dd43a8289b0 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -203,7 +203,6 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)
 	p9pdu_reset(req->tc);
 	p9pdu_reset(req->rc);
 
-	req->flush_tag = 0;
 	req->tc->tag = tag-1;
 	req->status = REQ_STATUS_ALLOC;
 
@@ -324,35 +323,9 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r)
  */
 void p9_client_cb(struct p9_client *c, struct p9_req_t *req)
 {
-	struct p9_req_t *other_req;
-	unsigned long flags;
-
 	P9_DPRINTK(P9_DEBUG_MUX, " tag %d\n", req->tc->tag);
-
-	if (req->status == REQ_STATUS_ERROR)
-		wake_up(req->wq);
-
-	if (req->flush_tag) { 			/* flush receive path */
-		P9_DPRINTK(P9_DEBUG_9P, "<<< RFLUSH %d\n", req->tc->tag);
-		spin_lock_irqsave(&c->lock, flags);
-		other_req = p9_tag_lookup(c, req->flush_tag);
-		if (other_req->status != REQ_STATUS_FLSH) /* stale flush */
-			spin_unlock_irqrestore(&c->lock, flags);
-		else {
-			other_req->status = REQ_STATUS_FLSHD;
-			spin_unlock_irqrestore(&c->lock, flags);
-			wake_up(other_req->wq);
-		}
-		p9_free_req(c, req);
-	} else { 				/* normal receive path */
-		P9_DPRINTK(P9_DEBUG_MUX, "normal: tag %d\n", req->tc->tag);
-		spin_lock_irqsave(&c->lock, flags);
-		if (req->status != REQ_STATUS_FLSHD)
-			req->status = REQ_STATUS_RCVD;
-		spin_unlock_irqrestore(&c->lock, flags);
-		wake_up(req->wq);
-		P9_DPRINTK(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);
-	}
+	wake_up(req->wq);
+	P9_DPRINTK(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);
 }
 EXPORT_SYMBOL(p9_client_cb);
 
@@ -486,9 +459,15 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
-	req->flush_tag = oldtag;
 
-	/* we don't free anything here because RPC isn't complete */
+	/* if we haven't received a response for oldreq,
+	   remove it from the list. */
+	spin_lock(&c->lock);
+	if (oldreq->status == REQ_STATUS_FLSH)
+		list_del(&oldreq->req_list);
+	spin_unlock(&c->lock);
+
+	p9_free_req(c, req);
 	return 0;
 }
 
@@ -509,7 +488,6 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
 	struct p9_req_t *req;
 	unsigned long flags;
 	int sigpending;
-	int flushed = 0;
 
 	P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type);
 
@@ -546,42 +524,28 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
 		goto reterr;
 	}
 
-	/* if it was a flush we just transmitted, return our tag */
-	if (type == P9_TFLUSH)
-		return req;
-again:
 	P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d\n", req->wq, tag);
 	err = wait_event_interruptible(*req->wq,
 						req->status >= REQ_STATUS_RCVD);
-	P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d (flushed=%d)\n",
-						req->wq, tag, err, flushed);
+	P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d\n",
+						req->wq, tag, err);
 
 	if (req->status == REQ_STATUS_ERROR) {
 		P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
 		err = req->t_err;
-	} else if (err == -ERESTARTSYS && flushed) {
-		P9_DPRINTK(P9_DEBUG_MUX, "flushed - going again\n");
-		goto again;
-	} else if (req->status == REQ_STATUS_FLSHD) {
-		P9_DPRINTK(P9_DEBUG_MUX, "flushed - erestartsys\n");
-		err = -ERESTARTSYS;
 	}
 
-	if ((err == -ERESTARTSYS) && (c->status == Connected) && (!flushed)) {
+	if ((err == -ERESTARTSYS) && (c->status == Connected)) {
 		P9_DPRINTK(P9_DEBUG_MUX, "flushing\n");
-		spin_lock_irqsave(&c->lock, flags);
-		if (req->status == REQ_STATUS_SENT)
-			req->status = REQ_STATUS_FLSH;
-		spin_unlock_irqrestore(&c->lock, flags);
 		sigpending = 1;
-		flushed = 1;
 		clear_thread_flag(TIF_SIGPENDING);
 
-		if (c->trans_mod->cancel(c, req)) {
-			err = p9_client_flush(c, req);
-			if (err == 0)
-				goto again;
-		}
+		if (c->trans_mod->cancel(c, req))
+			p9_client_flush(c, req);
+
+		/* if we received the response anyway, don't signal error */
+		if (req->status == REQ_STATUS_RCVD)
+			err = 0;
 	}
 
 	if (sigpending) {
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index c613ed08a5e..a2a1814c7a8 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -213,8 +213,8 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
 	spin_unlock_irqrestore(&m->client->lock, flags);
 
 	list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
-		list_del(&req->req_list);
 		P9_DPRINTK(P9_DEBUG_ERROR, "call back req %p\n", req);
+		list_del(&req->req_list);
 		p9_client_cb(m->client, req);
 	}
 }
@@ -336,7 +336,8 @@ static void p9_read_work(struct work_struct *work)
 			"mux %p pkt: size: %d bytes tag: %d\n", m, n, tag);
 
 		m->req = p9_tag_lookup(m->client, tag);
-		if (!m->req) {
+		if (!m->req || (m->req->status != REQ_STATUS_SENT &&
+					m->req->status != REQ_STATUS_FLSH)) {
 			P9_DPRINTK(P9_DEBUG_ERROR, "Unexpected packet tag %d\n",
 								 tag);
 			err = -EIO;
@@ -361,10 +362,11 @@ static void p9_read_work(struct work_struct *work)
 	if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */
 		P9_DPRINTK(P9_DEBUG_TRANS, "got new packet\n");
 		spin_lock(&m->client->lock);
+		if (m->req->status != REQ_STATUS_ERROR)
+			m->req->status = REQ_STATUS_RCVD;
 		list_del(&m->req->req_list);
 		spin_unlock(&m->client->lock);
 		p9_client_cb(m->client, m->req);
-
 		m->rbuf = NULL;
 		m->rpos = 0;
 		m->rsize = 0;
@@ -454,6 +456,7 @@ static void p9_write_work(struct work_struct *work)
 		req = list_entry(m->unsent_req_list.next, struct p9_req_t,
 			       req_list);
 		req->status = REQ_STATUS_SENT;
+		P9_DPRINTK(P9_DEBUG_TRANS, "move req %p\n", req);
 		list_move_tail(&req->req_list, &m->req_list);
 
 		m->wbuf = req->tc->sdata;
@@ -683,12 +686,13 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
 	P9_DPRINTK(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
 
 	spin_lock(&client->lock);
-	list_del(&req->req_list);
 
 	if (req->status == REQ_STATUS_UNSENT) {
+		list_del(&req->req_list);
 		req->status = REQ_STATUS_FLSHD;
 		ret = 0;
-	}
+	} else if (req->status == REQ_STATUS_SENT)
+		req->status = REQ_STATUS_FLSH;
 
 	spin_unlock(&client->lock);
 
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 7fa0eb20b2f..ac4990041eb 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -295,6 +295,7 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
 		goto err_out;
 
 	req->rc = c->rc;
+	req->status = REQ_STATUS_RCVD;
 	p9_client_cb(client, req);
 
 	return;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 2d7781ec663..bb8579a141a 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -134,6 +134,7 @@ static void req_done(struct virtqueue *vq)
 		P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
 		P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
 		req = p9_tag_lookup(chan->client, rc->tag);
+		req->status = REQ_STATUS_RCVD;
 		p9_client_cb(chan->client, req);
 	}
 }
-- 
cgit v1.2.3


From b6f0a3652ea9d2296fdc98c3b2c96603be611c4d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 15 Apr 2009 12:16:19 +0200
Subject: netfilter: nf_log regression fix

commit ca735b3aaa945626ba65a3e51145bfe4ecd9e222
'netfilter: use a linked list of loggers'
introduced an array of list_head in "struct nf_logger", but
forgot to initialize it in nf_log_register(). This resulted
in oops when calling nf_log_unregister() at module unload time.

Reported-and-tested-by: Mariusz Kozlowski <m.kozlowski@tuxland.pl>
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: Eric Leblond <eric@inl.fr>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_log.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 8bb998fe098..beb37311e1a 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -36,10 +36,14 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger)
 int nf_log_register(u_int8_t pf, struct nf_logger *logger)
 {
 	const struct nf_logger *llog;
+	int i;
 
 	if (pf >= ARRAY_SIZE(nf_loggers))
 		return -EINVAL;
 
+	for (i = 0; i < ARRAY_SIZE(logger->list); i++)
+		INIT_LIST_HEAD(&logger->list[i]);
+
 	mutex_lock(&nf_log_mutex);
 
 	if (pf == NFPROTO_UNSPEC) {
-- 
cgit v1.2.3


From 38fb0afcd8761f8858e27135ed89a65117e2019c Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 15 Apr 2009 12:45:08 +0200
Subject: netfilter: nf_conntrack: fix crash when unloading helpers

Commit ea781f197d (netfilter: nf_conntrack: use SLAB_DESTROY_BY_RCU and)
get rid of call_rcu() was missing one conversion to the hlist_nulls
functions, causing a crash when unloading conntrack helper modules.

Reported-and-tested-by: Mariusz Kozlowski <m.kozlowski@tuxland.pl>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 30b8e9009f9..0fa5a422959 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -176,7 +176,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
 	}
 
 	/* Get rid of expecteds, set helpers to NULL. */
-	hlist_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode)
+	hlist_nulls_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode)
 		unhelp(h, me);
 	for (i = 0; i < nf_conntrack_htable_size; i++) {
 		hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
-- 
cgit v1.2.3


From 0ad8acaf434d360ad99813d981a68e605d6c8179 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Tue, 24 Mar 2009 21:21:08 -0400
Subject: cfg80211: fix NULL pointer deference in reg_device_remove()

We won't ever get here as regulatory_hint_core() can only fail
on -ENOMEM and in that case we don't initialize cfg80211 but this is
technically correct code.

This is actually good for stable, where we don't check for -ENOMEM
failure on __regulatory_hint()'s failure.

Cc: stable@kernel.org
Reported-by: Quentin Armitage <Quentin@armitage.org.uk>
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 6327e1617ac..6c1993d9990 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2095,11 +2095,12 @@ int set_regdom(const struct ieee80211_regdomain *rd)
 /* Caller must hold cfg80211_mutex */
 void reg_device_remove(struct wiphy *wiphy)
 {
-	struct wiphy *request_wiphy;
+	struct wiphy *request_wiphy = NULL;
 
 	assert_cfg80211_lock();
 
-	request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx);
+	if (last_request)
+		request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx);
 
 	kfree(wiphy->regd);
 	if (!last_request || !request_wiphy)
-- 
cgit v1.2.3


From b3631286aca3f54427ca0eb950981e9753866f6c Mon Sep 17 00:00:00 2001
From: Vasanthakumar Thiagarajan <vasanth@atheros.com>
Date: Wed, 25 Mar 2009 18:10:18 +0530
Subject: mac80211: Fix bug in getting rx status for frames pending in reorder
 buffer

Currently rx status for frames which are completed from reorder buffer
is taken from it's cb area which is not always right, cb is not holding
the rx status when driver uses mac80211's non-irq rx handler to pass it's
received frames. This results in dropping almost all frames from reorder
buffer when security is enabled by doing double decryption (first in hw,
second in sw because of wrong rx status). This patch copies rx status into
cb area before the frame is put into reorder buffer. After this patch,
there is a significant improvement in throughput with ath9k + WPA2(AES).

Signed-off-by: Vasanthakumar Thiagarajan <vasanth@atheros.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 64ebe664eff..5fa7aedd90e 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -29,6 +29,7 @@
 static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
 					   struct tid_ampdu_rx *tid_agg_rx,
 					   struct sk_buff *skb,
+					   struct ieee80211_rx_status *status,
 					   u16 mpdu_seq_num,
 					   int bar_req);
 /*
@@ -1688,7 +1689,7 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx)
 		/* manage reordering buffer according to requested */
 		/* sequence number */
 		rcu_read_lock();
-		ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, NULL,
+		ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, NULL, NULL,
 						 start_seq_num, 1);
 		rcu_read_unlock();
 		return RX_DROP_UNUSABLE;
@@ -2293,6 +2294,7 @@ static inline u16 seq_sub(u16 sq1, u16 sq2)
 static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
 					   struct tid_ampdu_rx *tid_agg_rx,
 					   struct sk_buff *skb,
+					   struct ieee80211_rx_status *rxstatus,
 					   u16 mpdu_seq_num,
 					   int bar_req)
 {
@@ -2374,6 +2376,8 @@ static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
 
 	/* put the frame in the reordering buffer */
 	tid_agg_rx->reorder_buf[index] = skb;
+	memcpy(tid_agg_rx->reorder_buf[index]->cb, rxstatus,
+	       sizeof(*rxstatus));
 	tid_agg_rx->stored_mpdu_num++;
 	/* release the buffer until next missing frame */
 	index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn)
@@ -2399,7 +2403,8 @@ static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
 }
 
 static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local,
-				     struct sk_buff *skb)
+				     struct sk_buff *skb,
+				     struct ieee80211_rx_status *status)
 {
 	struct ieee80211_hw *hw = &local->hw;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
@@ -2448,7 +2453,7 @@ static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local,
 
 	/* according to mpdu sequence number deal with reordering buffer */
 	mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4;
-	ret = ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb,
+	ret = ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb, status,
 						mpdu_seq_num, 0);
  end_reorder:
 	return ret;
@@ -2512,7 +2517,7 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
 		return;
 	}
 
-	if (!ieee80211_rx_reorder_ampdu(local, skb))
+	if (!ieee80211_rx_reorder_ampdu(local, skb, status))
 		__ieee80211_rx_handle_packet(hw, skb, status, rate);
 
 	rcu_read_unlock();
-- 
cgit v1.2.3


From 47afbaf5af9454a7a1a64591e20cbfcc27ca67a8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 7 Apr 2009 15:22:28 +0200
Subject: mac80211: correct wext transmit power handler

Wext makes no assumptions about the contents of
data->txpower.fixed and data->txpower.value when
data->txpower.disabled is set, so do not update
the user-requested power level while disabling.

Also, when wext configures a really _fixed_ power
output [1], we should reject it instead of limiting it
to the regulatory constraint. If the user wants to set
a _limit_ [2] then we should honour that.

[1] iwconfig wlan0 txpower 20dBm fixed
[2] iwconfig wlan0 txpower 10dBm

This fixes
http://www.intellinuxwireless.org/bugzilla/show_bug.cgi?id=1942

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c |  2 +-
 net/mac80211/wext.c | 41 +++++++++++++++++++++++++++++++++--------
 2 files changed, 34 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index a6f1d8a869b..fbcbed6cad0 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -258,7 +258,7 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
 			(chan->max_power - local->power_constr_level) :
 			chan->max_power;
 
-	if (local->user_power_level)
+	if (local->user_power_level >= 0)
 		power = min(power, local->user_power_level);
 
 	if (local->hw.conf.power_level != power) {
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index deb4ecec122..ce9115c1815 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -417,6 +417,7 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev,
 {
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	struct ieee80211_channel* chan = local->hw.conf.channel;
+	bool reconf = false;
 	u32 reconf_flags = 0;
 	int new_power_level;
 
@@ -427,14 +428,38 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev,
 	if (!chan)
 		return -EINVAL;
 
-	if (data->txpower.fixed)
-		new_power_level = min(data->txpower.value, chan->max_power);
-	else /* Automatic power level setting */
-		new_power_level = chan->max_power;
+	/* only change when not disabling */
+	if (!data->txpower.disabled) {
+		if (data->txpower.fixed) {
+			if (data->txpower.value < 0)
+				return -EINVAL;
+			new_power_level = data->txpower.value;
+			/*
+			 * Debatable, but we cannot do a fixed power
+			 * level above the regulatory constraint.
+			 * Use "iwconfig wlan0 txpower 15dBm" instead.
+			 */
+			if (new_power_level > chan->max_power)
+				return -EINVAL;
+		} else {
+			/*
+			 * Automatic power level setting, max being the value
+			 * passed in from userland.
+			 */
+			if (data->txpower.value < 0)
+				new_power_level = -1;
+			else
+				new_power_level = data->txpower.value;
+		}
+
+		reconf = true;
 
-	local->user_power_level = new_power_level;
-	if (local->hw.conf.power_level != new_power_level)
-		reconf_flags |= IEEE80211_CONF_CHANGE_POWER;
+		/*
+		 * ieee80211_hw_config() will limit to the channel's
+		 * max power and possibly power constraint from AP.
+		 */
+		local->user_power_level = new_power_level;
+	}
 
 	if (local->hw.conf.radio_enabled != !(data->txpower.disabled)) {
 		local->hw.conf.radio_enabled = !(data->txpower.disabled);
@@ -442,7 +467,7 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev,
 		ieee80211_led_radio(local, local->hw.conf.radio_enabled);
 	}
 
-	if (reconf_flags)
+	if (reconf || reconf_flags)
 		ieee80211_hw_config(local, reconf_flags);
 
 	return 0;
-- 
cgit v1.2.3


From a860402d8f1756dae48cdcabe153c974116fc37e Mon Sep 17 00:00:00 2001
From: Michael Buesch <mb@bu3sch.de>
Date: Wed, 15 Apr 2009 14:41:22 -0400
Subject: mac80211: quiet beacon loss messages

On Sunday 05 April 2009 11:29:38 Michael Buesch wrote:
> On Sunday 05 April 2009 11:23:59 Jaswinder Singh Rajput wrote:
> > With latest linus tree I am getting, .config file attached:
> >
> > [   22.895051] r8169: eth0: link down
> > [   22.897564] ADDRCONF(NETDEV_UP): eth0: link is not ready
> > [   22.928047] ADDRCONF(NETDEV_UP): wlan0: link is not ready
> > [   22.982292] libvirtd used greatest stack depth: 4200 bytes left
> > [   63.709879] wlan0: authenticate with AP 00:11:95:9e:df:f6
> > [   63.712096] wlan0: authenticated
> > [   63.712127] wlan0: associate with AP 00:11:95:9e:df:f6
> > [   63.726831] wlan0: RX AssocResp from 00:11:95:9e:df:f6 (capab=0x471 status=0 aid=1)
> > [   63.726855] wlan0: associated
> > [   63.730093] ADDRCONF(NETDEV_CHANGE): wlan0: link becomes ready
> > [   74.296087] wlan0: no IPv6 routers present
> > [   79.349044] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request
> > [  119.358200] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request
> > [  179.354292] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request
> > [  259.366044] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request
> > [  359.348292] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request
> > [  361.953459] packagekitd used greatest stack depth: 4160 bytes left
> > [  478.824258] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request
> > [  598.813343] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request
> > [  718.817292] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request
> > [  838.824567] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request
> > [  958.815402] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request
> > [ 1078.848434] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request
> > [ 1198.822913] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request
> > [ 1318.824931] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request
> > [ 1438.814157] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request
> > [ 1558.827336] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request
> > [ 1678.823011] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request
> > [ 1798.830589] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request
> > [ 1918.828044] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request
> > [ 2038.827224] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request
> > [ 2116.517152] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request
> > [ 2158.840243] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request
> > [ 2278.827427] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request
>
>
> I think this message should only show if CONFIG_MAC80211_VERBOSE_DEBUG is set.
> It's kind of expected that we lose a beacon once in a while, so we shouldn't print
> verbose messages to the kernel log (even if they are KERN_DEBUG).
>
> And besides that, I think one can easily remotely trigger this message and flood the logs.
> So it should probably _also_ be ratelimited.

Something like this:

Signed-off-by: Michael Buesch <mb@bu3sch.de>
---
 net/mac80211/mlme.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 7ecda9d59d8..1b14d0204dd 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -945,9 +945,13 @@ void ieee80211_beacon_loss_work(struct work_struct *work)
 			     u.mgd.beacon_loss_work);
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 
-	printk(KERN_DEBUG "%s: driver reports beacon loss from AP %pM "
-	       "- sending probe request\n", sdata->dev->name,
-	       sdata->u.mgd.bssid);
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+	if (net_ratelimit()) {
+		printk(KERN_DEBUG "%s: driver reports beacon loss from AP %pM "
+		       "- sending probe request\n", sdata->dev->name,
+		       sdata->u.mgd.bssid);
+	}
+#endif
 
 	ifmgd->flags |= IEEE80211_STA_PROBEREQ_POLL;
 	ieee80211_send_probe_req(sdata, ifmgd->bssid, ifmgd->ssid,
@@ -1007,9 +1011,13 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata)
 	      (local->hw.conf.flags & IEEE80211_CONF_PS)) &&
 	    time_after(jiffies,
 		       ifmgd->last_beacon + IEEE80211_MONITORING_INTERVAL)) {
-		printk(KERN_DEBUG "%s: beacon loss from AP %pM "
-		       "- sending probe request\n",
-		       sdata->dev->name, ifmgd->bssid);
+#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
+		if (net_ratelimit()) {
+			printk(KERN_DEBUG "%s: beacon loss from AP %pM "
+			       "- sending probe request\n",
+			       sdata->dev->name, ifmgd->bssid);
+		}
+#endif
 		ifmgd->flags |= IEEE80211_STA_PROBEREQ_POLL;
 		ieee80211_send_probe_req(sdata, ifmgd->bssid, ifmgd->ssid,
 					 ifmgd->ssid_len, NULL, 0);
-- 
cgit v1.2.3


From 23a99840d571a237845fd0906bce78e7c76be650 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 14 Apr 2009 06:32:56 +0200
Subject: mac80211: Fragmentation threshold (typo)

mac80211: Fragmentation threshold (typo)

ieee80211_ioctl_siwfrag() sets the fragmentation_threshold to 2352
when frame fragmentation is to be disabled, yet the corresponding
'get' function tests for 2353 bytes instead.

This causes user-space tools to display a fragmentation threshold
of 2352 bytes even if fragmentation has been disabled.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/wext.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index ce9115c1815..959aa8379cc 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -555,7 +555,7 @@ static int ieee80211_ioctl_giwfrag(struct net_device *dev,
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 
 	frag->value = local->fragmentation_threshold;
-	frag->disabled = (frag->value >= IEEE80211_MAX_RTS_THRESHOLD);
+	frag->disabled = (frag->value >= IEEE80211_MAX_FRAG_THRESHOLD);
 	frag->fixed = 1;
 
 	return 0;
-- 
cgit v1.2.3


From 98d500d66cb7940747b424b245fc6a51ecfbf005 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 16 Apr 2009 18:33:01 +0200
Subject: netfilter: nf_nat: add support for persistent mappings

The removal of the SAME target accidentally removed one feature that is
not available from the normal NAT targets so far, having multi-range
mappings that use the same mapping for each connection from a single
client. The current behaviour is to choose the address from the range
based on source and destination IP, which breaks when communicating
with sites having multiple addresses that require all connections to
originate from the same IP address.

Introduce a IP_NAT_RANGE_PERSISTENT option that controls whether the
destination address is taken into account for selecting addresses.

http://bugzilla.kernel.org/show_bug.cgi?id=12954

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_nat_core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index fe65187810f..3229e0a81ba 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -211,7 +211,8 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple,
 	minip = ntohl(range->min_ip);
 	maxip = ntohl(range->max_ip);
 	j = jhash_2words((__force u32)tuple->src.u3.ip,
-			 (__force u32)tuple->dst.u3.ip, 0);
+			 range->flags & IP_NAT_RANGE_PERSISTENT ?
+				(__force u32)tuple->dst.u3.ip : 0, 0);
 	j = ((u64)j * (maxip - minip + 1)) >> 32;
 	*var_ipp = htonl(minip + j);
 }
-- 
cgit v1.2.3


From 62bcaa13039538c7f794b0cfcbc26e0313e7fcd5 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <oliver@hartkopp.net>
Date: Fri, 17 Apr 2009 01:38:46 -0700
Subject: can: Network Drop Monitor: Make use of consume_skb() in af_can.c

Since commit ead2ceb0ec9f85cff19c43b5cdb2f8a054484431 ("Network Drop
Monitor: Adding kfree_skb_clean for non-drops and modifying
end-of-line points for skbs") so called end-of-line points for skb's
should use consume_skb() to free the socket buffer.

In opposite to consume_skb() the function kfree_skb() is intended to
be used for unexpected skb drops e.g. in error conditions that now can
trigger the network drop monitor if enabled.

This patch moves the skb end-of-line point in af_can.c to use
consume_skb().

Signed-off-by: Oliver Hartkopp <oliver@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/can/af_can.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/can/af_can.c b/net/can/af_can.c
index 547bafc79e2..10f0528c3bf 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -674,8 +674,8 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	rcu_read_unlock();
 
-	/* free the skbuff allocated by the netdevice driver */
-	kfree_skb(skb);
+	/* consume the skbuff allocated by the netdevice driver */
+	consume_skb(skb);
 
 	if (matches > 0) {
 		can_stats.matches++;
-- 
cgit v1.2.3


From a0a69a0106dab8d20596f97f6674bed3b394d1ee Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 17 Apr 2009 02:34:38 -0700
Subject: gro: Fix use after free in tcp_gro_receive

After calling skb_gro_receive skb->len can no longer be relied
on since if the skb was merged using frags, then its pages will
have been removed and the length reduced.

This caused tcp_gro_receive to prematurely end merging which
resulted in suboptimal performance with ixgbe.

The fix is to store skb->len on the stack.

Reported-by: Mark Wagner <mwagner@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index fafbec8b073..1d7f49c6f0c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2511,6 +2511,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 	struct sk_buff *p;
 	struct tcphdr *th;
 	struct tcphdr *th2;
+	unsigned int len;
 	unsigned int thlen;
 	unsigned int flags;
 	unsigned int mss = 1;
@@ -2531,6 +2532,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 
 	skb_gro_pull(skb, thlen);
 
+	len = skb_gro_len(skb);
 	flags = tcp_flag_word(th);
 
 	for (; (p = *head); head = &p->next) {
@@ -2561,7 +2563,7 @@ found:
 
 	mss = skb_shinfo(p)->gso_size;
 
-	flush |= (skb_gro_len(skb) > mss) | !skb_gro_len(skb);
+	flush |= (len > mss) | !len;
 	flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
 
 	if (flush || skb_gro_receive(head, skb)) {
@@ -2574,7 +2576,7 @@ found:
 	tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
 
 out_check_final:
-	flush = skb_gro_len(skb) < mss;
+	flush = len < mss;
 	flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST |
 			  TCP_FLAG_SYN | TCP_FLAG_FIN);
 
-- 
cgit v1.2.3


From 150ace0db360373d2016a2497d252138a59c5ba8 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 17 Apr 2009 17:47:31 +0200
Subject: netfilter: ctnetlink: report error if event message allocation fails

This patch fixes an inconsistency that results in no error reports
to user-space listeners if we fail to allocate the event message.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_netlink.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index c6439c77953..0ea36e0c8a0 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -512,7 +512,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 
 	skb = ctnetlink_alloc_skb(tuple(ct, IP_CT_DIR_ORIGINAL), GFP_ATOMIC);
 	if (!skb)
-		return NOTIFY_DONE;
+		goto errout;
 
 	b = skb->tail;
 
@@ -591,8 +591,9 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
 nla_put_failure:
 	rcu_read_unlock();
 nlmsg_failure:
-	nfnetlink_set_err(0, group, -ENOBUFS);
 	kfree_skb(skb);
+errout:
+	nfnetlink_set_err(0, group, -ENOBUFS);
 	return NOTIFY_DONE;
 }
 #endif /* CONFIG_NF_CONNTRACK_EVENTS */
@@ -1564,7 +1565,7 @@ static int ctnetlink_expect_event(struct notifier_block *this,
 
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
 	if (!skb)
-		return NOTIFY_DONE;
+		goto errout;
 
 	b = skb->tail;
 
@@ -1589,8 +1590,9 @@ static int ctnetlink_expect_event(struct notifier_block *this,
 nla_put_failure:
 	rcu_read_unlock();
 nlmsg_failure:
-	nfnetlink_set_err(0, 0, -ENOBUFS);
 	kfree_skb(skb);
+errout:
+	nfnetlink_set_err(0, 0, -ENOBUFS);
 	return NOTIFY_DONE;
 }
 #endif
-- 
cgit v1.2.3


From a0142733a7ef2f3476e63938b330026a08c53f37 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 17 Apr 2009 17:48:44 +0200
Subject: netfilter: nfnetlink: return ENOMEM if we fail to create netlink
 socket

With this patch, nfnetlink returns -ENOMEM instead of -EPERM if we
fail to create the nfnetlink netlink socket during the module
loading. This is exactly what rtnetlink does in this case.

Ideally, it would be better if we propagate the error that has
happened in netlink_kernel_create(), however, this function still
does not implement this yet.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nfnetlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 2785d66a7e3..b8ab37ad7ed 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -203,7 +203,7 @@ static int __init nfnetlink_init(void)
 				     nfnetlink_rcv, NULL, THIS_MODULE);
 	if (!nfnl) {
 		printk(KERN_ERR "cannot initialize nfnetlink!\n");
-		return -1;
+		return -ENOMEM;
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 7181d4673710888b6d7084b37b9d77ed4f4e41b2 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 15 Apr 2009 21:33:15 +0200
Subject: mac80211: avoid crashing when no scan sdata

Using the scan_sdata variable here is terribly wrong,
if there has never been a scan then we fail. However,
we need a bandaid...

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Cc: stable@kernel.org [2.6.29]
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 1b14d0204dd..dc60804d6dd 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2113,12 +2113,13 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
 	struct ieee80211_local *local =
 		container_of(work, struct ieee80211_local,
 			     dynamic_ps_enable_work);
+	/* XXX: using scan_sdata is completely broken! */
 	struct ieee80211_sub_if_data *sdata = local->scan_sdata;
 
 	if (local->hw.conf.flags & IEEE80211_CONF_PS)
 		return;
 
-	if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)
+	if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK && sdata)
 		ieee80211_send_nullfunc(local, sdata, 1);
 
 	local->hw.conf.flags |= IEEE80211_CONF_PS;
-- 
cgit v1.2.3


From 160002fe845218f5789a26954048592c3920ac7b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 16 Apr 2009 12:15:38 +0200
Subject: cfg80211: copy hold when replacing BSS

When we receive a probe response frame we can replace the
BSS struct in our list -- but if that struct is held then
we need to hold the new one as well.

We really should fix this completely and not replace the
struct, but this is a bandaid for now.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/scan.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 2a00e362f5f..4c77669275e 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -364,6 +364,8 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
 		list_replace(&found->list, &res->list);
 		rb_replace_node(&found->rbn, &res->rbn,
 				&dev->bss_tree);
+		/* XXX: workaround */
+		res->hold = found->hold;
 		kref_put(&found->ref, bss_release);
 		found = res;
 	} else if (found) {
-- 
cgit v1.2.3


From cd1658f592a60d028dd2e48d86724b737a82cab0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 16 Apr 2009 15:00:58 +0200
Subject: cfg80211: do not replace BSS structs

Instead, allocate extra IE memory if necessary. Normally,
this isn't even necessary since there's enough space.

This is a better way of correcting the "held BSS can
disappear" issue, but also a lot more code. It is also
necessary for proper auth/assoc BSS handling in the
future.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/core.h |  2 +-
 net/wireless/scan.c | 42 +++++++++++++++++++++++++++++++++---------
 2 files changed, 34 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/wireless/core.h b/net/wireless/core.h
index d43daa236ef..0a592e4295f 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -90,7 +90,7 @@ struct cfg80211_internal_bss {
 	struct rb_node rbn;
 	unsigned long ts;
 	struct kref ref;
-	bool hold;
+	bool hold, ies_allocated;
 
 	/* must be last because of priv member */
 	struct cfg80211_bss pub;
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 4c77669275e..2ae65b39b52 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -58,6 +58,10 @@ static void bss_release(struct kref *ref)
 	bss = container_of(ref, struct cfg80211_internal_bss, ref);
 	if (bss->pub.free_priv)
 		bss->pub.free_priv(&bss->pub);
+
+	if (bss->ies_allocated)
+		kfree(bss->pub.information_elements);
+
 	kfree(bss);
 }
 
@@ -360,21 +364,41 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
 
 	found = rb_find_bss(dev, res);
 
-	if (found && overwrite) {
-		list_replace(&found->list, &res->list);
-		rb_replace_node(&found->rbn, &res->rbn,
-				&dev->bss_tree);
-		/* XXX: workaround */
-		res->hold = found->hold;
-		kref_put(&found->ref, bss_release);
-		found = res;
-	} else if (found) {
+	if (found) {
 		kref_get(&found->ref);
 		found->pub.beacon_interval = res->pub.beacon_interval;
 		found->pub.tsf = res->pub.tsf;
 		found->pub.signal = res->pub.signal;
 		found->pub.capability = res->pub.capability;
 		found->ts = res->ts;
+
+		/* overwrite IEs */
+		if (overwrite) {
+			size_t used = dev->wiphy.bss_priv_size + sizeof(*res);
+			size_t ielen = res->pub.len_information_elements;
+
+			if (ksize(found) >= used + ielen) {
+				memcpy(found->pub.information_elements,
+				       res->pub.information_elements, ielen);
+				found->pub.len_information_elements = ielen;
+			} else {
+				u8 *ies = found->pub.information_elements;
+
+				if (found->ies_allocated) {
+					if (ksize(ies) < ielen)
+						ies = krealloc(ies, ielen,
+							       GFP_ATOMIC);
+				} else
+					ies = kmalloc(ielen, GFP_ATOMIC);
+
+				if (ies) {
+					memcpy(ies, res->pub.information_elements, ielen);
+					found->ies_allocated = true;
+					found->pub.information_elements = ies;
+				}
+			}
+		}
+
 		kref_put(&res->ref, bss_release);
 	} else {
 		/* this "consumes" the reference */
-- 
cgit v1.2.3


From 60375541f7c8a577b977d344565259776c3acfc1 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 17 Apr 2009 00:54:23 +0200
Subject: mac80211: validate TIM IE length

The TIM IE must not be shorter than 4 bytes, so verify that
when parsing it.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index dc60804d6dd..1619e0cd26e 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -441,6 +441,9 @@ static bool ieee80211_check_tim(struct ieee802_11_elems *elems, u16 aid)
 	u8 index, indexn1, indexn2;
 	struct ieee80211_tim_ie *tim = (struct ieee80211_tim_ie *) elems->tim;
 
+	if (unlikely(!tim || elems->tim_len < 4))
+		return false;
+
 	aid &= 0x3fff;
 	index = aid / 8;
 	mask  = 1 << (aid & 7);
-- 
cgit v1.2.3


From 7816a0a862d851d0b05710e7d94bfe390f3180e2 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 17 Apr 2009 15:59:23 -0700
Subject: vlan/macvlan: fix NULL pointer dereferences in ethtool handlers

Check whether the underlying device provides a set of ethtool ops before
checking for individual handlers to avoid NULL pointer dereferences.

Reported-by: Art van Breemen <ard@telegraafnet.nl>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 1b34135cf99..6b092136401 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -668,7 +668,8 @@ static int vlan_ethtool_get_settings(struct net_device *dev,
 	const struct vlan_dev_info *vlan = vlan_dev_info(dev);
 	struct net_device *real_dev = vlan->real_dev;
 
-	if (!real_dev->ethtool_ops->get_settings)
+	if (!real_dev->ethtool_ops ||
+	    !real_dev->ethtool_ops->get_settings)
 		return -EOPNOTSUPP;
 
 	return real_dev->ethtool_ops->get_settings(real_dev, cmd);
-- 
cgit v1.2.3


From e2139b32726e5dd184974c785ea3f62026590801 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Thu, 26 Mar 2009 16:41:56 +0200
Subject: Bluetooth: Fix removing of RFCOMM DLC timer with DEFER_SETUP

There is a missing call to rfcomm_dlc_clear_timer in the case that
DEFER_SETUP is used and so the connection gets disconnected after the
timeout even if it was successfully accepted previously.

This patch adds a call to rfcomm_dlc_clear_timer to rfcomm_dlc_accept
which will get called when the user accepts the connection by calling
read() on the socket.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/rfcomm/core.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 1d0fb0f23c6..374536e050a 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1194,6 +1194,8 @@ void rfcomm_dlc_accept(struct rfcomm_dlc *d)
 
 	rfcomm_send_ua(d->session, d->dlci);
 
+	rfcomm_dlc_clear_timer(d);
+
 	rfcomm_dlc_lock(d);
 	d->state = BT_CONNECTED;
 	d->state_change(d, 0);
-- 
cgit v1.2.3


From 732547f96ea2442965a24e0ed529d285321a0fff Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sun, 19 Apr 2009 19:14:14 +0200
Subject: Bluetooth: Fallback from eSCO to SCO on unspecified error

Some Bluetooth chips (like the ones from Texas Instruments) don't do
proper eSCO negotiations inside the Link Manager. They just return an
error code and in case of the Kyocera ED-8800 headset it is just a
random error.

  < HCI Command: Setup Synchronous Connection 0x01|0x0028) plen 17
    handle 1 voice setting 0x0060
  > HCI Event: Command Status (0x0f) plen 4
    Setup Synchronous Connection (0x01|0x0028) status 0x00 ncmd 1
  > HCI Event: Synchronous Connect Complete (0x2c) plen 17
    status 0x1f handle 257 bdaddr 00:14:0A:xx:xx:xx type eSCO
    Error: Unspecified Error

In these cases it is up to the host stack to fallback to a SCO setup
and so retry with SCO parameters.

Based on a report by Nick Pelly <npelly@google.com>

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 55534244c3a..963f9662eaa 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1646,20 +1646,28 @@ static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_bu
 		conn->type = SCO_LINK;
 	}
 
-	if (conn->out && ev->status == 0x1c && conn->attempt < 2) {
-		conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) |
-					(hdev->esco_type & EDR_ESCO_MASK);
-		hci_setup_sync(conn, conn->link->handle);
-		goto unlock;
-	}
-
-	if (!ev->status) {
+	switch (ev->status) {
+	case 0x00:
 		conn->handle = __le16_to_cpu(ev->handle);
 		conn->state  = BT_CONNECTED;
 
 		hci_conn_add_sysfs(conn);
-	} else
+		break;
+
+	case 0x1c:	/* SCO interval rejected */
+	case 0x1f:	/* Unspecified error */
+		if (conn->out && conn->attempt < 2) {
+			conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) |
+					(hdev->esco_type & EDR_ESCO_MASK);
+			hci_setup_sync(conn, conn->link->handle);
+			goto unlock;
+		}
+		/* fall through */
+
+	default:
 		conn->state = BT_CLOSED;
+		break;
+	}
 
 	hci_proto_connect_cfm(conn, ev->status);
 	if (ev->status)
-- 
cgit v1.2.3


From 9499237a1c42a27fbcc7ed1d59e34df2b574cdfb Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sun, 19 Apr 2009 19:30:03 +0200
Subject: Bluetooth: Add workaround for wrong HCI event in eSCO setup

The Broadcom chips with 2.1 firmware handle the fallback case to a SCO
link wrongly when setting up eSCO connections.

  < HCI Command: Setup Synchronous Connection (0x01|0x0028) plen 17
      handle 11 voice setting 0x0060
  > HCI Event: Command Status (0x0f) plen 4
      Setup Synchronous Connection (0x01|0x0028) status 0x00 ncmd 1
  > HCI Event: Connect Complete (0x03) plen 11
      status 0x00 handle 1 bdaddr 00:1E:3A:xx:xx:xx type SCO encrypt 0x01

The Link Manager negotiates the fallback to SCO, but then sends out
a Connect Complete event. This is wrong and the Link Manager should
actually send a Synchronous Connection Complete event if the Setup
Synchronous Connection has been used. Only the remote side is allowed
to use Connect Complete to indicate the missing support for eSCO in
the host stack.

This patch adds a workaround for this which clearly should not be
needed, but reality is that broken Broadcom devices are deployed.

Based on a report by Ville Tervo <ville.tervo@nokia.com>

Signed-off-by: Marcel Holtman <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 963f9662eaa..15f40ea8d54 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -866,8 +866,16 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 	hci_dev_lock(hdev);
 
 	conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr);
-	if (!conn)
-		goto unlock;
+	if (!conn) {
+		if (ev->link_type != SCO_LINK)
+			goto unlock;
+
+		conn = hci_conn_hash_lookup_ba(hdev, ESCO_LINK, &ev->bdaddr);
+		if (!conn)
+			goto unlock;
+
+		conn->type = SCO_LINK;
+	}
 
 	if (!ev->status) {
 		conn->handle = __le16_to_cpu(ev->handle);
-- 
cgit v1.2.3


From c7c1a0f60b90955855ba8cd9cfc480167bf6c3da Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Fri, 17 Apr 2009 04:19:36 +0000
Subject: ax25: proc uid file misses header

This has been broken for a while. I happened to catch it testing because one
app "knew" that the top line of the calls data was the policy line and got
confused.

Put the header back.

Signed-off-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ax25/ax25_uid.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c
index 57aeba729ba..832bcf092a0 100644
--- a/net/ax25/ax25_uid.c
+++ b/net/ax25/ax25_uid.c
@@ -148,9 +148,13 @@ static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct ax25_uid_assoc *pt;
 	struct hlist_node *node;
-	int i = 0;
+	int i = 1;
 
 	read_lock(&ax25_uid_lock);
+
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
+
 	ax25_uid_for_each(pt, node, &ax25_uid_list) {
 		if (i == *pos)
 			return pt;
@@ -162,8 +166,10 @@ static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos)
 static void *ax25_uid_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	++*pos;
-
-	return hlist_entry(((ax25_uid_assoc *)v)->uid_node.next,
+	if (v == SEQ_START_TOKEN)
+		return ax25_uid_list.first;
+	else
+		return hlist_entry(((ax25_uid_assoc *)v)->uid_node.next,
 			   ax25_uid_assoc, uid_node);
 }
 
-- 
cgit v1.2.3


From 8caf153974f2274301e583fda732cc8e5b80331f Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Fri, 17 Apr 2009 10:08:49 +0000
Subject: net: sch_netem: Fix an inconsistency in ingress netem timestamps.

Alex Sidorenko reported:

"while experimenting with 'netem' we have found some strange behaviour. It
seemed that ingress delay as measured by 'ping' command shows up on some
hosts but not on others.

After some investigation I have found that the problem is that skbuff->tstamp
field value depends on whether there are any packet sniffers enabled. That
is:

- if any ptype_all handler is registered, the tstamp field is as expected
- if there are no ptype_all handlers, the tstamp field does not show the delay"

This patch prevents unnecessary update of tstamp in dev_queue_xmit_nit()
on ingress path (with act_mirred) adding a check, so minimal overhead on
the fast path, but only when sniffers etc. are active.

Since netem at ingress seems to logically emulate a network before a host,
tstamp is zeroed to trigger the update and pretend delays are from the
outside.

Reported-by: Alex Sidorenko <alexandre.sidorenko@hp.com>
Tested-by: Alex Sidorenko <alexandre.sidorenko@hp.com>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c        | 5 +++++
 net/sched/sch_netem.c | 8 ++++++++
 2 files changed, 13 insertions(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 343883f65ea..dcc357e4f91 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1336,7 +1336,12 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct packet_type *ptype;
 
+#ifdef CONFIG_NET_CLS_ACT
+	if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
+		net_timestamp(skb);
+#else
 	net_timestamp(skb);
+#endif
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index d876b873484..2b88295cb7b 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -280,6 +280,14 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 			if (unlikely(!skb))
 				return NULL;
 
+#ifdef CONFIG_NET_CLS_ACT
+			/*
+			 * If it's at ingress let's pretend the delay is
+			 * from the network (tstamp will be updated).
+			 */
+			if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
+				skb->tstamp.tv64 = 0;
+#endif
 			pr_debug("netem_dequeue: return skb=%p\n", skb);
 			sch->q.qlen--;
 			return skb;
-- 
cgit v1.2.3


From 52cf3cc8acea52ecb93ef1dddb4ef2ae4e35c319 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sat, 18 Apr 2009 05:48:48 +0000
Subject: tcp: fix mid-wq adjustment helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Just noticed while doing some new work that the recent
mid-wq adjustment logic will misbehave when FACK is not
in use (happens either due sysctl'ed off or auto-detected
reordering) because I forgot the relevant TCPCB tagbit.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 53300fa2359..59aec609cec 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -778,7 +778,7 @@ static void tcp_adjust_pcount(struct sock *sk, struct sk_buff *skb, int decr)
 
 	if (tp->lost_skb_hint &&
 	    before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
-	    (tcp_is_fack(tp) || TCP_SKB_CB(skb)->sacked))
+	    (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)))
 		tp->lost_cnt_hint -= decr;
 
 	tcp_verify_left_out(tp);
-- 
cgit v1.2.3


From eb39c57ff7782bc015da517af1d9c3b2592e721e Mon Sep 17 00:00:00 2001
From: Marcin Slusarz <marcin.slusarz@gmail.com>
Date: Sun, 19 Apr 2009 07:24:24 +0000
Subject: net: fix "compatibility" typos

Signed-off-by: Marcin Slusarz <marcin.slusarz@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index dcc357e4f91..001a4c551d4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4405,7 +4405,7 @@ int register_netdevice(struct net_device *dev)
 	dev->iflink = -1;
 
 #ifdef CONFIG_COMPAT_NET_DEV_OPS
-	/* Netdevice_ops API compatiability support.
+	/* Netdevice_ops API compatibility support.
 	 * This is temporary until all network devices are converted.
 	 */
 	if (dev->netdev_ops) {
@@ -4416,7 +4416,7 @@ int register_netdevice(struct net_device *dev)
 			dev->name, netdev_drivername(dev, drivername, 64));
 
 		/* This works only because net_device_ops and the
-		   compatiablity structure are the same. */
+		   compatibility structure are the same. */
 		dev->netdev_ops = (void *) &(dev->init);
 	}
 #endif
-- 
cgit v1.2.3


From 5db8765a86a4cbaf45adaf8c231cf9a6ca2dcfaf Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 16 Apr 2009 08:04:20 +0000
Subject: net: Fix GRO for multiple page fragments

This loop over fragments in napi_fraginfo_skb() was "interesting".

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 001a4c551d4..308a7d0c277 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2545,9 +2545,9 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
 	}
 
 	BUG_ON(info->nr_frags > MAX_SKB_FRAGS);
-	frag = &info->frags[info->nr_frags - 1];
+	frag = info->frags;
 
-	for (i = skb_shinfo(skb)->nr_frags; i < info->nr_frags; i++) {
+	for (i = 0; i < info->nr_frags; i++) {
 		skb_fill_page_desc(skb, i, frag->page, frag->page_offset,
 				   frag->size);
 		frag++;
-- 
cgit v1.2.3


From c197facc8ea08062f8f949aade6a33649ee06771 Mon Sep 17 00:00:00 2001
From: "hummerbliss@gmail.com" <hummerbliss@gmail.com>
Date: Mon, 20 Apr 2009 17:12:35 +0200
Subject: netfilter: bridge: allow fragmentation of VLAN packets traversing a
 bridge

br_nf_dev_queue_xmit only checks for ETH_P_IP packets for fragmenting but not
VLAN packets. This results in dropping of large VLAN packets. This can be
observed when connection tracking is enabled. Connection tracking re-assembles
fragmented packets, and these have to re-fragmented when transmitting out. Also,
make sure only refragmented packets are defragmented as per suggestion from
Patrick McHardy.

Signed-off-by: Saikiran Madugula <hummerbliss@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/br_netfilter.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 3953ac4214c..e4a418fcb35 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -788,15 +788,23 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb,
 	return NF_STOLEN;
 }
 
+#if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE)
 static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 {
-	if (skb->protocol == htons(ETH_P_IP) &&
+	if (skb->nfct != NULL &&
+	    (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb)) &&
 	    skb->len > skb->dev->mtu &&
 	    !skb_is_gso(skb))
 		return ip_fragment(skb, br_dev_queue_push_xmit);
 	else
 		return br_dev_queue_push_xmit(skb);
 }
+#else
+static int br_nf_dev_queue_xmit(struct sk_buff *skb)
+{
+        return br_dev_queue_push_xmit(skb);
+}
+#endif
 
 /* PF_BRIDGE/POST_ROUTING ********************************************/
 static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
-- 
cgit v1.2.3


From 18aaab15f9a9cd4f20dc596aa38408c5e5d208ed Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Sat, 18 Apr 2009 09:33:55 -0400
Subject: MAC80211: Remove unused MAC80211_VERBOSE_SPECT_MGMT_DEBUG.

Remove this unused Kconfig variable, which Intel apparently once
promised to make use of but never did.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/Kconfig | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index f3d9ae350fb..ecc3faf9f11 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -202,10 +202,3 @@ config MAC80211_DEBUG_COUNTERS
 	  and show them in debugfs.
 
 	  If unsure, say N.
-
-config MAC80211_VERBOSE_SPECT_MGMT_DEBUG
-	bool "Verbose Spectrum Management (IEEE 802.11h)debugging"
-	depends on MAC80211_DEBUG_MENU
-	---help---
-	  Say Y here to print out verbose Spectrum Management (IEEE 802.11h)
-	  debug messages.
-- 
cgit v1.2.3


From d91c01c757bd9659ac10549504586fae610265a4 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Sat, 18 Apr 2009 21:53:15 +0300
Subject: nl80211: Make nl80211_send_mlme_event() atomic

One of the code paths sending deauth/disassoc events ends up calling
this function with rcu_read_lock held, so we must use GFP_ATOMIC in
allocation routines.

Reported-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/nl80211.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 353e1a4ece8..2456e4ee445 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3334,7 +3334,7 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
 	struct sk_buff *msg;
 	void *hdr;
 
-	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
 	if (!msg)
 		return;
 
@@ -3353,7 +3353,7 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
 		return;
 	}
 
-	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, GFP_KERNEL);
+	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, GFP_ATOMIC);
 	return;
 
  nla_put_failure:
-- 
cgit v1.2.3


From ad935687dbe7307f5abd9e3f610a965a287324a9 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@iki.fi>
Date: Sun, 19 Apr 2009 08:47:19 +0300
Subject: mac80211: fix beacon loss detection after scan

Currently beacon loss detection triggers after a scan. A probe request
is sent and a message like this is printed to the log:

wlan0: beacon loss from AP 00:12:17:e7:98:de - sending probe request

But in fact there is no beacon loss, the beacons are just not received
because of the ongoing scan. Fix it by updating last_beacon after
the scan has finished.

Reported-by: Jaswinder Singh Rajput <jaswinder@kernel.org>
Signed-off-by: Kalle Valo <kalle.valo@iki.fi>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 1619e0cd26e..ccfc21aa0b6 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1913,9 +1913,17 @@ static void ieee80211_sta_work(struct work_struct *work)
 
 static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
 {
-	if (sdata->vif.type == NL80211_IFTYPE_STATION)
+	if (sdata->vif.type == NL80211_IFTYPE_STATION) {
+		/*
+		 * Need to update last_beacon to avoid beacon loss
+		 * test to trigger.
+		 */
+		sdata->u.mgd.last_beacon = jiffies;
+
+
 		queue_work(sdata->local->hw.workqueue,
 			   &sdata->u.mgd.work);
+	}
 }
 
 /* interface setup */
-- 
cgit v1.2.3


From 7e0986c17f695952ce5d61ed793ce048ba90a661 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sun, 19 Apr 2009 13:22:11 +0200
Subject: mac80211: fix basic rate bitmap calculation

"mac80211: fix basic rates setting from association response"
introduced a copy/paste error.

Unfortunately, this not just leads to wrong data being passed
to the driver but is remotely exploitable for some hardware or
driver combinations.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Cc: stable@kernel.org [2.6.29]
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index ccfc21aa0b6..132938b073d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1366,7 +1366,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 
 	for (i = 0; i < elems.ext_supp_rates_len; i++) {
 		int rate = (elems.ext_supp_rates[i] & 0x7f) * 5;
-		bool is_basic = !!(elems.supp_rates[i] & 0x80);
+		bool is_basic = !!(elems.ext_supp_rates[i] & 0x80);
 
 		if (rate > 110)
 			have_higher_than_11mbit = true;
-- 
cgit v1.2.3


From 7d42081a271bd8a82f2100524085c4f029e47717 Mon Sep 17 00:00:00 2001
From: Reinette Chatre <reinette.chatre@intel.com>
Date: Mon, 20 Apr 2009 15:36:19 -0700
Subject: mac80211: do not print WARN if config interface

It is expected that config interface will always succeed as mac80211
will only request what driver supports. The exception here is when a
device has rfkill enabled. At this time the rfkill state is unknown to
mac80211 and config interface can fail. When this happens we deal with
this error instead of printing a WARN.

Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/pm.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 02730232649..81985d27cbd 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -156,8 +156,19 @@ int __ieee80211_resume(struct ieee80211_hw *hw)
 		case NL80211_IFTYPE_ADHOC:
 		case NL80211_IFTYPE_AP:
 		case NL80211_IFTYPE_MESH_POINT:
-			WARN_ON(ieee80211_if_config(sdata, changed));
-			ieee80211_bss_info_change_notify(sdata, ~0);
+			/*
+			 * Driver's config_interface can fail if rfkill is
+			 * enabled. Accommodate this return code.
+			 * FIXME: When mac80211 has knowledge of rfkill
+			 * state the code below can change back to:
+			 *   WARN(ieee80211_if_config(sdata, changed));
+			 *   ieee80211_bss_info_change_notify(sdata, ~0);
+			 */
+			if (ieee80211_if_config(sdata, changed))
+				printk(KERN_DEBUG "%s: failed to configure interface during resume\n",
+				       sdata->dev->name);
+			else
+				ieee80211_bss_info_change_notify(sdata, ~0);
 			break;
 		case NL80211_IFTYPE_WDS:
 			break;
-- 
cgit v1.2.3


From d1bcb9f1273adee6d2ce5edf84f19409a5cc31b9 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 21 Apr 2009 01:36:59 +0200
Subject: mac80211: fix alignment calculation bug

When checking whether or not a given frame needs to be
moved to be properly aligned to a 4-byte boundary, we
use & 4 which wasn't intended, this code should check
the lowest two bits.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 5fa7aedd90e..9776f73c51a 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1397,7 +1397,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
 		 * mac80211. That also explains the __skb_push()
 		 * below.
 		 */
-		align = (unsigned long)skb->data & 4;
+		align = (unsigned long)skb->data & 3;
 		if (align) {
 			if (WARN_ON(skb_headroom(skb) < 3)) {
 				dev_kfree_skb(skb);
-- 
cgit v1.2.3


From bbe188c8f16effd902d1ad391e06e41ce649b22e Mon Sep 17 00:00:00 2001
From: Ursula Braun <ursula.braun@de.ibm.com>
Date: Tue, 21 Apr 2009 06:04:20 +0000
Subject: af_iucv: consider state IUCV_CLOSING when closing a socket

Make sure a second invocation of iucv_sock_close() guarantees proper
freeing of an iucv path.

Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/af_iucv.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 49e786535dc..2941ee50393 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -172,6 +172,7 @@ static void iucv_sock_close(struct sock *sk)
 			err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0, timeo);
 		}
 
+	case IUCV_CLOSING:   /* fall through */
 		sk->sk_state = IUCV_CLOSED;
 		sk->sk_state_change(sk);
 
@@ -224,6 +225,8 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio)
 	spin_lock_init(&iucv_sk(sk)->message_q.lock);
 	skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q);
 	iucv_sk(sk)->send_tag = 0;
+	iucv_sk(sk)->path = NULL;
+	memset(&iucv_sk(sk)->src_user_id , 0, 32);
 
 	sk->sk_destruct = iucv_sock_destruct;
 	sk->sk_sndtimeo = IUCV_CONN_TIMEOUT;
-- 
cgit v1.2.3


From 60d3705fcbfe7deca8e94bc7ddecd6f9f1a4647e Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Tue, 21 Apr 2009 06:04:21 +0000
Subject: af_iucv: fix oops in iucv_sock_recvmsg() for MSG_PEEK flag

If iucv_sock_recvmsg() is called with MSG_PEEK flag set, the skb is enqueued
twice. If the socket is then closed, the pointer to the skb is freed twice.

Remove the skb_queue_head() call for MSG_PEEK, because the skb_recv_datagram()
function already handles MSG_PEEK (does not dequeue the skb).

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/af_iucv.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 2941ee50393..42b3be302c5 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -814,6 +814,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
 
+	/* receive/dequeue next skb:
+	 * the function understands MSG_PEEK and, thus, does not dequeue skb */
 	skb = skb_recv_datagram(sk, flags, noblock, &err);
 	if (!skb) {
 		if (sk->sk_shutdown & RCV_SHUTDOWN)
@@ -861,9 +863,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 				iucv_process_message_q(sk);
 			spin_unlock_bh(&iucv->message_q.lock);
 		}
-
-	} else
-		skb_queue_head(&sk->sk_receive_queue, skb);
+	}
 
 done:
 	return err ? : copied;
-- 
cgit v1.2.3


From fe86e54ef9465c97a16337d2a41a4cf486b937ae Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Tue, 21 Apr 2009 06:04:22 +0000
Subject: af_iucv: Reject incoming msgs if RECV_SHUTDOWN is set

Reject incoming iucv messages if the receive direction has been shut down.
It avoids that the queue of outstanding messages increases and exceeds the
message limit of the iucv communication path.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/af_iucv.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 42b3be302c5..ee517108856 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1116,8 +1116,10 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
 	struct sock_msg_q *save_msg;
 	int len;
 
-	if (sk->sk_shutdown & RCV_SHUTDOWN)
+	if (sk->sk_shutdown & RCV_SHUTDOWN) {
+		iucv_message_reject(path, msg);
 		return;
+	}
 
 	if (!list_empty(&iucv->message_q.list) ||
 	    !skb_queue_empty(&iucv->backlog_skb_q))
-- 
cgit v1.2.3


From e14ad5fa8705fb354e72312479abbe420ebc3f8e Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Tue, 21 Apr 2009 06:04:23 +0000
Subject: af_iucv: Test additional sk states in iucv_sock_shutdown

Add few more sk states in iucv_sock_shutdown().

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/af_iucv.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index ee517108856..9cfdaaddc5b 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -937,6 +937,9 @@ static int iucv_sock_shutdown(struct socket *sock, int how)
 
 	lock_sock(sk);
 	switch (sk->sk_state) {
+	case IUCV_DISCONN:
+	case IUCV_CLOSING:
+	case IUCV_SEVERED:
 	case IUCV_CLOSED:
 		err = -ENOTCONN;
 		goto fail;
-- 
cgit v1.2.3


From 3fa6b5adbe46b3d665267dee0f879858ab464f44 Mon Sep 17 00:00:00 2001
From: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Date: Tue, 21 Apr 2009 06:04:24 +0000
Subject: af_iucv: Fix race when queuing incoming iucv messages

AF_IUCV runs into a race when queuing incoming iucv messages
and receiving the resulting backlog.

If the Linux system is under pressure (high load or steal time),
the message queue grows up, but messages are not received and queued
onto the backlog queue. In that case, applications do not
receive any data with recvmsg() even if AF_IUCV puts incoming
messages onto the message queue.

The race can be avoided if the message queue spinlock in the
message_pending callback is spreaded across the entire callback
function.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/af_iucv.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 9cfdaaddc5b..b51c9187c34 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1124,6 +1124,8 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
 		return;
 	}
 
+	spin_lock(&iucv->message_q.lock);
+
 	if (!list_empty(&iucv->message_q.list) ||
 	    !skb_queue_empty(&iucv->backlog_skb_q))
 		goto save_message;
@@ -1137,9 +1139,8 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
 	if (!skb)
 		goto save_message;
 
-	spin_lock(&iucv->message_q.lock);
 	iucv_process_message(sk, skb, path, msg);
-	spin_unlock(&iucv->message_q.lock);
+	goto out_unlock;
 
 	return;
 
@@ -1150,8 +1151,9 @@ save_message:
 	save_msg->path = path;
 	save_msg->msg = *msg;
 
-	spin_lock(&iucv->message_q.lock);
 	list_add_tail(&save_msg->list, &iucv->message_q.list);
+
+out_unlock:
 	spin_unlock(&iucv->message_q.lock);
 }
 
-- 
cgit v1.2.3


From 50b2ff1bc47baacb8e9882b2b2a74b240ddbeecf Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Tue, 21 Apr 2009 10:04:22 +0000
Subject: netlabel: Always remove the correct address selector

The NetLabel address selector mechanism has a problem where it can get
mistakenly remove the wrong selector when similar addresses are used.  The
problem is caused when multiple addresses are configured that have different
netmasks but the same address, e.g. 127.0.0.0/8 and 127.0.0.0/24.  This patch
fixes the problem.

Reported-by: Etienne Basset <etienne.basset@numericable.fr>
Signed-off-by: Paul Moore <paul.moore@hp.com>
Acked-by: James Morris <jmorris@namei.org>
Tested-by: Etienne Basset <etienne.basset@numericable.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlabel/netlabel_addrlist.c | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c
index 834c6eb7f48..c0519139679 100644
--- a/net/netlabel/netlabel_addrlist.c
+++ b/net/netlabel/netlabel_addrlist.c
@@ -256,13 +256,11 @@ struct netlbl_af4list *netlbl_af4list_remove(__be32 addr, __be32 mask,
 {
 	struct netlbl_af4list *entry;
 
-	entry = netlbl_af4list_search(addr, head);
-	if (entry != NULL && entry->addr == addr && entry->mask == mask) {
-		netlbl_af4list_remove_entry(entry);
-		return entry;
-	}
-
-	return NULL;
+	entry = netlbl_af4list_search_exact(addr, mask, head);
+	if (entry == NULL)
+		return NULL;
+	netlbl_af4list_remove_entry(entry);
+	return entry;
 }
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -299,15 +297,11 @@ struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr,
 {
 	struct netlbl_af6list *entry;
 
-	entry = netlbl_af6list_search(addr, head);
-	if (entry != NULL &&
-	    ipv6_addr_equal(&entry->addr, addr) &&
-	    ipv6_addr_equal(&entry->mask, mask)) {
-		netlbl_af6list_remove_entry(entry);
-		return entry;
-	}
-
-	return NULL;
+	entry = netlbl_af6list_search_exact(addr, mask, head);
+	if (entry == NULL)
+		return NULL;
+	netlbl_af6list_remove_entry(entry);
+	return entry;
 }
 #endif /* IPv6 */
 
-- 
cgit v1.2.3


From cc29c70dd581f85ee7a3e7980fb031f90b90a2ab Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Wed, 22 Apr 2009 00:49:51 -0700
Subject: net/netrom: Fix socket locking

Patch "af_rose/x25: Sanity check the maximum user frame size"
(commit 83e0bbcbe2145f160fbaa109b0439dae7f4a38a9) from Alan Cox got
locking wrong. If we bail out due to user frame size being too large,
we must unlock the socket beforehand.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netrom/af_netrom.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 4e705f87969..3be0e016ab7 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1084,8 +1084,10 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 	/* Build a packet - the conventional user limit is 236 bytes. We can
 	   do ludicrously large NetROM frames but must not overflow */
-	if (len > 65536)
-		return -EMSGSIZE;
+	if (len > 65536) {
+		err = -EMSGSIZE;
+		goto out;
+	}
 
 	SOCK_DEBUG(sk, "NET/ROM: sendto: building packet.\n");
 	size = len + NR_NETWORK_LEN + NR_TRANSPORT_LEN;
-- 
cgit v1.2.3


From 29fe1b481283a1bada994a69f65736db4ae6f35f Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 22 Apr 2009 02:26:37 -0700
Subject: netfilter: ctnetlink: fix gcc warning during compilation

This patch fixes a (bogus?) gcc warning during compilation:

net/netfilter/nf_conntrack_netlink.c:1234: warning: 'helpname' may be used uninitialized in this function
net/netfilter/nf_conntrack_netlink.c:991: warning: 'helpname' may be used uninitialized in this function

In fact, helpname is initialized by ctnetlink_parse_help() so
I cannot see a way to use it without being initialized.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_netlink.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 0ea36e0c8a0..f13fc57e1ec 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -988,7 +988,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[])
 {
 	struct nf_conntrack_helper *helper;
 	struct nf_conn_help *help = nfct_help(ct);
-	char *helpname;
+	char *helpname = NULL;
 	int err;
 
 	/* don't change helper of sibling connections */
@@ -1231,7 +1231,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
 
 	rcu_read_lock();
  	if (cda[CTA_HELP]) {
- 		char *helpname;
+		char *helpname = NULL;
  
  		err = ctnetlink_parse_help(cda[CTA_HELP], &helpname);
  		if (err < 0)
-- 
cgit v1.2.3


From 5ff482940f5aa2cdc3424c4a8ea94b9833b2af5f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 24 Apr 2009 15:37:44 +0200
Subject: netfilter: nf_ct_dccp/udplite: fix protocol registration error

Commit d0dba725 (netfilter: ctnetlink: add callbacks to the per-proto
nlattrs) changed the protocol registration function to abort if the
to-be registered protocol doesn't provide a new callback function.

The DCCP and UDP-Lite IPv6 protocols were missed in this conversion,
add the required callback pointer.

Reported-and-tested-by: Steven Jan Springl <steven@springl.ukfsn.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_proto_dccp.c    | 1 +
 net/netfilter/nf_conntrack_proto_udplite.c | 1 +
 2 files changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 50dac8dbe7d..5411d63f31a 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -777,6 +777,7 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = {
 	.print_conntrack	= dccp_print_conntrack,
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
 	.to_nlattr		= dccp_to_nlattr,
+	.nlattr_size		= dccp_nlattr_size,
 	.from_nlattr		= nlattr_to_dccp,
 	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
 	.nlattr_tuple_size	= nf_ct_port_nlattr_tuple_size,
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 4614696c1b8..0badedc542d 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -204,6 +204,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
 	.error			= udplite_error,
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
 	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
+	.nlattr_tuple_size	= nf_ct_port_nlattr_tuple_size,
 	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
 	.nla_policy		= nf_ct_port_nla_policy,
 #endif
-- 
cgit v1.2.3


From 4b0706624930dc75c3b0d0df463d89759ef7de29 Mon Sep 17 00:00:00 2001
From: Laszlo Attila Toth <panther@balabit.hu>
Date: Fri, 24 Apr 2009 16:55:25 +0200
Subject: netfilter: Kconfig: TProxy doesn't depend on NF_CONNTRACK

Signed-off-by: Laszlo Attila Toth <panther@balabit.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 2329c5f5055..881203c4a14 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -275,6 +275,8 @@ config NF_CT_NETLINK
 	help
 	  This option enables support for a netlink-based userspace interface
 
+endif # NF_CONNTRACK
+
 # transparent proxy support
 config NETFILTER_TPROXY
 	tristate "Transparent proxying support (EXPERIMENTAL)"
@@ -290,8 +292,6 @@ config NETFILTER_TPROXY
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-endif # NF_CONNTRACK
-
 config NETFILTER_XTABLES
 	tristate "Netfilter Xtables support (required for ip_tables)"
 	default m if NETFILTER_ADVANCED=n
-- 
cgit v1.2.3


From 71951b64a5a87c09eb6fde59ce51aaab2fdaeab2 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 24 Apr 2009 16:58:41 +0200
Subject: netfilter: nf_ct_dccp: add missing role attributes for DCCP

This patch adds missing role attribute to the DCCP type, otherwise
the creation of entries is not of any use.

The attribute added is CTA_PROTOINFO_DCCP_ROLE which contains the
role of the conntrack original tuple.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_proto_dccp.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 5411d63f31a..8e757dd5339 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -633,6 +633,8 @@ static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
 	if (!nest_parms)
 		goto nla_put_failure;
 	NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state);
+	NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_ROLE,
+		   ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]);
 	nla_nest_end(skb, nest_parms);
 	read_unlock_bh(&dccp_lock);
 	return 0;
@@ -644,6 +646,7 @@ nla_put_failure:
 
 static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = {
 	[CTA_PROTOINFO_DCCP_STATE]	= { .type = NLA_U8 },
+	[CTA_PROTOINFO_DCCP_ROLE]	= { .type = NLA_U8 },
 };
 
 static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
@@ -661,11 +664,21 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
 		return err;
 
 	if (!tb[CTA_PROTOINFO_DCCP_STATE] ||
-	    nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE)
+	    !tb[CTA_PROTOINFO_DCCP_ROLE] ||
+	    nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) > CT_DCCP_ROLE_MAX ||
+	    nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) {
 		return -EINVAL;
+	}
 
 	write_lock_bh(&dccp_lock);
 	ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]);
+	if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) {
+		ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
+		ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER;
+	} else {
+		ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER;
+		ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT;
+	}
 	write_unlock_bh(&dccp_lock);
 	return 0;
 }
-- 
cgit v1.2.3


From 37e55cf0ceb8803256bf69a3e45bd668bf90b76f Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 24 Apr 2009 17:05:21 +0200
Subject: netfilter: xt_recent: fix stack overread in compat code

Related-to: commit 325fb5b4d26038cba665dd0d8ee09555321061f0

The compat path suffers from a similar problem. It only uses a __be32
when all of the recent code uses, and expects, an nf_inet_addr
everywhere. As a result, addresses stored by xt_recents were
filled with whatever other stuff was on the stack following the be32.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>

With a minor compile fix from Roman.

Reported-and-tested-by: Roman Hoog Antink <rha@open.ch>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_recent.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 791e030ea90..eb0ceb84652 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -474,7 +474,7 @@ static ssize_t recent_old_proc_write(struct file *file,
 	struct recent_table *t = pde->data;
 	struct recent_entry *e;
 	char buf[sizeof("+255.255.255.255")], *c = buf;
-	__be32 addr;
+	union nf_inet_addr addr = {};
 	int add;
 
 	if (size > sizeof(buf))
@@ -506,14 +506,13 @@ static ssize_t recent_old_proc_write(struct file *file,
 		add = 1;
 		break;
 	}
-	addr = in_aton(c);
+	addr.ip = in_aton(c);
 
 	spin_lock_bh(&recent_lock);
-	e = recent_entry_lookup(t, (const void *)&addr, NFPROTO_IPV4, 0);
+	e = recent_entry_lookup(t, &addr, NFPROTO_IPV4, 0);
 	if (e == NULL) {
 		if (add)
-			recent_entry_init(t, (const void *)&addr,
-					  NFPROTO_IPV4, 0);
+			recent_entry_init(t, &addr, NFPROTO_IPV4, 0);
 	} else {
 		if (add)
 			recent_entry_update(t, e);
-- 
cgit v1.2.3


From d0687be7c7ae21461da4438d5fd059b48487bfe1 Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Fri, 3 Apr 2009 15:18:24 -0500
Subject: svcrdma: Fix dma map direction for rdma read targets

The nfs server rdma transport was mapping rdma read target pages for
TO_DEVICE instead of FROM_DEVICE.  This causes data corruption on non
cache-coherent systems if frmrs are used.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 629a28764da..42a6f9f2028 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -265,7 +265,7 @@ static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
 		frmr->page_list->page_list[page_no] =
 			ib_dma_map_single(xprt->sc_cm_id->device,
 					  page_address(rqstp->rq_arg.pages[page_no]),
-					  PAGE_SIZE, DMA_TO_DEVICE);
+					  PAGE_SIZE, DMA_FROM_DEVICE);
 		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
 					 frmr->page_list->page_list[page_no]))
 			goto fatal_err;
-- 
cgit v1.2.3


From adc667e84f086aa110d810f3476c494e48eaabaa Mon Sep 17 00:00:00 2001
From: Jay Vosburgh <fubar@us.ibm.com>
Date: Sat, 25 Apr 2009 18:03:35 -0700
Subject: vlan: update vlan carrier state for admin up/down

	Currently, the VLAN event handler does not adjust the VLAN
device's carrier state when the real device or the VLAN device is set
administratively up or down.

	The following patch adds a transfer of operating state from the
real device to the VLAN device when the real device is administratively
set up or down, and sets the carrier state up or down during init, open
and close of the VLAN device.

	This permits observers above the VLAN device that care about the
carrier state (bonding's link monitor, for example) to receive updates
for administrative changes by more closely mimicing the behavior of real
devices.

Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/8021q/vlan.c     | 2 ++
 net/8021q/vlan_dev.c | 5 +++++
 2 files changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 2b7390e377b..d1e10546eb8 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -492,6 +492,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 				continue;
 
 			dev_change_flags(vlandev, flgs & ~IFF_UP);
+			vlan_transfer_operstate(dev, vlandev);
 		}
 		break;
 
@@ -507,6 +508,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 				continue;
 
 			dev_change_flags(vlandev, flgs | IFF_UP);
+			vlan_transfer_operstate(dev, vlandev);
 		}
 		break;
 
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 6b092136401..b4b9068e55a 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -462,6 +462,7 @@ static int vlan_dev_open(struct net_device *dev)
 	if (vlan->flags & VLAN_FLAG_GVRP)
 		vlan_gvrp_request_join(dev);
 
+	netif_carrier_on(dev);
 	return 0;
 
 clear_allmulti:
@@ -471,6 +472,7 @@ del_unicast:
 	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
 		dev_unicast_delete(real_dev, dev->dev_addr, ETH_ALEN);
 out:
+	netif_carrier_off(dev);
 	return err;
 }
 
@@ -492,6 +494,7 @@ static int vlan_dev_stop(struct net_device *dev)
 	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
 		dev_unicast_delete(real_dev, dev->dev_addr, dev->addr_len);
 
+	netif_carrier_off(dev);
 	return 0;
 }
 
@@ -612,6 +615,8 @@ static int vlan_dev_init(struct net_device *dev)
 	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
 	int subclass = 0;
 
+	netif_carrier_off(dev);
+
 	/* IFF_BROADCAST|IFF_MULTICAST; ??? */
 	dev->flags  = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI);
 	dev->iflink = real_dev->ifindex;
-- 
cgit v1.2.3


From 6a783c9067e3f71aac61a9262fe42c1f68efd4fc Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 27 Apr 2009 02:58:59 -0700
Subject: xfrm: wrong hash value for temporary SA

When kernel inserts a temporary SA for IKE, it uses the wrong hash
value for dst list. Two hash values were calcultated before: one with
source address and one with a wildcard source address.

Bug hinted by Junwei Zhang <junwei.zhang@6wind.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_state.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 82271720d97..5f1f86565f1 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -794,7 +794,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 {
 	static xfrm_address_t saddr_wildcard = { };
 	struct net *net = xp_net(pol);
-	unsigned int h;
+	unsigned int h, h_wildcard;
 	struct hlist_node *entry;
 	struct xfrm_state *x, *x0, *to_put;
 	int acquire_in_progress = 0;
@@ -819,8 +819,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 	if (best)
 		goto found;
 
-	h = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, family);
-	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) {
+	h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, family);
+	hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h_wildcard, bydst) {
 		if (x->props.family == family &&
 		    x->props.reqid == tmpl->reqid &&
 		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
-- 
cgit v1.2.3


From c9503e0fe052020e0294cd07d0ecd982eb7c9177 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 27 Apr 2009 05:42:24 -0700
Subject: ipv4: Limit size of route cache hash table

Right now we have no upper limit on the size of the route cache hash table.
On a 128GB POWER6 box it ends up as 32MB:

    IP route cache hash table entries: 4194304 (order: 9, 33554432 bytes)

It would be nice to cap this for memory consumption reasons, but a massive
hashtable also causes a significant spike when measuring OS jitter.

With a 32MB hashtable and 4 million entries, rt_worker_func is taking
5 ms to complete. On another system with more memory it's taking 14 ms.
Even though rt_worker_func does call cond_sched() to limit its impact,
in an HPC environment we want to keep all sources of OS jitter to a minimum.

With the patch applied we limit the number of entries to 512k which
can still be overriden by using the rt_entries boot option:

    IP route cache hash table entries: 524288 (order: 6, 4194304 bytes)

With this patch rt_worker_func now takes 0.460 ms on the same system.

Signed-off-by: Anton Blanchard <anton@samba.org>
Acked-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c40debe51b3..c4c60e9f068 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3397,7 +3397,7 @@ int __init ip_rt_init(void)
 					0,
 					&rt_hash_log,
 					&rt_hash_mask,
-					0);
+					rhash_entries ? 0 : 512 * 1024);
 	memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket));
 	rt_hash_lock_init();
 
-- 
cgit v1.2.3


From bf368e4e70cd4e0f880923c44e95a4273d725ab4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 28 Apr 2009 02:24:21 -0700
Subject: net: Avoid extra wakeups of threads blocked in wait_for_packet()

In 2.6.25 we added UDP mem accounting.

This unfortunatly added a penalty when a frame is transmitted, since
we have at TX completion time to call sock_wfree() to perform necessary
memory accounting. This calls sock_def_write_space() and utimately
scheduler if any thread is waiting on the socket.
Thread(s) waiting for an incoming frame was scheduled, then had to sleep
again as event was meaningless.

(All threads waiting on a socket are using same sk_sleep anchor)

This adds lot of extra wakeups and increases latencies, as noted
by Christoph Lameter, and slows down softirq handler.

Reference : http://marc.info/?l=linux-netdev&m=124060437012283&w=2

Fortunatly, Davide Libenzi recently added concept of keyed wakeups
into kernel, and particularly for sockets (see commit
37e5540b3c9d838eb20f2ca8ea2eb8072271e403
epoll keyed wakeups: make sockets use keyed wakeups)

Davide goal was to optimize epoll, but this new wakeup infrastructure
can help non epoll users as well, if they care to setup an appropriate
handler.

This patch introduces new DEFINE_WAIT_FUNC() helper and uses it
in wait_for_packet(), so that only relevant event can wakeup a thread
blocked in this function.

Trace of function calls from bnx2 TX completion bnx2_poll_work() is :
__kfree_skb()
 skb_release_head_state()
  sock_wfree()
   sock_def_write_space()
    __wake_up_sync_key()
     __wake_up_common()
      receiver_wake_function() : Stops here since thread is waiting for an INPUT


Reported-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/datagram.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/datagram.c b/net/core/datagram.c
index d0de644b378..b01a76abe1d 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -64,13 +64,25 @@ static inline int connection_based(struct sock *sk)
 	return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
 }
 
+static int receiver_wake_function(wait_queue_t *wait, unsigned mode, int sync,
+				  void *key)
+{
+	unsigned long bits = (unsigned long)key;
+
+	/*
+	 * Avoid a wakeup if event not interesting for us
+	 */
+	if (bits && !(bits & (POLLIN | POLLERR)))
+		return 0;
+	return autoremove_wake_function(wait, mode, sync, key);
+}
 /*
  * Wait for a packet..
  */
 static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
 {
 	int error;
-	DEFINE_WAIT(wait);
+	DEFINE_WAIT_FUNC(wait, receiver_wake_function);
 
 	prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
 
-- 
cgit v1.2.3


From f3784d834c71689336fa272df420b45345cb6b84 Mon Sep 17 00:00:00 2001
From: Roger Quadros <ext-roger.quadros@nokia.com>
Date: Thu, 23 Apr 2009 14:50:54 +0300
Subject: Bluetooth: Ensure that HCI sysfs add/del is preempt safe

Use a different work_struct variables for add_conn() and del_conn() and
use single work queue instead of two for adding and deleting connections.

It eliminates the following error on a preemptible kernel:

[  204.358032] Unable to handle kernel NULL pointer dereference at virtual address 0000000c
[  204.370697] pgd = c0004000
[  204.373443] [0000000c] *pgd=00000000
[  204.378601] Internal error: Oops: 17 [#1] PREEMPT
[  204.383361] Modules linked in: vfat fat rfcomm sco l2cap sd_mod scsi_mod iphb pvr2d drm omaplfb ps
[  204.438537] CPU: 0    Not tainted  (2.6.28-maemo2 #1)
[  204.443664] PC is at klist_put+0x2c/0xb4
[  204.447601] LR is at klist_put+0x18/0xb4
[  204.451568] pc : [<c0270f08>]    lr : [<c0270ef4>]    psr: a0000113
[  204.451568] sp : cf1b3f10  ip : cf1b3f10  fp : cf1b3f2c
[  204.463104] r10: 00000000  r9 : 00000000  r8 : bf08029c
[  204.468353] r7 : c7869200  r6 : cfbe2690  r5 : c78692c8  r4 : 00000001
[  204.474945] r3 : 00000001  r2 : cf1b2000  r1 : 00000001  r0 : 00000000
[  204.481506] Flags: NzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM Segment kernel
[  204.488861] Control: 10c5387d  Table: 887fc018  DAC: 00000017
[  204.494628] Process btdelconn (pid: 515, stack limit = 0xcf1b22e0)

Signed-off-by: Roger Quadros <ext-roger.quadros@nokia.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_sysfs.c | 37 ++++++++++++++++---------------------
 1 file changed, 16 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index ed82796d4a0..b7c51082dde 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -9,8 +9,7 @@
 struct class *bt_class = NULL;
 EXPORT_SYMBOL_GPL(bt_class);
 
-static struct workqueue_struct *btaddconn;
-static struct workqueue_struct *btdelconn;
+static struct workqueue_struct *bluetooth;
 
 static inline char *link_typetostr(int type)
 {
@@ -88,9 +87,10 @@ static struct device_type bt_link = {
 
 static void add_conn(struct work_struct *work)
 {
-	struct hci_conn *conn = container_of(work, struct hci_conn, work);
+	struct hci_conn *conn = container_of(work, struct hci_conn, work_add);
 
-	flush_workqueue(btdelconn);
+	/* ensure previous add/del is complete */
+	flush_workqueue(bluetooth);
 
 	if (device_add(&conn->dev) < 0) {
 		BT_ERR("Failed to register connection device");
@@ -114,9 +114,9 @@ void hci_conn_add_sysfs(struct hci_conn *conn)
 
 	device_initialize(&conn->dev);
 
-	INIT_WORK(&conn->work, add_conn);
+	INIT_WORK(&conn->work_add, add_conn);
 
-	queue_work(btaddconn, &conn->work);
+	queue_work(bluetooth, &conn->work_add);
 }
 
 /*
@@ -131,9 +131,12 @@ static int __match_tty(struct device *dev, void *data)
 
 static void del_conn(struct work_struct *work)
 {
-	struct hci_conn *conn = container_of(work, struct hci_conn, work);
+	struct hci_conn *conn = container_of(work, struct hci_conn, work_del);
 	struct hci_dev *hdev = conn->hdev;
 
+	/* ensure previous add/del is complete */
+	flush_workqueue(bluetooth);
+
 	while (1) {
 		struct device *dev;
 
@@ -156,9 +159,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn)
 	if (!device_is_registered(&conn->dev))
 		return;
 
-	INIT_WORK(&conn->work, del_conn);
+	INIT_WORK(&conn->work_del, del_conn);
 
-	queue_work(btdelconn, &conn->work);
+	queue_work(bluetooth, &conn->work_del);
 }
 
 static inline char *host_typetostr(int type)
@@ -435,20 +438,13 @@ void hci_unregister_sysfs(struct hci_dev *hdev)
 
 int __init bt_sysfs_init(void)
 {
-	btaddconn = create_singlethread_workqueue("btaddconn");
-	if (!btaddconn)
-		return -ENOMEM;
-
-	btdelconn = create_singlethread_workqueue("btdelconn");
-	if (!btdelconn) {
-		destroy_workqueue(btaddconn);
+	bluetooth = create_singlethread_workqueue("bluetooth");
+	if (!bluetooth)
 		return -ENOMEM;
-	}
 
 	bt_class = class_create(THIS_MODULE, "bluetooth");
 	if (IS_ERR(bt_class)) {
-		destroy_workqueue(btdelconn);
-		destroy_workqueue(btaddconn);
+		destroy_workqueue(bluetooth);
 		return PTR_ERR(bt_class);
 	}
 
@@ -457,8 +453,7 @@ int __init bt_sysfs_init(void)
 
 void bt_sysfs_cleanup(void)
 {
-	destroy_workqueue(btaddconn);
-	destroy_workqueue(btdelconn);
+	destroy_workqueue(bluetooth);
 
 	class_destroy(bt_class);
 }
-- 
cgit v1.2.3


From 052b30b0a8eec8db5b18ad49effdf2a9ba4c1e1a Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sun, 26 Apr 2009 20:01:22 +0200
Subject: Bluetooth: Add different pairing timeout for Legacy Pairing

The Bluetooth stack uses a reference counting for all established ACL
links and if no user (L2CAP connection) is present, the link will be
terminated to save power. The problem part is the dedicated pairing
when using Legacy Pairing (Bluetooth 2.0 and before). At that point
no user is present and pairing attempts will be disconnected within
10 seconds or less. In previous kernel version this was not a problem
since the disconnect timeout wasn't triggered on incoming connections
for the first time. However this caused issues with broken host stacks
that kept the connections around after dedicated pairing. When the
support for Simple Pairing got added, the link establishment procedure
needed to be changed and now causes issues when using Legacy Pairing

When using Simple Pairing it is possible to do a proper reference
counting of ACL link users. With Legacy Pairing this is not possible
since the specification is unclear in some areas and too many broken
Bluetooth devices have already been deployed. So instead of trying to
deal with all the broken devices, a special pairing timeout will be
introduced that increases the timeout to 60 seconds when pairing is
triggered.

If a broken devices now puts the stack into an unforeseen state, the
worst that happens is the disconnect timeout triggers after 120 seconds
instead of 4 seconds. This allows successful pairings with legacy and
broken devices now.

Based on a report by Johan Hedberg <johan.hedberg@nokia.com>

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_conn.c  |  1 +
 net/bluetooth/hci_event.c | 36 +++++++++++++++++++++++++++++++++++-
 2 files changed, 36 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 1181db08d9d..75ebbe2221a 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -215,6 +215,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
 	conn->state = BT_OPEN;
 
 	conn->power_save = 1;
+	conn->disc_timeout = HCI_DISCONN_TIMEOUT;
 
 	switch (type) {
 	case ACL_LINK:
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 15f40ea8d54..4e7cb88e5da 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -883,6 +883,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 		if (conn->type == ACL_LINK) {
 			conn->state = BT_CONFIG;
 			hci_conn_hold(conn);
+			conn->disc_timeout = HCI_DISCONN_TIMEOUT;
 		} else
 			conn->state = BT_CONNECTED;
 
@@ -1063,9 +1064,14 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 				hci_proto_connect_cfm(conn, ev->status);
 				hci_conn_put(conn);
 			}
-		} else
+		} else {
 			hci_auth_cfm(conn, ev->status);
 
+			hci_conn_hold(conn);
+			conn->disc_timeout = HCI_DISCONN_TIMEOUT;
+			hci_conn_put(conn);
+		}
+
 		if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) {
 			if (!ev->status) {
 				struct hci_cp_set_conn_encrypt cp;
@@ -1479,7 +1485,21 @@ static inline void hci_mode_change_evt(struct hci_dev *hdev, struct sk_buff *skb
 
 static inline void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
+	struct hci_ev_pin_code_req *ev = (void *) skb->data;
+	struct hci_conn *conn;
+
 	BT_DBG("%s", hdev->name);
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
+	if (conn) {
+		hci_conn_hold(conn);
+		conn->disc_timeout = HCI_PAIRING_TIMEOUT;
+		hci_conn_put(conn);
+	}
+
+	hci_dev_unlock(hdev);
 }
 
 static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1489,7 +1509,21 @@ static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff
 
 static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
+	struct hci_ev_link_key_notify *ev = (void *) skb->data;
+	struct hci_conn *conn;
+
 	BT_DBG("%s", hdev->name);
+
+	hci_dev_lock(hdev);
+
+	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
+	if (conn) {
+		hci_conn_hold(conn);
+		conn->disc_timeout = HCI_DISCONN_TIMEOUT;
+		hci_conn_put(conn);
+	}
+
+	hci_dev_unlock(hdev);
 }
 
 static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *skb)
-- 
cgit v1.2.3


From 3fdca1e1370ffe89980927cdef0583bebcd8caaf Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Tue, 28 Apr 2009 09:04:55 -0700
Subject: Bluetooth: Fix connection establishment with low security requirement

The Bluetooth 2.1 specification introduced four different security modes
that can be mapped using Legacy Pairing and Simple Pairing. With the
usage of Simple Pairing it is required that all connections (except
the ones for SDP) are encrypted. So even the low security requirement
mandates an encrypted connection when using Simple Pairing. When using
Legacy Pairing (for Bluetooth 2.0 devices and older) this is not required
since it causes interoperability issues.

To support this properly the low security requirement translates into
different host controller transactions depending if Simple Pairing is
supported or not. However in case of Simple Pairing the command to
switch on encryption after a successful authentication is not triggered
for the low security mode. This patch fixes this and actually makes
the logic to differentiate between Simple Pairing and Legacy Pairing
a lot simpler.

Based on a report by Ville Tervo <ville.tervo@nokia.com>

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_conn.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 75ebbe2221a..375f4b4f7f7 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -425,12 +425,9 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
 	if (sec_level == BT_SECURITY_SDP)
 		return 1;
 
-	if (sec_level == BT_SECURITY_LOW) {
-		if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0)
-			return hci_conn_auth(conn, sec_level, auth_type);
-		else
-			return 1;
-	}
+	if (sec_level == BT_SECURITY_LOW &&
+				(!conn->ssp_mode || !conn->hdev->ssp_mode))
+		return 1;
 
 	if (conn->link_mode & HCI_LM_ENCRYPT)
 		return hci_conn_auth(conn, sec_level, auth_type);
-- 
cgit v1.2.3


From 942e4a2bd680c606af0211e64eb216be2e19bf61 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 28 Apr 2009 22:36:33 -0700
Subject: netfilter: revised locking for x_tables

The x_tables are organized with a table structure and a per-cpu copies
of the counters and rules. On older kernels there was a reader/writer
lock per table which was a performance bottleneck. In 2.6.30-rc, this
was converted to use RCU and the counters/rules which solved the performance
problems for do_table but made replacing rules much slower because of
the necessary RCU grace period.

This version uses a per-cpu set of spinlocks and counters to allow to
table processing to proceed without the cache thrashing of a global
reader lock and keeps the same performance for table updates.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/arp_tables.c | 125 ++++++++++++---------------------------
 net/ipv4/netfilter/ip_tables.c  | 126 +++++++++++-----------------------------
 net/ipv6/netfilter/ip6_tables.c | 123 ++++++++++++---------------------------
 net/netfilter/x_tables.c        |  53 +++++++++--------
 4 files changed, 136 insertions(+), 291 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 5ba533d234d..831fe1879dc 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -253,9 +253,9 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	indev = in ? in->name : nulldevname;
 	outdev = out ? out->name : nulldevname;
 
-	rcu_read_lock_bh();
-	private = rcu_dereference(table->private);
-	table_base = rcu_dereference(private->entries[smp_processor_id()]);
+	xt_info_rdlock_bh();
+	private = table->private;
+	table_base = private->entries[smp_processor_id()];
 
 	e = get_entry(table_base, private->hook_entry[hook]);
 	back = get_entry(table_base, private->underflow[hook]);
@@ -273,6 +273,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 
 			hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
 				(2 * skb->dev->addr_len);
+
 			ADD_COUNTER(e->counters, hdr_len, 1);
 
 			t = arpt_get_target(e);
@@ -328,8 +329,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 			e = (void *)e + e->next_offset;
 		}
 	} while (!hotdrop);
-
-	rcu_read_unlock_bh();
+	xt_info_rdunlock_bh();
 
 	if (hotdrop)
 		return NF_DROP;
@@ -711,9 +711,12 @@ static void get_counters(const struct xt_table_info *t,
 	/* Instead of clearing (by a previous call to memset())
 	 * the counters and using adds, we set the counters
 	 * with data used by 'current' CPU
-	 * We dont care about preemption here.
+	 *
+	 * Bottom half has to be disabled to prevent deadlock
+	 * if new softirq were to run and call ipt_do_table
 	 */
-	curcpu = raw_smp_processor_id();
+	local_bh_disable();
+	curcpu = smp_processor_id();
 
 	i = 0;
 	ARPT_ENTRY_ITERATE(t->entries[curcpu],
@@ -726,73 +729,22 @@ static void get_counters(const struct xt_table_info *t,
 		if (cpu == curcpu)
 			continue;
 		i = 0;
+		xt_info_wrlock(cpu);
 		ARPT_ENTRY_ITERATE(t->entries[cpu],
 				   t->size,
 				   add_entry_to_counter,
 				   counters,
 				   &i);
+		xt_info_wrunlock(cpu);
 	}
-}
-
-
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
-static int
-add_counter_to_entry(struct arpt_entry *e,
-		     const struct xt_counters addme[],
-		     unsigned int *i)
-{
-	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
-	(*i)++;
-	return 0;
-}
-
-/* Take values from counters and add them back onto the current cpu */
-static void put_counters(struct xt_table_info *t,
-			 const struct xt_counters counters[])
-{
-	unsigned int i, cpu;
-
-	local_bh_disable();
-	cpu = smp_processor_id();
-	i = 0;
-	ARPT_ENTRY_ITERATE(t->entries[cpu],
-			  t->size,
-			  add_counter_to_entry,
-			  counters,
-			  &i);
 	local_bh_enable();
 }
 
-static inline int
-zero_entry_counter(struct arpt_entry *e, void *arg)
-{
-	e->counters.bcnt = 0;
-	e->counters.pcnt = 0;
-	return 0;
-}
-
-static void
-clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
-{
-	unsigned int cpu;
-	const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
-
-	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
-	for_each_possible_cpu(cpu) {
-		memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
-		ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
-				  zero_entry_counter, NULL);
-	}
-}
-
 static struct xt_counters *alloc_counters(struct xt_table *table)
 {
 	unsigned int countersize;
 	struct xt_counters *counters;
 	struct xt_table_info *private = table->private;
-	struct xt_table_info *info;
 
 	/* We need atomic snapshot of counters: rest doesn't change
 	 * (other than comefrom, which userspace doesn't care
@@ -802,30 +754,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table)
 	counters = vmalloc_node(countersize, numa_node_id());
 
 	if (counters == NULL)
-		goto nomem;
-
-	info = xt_alloc_table_info(private->size);
-	if (!info)
-		goto free_counters;
-
-	clone_counters(info, private);
-
-	mutex_lock(&table->lock);
-	xt_table_entry_swap_rcu(private, info);
-	synchronize_net();	/* Wait until smoke has cleared */
+		return ERR_PTR(-ENOMEM);
 
-	get_counters(info, counters);
-	put_counters(private, counters);
-	mutex_unlock(&table->lock);
-
-	xt_free_table_info(info);
+	get_counters(private, counters);
 
 	return counters;
-
- free_counters:
-	vfree(counters);
- nomem:
-	return ERR_PTR(-ENOMEM);
 }
 
 static int copy_entries_to_user(unsigned int total_size,
@@ -1094,8 +1027,9 @@ static int __do_replace(struct net *net, const char *name,
 	    (newinfo->number <= oldinfo->initial_entries))
 		module_put(t->me);
 
-	/* Get the old counters. */
+	/* Get the old counters, and synchronize with replace */
 	get_counters(oldinfo, counters);
+
 	/* Decrease module usage counts and free resource */
 	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
 	ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
@@ -1165,10 +1099,23 @@ static int do_replace(struct net *net, void __user *user, unsigned int len)
 	return ret;
 }
 
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static int
+add_counter_to_entry(struct arpt_entry *e,
+		     const struct xt_counters addme[],
+		     unsigned int *i)
+{
+	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+	(*i)++;
+	return 0;
+}
+
 static int do_add_counters(struct net *net, void __user *user, unsigned int len,
 			   int compat)
 {
-	unsigned int i;
+	unsigned int i, curcpu;
 	struct xt_counters_info tmp;
 	struct xt_counters *paddc;
 	unsigned int num_counters;
@@ -1224,26 +1171,26 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len,
 		goto free;
 	}
 
-	mutex_lock(&t->lock);
+	local_bh_disable();
 	private = t->private;
 	if (private->number != num_counters) {
 		ret = -EINVAL;
 		goto unlock_up_free;
 	}
 
-	preempt_disable();
 	i = 0;
 	/* Choose the copy that is on our node */
-	loc_cpu_entry = private->entries[smp_processor_id()];
+	curcpu = smp_processor_id();
+	loc_cpu_entry = private->entries[curcpu];
+	xt_info_wrlock(curcpu);
 	ARPT_ENTRY_ITERATE(loc_cpu_entry,
 			   private->size,
 			   add_counter_to_entry,
 			   paddc,
 			   &i);
-	preempt_enable();
+	xt_info_wrunlock(curcpu);
  unlock_up_free:
-	mutex_unlock(&t->lock);
-
+	local_bh_enable();
 	xt_table_unlock(t);
 	module_put(t->me);
  free:
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 810c0b62c7d..2ec8d7290c4 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -338,10 +338,9 @@ ipt_do_table(struct sk_buff *skb,
 	tgpar.hooknum = hook;
 
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
-
-	rcu_read_lock_bh();
-	private = rcu_dereference(table->private);
-	table_base = rcu_dereference(private->entries[smp_processor_id()]);
+	xt_info_rdlock_bh();
+	private = table->private;
+	table_base = private->entries[smp_processor_id()];
 
 	e = get_entry(table_base, private->hook_entry[hook]);
 
@@ -436,8 +435,7 @@ ipt_do_table(struct sk_buff *skb,
 			e = (void *)e + e->next_offset;
 		}
 	} while (!hotdrop);
-
-	rcu_read_unlock_bh();
+	xt_info_rdunlock_bh();
 
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
@@ -896,10 +894,13 @@ get_counters(const struct xt_table_info *t,
 
 	/* Instead of clearing (by a previous call to memset())
 	 * the counters and using adds, we set the counters
-	 * with data used by 'current' CPU
-	 * We dont care about preemption here.
+	 * with data used by 'current' CPU.
+	 *
+	 * Bottom half has to be disabled to prevent deadlock
+	 * if new softirq were to run and call ipt_do_table
 	 */
-	curcpu = raw_smp_processor_id();
+	local_bh_disable();
+	curcpu = smp_processor_id();
 
 	i = 0;
 	IPT_ENTRY_ITERATE(t->entries[curcpu],
@@ -912,74 +913,22 @@ get_counters(const struct xt_table_info *t,
 		if (cpu == curcpu)
 			continue;
 		i = 0;
+		xt_info_wrlock(cpu);
 		IPT_ENTRY_ITERATE(t->entries[cpu],
 				  t->size,
 				  add_entry_to_counter,
 				  counters,
 				  &i);
+		xt_info_wrunlock(cpu);
 	}
-
-}
-
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
-static int
-add_counter_to_entry(struct ipt_entry *e,
-		     const struct xt_counters addme[],
-		     unsigned int *i)
-{
-	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
-	(*i)++;
-	return 0;
-}
-
-/* Take values from counters and add them back onto the current cpu */
-static void put_counters(struct xt_table_info *t,
-			 const struct xt_counters counters[])
-{
-	unsigned int i, cpu;
-
-	local_bh_disable();
-	cpu = smp_processor_id();
-	i = 0;
-	IPT_ENTRY_ITERATE(t->entries[cpu],
-			  t->size,
-			  add_counter_to_entry,
-			  counters,
-			  &i);
 	local_bh_enable();
 }
 
-
-static inline int
-zero_entry_counter(struct ipt_entry *e, void *arg)
-{
-	e->counters.bcnt = 0;
-	e->counters.pcnt = 0;
-	return 0;
-}
-
-static void
-clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
-{
-	unsigned int cpu;
-	const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
-
-	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
-	for_each_possible_cpu(cpu) {
-		memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
-		IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
-				  zero_entry_counter, NULL);
-	}
-}
-
 static struct xt_counters * alloc_counters(struct xt_table *table)
 {
 	unsigned int countersize;
 	struct xt_counters *counters;
 	struct xt_table_info *private = table->private;
-	struct xt_table_info *info;
 
 	/* We need atomic snapshot of counters: rest doesn't change
 	   (other than comefrom, which userspace doesn't care
@@ -988,30 +937,11 @@ static struct xt_counters * alloc_counters(struct xt_table *table)
 	counters = vmalloc_node(countersize, numa_node_id());
 
 	if (counters == NULL)
-		goto nomem;
+		return ERR_PTR(-ENOMEM);
 
-	info = xt_alloc_table_info(private->size);
-	if (!info)
-		goto free_counters;
-
-	clone_counters(info, private);
-
-	mutex_lock(&table->lock);
-	xt_table_entry_swap_rcu(private, info);
-	synchronize_net();	/* Wait until smoke has cleared */
-
-	get_counters(info, counters);
-	put_counters(private, counters);
-	mutex_unlock(&table->lock);
-
-	xt_free_table_info(info);
+	get_counters(private, counters);
 
 	return counters;
-
- free_counters:
-	vfree(counters);
- nomem:
-	return ERR_PTR(-ENOMEM);
 }
 
 static int
@@ -1306,8 +1236,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	    (newinfo->number <= oldinfo->initial_entries))
 		module_put(t->me);
 
-	/* Get the old counters. */
+	/* Get the old counters, and synchronize with replace */
 	get_counters(oldinfo, counters);
+
 	/* Decrease module usage counts and free resource */
 	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
 	IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
@@ -1377,11 +1308,23 @@ do_replace(struct net *net, void __user *user, unsigned int len)
 	return ret;
 }
 
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static int
+add_counter_to_entry(struct ipt_entry *e,
+		     const struct xt_counters addme[],
+		     unsigned int *i)
+{
+	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+	(*i)++;
+	return 0;
+}
 
 static int
 do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
 {
-	unsigned int i;
+	unsigned int i, curcpu;
 	struct xt_counters_info tmp;
 	struct xt_counters *paddc;
 	unsigned int num_counters;
@@ -1437,25 +1380,26 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat
 		goto free;
 	}
 
-	mutex_lock(&t->lock);
+	local_bh_disable();
 	private = t->private;
 	if (private->number != num_counters) {
 		ret = -EINVAL;
 		goto unlock_up_free;
 	}
 
-	preempt_disable();
 	i = 0;
 	/* Choose the copy that is on our node */
-	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+	curcpu = smp_processor_id();
+	loc_cpu_entry = private->entries[curcpu];
+	xt_info_wrlock(curcpu);
 	IPT_ENTRY_ITERATE(loc_cpu_entry,
 			  private->size,
 			  add_counter_to_entry,
 			  paddc,
 			  &i);
-	preempt_enable();
+	xt_info_wrunlock(curcpu);
  unlock_up_free:
-	mutex_unlock(&t->lock);
+	local_bh_enable();
 	xt_table_unlock(t);
 	module_put(t->me);
  free:
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 800ae854247..219e165aea1 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -365,9 +365,9 @@ ip6t_do_table(struct sk_buff *skb,
 
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
 
-	rcu_read_lock_bh();
-	private = rcu_dereference(table->private);
-	table_base = rcu_dereference(private->entries[smp_processor_id()]);
+	xt_info_rdlock_bh();
+	private = table->private;
+	table_base = private->entries[smp_processor_id()];
 
 	e = get_entry(table_base, private->hook_entry[hook]);
 
@@ -466,7 +466,7 @@ ip6t_do_table(struct sk_buff *skb,
 #ifdef CONFIG_NETFILTER_DEBUG
 	((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON;
 #endif
-	rcu_read_unlock_bh();
+	xt_info_rdunlock_bh();
 
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
@@ -926,9 +926,12 @@ get_counters(const struct xt_table_info *t,
 	/* Instead of clearing (by a previous call to memset())
 	 * the counters and using adds, we set the counters
 	 * with data used by 'current' CPU
-	 * We dont care about preemption here.
+	 *
+	 * Bottom half has to be disabled to prevent deadlock
+	 * if new softirq were to run and call ipt_do_table
 	 */
-	curcpu = raw_smp_processor_id();
+	local_bh_disable();
+	curcpu = smp_processor_id();
 
 	i = 0;
 	IP6T_ENTRY_ITERATE(t->entries[curcpu],
@@ -941,72 +944,22 @@ get_counters(const struct xt_table_info *t,
 		if (cpu == curcpu)
 			continue;
 		i = 0;
+		xt_info_wrlock(cpu);
 		IP6T_ENTRY_ITERATE(t->entries[cpu],
 				  t->size,
 				  add_entry_to_counter,
 				  counters,
 				  &i);
+		xt_info_wrunlock(cpu);
 	}
-}
-
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
-static int
-add_counter_to_entry(struct ip6t_entry *e,
-		     const struct xt_counters addme[],
-		     unsigned int *i)
-{
-	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
-	(*i)++;
-	return 0;
-}
-
-/* Take values from counters and add them back onto the current cpu */
-static void put_counters(struct xt_table_info *t,
-			 const struct xt_counters counters[])
-{
-	unsigned int i, cpu;
-
-	local_bh_disable();
-	cpu = smp_processor_id();
-	i = 0;
-	IP6T_ENTRY_ITERATE(t->entries[cpu],
-			   t->size,
-			   add_counter_to_entry,
-			   counters,
-			   &i);
 	local_bh_enable();
 }
 
-static inline int
-zero_entry_counter(struct ip6t_entry *e, void *arg)
-{
-	e->counters.bcnt = 0;
-	e->counters.pcnt = 0;
-	return 0;
-}
-
-static void
-clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
-{
-	unsigned int cpu;
-	const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
-
-	memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
-	for_each_possible_cpu(cpu) {
-		memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
-		IP6T_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
-				   zero_entry_counter, NULL);
-	}
-}
-
 static struct xt_counters *alloc_counters(struct xt_table *table)
 {
 	unsigned int countersize;
 	struct xt_counters *counters;
 	struct xt_table_info *private = table->private;
-	struct xt_table_info *info;
 
 	/* We need atomic snapshot of counters: rest doesn't change
 	   (other than comefrom, which userspace doesn't care
@@ -1015,30 +968,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table)
 	counters = vmalloc_node(countersize, numa_node_id());
 
 	if (counters == NULL)
-		goto nomem;
+		return ERR_PTR(-ENOMEM);
 
-	info = xt_alloc_table_info(private->size);
-	if (!info)
-		goto free_counters;
-
-	clone_counters(info, private);
-
-	mutex_lock(&table->lock);
-	xt_table_entry_swap_rcu(private, info);
-	synchronize_net();	/* Wait until smoke has cleared */
-
-	get_counters(info, counters);
-	put_counters(private, counters);
-	mutex_unlock(&table->lock);
-
-	xt_free_table_info(info);
+	get_counters(private, counters);
 
 	return counters;
-
- free_counters:
-	vfree(counters);
- nomem:
-	return ERR_PTR(-ENOMEM);
 }
 
 static int
@@ -1334,8 +1268,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	    (newinfo->number <= oldinfo->initial_entries))
 		module_put(t->me);
 
-	/* Get the old counters. */
+	/* Get the old counters, and synchronize with replace */
 	get_counters(oldinfo, counters);
+
 	/* Decrease module usage counts and free resource */
 	loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
 	IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
@@ -1405,11 +1340,24 @@ do_replace(struct net *net, void __user *user, unsigned int len)
 	return ret;
 }
 
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static int
+add_counter_to_entry(struct ip6t_entry *e,
+		     const struct xt_counters addme[],
+		     unsigned int *i)
+{
+	ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+	(*i)++;
+	return 0;
+}
+
 static int
 do_add_counters(struct net *net, void __user *user, unsigned int len,
 		int compat)
 {
-	unsigned int i;
+	unsigned int i, curcpu;
 	struct xt_counters_info tmp;
 	struct xt_counters *paddc;
 	unsigned int num_counters;
@@ -1465,25 +1413,28 @@ do_add_counters(struct net *net, void __user *user, unsigned int len,
 		goto free;
 	}
 
-	mutex_lock(&t->lock);
+
+	local_bh_disable();
 	private = t->private;
 	if (private->number != num_counters) {
 		ret = -EINVAL;
 		goto unlock_up_free;
 	}
 
-	preempt_disable();
 	i = 0;
 	/* Choose the copy that is on our node */
-	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+	curcpu = smp_processor_id();
+	xt_info_wrlock(curcpu);
+	loc_cpu_entry = private->entries[curcpu];
 	IP6T_ENTRY_ITERATE(loc_cpu_entry,
 			  private->size,
 			  add_counter_to_entry,
 			  paddc,
 			  &i);
-	preempt_enable();
+	xt_info_wrunlock(curcpu);
+
  unlock_up_free:
-	mutex_unlock(&t->lock);
+	local_bh_enable();
 	xt_table_unlock(t);
 	module_put(t->me);
  free:
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 509a95621f9..150e5cf62f8 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -625,20 +625,6 @@ void xt_free_table_info(struct xt_table_info *info)
 }
 EXPORT_SYMBOL(xt_free_table_info);
 
-void xt_table_entry_swap_rcu(struct xt_table_info *oldinfo,
-			     struct xt_table_info *newinfo)
-{
-	unsigned int cpu;
-
-	for_each_possible_cpu(cpu) {
-		void *p = oldinfo->entries[cpu];
-		rcu_assign_pointer(oldinfo->entries[cpu], newinfo->entries[cpu]);
-		newinfo->entries[cpu] = p;
-	}
-
-}
-EXPORT_SYMBOL_GPL(xt_table_entry_swap_rcu);
-
 /* Find table by name, grabs mutex & ref.  Returns ERR_PTR() on error. */
 struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
 				    const char *name)
@@ -676,32 +662,43 @@ void xt_compat_unlock(u_int8_t af)
 EXPORT_SYMBOL_GPL(xt_compat_unlock);
 #endif
 
+DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks);
+EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks);
+
+
 struct xt_table_info *
 xt_replace_table(struct xt_table *table,
 	      unsigned int num_counters,
 	      struct xt_table_info *newinfo,
 	      int *error)
 {
-	struct xt_table_info *oldinfo, *private;
+	struct xt_table_info *private;
 
 	/* Do the substitution. */
-	mutex_lock(&table->lock);
+	local_bh_disable();
 	private = table->private;
+
 	/* Check inside lock: is the old number correct? */
 	if (num_counters != private->number) {
 		duprintf("num_counters != table->private->number (%u/%u)\n",
 			 num_counters, private->number);
-		mutex_unlock(&table->lock);
+		local_bh_enable();
 		*error = -EAGAIN;
 		return NULL;
 	}
-	oldinfo = private;
-	rcu_assign_pointer(table->private, newinfo);
-	newinfo->initial_entries = oldinfo->initial_entries;
-	mutex_unlock(&table->lock);
 
-	synchronize_net();
-	return oldinfo;
+	table->private = newinfo;
+	newinfo->initial_entries = private->initial_entries;
+
+	/*
+	 * Even though table entries have now been swapped, other CPU's
+	 * may still be using the old entries. This is okay, because
+	 * resynchronization happens because of the locking done
+	 * during the get_counters() routine.
+	 */
+	local_bh_enable();
+
+	return private;
 }
 EXPORT_SYMBOL_GPL(xt_replace_table);
 
@@ -734,7 +731,6 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table *table,
 
 	/* Simplifies replace_table code. */
 	table->private = bootstrap;
-	mutex_init(&table->lock);
 
 	if (!xt_replace_table(table, 0, newinfo, &ret))
 		goto unlock;
@@ -1147,7 +1143,14 @@ static struct pernet_operations xt_net_ops = {
 
 static int __init xt_init(void)
 {
-	int i, rv;
+	unsigned int i;
+	int rv;
+
+	for_each_possible_cpu(i) {
+		struct xt_info_lock *lock = &per_cpu(xt_info_locks, i);
+		spin_lock_init(&lock->lock);
+		lock->readers = 0;
+	}
 
 	xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL);
 	if (!xt)
-- 
cgit v1.2.3


From d4c4a9a1bce1912ed5681251f0037fd4f2364a3e Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Wed, 29 Apr 2009 11:41:24 +0100
Subject: mac80211: fix modprobe deadlock by not calling wep_init under
 rtnl_lock

- ieee80211_wep_init(), which is called with rtnl_lock held, blocks in
   request_module() [waiting for modprobe to load a crypto module].

 - modprobe blocks in a call to flush_workqueue(), when it closes a TTY
   [presumably when it exits].

 - The workqueue item linkwatch_event() blocks on rtnl_lock.

There's no reason for wep_init() to be called with rtnl_lock held, so
just move it outside the critical section.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index fbcbed6cad0..00968c2d22b 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -909,6 +909,13 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	if (result < 0)
 		goto fail_sta_info;
 
+	result = ieee80211_wep_init(local);
+	if (result < 0) {
+		printk(KERN_DEBUG "%s: Failed to initialize wep: %d\n",
+		       wiphy_name(local->hw.wiphy), result);
+		goto fail_wep;
+	}
+
 	rtnl_lock();
 	result = dev_alloc_name(local->mdev, local->mdev->name);
 	if (result < 0)
@@ -930,14 +937,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		goto fail_rate;
 	}
 
-	result = ieee80211_wep_init(local);
-
-	if (result < 0) {
-		printk(KERN_DEBUG "%s: Failed to initialize wep: %d\n",
-		       wiphy_name(local->hw.wiphy), result);
-		goto fail_wep;
-	}
-
 	/* add one default STA interface if supported */
 	if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION)) {
 		result = ieee80211_if_add(local, "wlan%d", NULL,
@@ -967,13 +966,13 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 
 	return 0;
 
-fail_wep:
-	rate_control_deinitialize(local);
 fail_rate:
 	unregister_netdevice(local->mdev);
 	local->mdev = NULL;
 fail_dev:
 	rtnl_unlock();
+	ieee80211_wep_free(local);
+fail_wep:
 	sta_info_stop(local);
 fail_sta_info:
 	debugfs_hw_del(local);
-- 
cgit v1.2.3


From c428c89201a57a0ce24c37ed79e540d1f4101cf3 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 29 Apr 2009 00:28:18 +0200
Subject: mac80211: default to automatic power control

In "mac80211: correct wext transmit power handler"
I fixed the wext handler, but forgot to make the default of the
user_power_level -1 (aka "auto"), so that now the transmit power
is always set to 0, causing associations to time out and similar
problems since we're transmitting with very little power. Correct
this by correcting the default user_power_level to -1.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Bisected-by: Niel Lambrechts <niel.lambrechts@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/main.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 00968c2d22b..14134193cd1 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -757,6 +757,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 	local->hw.conf.long_frame_max_tx_count = 4;
 	local->hw.conf.short_frame_max_tx_count = 7;
 	local->hw.conf.radio_enabled = true;
+	local->user_power_level = -1;
 
 	INIT_LIST_HEAD(&local->interfaces);
 	mutex_init(&local->iflist_mtx);
-- 
cgit v1.2.3


From 7a67e56fd362d3edfde1f19170893508c3940d3a Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Thu, 30 Apr 2009 05:41:19 -0700
Subject: net: Fix oops when splicing skbs from a frag_list.

Lennert Buytenhek wrote:
> Since 4fb669948116d928ae44262ab7743732c574630d ("net: Optimize memory
> usage when splicing from sockets.") I'm seeing this oops (e.g. in
> 2.6.30-rc3) when splicing from a TCP socket to /dev/null on a driver
> (mv643xx_eth) that uses LRO in the skb mode (lro_receive_skb) rather
> than the frag mode:

My patch incorrectly assumed skb->sk was always valid, but for
"frag_listed" skbs we can only use skb->sk of their parent.

Reported-by: Lennert Buytenhek <buytenh@wantstofly.org>
Debugged-by: Lennert Buytenhek <buytenh@wantstofly.org>
Tested-by: Lennert Buytenhek <buytenh@wantstofly.org>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ce6356cd9f7..f091a5a845c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1365,9 +1365,8 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
 
 static inline struct page *linear_to_page(struct page *page, unsigned int *len,
 					  unsigned int *offset,
-					  struct sk_buff *skb)
+					  struct sk_buff *skb, struct sock *sk)
 {
-	struct sock *sk = skb->sk;
 	struct page *p = sk->sk_sndmsg_page;
 	unsigned int off;
 
@@ -1405,13 +1404,14 @@ new_page:
  */
 static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
 				unsigned int *len, unsigned int offset,
-				struct sk_buff *skb, int linear)
+				struct sk_buff *skb, int linear,
+				struct sock *sk)
 {
 	if (unlikely(spd->nr_pages == PIPE_BUFFERS))
 		return 1;
 
 	if (linear) {
-		page = linear_to_page(page, len, &offset, skb);
+		page = linear_to_page(page, len, &offset, skb, sk);
 		if (!page)
 			return 1;
 	} else
@@ -1442,7 +1442,8 @@ static inline void __segment_seek(struct page **page, unsigned int *poff,
 static inline int __splice_segment(struct page *page, unsigned int poff,
 				   unsigned int plen, unsigned int *off,
 				   unsigned int *len, struct sk_buff *skb,
-				   struct splice_pipe_desc *spd, int linear)
+				   struct splice_pipe_desc *spd, int linear,
+				   struct sock *sk)
 {
 	if (!*len)
 		return 1;
@@ -1465,7 +1466,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
 		/* the linear region may spread across several pages  */
 		flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
 
-		if (spd_fill_page(spd, page, &flen, poff, skb, linear))
+		if (spd_fill_page(spd, page, &flen, poff, skb, linear, sk))
 			return 1;
 
 		__segment_seek(&page, &poff, &plen, flen);
@@ -1481,8 +1482,8 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
  * pipe is full or if we already spliced the requested length.
  */
 static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
-		      unsigned int *len,
-		      struct splice_pipe_desc *spd)
+			     unsigned int *len, struct splice_pipe_desc *spd,
+			     struct sock *sk)
 {
 	int seg;
 
@@ -1492,7 +1493,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
 	if (__splice_segment(virt_to_page(skb->data),
 			     (unsigned long) skb->data & (PAGE_SIZE - 1),
 			     skb_headlen(skb),
-			     offset, len, skb, spd, 1))
+			     offset, len, skb, spd, 1, sk))
 		return 1;
 
 	/*
@@ -1502,7 +1503,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
 		const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
 
 		if (__splice_segment(f->page, f->page_offset, f->size,
-				     offset, len, skb, spd, 0))
+				     offset, len, skb, spd, 0, sk))
 			return 1;
 	}
 
@@ -1528,12 +1529,13 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
 		.ops = &sock_pipe_buf_ops,
 		.spd_release = sock_spd_release,
 	};
+	struct sock *sk = skb->sk;
 
 	/*
 	 * __skb_splice_bits() only fails if the output has no room left,
 	 * so no point in going over the frag_list for the error case.
 	 */
-	if (__skb_splice_bits(skb, &offset, &tlen, &spd))
+	if (__skb_splice_bits(skb, &offset, &tlen, &spd, sk))
 		goto done;
 	else if (!tlen)
 		goto done;
@@ -1545,14 +1547,13 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
 		struct sk_buff *list = skb_shinfo(skb)->frag_list;
 
 		for (; list && tlen; list = list->next) {
-			if (__skb_splice_bits(list, &offset, &tlen, &spd))
+			if (__skb_splice_bits(list, &offset, &tlen, &spd, sk))
 				break;
 		}
 	}
 
 done:
 	if (spd.nr_pages) {
-		struct sock *sk = skb->sk;
 		int ret;
 
 		/*
-- 
cgit v1.2.3


From ec581f6a42bbbea5271c66da9769a41b46c74e10 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Fri, 1 May 2009 09:05:06 -0700
Subject: net: Fix skb_tx_hash() for forwarding workloads.

When skb_rx_queue_recorded() is true, we dont want to use jash distribution
as the device driver exactly told us which queue was selected at RX time.
jhash makes a statistical shuffle, but this wont work with 8 static inputs.

Later improvements would be to compute reciprocal value of real_num_tx_queues
to avoid a divide here. But this computation should be done once,
when real_num_tx_queues is set. This needs a separate patch, and a new
field in struct net_device.

Reported-by: Andrew Dickinson <andrew@whydna.net>
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 308a7d0c277..e2e9e4af3ac 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1735,11 +1735,12 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
 {
 	u32 hash;
 
-	if (skb_rx_queue_recorded(skb)) {
-		hash = skb_get_rx_queue(skb);
-	} else if (skb->sk && skb->sk->sk_hash) {
+	if (skb_rx_queue_recorded(skb))
+		return skb_get_rx_queue(skb) % dev->real_num_tx_queues;
+
+	if (skb->sk && skb->sk->sk_hash)
 		hash = skb->sk->sk_hash;
-	} else
+	else
 		hash = skb->protocol;
 
 	hash = jhash_1word(hash, skb_tx_hashrnd);
-- 
cgit v1.2.3


From acda074390270ca9e28f2a9729f7b835e2ad6da4 Mon Sep 17 00:00:00 2001
From: Laszlo Attila Toth <panther@balabit.hu>
Date: Fri, 1 May 2009 15:23:10 -0700
Subject: xt_socket: checks for the state of nf_conntrack

xt_socket can use connection tracking, and checks whether it is a module.

Signed-off-by: Laszlo Attila Toth <panther@balabit.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 881203c4a14..cb3ad741ebf 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -837,6 +837,7 @@ config NETFILTER_XT_MATCH_SOCKET
 	depends on NETFILTER_TPROXY
 	depends on NETFILTER_XTABLES
 	depends on NETFILTER_ADVANCED
+	depends on !NF_CONNTRACK || NF_CONNTRACK
 	select NF_DEFRAG_IPV4
 	help
 	  This option adds a `socket' match, which can be used to match
-- 
cgit v1.2.3


From d0ab8ff81bf1b01bae7d6b92ca067badbbb02cc9 Mon Sep 17 00:00:00 2001
From: Robert Love <robert.w.love@intel.com>
Date: Sat, 2 May 2009 13:48:32 -0700
Subject: net: Only store high 16 bits of kernel generated filter priorities

The kernel should only be using the high 16 bits of a kernel
generated priority. Filter priorities in all other cases only
use the upper 16 bits of the u32 'prio' field of 'struct tcf_proto',
but when the kernel generates the priority of a filter is saves all
32 bits which can result in incorrect lookup failures when a filter
needs to be deleted or modified.

Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 173fcc4b050..0759f32e9dc 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -254,7 +254,7 @@ replay:
 		}
 		tp->ops = tp_ops;
 		tp->protocol = protocol;
-		tp->prio = nprio ? : tcf_auto_prio(*back);
+		tp->prio = nprio ? : TC_H_MAJ(tcf_auto_prio(*back));
 		tp->q = q;
 		tp->classify = tp_ops->classify;
 		tp->classid = parent;
-- 
cgit v1.2.3


From 902e5ea15f8471a3213a37b11b98196f3406aeaf Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin@rab.in>
Date: Sat, 2 May 2009 13:49:36 -0700
Subject: Subject: [PATCH] br2684: restore net_dev initialization

Commit 0ba25ff4c669e5395110ba6ab4958a97a9f96922 ("br2684: convert to
net_device_ops") inadvertently deleted the initialization of the net_dev
pointer in the br2684_dev structure, leading to crashes.  This patch
adds it back.

Reported-by: Mikko Vinni <mmvinni@yahoo.com>
Tested-by: Mikko Vinni <mmvinni@yahoo.com>
Signed-off-by: Rabin Vincent <rabin@rab.in>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/br2684.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 334fcd4a4ea..3100a8940af 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -549,6 +549,7 @@ static void br2684_setup(struct net_device *netdev)
 	struct br2684_dev *brdev = BRPRIV(netdev);
 
 	ether_setup(netdev);
+	brdev->net_dev = netdev;
 
 	netdev->netdev_ops = &br2684_netdev_ops;
 
-- 
cgit v1.2.3


From 21515e46bc6a6279dd13f6c01898ada9720100a3 Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Wed, 29 Apr 2009 14:14:00 -0500
Subject: svcrdma: clean up error paths.

These fixes resolved crashes due to resource leak BUG_ON checks. The
resource leaks were detected by introducing asynchronous transport errors.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/xprtrdma/svc_rdma_sendto.c    | 3 +++
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 6c26a675435..8b510c5e877 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -183,6 +183,7 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
 
  fatal_err:
 	printk("svcrdma: Error fast registering memory for xprt %p\n", xprt);
+	vec->frmr = NULL;
 	svc_rdma_put_frmr(xprt, frmr);
 	return -EIO;
 }
@@ -516,6 +517,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
 		       "svcrdma: could not post a receive buffer, err=%d."
 		       "Closing transport %p.\n", ret, rdma);
 		set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+		svc_rdma_put_frmr(rdma, vec->frmr);
 		svc_rdma_put_context(ctxt, 0);
 		return -ENOTCONN;
 	}
@@ -606,6 +608,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	return 0;
 
  err:
+	svc_rdma_unmap_dma(ctxt);
 	svc_rdma_put_frmr(rdma, vec->frmr);
 	svc_rdma_put_context(ctxt, 1);
 	return -EIO;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 3d810e7df3f..4b0c2fa15e0 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -520,8 +520,9 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
 	svc_xprt_get(&xprt->sc_xprt);
 	ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr);
 	if (ret) {
-		svc_xprt_put(&xprt->sc_xprt);
+		svc_rdma_unmap_dma(ctxt);
 		svc_rdma_put_context(ctxt, 1);
+		svc_xprt_put(&xprt->sc_xprt);
 	}
 	return ret;
 
-- 
cgit v1.2.3


From 0c266898b42fe4e4e2f9edfc9d3474c10f93aa6a Mon Sep 17 00:00:00 2001
From: Satoru SATOH <satoru.satoh@gmail.com>
Date: Mon, 4 May 2009 11:11:01 -0700
Subject: tcp: Fix tcp_prequeue() to get correct rto_min value

tcp_prequeue() refers to the constant value (TCP_RTO_MIN) regardless of
the actual value might be tuned. The following patches fix this and make
tcp_prequeue get the actual value returns from tcp_rto_min().

Signed-off-by: Satoru SATOH <satoru.satoh@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c96a6bb2543..eec3e6f9956 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -597,16 +597,6 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
 		tcp_grow_window(sk, skb);
 }
 
-static u32 tcp_rto_min(struct sock *sk)
-{
-	struct dst_entry *dst = __sk_dst_get(sk);
-	u32 rto_min = TCP_RTO_MIN;
-
-	if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
-		rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
-	return rto_min;
-}
-
 /* Called to compute a smoothed rtt estimate. The data fed to this
  * routine either comes from timestamps, or from segments that were
  * known _not_ to have been retransmitted [see Karn/Partridge
-- 
cgit v1.2.3


From 8ccd8f21122dcc30a665516d43aa8b4aa8ae51f6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 29 Apr 2009 23:35:56 +0200
Subject: mac80211: correct fragmentation threshold check

The fragmentation threshold is defined to be including the
FCS, and the code that sets the TX_FRAGMENTED flag correctly
accounts for those four bytes. The code that verifies this
doesn't though, which could lead to spurious warnings and
frames being dropped although everything is ok. Correct the
code by accounting for the FCS.

(JWL -- The problem is described here:
 http://article.gmane.org/gmane.linux.kernel.wireless.general/32205 )

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 3fb04a86444..63656266d56 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -772,7 +772,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
 
 	/* internal error, why is TX_FRAGMENTED set? */
-	if (WARN_ON(skb->len <= frag_threshold))
+	if (WARN_ON(skb->len + FCS_LEN <= frag_threshold))
 		return TX_DROP;
 
 	/*
-- 
cgit v1.2.3


From c0f0aac05fa84b37ed46db8cf6c8bee9a67bbcca Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 30 Apr 2009 20:09:56 +0200
Subject: cfg80211: fix truncated IEs

Another bug in the "cfg80211: do not replace BSS structs" patch,
a forgotten length update leads to bogus data being stored and
passed to userspace, often truncated.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/scan.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 2ae65b39b52..1f260c40b6c 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -395,6 +395,7 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
 					memcpy(ies, res->pub.information_elements, ielen);
 					found->ies_allocated = true;
 					found->pub.information_elements = ies;
+					found->pub.len_information_elements = ielen;
 				}
 			}
 		}
-- 
cgit v1.2.3


From ac46d48e00349c63650b3cc6f9460fcc183da6a6 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Fri, 1 May 2009 18:44:50 -0400
Subject: cfg80211: fix race condition with wiphy_apply_custom_regulatory()

We forgot to lock using the cfg80211_mutex in
wiphy_apply_custom_regulatory(). Without the lock
there is possible race between processing a reply from CRDA
and a driver calling wiphy_apply_custom_regulatory(). During
the processing of the reply from CRDA we free last_request and
wiphy_apply_custom_regulatory() eventually accesses an
element from last_request in the through freq_reg_info_regd().

This is very difficult to reproduce (I haven't), it takes us
3 hours and you need to be banging hard, but the race is obvious
by looking at the code.

This should only affect those who use this caller, which currently
is ath5k, ath9k, and ar9170.

EIP: 0060:[<f8ebec50>] EFLAGS: 00210282 CPU: 1
EIP is at freq_reg_info_regd+0x24/0x121 [cfg80211]
EAX: 00000000 EBX: f7ca0060 ECX: f5183d94 EDX: 0024cde0
ESI: f8f56edc EDI: 00000000 EBP: 00000000 ESP: f5183d44
DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
Process modprobe (pid: 14617, ti=f5182000 task=f3934d10 task.ti=f5182000)
Stack: c0505300 f7ca0ab4 f5183d94 0024cde0 f8f403a6 f8f63160 f7ca0060 00000000
00000000 f8ebedf8 f5183d90 f8f56edc 00000000 00000004 00000f40 f8f56edc
f7ca0060 f7ca1234 00000000 00000000 00000000 f7ca14f0 f7ca0ab4 f7ca1289
Call Trace:
[<f8ebedf8>] wiphy_apply_custom_regulatory+0x8f/0x122 [cfg80211]
[<f8f3f798>] ath_attach+0x707/0x9e6 [ath9k]
[<f8f45e46>] ath_pci_probe+0x18d/0x29a [ath9k]
[<c023c7ba>] pci_device_probe+0xa3/0xe4
[<c02a860b>] really_probe+0xd7/0x1de
[<c02a87e7>] __driver_attach+0x37/0x55
[<c02a7eed>] bus_for_each_dev+0x31/0x57
[<c02a83bd>] driver_attach+0x16/0x18
[<c02a78e6>] bus_add_driver+0xec/0x21b
[<c02a8959>] driver_register+0x85/0xe2
[<c023c9bb>] __pci_register_driver+0x3c/0x69
[<f8e93043>] ath9k_init+0x43/0x68 [ath9k]
[<c010112b>] _stext+0x3b/0x116
[<c014a872>] sys_init_module+0x8a/0x19e
[<c01049ad>] sysenter_do_call+0x12/0x21
[<ffffe430>] 0xffffe430
=======================
Code: 0f 94 c0 c3 31 c0 c3 55 57 56 53 89 c3 83 ec 14 8b 74 24 2c 89 54 24 0c 89 4c 24 08 85 f6 75
06 8b 35 c8 bb ec f8 a1 cc bb ec f8 <8b> 40 04 83 f8 03 74 3a 48 74 37 8b 43 28 85 c0 74 30 89 c6
8b
EIP: [<f8ebec50>] freq_reg_info_regd+0x24/0x121 [cfg80211] SS:ESP 0068:f5183d44

Cc: stable@kernel.org
Reported-by: Nataraj Sadasivam <Nataraj.Sadasivam@Atheros.com>
Reported-by: Vivek Natarajan <Vivek.Natarajan@Atheros.com>
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 6c1993d9990..3a734949769 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -907,6 +907,7 @@ EXPORT_SYMBOL(freq_reg_info);
 int freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 *bandwidth,
 			 const struct ieee80211_reg_rule **reg_rule)
 {
+	assert_cfg80211_lock();
 	return freq_reg_info_regd(wiphy, center_freq,
 		bandwidth, reg_rule, NULL);
 }
@@ -1176,6 +1177,8 @@ static void handle_channel_custom(struct wiphy *wiphy,
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_channel *chan;
 
+	assert_cfg80211_lock();
+
 	sband = wiphy->bands[band];
 	BUG_ON(chan_idx >= sband->n_channels);
 	chan = &sband->channels[chan_idx];
@@ -1214,10 +1217,13 @@ void wiphy_apply_custom_regulatory(struct wiphy *wiphy,
 				   const struct ieee80211_regdomain *regd)
 {
 	enum ieee80211_band band;
+
+	mutex_lock(&cfg80211_mutex);
 	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
 		if (wiphy->bands[band])
 			handle_band_custom(wiphy, band, regd);
 	}
+	mutex_unlock(&cfg80211_mutex);
 }
 EXPORT_SYMBOL(wiphy_apply_custom_regulatory);
 
-- 
cgit v1.2.3


From b1ed8ddd21a2d7acf8efbb60a112ea5c9f914159 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Sat, 2 May 2009 00:34:15 -0400
Subject: cfg80211: fix bug while trying to process beacon hints on init

During initialization we would not have received any beacons
so skip processing reg beacon hints, also adds a check to
reg_is_world_roaming() for last_request before accessing its
fields.

This should fix this:

BUG: unable to handle kernel NULL pointer dereference at

IP: [<e0171332>] wiphy_update_regulatory+0x20f/0x295

*pdpt = 0000000008bf1001 *pde = 0000000000000000
Oops: 0000 [#1]
last sysfs file: /sys/class/backlight/eeepc/brightness
Modules linked in: ath5k(+) mac80211 led_class cfg80211
go_bit cfbcopyarea cfbimgblt cfbfillrect ipv6
ydev usual_tables(P) snd_hda_codec_realtek snd_hda_intel
nd_hwdep uhci_hcd snd_pcm_oss snd_mixer_oss i2c_i801
e serio_raw i2c_core pcspkr atl2 snd_pcm intel_agp
re agpgart eeepc_laptop snd_page_alloc ac video backlight
rfkill button processor evdev thermal fan ata_generic

Pid: 2909, comm: modprobe Tainted: Pc #112) 701
EIP: 0060:[<e0171332>] EFLAGS: 00010246 CPU: 0
EIP is at wiphy_update_regulatory+0x20f/0x295 [cfg80211]
EAX: 00000000 EBX: c5da0000 ECX: 00000000 EDX: c5da0060
ESI: 0000001a EDI: c5da0060 EBP: df3bdd70 ESP: df3bdd40
 DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068
Process modprobe (pid: 2909, ti=df3bc000 task=c5d030000)
Stack:
 df3bdd90 c5da0060 c04277e0 00000001 00000044 c04277e402
 00000002 c5da0000 0000001a c5da0060 df3bdda8 e01706a2 02
 00000282 000080d0 00000068 c5d53500 00000080 0000028240
Call Trace:
 [<e01706a2>] ? wiphy_register+0x122/0x1b7 [cfg80211]
 [<e0328e02>] ? ieee80211_register_hw+0xd8/0x346
 [<e06a7c9f>] ? ath5k_hw_set_bssid_mask+0x71/0x78 [ath5k]
 [<e06b0c52>] ? ath5k_pci_probe+0xa5c/0xd0a [ath5k]
 [<c01a6037>] ? sysfs_find_dirent+0x16/0x27
 [<c01fec95>] ? local_pci_probe+0xe/0x10
 [<c01ff526>] ? pci_device_probe+0x48/0x66
 [<c024c9fd>] ? driver_probe_device+0x7f/0xf2
 [<c024cab3>] ? __driver_attach+0x43/0x5f
 [<c024c0af>] ? bus_for_each_dev+0x39/0x5a
 [<c024c8d0>] ? driver_attach+0x14/0x16
 [<c024ca70>] ? __driver_attach+0x0/0x5f
 [<c024c5b3>] ? bus_add_driver+0xd7/0x1e7
 [<c024ccb9>] ? driver_register+0x7b/0xd7
 [<c01ff827>] ? __pci_register_driver+0x32/0x85
 [<e00a8018>] ? init_ath5k_pci+0x18/0x30 [ath5k]
 [<c0101131>] ? _stext+0x49/0x10b
 [<e00a8000>] ? init_ath5k_pci+0x0/0x30 [ath5k]
 [<c012f452>] ? __blocking_notifier_call_chain+0x40/0x4c
 [<c013a714>] ? sys_init_module+0x87/0x18b
 [<c0102804>] ? sysenter_do_call+0x12/0x22
Code: b8 da 17 e0 83 c0 04 e8 92 f9 ff ff 84 c0 75 2a 8b
85 c0 74 0c 83 c0 04 e8 7c f9 ff ff 84 c0 75 14 a1 bc da
4 03 74 66 8b 4d d4 80 79 08 00 74 5d a1 e0 d2 17 e0 48
EIP: [<e0171332>] wiphy_update_regulatory+0x20f/0x295
SP 0068:df3bdd40
CR2: 0000000000000004
---[ end trace 830f2dd2a95fd1a8 ]---

This issue is hard to reproduce, but it was noticed and discussed on
this thread:

http://marc.info/?t=123938022700005&r=1&w=2

Cc: stable@kernel.org
Reported-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 3a734949769..974b127aea6 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1134,7 +1134,8 @@ static bool reg_is_world_roaming(struct wiphy *wiphy)
 	if (is_world_regdom(cfg80211_regdomain->alpha2) ||
 	    (wiphy->regd && is_world_regdom(wiphy->regd->alpha2)))
 		return true;
-	if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
+	if (last_request &&
+	    last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
 	    wiphy->custom_regulatory)
 		return true;
 	return false;
@@ -1143,6 +1144,12 @@ static bool reg_is_world_roaming(struct wiphy *wiphy)
 /* Reap the advantages of previously found beacons */
 static void reg_process_beacons(struct wiphy *wiphy)
 {
+	/*
+	 * Means we are just firing up cfg80211, so no beacons would
+	 * have been processed yet.
+	 */
+	if (!last_request)
+		return;
 	if (!reg_is_world_roaming(wiphy))
 		return;
 	wiphy_update_beacon_reg(wiphy);
-- 
cgit v1.2.3


From 30a548c727514484b08ac06edf0a7eb0f7fd70bf Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Sat, 2 May 2009 01:17:27 -0400
Subject: cfg80211: fix comment on regulatory hint processing

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 974b127aea6..08265ca1578 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1436,7 +1436,7 @@ new_request:
 	return call_crda(last_request->alpha2);
 }
 
-/* This currently only processes user and driver regulatory hints */
+/* This processes *all* regulatory hints */
 static void reg_process_hint(struct regulatory_request *reg_request)
 {
 	int r = 0;
-- 
cgit v1.2.3


From 8e532175277d9a5eae49768ed086555081f741a7 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Mon, 4 May 2009 18:04:55 +0200
Subject: mac80211: minstrel, fix memory corruption

minstrel doesn't count max rate count in fact, since it doesn't use
a loop variable `i' and hence allocs space only for bitrates found in
the first band.

Fix it by involving the `i' as an index so that it traverses all the
bands now and finds the real max bitrate count.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Cc: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rc80211_minstrel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 3824990d340..70df3dcc3cf 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -476,7 +476,7 @@ minstrel_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp)
 		return NULL;
 
 	for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
-		sband = hw->wiphy->bands[hw->conf.channel->band];
+		sband = hw->wiphy->bands[i];
 		if (sband->n_bitrates > max_rates)
 			max_rates = sband->n_bitrates;
 	}
-- 
cgit v1.2.3


From 6909268dc93ae4b0b8e1ebb4b2fa70b1a47dd347 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Mon, 4 May 2009 18:10:28 +0200
Subject: mac80211: pid, fix memory corruption

pid doesn't count with some band having more bitrates than the one
associated the first time.
Fix that by counting the maximal available bitrate count and allocate
big enough space.

Secondly, fix touching uninitialized memory which causes panics.
Index sucked from this random memory points to the hell.
The fix is to sort the rates on each band change.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rc80211_pid_algo.c | 73 ++++++++++++++++++++++-------------------
 1 file changed, 39 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index b16801cde06..01d59a8e334 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -317,13 +317,44 @@ rate_control_pid_rate_init(void *priv, struct ieee80211_supported_band *sband,
 			   struct ieee80211_sta *sta, void *priv_sta)
 {
 	struct rc_pid_sta_info *spinfo = priv_sta;
+	struct rc_pid_info *pinfo = priv;
+	struct rc_pid_rateinfo *rinfo = pinfo->rinfo;
 	struct sta_info *si;
+	int i, j, tmp;
+	bool s;
 
 	/* TODO: This routine should consider using RSSI from previous packets
 	 * as we need to have IEEE 802.1X auth succeed immediately after assoc..
 	 * Until that method is implemented, we will use the lowest supported
 	 * rate as a workaround. */
 
+	/* Sort the rates. This is optimized for the most common case (i.e.
+	 * almost-sorted CCK+OFDM rates). Kind of bubble-sort with reversed
+	 * mapping too. */
+	for (i = 0; i < sband->n_bitrates; i++) {
+		rinfo[i].index = i;
+		rinfo[i].rev_index = i;
+		if (RC_PID_FAST_START)
+			rinfo[i].diff = 0;
+		else
+			rinfo[i].diff = i * pinfo->norm_offset;
+	}
+	for (i = 1; i < sband->n_bitrates; i++) {
+		s = 0;
+		for (j = 0; j < sband->n_bitrates - i; j++)
+			if (unlikely(sband->bitrates[rinfo[j].index].bitrate >
+				     sband->bitrates[rinfo[j + 1].index].bitrate)) {
+				tmp = rinfo[j].index;
+				rinfo[j].index = rinfo[j + 1].index;
+				rinfo[j + 1].index = tmp;
+				rinfo[rinfo[j].index].rev_index = j;
+				rinfo[rinfo[j + 1].index].rev_index = j + 1;
+				s = 1;
+			}
+		if (!s)
+			break;
+	}
+
 	spinfo->txrate_idx = rate_lowest_index(sband, sta);
 	/* HACK */
 	si = container_of(sta, struct sta_info, sta);
@@ -336,21 +367,22 @@ static void *rate_control_pid_alloc(struct ieee80211_hw *hw,
 	struct rc_pid_info *pinfo;
 	struct rc_pid_rateinfo *rinfo;
 	struct ieee80211_supported_band *sband;
-	int i, j, tmp;
-	bool s;
+	int i, max_rates = 0;
 #ifdef CONFIG_MAC80211_DEBUGFS
 	struct rc_pid_debugfs_entries *de;
 #endif
 
-	sband = hw->wiphy->bands[hw->conf.channel->band];
-
 	pinfo = kmalloc(sizeof(*pinfo), GFP_ATOMIC);
 	if (!pinfo)
 		return NULL;
 
-	/* We can safely assume that sband won't change unless we get
-	 * reinitialized. */
-	rinfo = kmalloc(sizeof(*rinfo) * sband->n_bitrates, GFP_ATOMIC);
+	for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
+		sband = hw->wiphy->bands[i];
+		if (sband->n_bitrates > max_rates)
+			max_rates = sband->n_bitrates;
+	}
+
+	rinfo = kmalloc(sizeof(*rinfo) * max_rates, GFP_ATOMIC);
 	if (!rinfo) {
 		kfree(pinfo);
 		return NULL;
@@ -368,33 +400,6 @@ static void *rate_control_pid_alloc(struct ieee80211_hw *hw,
 	pinfo->rinfo = rinfo;
 	pinfo->oldrate = 0;
 
-	/* Sort the rates. This is optimized for the most common case (i.e.
-	 * almost-sorted CCK+OFDM rates). Kind of bubble-sort with reversed
-	 * mapping too. */
-	for (i = 0; i < sband->n_bitrates; i++) {
-		rinfo[i].index = i;
-		rinfo[i].rev_index = i;
-		if (RC_PID_FAST_START)
-			rinfo[i].diff = 0;
-		else
-			rinfo[i].diff = i * pinfo->norm_offset;
-	}
-	for (i = 1; i < sband->n_bitrates; i++) {
-		s = 0;
-		for (j = 0; j < sband->n_bitrates - i; j++)
-			if (unlikely(sband->bitrates[rinfo[j].index].bitrate >
-				     sband->bitrates[rinfo[j + 1].index].bitrate)) {
-				tmp = rinfo[j].index;
-				rinfo[j].index = rinfo[j + 1].index;
-				rinfo[j + 1].index = tmp;
-				rinfo[rinfo[j].index].rev_index = j;
-				rinfo[rinfo[j + 1].index].rev_index = j + 1;
-				s = 1;
-			}
-		if (!s)
-			break;
-	}
-
 #ifdef CONFIG_MAC80211_DEBUGFS
 	de = &pinfo->dentries;
 	de->target = debugfs_create_u32("target_pf", S_IRUSR | S_IWUSR,
-- 
cgit v1.2.3


From a67e899cf38ae542d1a028ccd021f9189f76fb74 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 2 May 2009 18:24:06 -0700
Subject: Bluetooth: Fix issue with sysfs handling for connections

Due to a semantic changes in flush_workqueue() the current approach of
synchronizing the sysfs handling for connections doesn't work anymore. The
whole approach is actually fully broken and based on assumptions that are
no longer valid.

With the introduction of Simple Pairing support, the creation of low-level
ACL links got changed. This change invalidates the reason why in the past
two independent work queues have been used for adding/removing sysfs
devices. The adding of the actual sysfs device is now postponed until the
host controller successfully assigns an unique handle to that link. So
the real synchronization happens inside the controller and not the host.

The only left-over problem is that some internals of the sysfs device
handling are not initialized ahead of time. This leaves potential access
to invalid data and can cause various NULL pointer dereferences. To fix
this a new function makes sure that all sysfs details are initialized
when an connection attempt is made. The actual sysfs device is only
registered when the connection has been successfully established. To
avoid a race condition with the registration, the check if a device is
registered has been moved into the removal work.

As an extra protection two flush_work() calls are left in place to
make sure a previous add/del work has been completed first.

Based on a report by Marc Pignat <marc.pignat@hevs.ch>

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Tested-by: Justin P. Mattock <justinmattock@gmail.com>
Tested-by: Roger Quadros <ext-roger.quadros@nokia.com>
Tested-by: Marc Pignat <marc.pignat@hevs.ch>
---
 net/bluetooth/hci_conn.c  |  2 ++
 net/bluetooth/hci_sysfs.c | 74 +++++++++++++++++++++++++----------------------
 2 files changed, 42 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 375f4b4f7f7..61309b26f27 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -248,6 +248,8 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
 	if (hdev->notify)
 		hdev->notify(hdev, HCI_NOTIFY_CONN_ADD);
 
+	hci_conn_init_sysfs(conn);
+
 	tasklet_enable(&hdev->tx_task);
 
 	return conn;
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index b7c51082dde..582d8877078 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -9,7 +9,7 @@
 struct class *bt_class = NULL;
 EXPORT_SYMBOL_GPL(bt_class);
 
-static struct workqueue_struct *bluetooth;
+static struct workqueue_struct *bt_workq;
 
 static inline char *link_typetostr(int type)
 {
@@ -89,8 +89,8 @@ static void add_conn(struct work_struct *work)
 {
 	struct hci_conn *conn = container_of(work, struct hci_conn, work_add);
 
-	/* ensure previous add/del is complete */
-	flush_workqueue(bluetooth);
+	/* ensure previous del is complete */
+	flush_work(&conn->work_del);
 
 	if (device_add(&conn->dev) < 0) {
 		BT_ERR("Failed to register connection device");
@@ -98,27 +98,6 @@ static void add_conn(struct work_struct *work)
 	}
 }
 
-void hci_conn_add_sysfs(struct hci_conn *conn)
-{
-	struct hci_dev *hdev = conn->hdev;
-
-	BT_DBG("conn %p", conn);
-
-	conn->dev.type = &bt_link;
-	conn->dev.class = bt_class;
-	conn->dev.parent = &hdev->dev;
-
-	dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle);
-
-	dev_set_drvdata(&conn->dev, conn);
-
-	device_initialize(&conn->dev);
-
-	INIT_WORK(&conn->work_add, add_conn);
-
-	queue_work(bluetooth, &conn->work_add);
-}
-
 /*
  * The rfcomm tty device will possibly retain even when conn
  * is down, and sysfs doesn't support move zombie device,
@@ -134,8 +113,11 @@ static void del_conn(struct work_struct *work)
 	struct hci_conn *conn = container_of(work, struct hci_conn, work_del);
 	struct hci_dev *hdev = conn->hdev;
 
-	/* ensure previous add/del is complete */
-	flush_workqueue(bluetooth);
+	/* ensure previous add is complete */
+	flush_work(&conn->work_add);
+
+	if (!device_is_registered(&conn->dev))
+		return;
 
 	while (1) {
 		struct device *dev;
@@ -152,16 +134,40 @@ static void del_conn(struct work_struct *work)
 	hci_dev_put(hdev);
 }
 
-void hci_conn_del_sysfs(struct hci_conn *conn)
+void hci_conn_init_sysfs(struct hci_conn *conn)
 {
+	struct hci_dev *hdev = conn->hdev;
+
 	BT_DBG("conn %p", conn);
 
-	if (!device_is_registered(&conn->dev))
-		return;
+	conn->dev.type = &bt_link;
+	conn->dev.class = bt_class;
+	conn->dev.parent = &hdev->dev;
+
+	dev_set_drvdata(&conn->dev, conn);
 
+	device_initialize(&conn->dev);
+
+	INIT_WORK(&conn->work_add, add_conn);
 	INIT_WORK(&conn->work_del, del_conn);
+}
+
+void hci_conn_add_sysfs(struct hci_conn *conn)
+{
+	struct hci_dev *hdev = conn->hdev;
+
+	BT_DBG("conn %p", conn);
+
+	dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle);
+
+	queue_work(bt_workq, &conn->work_add);
+}
+
+void hci_conn_del_sysfs(struct hci_conn *conn)
+{
+	BT_DBG("conn %p", conn);
 
-	queue_work(bluetooth, &conn->work_del);
+	queue_work(bt_workq, &conn->work_del);
 }
 
 static inline char *host_typetostr(int type)
@@ -438,13 +444,13 @@ void hci_unregister_sysfs(struct hci_dev *hdev)
 
 int __init bt_sysfs_init(void)
 {
-	bluetooth = create_singlethread_workqueue("bluetooth");
-	if (!bluetooth)
+	bt_workq = create_singlethread_workqueue("bluetooth");
+	if (!bt_workq)
 		return -ENOMEM;
 
 	bt_class = class_create(THIS_MODULE, "bluetooth");
 	if (IS_ERR(bt_class)) {
-		destroy_workqueue(bluetooth);
+		destroy_workqueue(bt_workq);
 		return PTR_ERR(bt_class);
 	}
 
@@ -453,7 +459,7 @@ int __init bt_sysfs_init(void)
 
 void bt_sysfs_cleanup(void)
 {
-	destroy_workqueue(bluetooth);
+	destroy_workqueue(bt_workq);
 
 	class_destroy(bt_class);
 }
-- 
cgit v1.2.3


From b98b4947cb79d670fceca0e951c092eea93e9baa Mon Sep 17 00:00:00 2001
From: Christoph Paasch <christoph.paasch@gmail.com>
Date: Tue, 5 May 2009 15:32:16 +0200
Subject: netfilter: ip6t_ipv6header: fix match on packets ending with
 NEXTHDR_NONE

As packets ending with NEXTHDR_NONE don't have a last extension header,
the check for the length needs to be after the check for NEXTHDR_NONE.

Signed-off-by: Christoph Paasch <christoph.paasch@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/ip6t_ipv6header.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 14e6724d567..91490ad9302 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -50,14 +50,14 @@ ipv6header_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 		struct ipv6_opt_hdr _hdr;
 		int hdrlen;
 
-		/* Is there enough space for the next ext header? */
-		if (len < (int)sizeof(struct ipv6_opt_hdr))
-			return false;
 		/* No more exthdr -> evaluate */
 		if (nexthdr == NEXTHDR_NONE) {
 			temp |= MASK_NONE;
 			break;
 		}
+		/* Is there enough space for the next ext header? */
+		if (len < (int)sizeof(struct ipv6_opt_hdr))
+			return false;
 		/* ESP -> evaluate */
 		if (nexthdr == NEXTHDR_ESP) {
 			temp |= MASK_ESP;
-- 
cgit v1.2.3


From 280f37afa2c270ff029cb420b34396aa002909c3 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 5 May 2009 17:46:07 +0200
Subject: netfilter: xt_cluster: fix use of cluster match with 32 nodes

This patch fixes a problem when you use 32 nodes in the cluster
match:

% iptables -I PREROUTING -t mangle -i eth0 -m cluster \
  --cluster-total-nodes  32  --cluster-local-node  32 \
  --cluster-hash-seed 0xdeadbeef -j MARK --set-mark 0xffff
iptables: Invalid argument. Run `dmesg' for more information.
% dmesg | tail -1
xt_cluster: this node mask cannot be higher than the total number of nodes

The problem is related to this checking:

if (info->node_mask >= (1 << info->total_nodes)) {
	printk(KERN_ERR "xt_cluster: this node mask cannot be "
			"higher than the total number of nodes\n");
	return false;
}

(1 << 32) is 1. Thus, the checking fails.

BTW, I said this before but I insist: I have only tested the cluster
match with 2 nodes getting ~45% extra performance in an active-active setup.
The maximum limit of 32 nodes is still completely arbitrary. I'd really
appreciate if people that have more nodes in their setups let me know.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_cluster.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 6c4847662b8..69a639f3540 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -135,7 +135,13 @@ static bool xt_cluster_mt_checkentry(const struct xt_mtchk_param *par)
 {
 	struct xt_cluster_match_info *info = par->matchinfo;
 
-	if (info->node_mask >= (1 << info->total_nodes)) {
+	if (info->total_nodes > XT_CLUSTER_NODES_MAX) {
+		printk(KERN_ERR "xt_cluster: you have exceeded the maximum "
+				"number of cluster nodes (%u > %u)\n",
+				info->total_nodes, XT_CLUSTER_NODES_MAX);
+		return false;
+	}
+	if (info->node_mask >= (1ULL << info->total_nodes)) {
 		printk(KERN_ERR "xt_cluster: this node mask cannot be "
 				"higher than the total number of nodes\n");
 		return false;
-- 
cgit v1.2.3


From fecc1133b66af6e0cd49115a248f34bbb01f180a Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 5 May 2009 17:48:26 +0200
Subject: netfilter: ctnetlink: fix wrong message type in user updates

This patch fixes the wrong message type that are triggered by
user updates, the following commands:

(term1)# conntrack -I -p tcp -s 1.1.1.1 -d 2.2.2.2 -t 10 --sport 10 --dport 20 --state LISTEN
(term1)# conntrack -U -p tcp -s 1.1.1.1 -d 2.2.2.2 -t 10 --sport 10 --dport 20 --state SYN_SENT
(term1)# conntrack -U -p tcp -s 1.1.1.1 -d 2.2.2.2 -t 10 --sport 10 --dport 20 --state SYN_RECV

only trigger event message of type NEW, when only the first is NEW
while others should be UPDATE.

(term2)# conntrack -E
    [NEW] tcp      6 10 LISTEN src=1.1.1.1 dst=2.2.2.2 sport=10 dport=20 [UNREPLIED] src=2.2.2.2 dst=1.1.1.1 sport=20 dport=10 mark=0
    [NEW] tcp      6 10 SYN_SENT src=1.1.1.1 dst=2.2.2.2 sport=10 dport=20 [UNREPLIED] src=2.2.2.2 dst=1.1.1.1 sport=20 dport=10 mark=0
    [NEW] tcp      6 10 SYN_RECV src=1.1.1.1 dst=2.2.2.2 sport=10 dport=20 [UNREPLIED] src=2.2.2.2 dst=1.1.1.1 sport=20 dport=10 mark=0

This patch also removes IPCT_REFRESH from the bitmask since it is
not of any use.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_netlink.c | 48 +++++++++++++++---------------------
 1 file changed, 20 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 0ea36e0c8a0..fd77619256a 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1186,28 +1186,6 @@ ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[])
 	return 0;
 }
 
-static inline void
-ctnetlink_event_report(struct nf_conn *ct, u32 pid, int report)
-{
-	unsigned int events = 0;
-
-	if (test_bit(IPS_EXPECTED_BIT, &ct->status))
-		events |= IPCT_RELATED;
-	else
-		events |= IPCT_NEW;
-
-	nf_conntrack_event_report(IPCT_STATUS |
-				  IPCT_HELPER |
-				  IPCT_REFRESH |
-				  IPCT_PROTOINFO |
-				  IPCT_NATSEQADJ |
-				  IPCT_MARK |
-				  events,
-				  ct,
-				  pid,
-				  report);
-}
-
 static struct nf_conn *
 ctnetlink_create_conntrack(struct nlattr *cda[],
 			   struct nf_conntrack_tuple *otuple,
@@ -1373,6 +1351,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 		err = -ENOENT;
 		if (nlh->nlmsg_flags & NLM_F_CREATE) {
 			struct nf_conn *ct;
+			enum ip_conntrack_events events;
 
 			ct = ctnetlink_create_conntrack(cda, &otuple,
 							&rtuple, u3);
@@ -1383,9 +1362,18 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 			err = 0;
 			nf_conntrack_get(&ct->ct_general);
 			spin_unlock_bh(&nf_conntrack_lock);
-			ctnetlink_event_report(ct,
-					       NETLINK_CB(skb).pid,
-					       nlmsg_report(nlh));
+			if (test_bit(IPS_EXPECTED_BIT, &ct->status))
+				events = IPCT_RELATED;
+			else
+				events = IPCT_NEW;
+
+			nf_conntrack_event_report(IPCT_STATUS |
+						  IPCT_HELPER |
+						  IPCT_PROTOINFO |
+						  IPCT_NATSEQADJ |
+						  IPCT_MARK | events,
+						  ct, NETLINK_CB(skb).pid,
+						  nlmsg_report(nlh));
 			nf_ct_put(ct);
 		} else
 			spin_unlock_bh(&nf_conntrack_lock);
@@ -1404,9 +1392,13 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 		if (err == 0) {
 			nf_conntrack_get(&ct->ct_general);
 			spin_unlock_bh(&nf_conntrack_lock);
-			ctnetlink_event_report(ct,
-					       NETLINK_CB(skb).pid,
-					       nlmsg_report(nlh));
+			nf_conntrack_event_report(IPCT_STATUS |
+						  IPCT_HELPER |
+						  IPCT_PROTOINFO |
+						  IPCT_NATSEQADJ |
+						  IPCT_MARK,
+						  ct, NETLINK_CB(skb).pid,
+						  nlmsg_report(nlh));
 			nf_ct_put(ct);
 		} else
 			spin_unlock_bh(&nf_conntrack_lock);
-- 
cgit v1.2.3


From 457ca7bb6bdf39d0832d3f88c65fa367a3b20de6 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Tue, 5 May 2009 13:09:01 -0700
Subject: Bluetooth: Move dev_set_name() to a context that can sleep

Setting the name of a sysfs device has to be done in a context that can
actually sleep. It allocates its memory with GFP_KERNEL. Previously it
was a static (size limited) string and that got changed to accommodate
longer device names. So move the dev_set_name() just before calling
device_add() which is executed in a work queue.

This fixes the following error:

[  110.012125] BUG: sleeping function called from invalid context at mm/slub.c:1595
[  110.012135] in_atomic(): 1, irqs_disabled(): 0, pid: 0, name: swapper
[  110.012141] 2 locks held by swapper/0:
[  110.012145]  #0:  (hci_task_lock){++.-.+}, at: [<ffffffffa01f822f>] hci_rx_task+0x2f/0x2d0 [bluetooth]
[  110.012173]  #1:  (&hdev->lock){+.-.+.}, at: [<ffffffffa01fb9e2>] hci_event_packet+0x72/0x25c0 [bluetooth]
[  110.012198] Pid: 0, comm: swapper Tainted: G        W 2.6.30-rc4-g953cdaa #1
[  110.012203] Call Trace:
[  110.012207]  <IRQ>  [<ffffffff8023eabd>] __might_sleep+0x14d/0x170
[  110.012228]  [<ffffffff802cfbe1>] __kmalloc+0x111/0x170
[  110.012239]  [<ffffffff803c2094>] kvasprintf+0x64/0xb0
[  110.012248]  [<ffffffff803b7a5b>] kobject_set_name_vargs+0x3b/0xa0
[  110.012257]  [<ffffffff80465326>] dev_set_name+0x76/0xa0
[  110.012273]  [<ffffffffa01fb9e2>] ? hci_event_packet+0x72/0x25c0 [bluetooth]
[  110.012289]  [<ffffffffa01ffc1d>] hci_conn_add_sysfs+0x3d/0x70 [bluetooth]
[  110.012303]  [<ffffffffa01fba2c>] hci_event_packet+0xbc/0x25c0 [bluetooth]
[  110.012312]  [<ffffffff80516eb0>] ? sock_def_readable+0x80/0xa0
[  110.012328]  [<ffffffffa01fee0c>] ? hci_send_to_sock+0xfc/0x1c0 [bluetooth]
[  110.012343]  [<ffffffff80516eb0>] ? sock_def_readable+0x80/0xa0
[  110.012347]  [<ffffffff805e88c5>] ? _read_unlock+0x75/0x80
[  110.012354]  [<ffffffffa01fee0c>] ? hci_send_to_sock+0xfc/0x1c0 [bluetooth]
[  110.012360]  [<ffffffffa01f8403>] hci_rx_task+0x203/0x2d0 [bluetooth]
[  110.012365]  [<ffffffff80250ab5>] tasklet_action+0xb5/0x160
[  110.012369]  [<ffffffff8025116c>] __do_softirq+0x9c/0x150
[  110.012372]  [<ffffffff805e850f>] ? _spin_unlock+0x3f/0x80
[  110.012376]  [<ffffffff8020cbbc>] call_softirq+0x1c/0x30
[  110.012380]  [<ffffffff8020f01d>] do_softirq+0x8d/0xe0
[  110.012383]  [<ffffffff80250df5>] irq_exit+0xc5/0xe0
[  110.012386]  [<ffffffff8020e71d>] do_IRQ+0x9d/0x120
[  110.012389]  [<ffffffff8020c3d3>] ret_from_intr+0x0/0xf
[  110.012391]  <EOI>  [<ffffffff80431832>] ? acpi_idle_enter_bm+0x264/0x2a6
[  110.012399]  [<ffffffff80431828>] ? acpi_idle_enter_bm+0x25a/0x2a6
[  110.012403]  [<ffffffff804f50d5>] ? cpuidle_idle_call+0xc5/0x130
[  110.012407]  [<ffffffff8020a4b4>] ? cpu_idle+0xc4/0x130
[  110.012411]  [<ffffffff805d2268>] ? rest_init+0x88/0xb0
[  110.012416]  [<ffffffff807e2fbd>] ? start_kernel+0x3b5/0x412
[  110.012420]  [<ffffffff807e2281>] ? x86_64_start_reservations+0x91/0xb5
[  110.012424]  [<ffffffff807e2394>] ? x86_64_start_kernel+0xef/0x11b

Based on a report by Davide Pesavento <davidepesa@gmail.com>

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Tested-by: Hugo Mildenberger <hugo.mildenberger@namir.de>
Tested-by: Bing Zhao <bzhao@marvell.com>
---
 net/bluetooth/hci_sysfs.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 582d8877078..a05d45eb3ba 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -88,10 +88,13 @@ static struct device_type bt_link = {
 static void add_conn(struct work_struct *work)
 {
 	struct hci_conn *conn = container_of(work, struct hci_conn, work_add);
+	struct hci_dev *hdev = conn->hdev;
 
 	/* ensure previous del is complete */
 	flush_work(&conn->work_del);
 
+	dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle);
+
 	if (device_add(&conn->dev) < 0) {
 		BT_ERR("Failed to register connection device");
 		return;
@@ -154,12 +157,8 @@ void hci_conn_init_sysfs(struct hci_conn *conn)
 
 void hci_conn_add_sysfs(struct hci_conn *conn)
 {
-	struct hci_dev *hdev = conn->hdev;
-
 	BT_DBG("conn %p", conn);
 
-	dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle);
-
 	queue_work(bt_workq, &conn->work_add);
 }
 
-- 
cgit v1.2.3


From d1a2627a29667fe7c4a9d06e1579a2d65bd39bba Mon Sep 17 00:00:00 2001
From: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Date: Mon, 30 Mar 2009 17:50:17 -0700
Subject: wimax: fix oops if netlink fails to add attribute

When sending a message to user space using wimax_msg(), if nla_put()
fails, correctly interpret the return code from wimax_msg_alloc() as
an err ptr and return the error code instead of crashing (as it is
assuming than non-NULL means the pointer is ok).

Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
---
 net/wimax/op-msg.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/wimax/op-msg.c b/net/wimax/op-msg.c
index 5d149c1b5f0..9ad4d893a56 100644
--- a/net/wimax/op-msg.c
+++ b/net/wimax/op-msg.c
@@ -149,7 +149,8 @@ struct sk_buff *wimax_msg_alloc(struct wimax_dev *wimax_dev,
 	}
 	result = nla_put(skb, WIMAX_GNL_MSG_DATA, size, msg);
 	if (result < 0) {
-		dev_err(dev, "no memory to add payload in attribute\n");
+		dev_err(dev, "no memory to add payload (msg %p size %zu) in "
+			"attribute: %d\n", msg, size, result);
 		goto error_nla_put;
 	}
 	genlmsg_end(skb, genl_msg);
@@ -299,10 +300,10 @@ int wimax_msg(struct wimax_dev *wimax_dev, const char *pipe_name,
 	struct sk_buff *skb;
 
 	skb = wimax_msg_alloc(wimax_dev, pipe_name, buf, size, gfp_flags);
-	if (skb == NULL)
-		goto error_msg_new;
-	result = wimax_msg_send(wimax_dev, skb);
-error_msg_new:
+	if (IS_ERR(skb))
+		result = PTR_ERR(skb);
+	else
+		result = wimax_msg_send(wimax_dev, skb);
 	return result;
 }
 EXPORT_SYMBOL_GPL(wimax_msg);
-- 
cgit v1.2.3


From 94c7f2d49521b0bb3ab91cbeb3518ac34355d47f Mon Sep 17 00:00:00 2001
From: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Date: Sat, 2 May 2009 02:30:28 -0700
Subject: wimax: oops: wimax_dev_add() is the only one that can initialize the
 state

When a new wimax_dev is created, it's state has to be __WIMAX_ST_NULL
until wimax_dev_add() is succesfully called. This allows calls into
the stack that happen before said time to be rejected.

Until now, the state was being set (by mistake) to UNINITIALIZED,
which was allowing calls such as wimax_report_rfkill_hw() to go
through even when a call to wimax_dev_add() had failed; that was
causing an oops when touching uninitialized data.

This situation is normal when the device starts reporting state before
the whole initialization has been completed. It just has to be dealt
with.

Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
---
 net/wimax/stack.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index a0ee76b5251..933e1422b09 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c
@@ -338,8 +338,21 @@ out:
  */
 void wimax_state_change(struct wimax_dev *wimax_dev, enum wimax_st new_state)
 {
+	/*
+	 * A driver cannot take the wimax_dev out of the
+	 * __WIMAX_ST_NULL state unless by calling wimax_dev_add(). If
+	 * the wimax_dev's state is still NULL, we ignore any request
+	 * to change its state because it means it hasn't been yet
+	 * registered.
+	 *
+	 * There is no need to complain about it, as routines that
+	 * call this might be shared from different code paths that
+	 * are called before or after wimax_dev_add() has done its
+	 * job.
+	 */
 	mutex_lock(&wimax_dev->mutex);
-	__wimax_state_change(wimax_dev, new_state);
+	if (wimax_dev->state > __WIMAX_ST_NULL)
+		__wimax_state_change(wimax_dev, new_state);
 	mutex_unlock(&wimax_dev->mutex);
 	return;
 }
@@ -376,7 +389,7 @@ EXPORT_SYMBOL_GPL(wimax_state_get);
 void wimax_dev_init(struct wimax_dev *wimax_dev)
 {
 	INIT_LIST_HEAD(&wimax_dev->id_table_node);
-	__wimax_state_set(wimax_dev, WIMAX_ST_UNINITIALIZED);
+	__wimax_state_set(wimax_dev, __WIMAX_ST_NULL);
 	mutex_init(&wimax_dev->mutex);
 	mutex_init(&wimax_dev->mutex_reset);
 }
-- 
cgit v1.2.3


From 6473990c7f0565fca2007f8662395d122e30f0d8 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 6 May 2009 16:45:07 -0700
Subject: net-sched: fix bfifo default limit

When no limit is given, the bfifo uses a default of tx_queue_len * mtu.
Packets handled by qdiscs include the link layer header, so this should
be taken into account, similar to what other qdiscs do.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_fifo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 92cfc9d7e3b..69188e8358b 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -51,7 +51,7 @@ static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
 		u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1;
 
 		if (sch->ops == &bfifo_qdisc_ops)
-			limit *= qdisc_dev(sch)->mtu;
+			limit *= psched_mtu(qdisc_dev(sch));
 
 		q->limit = limit;
 	} else {
-- 
cgit v1.2.3


From b805007545813d276c844f0f6d6c825b07c6aec6 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Wed, 6 May 2009 16:49:18 -0700
Subject: net: update skb_recycle_check() for hardware timestamping changes

Commit ac45f602ee3d1b6f326f68bc0c2591ceebf05ba4 ("net: infrastructure
for hardware time stamping") added two skb initialization actions to
__alloc_skb(), which need to be added to skb_recycle_check() as well.

Signed-off-by: Lennert Buytenhek <buytenh@wantstofly.org>
Signed-off-by: Patrick Ohly <patrick.ohly@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f091a5a845c..d152394b261 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -502,7 +502,9 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size)
 	shinfo->gso_segs = 0;
 	shinfo->gso_type = 0;
 	shinfo->ip6_frag_id = 0;
+	shinfo->tx_flags.flags = 0;
 	shinfo->frag_list = NULL;
+	memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
 
 	memset(skb, 0, offsetof(struct sk_buff, tail));
 	skb->data = skb->head + NET_SKB_PAD;
-- 
cgit v1.2.3


From 9b05126baa146fc3f41360164141d4e1b3ea93c4 Mon Sep 17 00:00:00 2001
From: Ashish Karkare <akarkare@marvell.com>
Date: Thu, 7 May 2009 16:31:01 -0700
Subject: net: remove stale reference to fastroute from Kconfig help text

Signed-off-by: Ashish Karkare <akarkare@marvell.com>
Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/Kconfig | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'net')

diff --git a/net/Kconfig b/net/Kconfig
index ce77db4fcec..c19f549c8e7 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -119,12 +119,6 @@ menuconfig NETFILTER
 	  <file:Documentation/Changes> under "iptables" for the location of
 	  these packages.
 
-	  Make sure to say N to "Fast switching" below if you intend to say Y
-	  here, as Fast switching currently bypasses netfilter.
-
-	  Chances are that you should say Y here if you compile a kernel which
-	  will run as a router and N for regular hosts. If unsure, say N.
-
 if NETFILTER
 
 config NETFILTER_DEBUG
-- 
cgit v1.2.3


From e81963b180ac502fda0326edf059b1e29cdef1a2 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 8 May 2009 12:45:26 -0700
Subject: ipv4: Make INET_LRO a bool instead of tristate.

This code is used as a library by several device drivers,
which select INET_LRO.

If some are modules and some are statically built into the
kernel, we get build failures if INET_LRO is modular.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index b2cf91e4cca..9d26a3da37e 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -407,7 +407,7 @@ config INET_XFRM_MODE_BEET
 	  If unsure, say Y.
 
 config INET_LRO
-	tristate "Large Receive Offload (ipv4/tcp)"
+	bool "Large Receive Offload (ipv4/tcp)"
 
 	---help---
 	  Support for Large Receive Offload (ipv4/tcp).
-- 
cgit v1.2.3


From be8be9eccbf2d908a7e56b3f7a71105cd88da06b Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Wed, 6 May 2009 15:02:29 +0000
Subject: ipvs: Fix IPv4 FWMARK virtual services
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes the use of fwmarks to denote IPv4 virtual services
which was unfortunately broken as a result of the integration
of IPv6 support into IPVS, which was included in 2.6.28.

The problem arises because fwmarks are stored in the 4th octet
of a union nf_inet_addr .all, however in the case of IPv4 only
the first octet, corresponding to .ip, is assigned and compared.

In other words, using .all = { 0, 0, 0, htonl(svc->fwmark) always
results in a value of 0 (32bits) being stored for IPv4. This means
that one fwmark can be used, as it ends up being mapped to 0, but things
break down when multiple fwmarks are used, as they all end up being mapped
to 0.

As fwmarks are 32bits a reasonable fix seems to be to just store the fwmark
in .ip, and comparing and storing .ip when fwmarks are used.

This patch makes the assumption that in calls to ip_vs_ct_in_get()
and ip_vs_sched_persist() if the proto parameter is IPPROTO_IP then
we are dealing with an fwmark. I believe this is valid as ip_vs_in()
does fairly strict filtering on the protocol and IPPROTO_IP should
not be used in these calls unless explicitly passed when making
these calls for fwmarks in ip_vs_sched_persist().

Tested-by: Fabien Duchêne <fabien.duchene@student.uclouvain.be>
Cc: Joseph Mack NA3T <jmack@wm7d.net>
Cc: Julius Volz <julius.volz@gmail.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/ipvs/ip_vs_conn.c | 9 +++++++--
 net/netfilter/ipvs/ip_vs_core.c | 4 ++--
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 60aba45023f..77bfdfeb966 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -260,7 +260,10 @@ struct ip_vs_conn *ip_vs_ct_in_get
 	list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
 		if (cp->af == af &&
 		    ip_vs_addr_equal(af, s_addr, &cp->caddr) &&
-		    ip_vs_addr_equal(af, d_addr, &cp->vaddr) &&
+		    /* protocol should only be IPPROTO_IP if
+		     * d_addr is a fwmark */
+		    ip_vs_addr_equal(protocol == IPPROTO_IP ? AF_UNSPEC : af,
+		                     d_addr, &cp->vaddr) &&
 		    s_port == cp->cport && d_port == cp->vport &&
 		    cp->flags & IP_VS_CONN_F_TEMPLATE &&
 		    protocol == cp->protocol) {
@@ -698,7 +701,9 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
 	cp->cport	   = cport;
 	ip_vs_addr_copy(af, &cp->vaddr, vaddr);
 	cp->vport	   = vport;
-	ip_vs_addr_copy(af, &cp->daddr, daddr);
+	/* proto should only be IPPROTO_IP if d_addr is a fwmark */
+	ip_vs_addr_copy(proto == IPPROTO_IP ? AF_UNSPEC : af,
+			&cp->daddr, daddr);
 	cp->dport          = dport;
 	cp->flags	   = flags;
 	spin_lock_init(&cp->lock);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index cb3e031335e..8dddb17a947 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -278,7 +278,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 		 */
 		if (svc->fwmark) {
 			union nf_inet_addr fwmark = {
-				.all = { 0, 0, 0, htonl(svc->fwmark) }
+				.ip = htonl(svc->fwmark)
 			};
 
 			ct = ip_vs_ct_in_get(svc->af, IPPROTO_IP, &snet, 0,
@@ -306,7 +306,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 			 */
 			if (svc->fwmark) {
 				union nf_inet_addr fwmark = {
-					.all = { 0, 0, 0, htonl(svc->fwmark) }
+					.ip = htonl(svc->fwmark)
 				};
 
 				ct = ip_vs_conn_new(svc->af, IPPROTO_IP,
-- 
cgit v1.2.3


From 384943ec1bb462e410390ad8f108ff1474cd882d Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 8 May 2009 18:20:43 -0700
Subject: Bluetooth: Fix wrong module refcount when connection setup fails

The module refcount is increased by hci_dev_hold() call in hci_conn_add()
and decreased by hci_dev_put() call in del_conn(). In case the connection
setup fails, hci_dev_put() is never called.

Procedure to reproduce the issue:

  # hciconfig hci0 up
  # lsmod | grep btusb                   -> "used by" refcount = 1

  # hcitool cc <non-exisiting bdaddr>    -> will get timeout

  # lsmod | grep btusb                   -> "used by" refcount = 2
  # hciconfig hci0 down
  # lsmod | grep btusb                   -> "used by" refcount = 1
  # rmmod btusb                          -> ERROR: Module btusb is in use

The hci_dev_put() call got moved into del_conn() with the 2.6.25 kernel
to fix an issue with hci_dev going away before hci_conn. However that
change was wrong and introduced this problem.

When calling hci_conn_del() it has to call hci_dev_put() after freeing
the connection details. This handling should be fully symmetric. The
execution of del_conn() is done in a work queue and needs it own calls
to hci_dev_hold() and hci_dev_put() to ensure that the hci_dev stays
until the connection cleanup has been finished.

Based on a report by Bing Zhao <bzhao@marvell.com>

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Tested-by: Bing Zhao <bzhao@marvell.com>
---
 net/bluetooth/hci_conn.c  | 2 ++
 net/bluetooth/hci_sysfs.c | 3 +++
 2 files changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 61309b26f27..85a1c6be2db 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -292,6 +292,8 @@ int hci_conn_del(struct hci_conn *conn)
 
 	hci_conn_del_sysfs(conn);
 
+	hci_dev_put(hdev);
+
 	return 0;
 }
 
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index a05d45eb3ba..4cc3624bd22 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -99,6 +99,8 @@ static void add_conn(struct work_struct *work)
 		BT_ERR("Failed to register connection device");
 		return;
 	}
+
+	hci_dev_hold(hdev);
 }
 
 /*
@@ -134,6 +136,7 @@ static void del_conn(struct work_struct *work)
 
 	device_del(&conn->dev);
 	put_device(&conn->dev);
+
 	hci_dev_put(hdev);
 }
 
-- 
cgit v1.2.3


From 1b0336bb36f88976f1210a65b62f6a3e9578ee7b Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 9 May 2009 12:04:08 -0700
Subject: Bluetooth: Don't use hci_acl_connect_cancel() for incoming
 connections

The connection setup phase takes around 2 seconds or longer and in
that time it is possible that the need for an ACL connection is no
longer present. If that happens then, the connection attempt will
be canceled.

This only applies to outgoing connections, but currently it can also
be triggered by incoming connection. Don't call hci_acl_connect_cancel()
on incoming connection since these have to be either accepted or rejected
in this state. Once they are successfully connected they need to be
fully disconnected anyway.

Also remove the wrong hci_acl_disconn() call for SCO and eSCO links
since at this stage they can't be disconnected either, because the
connection handle is still unknown.

Based on a report by Johan Hedberg <johan.hedberg@nokia.com>

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Tested-by: Johan Hedberg <johan.hedberg@nokia.com>
---
 net/bluetooth/hci_conn.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 85a1c6be2db..fa47d5d84f5 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -171,10 +171,8 @@ static void hci_conn_timeout(unsigned long arg)
 	switch (conn->state) {
 	case BT_CONNECT:
 	case BT_CONNECT2:
-		if (conn->type == ACL_LINK)
+		if (conn->type == ACL_LINK && conn->out)
 			hci_acl_connect_cancel(conn);
-		else
-			hci_acl_disconn(conn, 0x13);
 		break;
 	case BT_CONFIG:
 	case BT_CONNECTED:
-- 
cgit v1.2.3


From 3d7a9d1c7ee251a04095d43eec5a3f4ff3f710a8 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 9 May 2009 12:09:21 -0700
Subject: Bluetooth: Don't trigger disconnect timeout for security mode 3
 pairing

A remote device in security mode 3 that tries to connect will require
the pairing during the connection setup phase. The disconnect timeout
is now triggered within 10 milliseconds and causes the pairing to fail.

If a connection is not fully established and a PIN code request is
received, don't trigger the disconnect timeout. The either successful
or failing connection complete event will make sure that the timeout
is triggered at the right time.

The biggest problem with security mode 3 is that many Bluetooth 2.0
device and before use a temporary security mode 3 for dedicated
bonding.

Based on a report by Johan Hedberg <johan.hedberg@nokia.com>

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Tested-by: Johan Hedberg <johan.hedberg@nokia.com>
---
 net/bluetooth/hci_event.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 4e7cb88e5da..184ba0a88ec 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1493,7 +1493,7 @@ static inline void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff
 	hci_dev_lock(hdev);
 
 	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
-	if (conn) {
+	if (conn && conn->state == BT_CONNECTED) {
 		hci_conn_hold(conn);
 		conn->disc_timeout = HCI_PAIRING_TIMEOUT;
 		hci_conn_put(conn);
-- 
cgit v1.2.3


From 621ad7c96aa138cfeab53cd4debc5a4e08b2189b Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Tue, 5 May 2009 15:18:26 -0400
Subject: mac80211: avoid NULL ptr deref when finding max_rates in PID and
 minstrel

"There is another problem with this piece of code. The sband will be NULL
after second iteration on single band device and cause null pointer
dereference. Everything is working with dual band card. Sorry, but i
don't know how to explain this clearly in English. I have looked on the
second patch for pid algorithm and found similar bug."

Reported-by: Karol Szuster <qflon@o2.pl>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rc80211_minstrel.c | 2 +-
 net/mac80211/rc80211_pid_algo.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 70df3dcc3cf..d9233ec5061 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -477,7 +477,7 @@ minstrel_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp)
 
 	for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
 		sband = hw->wiphy->bands[i];
-		if (sband->n_bitrates > max_rates)
+		if (sband && sband->n_bitrates > max_rates)
 			max_rates = sband->n_bitrates;
 	}
 
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index 01d59a8e334..8bef9a1262f 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -378,7 +378,7 @@ static void *rate_control_pid_alloc(struct ieee80211_hw *hw,
 
 	for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
 		sband = hw->wiphy->bands[i];
-		if (sband->n_bitrates > max_rates)
+		if (sband && sband->n_bitrates > max_rates)
 			max_rates = sband->n_bitrates;
 	}
 
-- 
cgit v1.2.3


From 5e392739d6ab72f7c35040aa07f4097904bce6e7 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 11 May 2009 00:36:35 +0000
Subject: netpoll: don't dereference NULL dev from np

It looks like the dev in netpoll_poll can be NULL - at lease it's
checked at the function beginning. Thus the dev->netde_ops dereference
looks dangerous.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index b5873bdff61..64f51eec657 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -175,9 +175,13 @@ static void service_arp_queue(struct netpoll_info *npi)
 void netpoll_poll(struct netpoll *np)
 {
 	struct net_device *dev = np->dev;
-	const struct net_device_ops *ops = dev->netdev_ops;
+	const struct net_device_ops *ops;
+
+	if (!dev || !netif_running(dev))
+		return;
 
-	if (!dev || !netif_running(dev) || !ops->ndo_poll_controller)
+	ops = dev->netdev_ops;
+	if (!ops->ndo_poll_controller)
 		return;
 
 	/* Process pending work on NIC */
-- 
cgit v1.2.3


From 2513dfb83fc775364fe85803d3a84d7ebe5763a5 Mon Sep 17 00:00:00 2001
From: Chris Friesen <cfriesen@nortel.com>
Date: Sun, 17 May 2009 20:39:33 -0700
Subject: ipconfig: handle case of delayed DHCP server

If a DHCP server is delayed, it's possible for the client to receive the
DHCPOFFER after it has already sent out a new DHCPDISCOVER message from
a second interface.  The client then sends out a DHCPREQUEST from the
second interface, but the server doesn't recognize the device and
rejects the request.

This patch simply tracks the current device being configured and throws
away the OFFER if it is not intended for the current device.  A more
sophisticated approach would be to put the OFFER information into the
struct ic_device rather than storing it globally.

Signed-off-by: Chris Friesen <cfriesen@nortel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipconfig.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 90d22ae0a41..88bf051d0cb 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -139,6 +139,8 @@ __be32 ic_servaddr = NONE;	/* Boot server IP address */
 __be32 root_server_addr = NONE;	/* Address of NFS server */
 u8 root_server_path[256] = { 0, };	/* Path to mount as root */
 
+u32 ic_dev_xid;		/* Device under configuration */
+
 /* vendor class identifier */
 static char vendor_class_identifier[253] __initdata;
 
@@ -932,6 +934,13 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
 		goto drop_unlock;
 	}
 
+	/* Is it a reply for the device we are configuring? */
+	if (b->xid != ic_dev_xid) {
+		if (net_ratelimit())
+			printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet \n");
+		goto drop_unlock;
+	}
+
 	/* Parse extensions */
 	if (ext_len >= 4 &&
 	    !memcmp(b->exten, ic_bootp_cookie, 4)) { /* Check magic cookie */
@@ -1115,6 +1124,9 @@ static int __init ic_dynamic(void)
 	get_random_bytes(&timeout, sizeof(timeout));
 	timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned) CONF_TIMEOUT_RANDOM);
 	for (;;) {
+		/* Track the device we are configuring */
+		ic_dev_xid = d->xid;
+
 #ifdef IPCONFIG_BOOTP
 		if (do_bootp && (d->able & IC_BOOTP))
 			ic_bootp_send_if(d, jiffies - start_jiffies);
-- 
cgit v1.2.3


From a598f6aebea2481531b0757ed90cfb0d8cf1d8f5 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Fri, 15 May 2009 06:10:13 +0000
Subject: bridge: relay bridge multicast pkgs if !STP

Currently the bridge catches all STP packets; even if STP is turned
off.  This prevents other systems (which do have STP turned on)
from being able to detect loops in the network.

With this patch, if STP is off, then any packet sent to the STP
multicast group address is forwarded to all ports.

Based on earlier patch by Joakim Tjernlund with changes
to go through forwarding (not local chain), and optimization
that only last octet needs to be checked.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_input.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 30b88777c3d..5ee1a3682bf 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -134,6 +134,10 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
 		if (skb->protocol == htons(ETH_P_PAUSE))
 			goto drop;
 
+		/* If STP is turned off, then forward */
+		if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0)
+			goto forward;
+
 		if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
 			    NULL, br_handle_local_finish))
 			return NULL;	/* frame consumed by filter */
@@ -141,6 +145,7 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
 			return skb;	/* continue processing */
 	}
 
+forward:
 	switch (p->state) {
 	case BR_STATE_FORWARDING:
 		rhook = rcu_dereference(br_should_route_hook);
-- 
cgit v1.2.3


From 4f0611af47e25807cf18cd2b4d4e94206c75b29e Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Fri, 15 May 2009 06:11:58 +0000
Subject: bridge: fix initial packet flood if !STP

If bridge is configured with no STP and forwarding delay of 0 (which
is typical for virtualization) then when link starts it will flood all
packets for the first 20 seconds.

This bug was introduced by a combination of earlier changes:
  * forwarding database uses hold time of zero to indicate
    user wants to always flood packets
  * optimzation of the case of forwarding delay of 0 avoids the initial
    timer tick

The fix is to just skip all the topology change detection code if
kernel STP is not being used.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_stp.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 6e63ec3f1fc..0660515f399 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -297,6 +297,9 @@ void br_topology_change_detection(struct net_bridge *br)
 {
 	int isroot = br_is_root_bridge(br);
 
+	if (br->stp_enabled != BR_KERNEL_STP)
+		return;
+
 	pr_info("%s: topology change detected, %s\n", br->dev->name,
 		isroot ? "propagating" : "sending tcn bpdu");
 
-- 
cgit v1.2.3


From 775273131810caa41dfc7f9e552ea5d8508caf40 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Sun, 10 May 2009 20:32:34 +0000
Subject: tcp: fix MSG_PEEK race check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 518a09ef11 (tcp: Fix recvmsg MSG_PEEK influence of
blocking behavior) lets the loop run longer than the race check
did previously expect, so we need to be more careful with this
check and consider the work we have been doing.

I tried my best to deal with urg hole madness too which happens
here:
	if (!sock_flag(sk, SOCK_URGINLINE)) {
		++*seq;
		...
by using additional offset by one but I certainly have very
little interest in testing that part.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Tested-by: Frans Pop <elendil@planet.nl>
Tested-by: Ian Zimmermann <itz@buug.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1d7f49c6f0c..7a0f0b27bf1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1321,6 +1321,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	struct task_struct *user_recv = NULL;
 	int copied_early = 0;
 	struct sk_buff *skb;
+	u32 urg_hole = 0;
 
 	lock_sock(sk);
 
@@ -1532,7 +1533,8 @@ do_prequeue:
 				}
 			}
 		}
-		if ((flags & MSG_PEEK) && peek_seq != tp->copied_seq) {
+		if ((flags & MSG_PEEK) &&
+		    (peek_seq - copied - urg_hole != tp->copied_seq)) {
 			if (net_ratelimit())
 				printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n",
 				       current->comm, task_pid_nr(current));
@@ -1553,6 +1555,7 @@ do_prequeue:
 				if (!urg_offset) {
 					if (!sock_flag(sk, SOCK_URGINLINE)) {
 						++*seq;
+						urg_hole++;
 						offset++;
 						used--;
 						if (!used)
-- 
cgit v1.2.3


From c0f84d0d4be3f7d818b4ffb04d27f9bae64397f0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Mon, 18 May 2009 15:12:31 -0700
Subject: sch_teql: should not dereference skb after ndo_start_xmit()

It is illegal to dereference a skb after a successful ndo_start_xmit()
call. We must store skb length in a local variable instead.

Bug was introduced in 2.6.27 by commit 0abf77e55a2459aa9905be4b226e4729d5b4f0cb
(net_sched: Add accessor function for packet length for qdiscs)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_teql.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index ec697cebb63..3b641829723 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -303,6 +303,8 @@ restart:
 		switch (teql_resolve(skb, skb_res, slave)) {
 		case 0:
 			if (__netif_tx_trylock(slave_txq)) {
+				unsigned int length = qdisc_pkt_len(skb);
+
 				if (!netif_tx_queue_stopped(slave_txq) &&
 				    !netif_tx_queue_frozen(slave_txq) &&
 				    slave_ops->ndo_start_xmit(skb, slave) == 0) {
@@ -310,8 +312,7 @@ restart:
 					master->slaves = NEXT_SLAVE(q);
 					netif_wake_queue(dev);
 					master->stats.tx_packets++;
-					master->stats.tx_bytes +=
-						qdisc_pkt_len(skb);
+					master->stats.tx_bytes += length;
 					return 0;
 				}
 				__netif_tx_unlock(slave_txq);
-- 
cgit v1.2.3


From 511e11e396dc596825ce04d53d7f6d579404bc01 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Mon, 18 May 2009 19:26:37 -0700
Subject: pkt_sched: gen_estimator: use 64 bit intermediate counters for bps

gen_estimator can overflow bps (bytes per second) with Gb links, while
it was designed with a u32 API, with a theorical limit of 34360Mbit
(2^32 bytes)

Using 64 bit intermediate avbps/brate counters can allow us to reach
this theorical limit.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/gen_estimator.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 9cc9f95b109..6d62d4618cf 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -66,9 +66,9 @@
 
    NOTES.
 
-   * The stored value for avbps is scaled by 2^5, so that maximal
-     rate is ~1Gbit, avpps is scaled by 2^10.
-
+   * avbps is scaled by 2^5, avpps is scaled by 2^10.
+   * both values are reported as 32 bit unsigned values. bps can
+     overflow for fast links : max speed being 34360Mbit/sec
    * Minimal interval is HZ/4=250msec (it is the greatest common divisor
      for HZ=100 and HZ=1024 8)), maximal interval
      is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
@@ -86,9 +86,9 @@ struct gen_estimator
 	spinlock_t		*stats_lock;
 	int			ewma_log;
 	u64			last_bytes;
+	u64			avbps;
 	u32			last_packets;
 	u32			avpps;
-	u32			avbps;
 	struct rcu_head		e_rcu;
 	struct rb_node		node;
 };
@@ -115,6 +115,7 @@ static void est_timer(unsigned long arg)
 	rcu_read_lock();
 	list_for_each_entry_rcu(e, &elist[idx].list, list) {
 		u64 nbytes;
+		u64 brate;
 		u32 npackets;
 		u32 rate;
 
@@ -125,9 +126,9 @@ static void est_timer(unsigned long arg)
 
 		nbytes = e->bstats->bytes;
 		npackets = e->bstats->packets;
-		rate = (nbytes - e->last_bytes)<<(7 - idx);
+		brate = (nbytes - e->last_bytes)<<(7 - idx);
 		e->last_bytes = nbytes;
-		e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log;
+		e->avbps += ((s64)(brate - e->avbps)) >> e->ewma_log;
 		e->rate_est->bps = (e->avbps+0xF)>>5;
 
 		rate = (npackets - e->last_packets)<<(12 - idx);
-- 
cgit v1.2.3


From 995b337952cdf7e05d288eede580257b632a8343 Mon Sep 17 00:00:00 2001
From: Thomas Chenault <thomas_chenault@dell.com>
Date: Mon, 18 May 2009 21:43:27 -0700
Subject: net: fix skb_seq_read returning wrong offset/length for page frag
 data

When called with a consumed value that is less than skb_headlen(skb)
bytes into a page frag, skb_seq_read() incorrectly returns an
offset/length relative to skb->data. Ensure that data which should come
from a page frag does.

Signed-off-by: Thomas Chenault <thomas_chenault@dell.com>
Tested-by: Shyam Iyer <shyam_iyer@dell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d152394b261..e505b5392e1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2288,7 +2288,7 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data,
 next_skb:
 	block_limit = skb_headlen(st->cur_skb) + st->stepped_offset;
 
-	if (abs_offset < block_limit) {
+	if (abs_offset < block_limit && !st->frag_data) {
 		*data = st->cur_skb->data + (abs_offset - st->stepped_offset);
 		return block_limit - abs_offset;
 	}
-- 
cgit v1.2.3


From bc8a5397433e4effbaddfa7e462d10b3c060cabb Mon Sep 17 00:00:00 2001
From: Frans Pop <elendil@planet.nl>
Date: Mon, 18 May 2009 21:48:38 -0700
Subject: ipv4: make default for INET_LRO consistent with help text

Commit e81963b1 ("ipv4: Make INET_LRO a bool instead of tristate.")
changed this config from tristate to bool.  Add default so that it is
consistent with the help text.

Signed-off-by: Frans Pop <elendil@planet.nl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 9d26a3da37e..5b919f7b45d 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -408,7 +408,7 @@ config INET_XFRM_MODE_BEET
 
 config INET_LRO
 	bool "Large Receive Offload (ipv4/tcp)"
-
+	default y
 	---help---
 	  Support for Large Receive Offload (ipv4/tcp).
 
-- 
cgit v1.2.3


From 88f16db7a2fa63b9242e8a0fbc40d51722f2e2f9 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 13 May 2009 12:04:30 +0200
Subject: wext: verify buffer size for SIOCSIWENCODEEXT

Another design flaw in wireless extensions (is anybody
surprised?) in the way it handles the iw_encode_ext
structure: The structure is part of the 'extra' memory
but contains the key length explicitly, instead of it
just being the length of the extra buffer - size of
the struct and using the explicit key length only for
the get operation (which only writes it).

Therefore, we have this layout:

extra: +-------------------------+
       | struct iw_encode_ext  { |
       |     ...                 |
       |     u16 key_len;        |
       |     u8 key[0];          |
       | };                      |
       +-------------------------+
       | key material            |
       +-------------------------+

Now, all drivers I checked use ext->key_len without
checking that both key_len and the struct fit into the
extra buffer that has been copied from userspace. This
leads to a buffer overrun while reading that buffer,
depending on the driver it may be possible to specify
arbitrary key_len or it may need to be a proper length
for the key algorithm specified.

Thankfully, this is only exploitable by root, but root
can actually cause a segfault or use kernel memory as
a key (which you can even get back with siocgiwencode
or siocgiwencodeext from the key buffer).

Fix this by verifying that key_len fits into the buffer
along with struct iw_encode_ext.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/wext.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index cb6a5bb85d8..0e59f9ae9b8 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -786,6 +786,13 @@ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd,
 			err = -EFAULT;
 			goto out;
 		}
+
+		if (cmd == SIOCSIWENCODEEXT) {
+			struct iw_encode_ext *ee = (void *) extra;
+
+			if (iwp->length < sizeof(*ee) + ee->key_len)
+				return -EFAULT;
+		}
 	}
 
 	err = handler(dev, info, (union iwreq_data *) iwp, extra);
-- 
cgit v1.2.3


From 5078b2e32ad4b1f753b1c837c15892202f753c97 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Wed, 13 May 2009 17:04:42 -0400
Subject: cfg80211: fix race between core hint and driver's custom apply

Its possible for cfg80211 to have scheduled the work and for
the global workqueue to not have kicked in prior to a cfg80211
driver's regulatory hint or wiphy_apply_custom_regulatory().

Although this is very unlikely its possible and should fix
this race. When this race would happen you are expected to have
hit a null pointer dereference panic.

Cc: stable@kernel.org
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Tested-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/reg.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 08265ca1578..487cb627ddb 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1551,6 +1551,13 @@ static int regulatory_hint_core(const char *alpha2)
 
 	queue_regulatory_request(request);
 
+	/*
+	 * This ensures last_request is populated once modules
+	 * come swinging in and calling regulatory hints and
+	 * wiphy_apply_custom_regulatory().
+	 */
+	flush_scheduled_work();
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From cf8da764fc6959b7efb482f375dfef9830e98205 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 19 May 2009 18:54:22 +0000
Subject: net: fix length computation in rt_check_expire()

rt_check_expire() computes average and standard deviation of chain lengths,
but not correclty reset length to 0 at beginning of each chain.
This probably gives overflows for sum2 (and sum) on loaded machines instead
of meaningful results.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c4c60e9f068..869cf1c44b7 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -785,7 +785,7 @@ static void rt_check_expire(void)
 	static unsigned int rover;
 	unsigned int i = rover, goal;
 	struct rtable *rth, **rthp;
-	unsigned long length = 0, samples = 0;
+	unsigned long samples = 0;
 	unsigned long sum = 0, sum2 = 0;
 	u64 mult;
 
@@ -795,9 +795,9 @@ static void rt_check_expire(void)
 	goal = (unsigned int)mult;
 	if (goal > rt_hash_mask)
 		goal = rt_hash_mask + 1;
-	length = 0;
 	for (; goal > 0; goal--) {
 		unsigned long tmo = ip_rt_gc_timeout;
+		unsigned long length;
 
 		i = (i + 1) & rt_hash_mask;
 		rthp = &rt_hash_table[i].chain;
@@ -809,6 +809,7 @@ static void rt_check_expire(void)
 
 		if (*rthp == NULL)
 			continue;
+		length = 0;
 		spin_lock_bh(rt_hash_lock_addr(i));
 		while ((rth = *rthp) != NULL) {
 			if (rt_is_expired(rth)) {
-- 
cgit v1.2.3


From 1ddbcb005c395518c2cd0df504cff3d4b5c85853 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 19 May 2009 20:14:28 +0000
Subject: net: fix rtable leak in net/ipv4/route.c

Alexander V. Lukyanov found a regression in 2.6.29 and made a complete
analysis found in http://bugzilla.kernel.org/show_bug.cgi?id=13339
Quoted here because its a perfect one :

begin_of_quotation
 2.6.29 patch has introduced flexible route cache rebuilding. Unfortunately the
 patch has at least one critical flaw, and another problem.

 rt_intern_hash calculates rthi pointer, which is later used for new entry
 insertion. The same loop calculates cand pointer which is used to clean the
 list. If the pointers are the same, rtable leak occurs, as first the cand is
 removed then the new entry is appended to it.

 This leak leads to unregister_netdevice problem (usage count > 0).

 Another problem of the patch is that it tries to insert the entries in certain
 order, to facilitate counting of entries distinct by all but QoS parameters.
 Unfortunately, referencing an existing rtable entry moves it to list beginning,
 to speed up further lookups, so the carefully built order is destroyed.

 For the first problem the simplest patch it to set rthi=0 when rthi==cand, but
 it will also destroy the ordering.
end_of_quotation

Problematic commit is 1080d709fb9d8cd4392f93476ee46a9d6ea05a5b
(net: implement emergency route cache rebulds when gc_elasticity is exceeded)

Trying to keep dst_entries ordered is too complex and breaks the fact that
order should depend on the frequency of use for garbage collection.

A possible fix is to make rt_intern_hash() simpler, and only makes
rt_check_expire() a litle bit smarter, being able to cope with an arbitrary
entries order. The added loop is running on cache hot data, while cpu
is prefetching next object, so should be unnoticied.

Reported-and-analyzed-by: Alexander V. Lukyanov <lav@yar.ru>
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 55 +++++++++++++++++--------------------------------------
 1 file changed, 17 insertions(+), 38 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 869cf1c44b7..28205e5bfa9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -784,7 +784,7 @@ static void rt_check_expire(void)
 {
 	static unsigned int rover;
 	unsigned int i = rover, goal;
-	struct rtable *rth, **rthp;
+	struct rtable *rth, *aux, **rthp;
 	unsigned long samples = 0;
 	unsigned long sum = 0, sum2 = 0;
 	u64 mult;
@@ -812,6 +812,7 @@ static void rt_check_expire(void)
 		length = 0;
 		spin_lock_bh(rt_hash_lock_addr(i));
 		while ((rth = *rthp) != NULL) {
+			prefetch(rth->u.dst.rt_next);
 			if (rt_is_expired(rth)) {
 				*rthp = rth->u.dst.rt_next;
 				rt_free(rth);
@@ -820,33 +821,30 @@ static void rt_check_expire(void)
 			if (rth->u.dst.expires) {
 				/* Entry is expired even if it is in use */
 				if (time_before_eq(jiffies, rth->u.dst.expires)) {
+nofree:
 					tmo >>= 1;
 					rthp = &rth->u.dst.rt_next;
 					/*
-					 * Only bump our length if the hash
-					 * inputs on entries n and n+1 are not
-					 * the same, we only count entries on
+					 * We only count entries on
 					 * a chain with equal hash inputs once
 					 * so that entries for different QOS
 					 * levels, and other non-hash input
 					 * attributes don't unfairly skew
 					 * the length computation
 					 */
-					if ((*rthp == NULL) ||
-					    !compare_hash_inputs(&(*rthp)->fl,
-								 &rth->fl))
-						length += ONE;
+					for (aux = rt_hash_table[i].chain;;) {
+						if (aux == rth) {
+							length += ONE;
+							break;
+						}
+						if (compare_hash_inputs(&aux->fl, &rth->fl))
+							break;
+						aux = aux->u.dst.rt_next;
+					}
 					continue;
 				}
-			} else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) {
-				tmo >>= 1;
-				rthp = &rth->u.dst.rt_next;
-				if ((*rthp == NULL) ||
-				    !compare_hash_inputs(&(*rthp)->fl,
-							 &rth->fl))
-					length += ONE;
-				continue;
-			}
+			} else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
+				goto nofree;
 
 			/* Cleanup aged off entries. */
 			*rthp = rth->u.dst.rt_next;
@@ -1069,7 +1067,6 @@ out:	return 0;
 static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
 {
 	struct rtable	*rth, **rthp;
-	struct rtable	*rthi;
 	unsigned long	now;
 	struct rtable *cand, **candp;
 	u32 		min_score;
@@ -1089,7 +1086,6 @@ restart:
 	}
 
 	rthp = &rt_hash_table[hash].chain;
-	rthi = NULL;
 
 	spin_lock_bh(rt_hash_lock_addr(hash));
 	while ((rth = *rthp) != NULL) {
@@ -1135,17 +1131,6 @@ restart:
 		chain_length++;
 
 		rthp = &rth->u.dst.rt_next;
-
-		/*
-		 * check to see if the next entry in the chain
-		 * contains the same hash input values as rt.  If it does
-		 * This is where we will insert into the list, instead of
-		 * at the head.  This groups entries that differ by aspects not
-		 * relvant to the hash function together, which we use to adjust
-		 * our chain length
-		 */
-		if (*rthp && compare_hash_inputs(&(*rthp)->fl, &rt->fl))
-			rthi = rth;
 	}
 
 	if (cand) {
@@ -1206,10 +1191,7 @@ restart:
 		}
 	}
 
-	if (rthi)
-		rt->u.dst.rt_next = rthi->u.dst.rt_next;
-	else
-		rt->u.dst.rt_next = rt_hash_table[hash].chain;
+	rt->u.dst.rt_next = rt_hash_table[hash].chain;
 
 #if RT_CACHE_DEBUG >= 2
 	if (rt->u.dst.rt_next) {
@@ -1225,10 +1207,7 @@ restart:
 	 * previous writes to rt are comitted to memory
 	 * before making rt visible to other CPUS.
 	 */
-	if (rthi)
-		rcu_assign_pointer(rthi->u.dst.rt_next, rt);
-	else
-		rcu_assign_pointer(rt_hash_table[hash].chain, rt);
+	rcu_assign_pointer(rt_hash_table[hash].chain, rt);
 
 	spin_unlock_bh(rt_hash_lock_addr(hash));
 	*rp = rt;
-- 
cgit v1.2.3


From 4f72427998b105392e60bae7a6798a0c96fe4f0a Mon Sep 17 00:00:00 2001
From: Jean-Mickael Guerin <jean-mickael.guerin@6wind.com>
Date: Wed, 20 May 2009 17:38:59 -0700
Subject: IPv6: set RTPROT_KERNEL to initial route

The use of unspecified protocol in IPv6 initial route prevents quagga to
install IPv6 default route:
# show ipv6 route
S   ::/0 [1/0] via fe80::1, eth1_0
K>* ::/0 is directly connected, lo, rej
C>* ::1/128 is directly connected, lo
C>* fe80::/64 is directly connected, eth1_0

# ip -6 route
fe80::/64 dev eth1_0  proto kernel  metric 256  mtu 1500 advmss 1440
hoplimit -1
ff00::/8 dev eth1_0  metric 256  mtu 1500 advmss 1440 hoplimit -1
unreachable default dev lo  proto none  metric -1  error -101 hoplimit 255

The attached patch ensures RTPROT_KERNEL to the default initial route
and fixes the problem for quagga.
This is similar to "ipv6: protocol for address routes"
f410a1fba7afa79d2992620e874a343fdba28332.

# show ipv6 route
S>* ::/0 [1/0] via fe80::1, eth1_0
C>* ::1/128 is directly connected, lo
C>* fe80::/64 is directly connected, eth1_0

# ip -6 route
fe80::/64 dev eth1_0  proto kernel  metric 256  mtu 1500 advmss 1440
hoplimit -1
fe80::/64 dev eth1_0  proto kernel  metric 256  mtu 1500 advmss 1440
hoplimit -1
ff00::/8 dev eth1_0  metric 256  mtu 1500 advmss 1440 hoplimit -1
default via fe80::1 dev eth1_0  proto zebra  metric 1024  mtu 1500
advmss 1440 hoplimit -1
unreachable default dev lo  proto kernel  metric -1  error -101 hoplimit 255

Signed-off-by: Jean-Mickael Guerin <jean-mickael.guerin@6wind.com>
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1394ddb6e35..032a5ec391c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -137,6 +137,7 @@ static struct rt6_info ip6_null_entry_template = {
 		}
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
+	.rt6i_protocol  = RTPROT_KERNEL,
 	.rt6i_metric	= ~(u32) 0,
 	.rt6i_ref	= ATOMIC_INIT(1),
 };
@@ -159,6 +160,7 @@ static struct rt6_info ip6_prohibit_entry_template = {
 		}
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
+	.rt6i_protocol  = RTPROT_KERNEL,
 	.rt6i_metric	= ~(u32) 0,
 	.rt6i_ref	= ATOMIC_INIT(1),
 };
@@ -176,6 +178,7 @@ static struct rt6_info ip6_blk_hole_entry_template = {
 		}
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
+	.rt6i_protocol  = RTPROT_KERNEL,
 	.rt6i_metric	= ~(u32) 0,
 	.rt6i_ref	= ATOMIC_INIT(1),
 };
-- 
cgit v1.2.3


From 5b5f792a6a9a2f9ae812d151ed621f72e99b1725 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 21 May 2009 15:07:12 -0700
Subject: pktgen: do not access flows[] beyond its length

typo -- pkt_dev->nflows is for stats only, the number of concurrent
flows is stored in cflows.

Reported-By: Vladimir Ivashchenko <hazard@francoudi.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 3779c1438c1..0666a827bc6 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2447,7 +2447,7 @@ static inline void free_SAs(struct pktgen_dev *pkt_dev)
 	if (pkt_dev->cflows) {
 		/* let go of the SAs if we have them */
 		int i = 0;
-		for (;  i < pkt_dev->nflows; i++){
+		for (;  i < pkt_dev->cflows; i++) {
 			struct xfrm_state *x = pkt_dev->flows[i].x;
 			if (x) {
 				xfrm_state_put(x);
-- 
cgit v1.2.3


From 3ed18d76d959e5cbfa5d70c8f7ba95476582a556 Mon Sep 17 00:00:00 2001
From: Robert Olsson <robert.olsson@its.uu.se>
Date: Thu, 21 May 2009 15:20:59 -0700
Subject: ipv4: Fix oops with FIB_TRIE

It seems we can fix this by disabling preemption while we re-balance the
trie. This is with the CONFIG_CLASSIC_RCU. It's been stress-tested at high
loads continuesly taking a full BGP table up/down via iproute -batch.

Note. fib_trie is not updated for CONFIG_PREEMPT_RCU

Reported-by: Andrei Popa
Signed-off-by: Robert Olsson <robert.olsson@its.uu.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_trie.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index ec0ae490f0b..33c7c85dfe4 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -986,9 +986,12 @@ fib_find_node(struct trie *t, u32 key)
 static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
 {
 	int wasfull;
-	t_key cindex, key = tn->key;
+	t_key cindex, key;
 	struct tnode *tp;
 
+	preempt_disable();
+	key = tn->key;
+
 	while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) {
 		cindex = tkey_extract_bits(key, tp->pos, tp->bits);
 		wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
@@ -1007,6 +1010,7 @@ static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
 	if (IS_TNODE(tn))
 		tn = (struct tnode *)resize(t, (struct tnode *)tn);
 
+	preempt_enable();
 	return (struct node *)tn;
 }
 
-- 
cgit v1.2.3


From 0975ecba3b670df7c488a5e0e6fe9f1f370a8ad8 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Thu, 21 May 2009 15:22:02 -0700
Subject: RxRPC: Error handling for rxrpc_alloc_connection()

rxrpc_alloc_connection() doesn't return an error code on failure, it just
returns NULL.  IS_ERR(NULL) is false.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/ar-connection.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
index 0f1218b8d28..67e38a05624 100644
--- a/net/rxrpc/ar-connection.c
+++ b/net/rxrpc/ar-connection.c
@@ -343,9 +343,9 @@ static int rxrpc_connect_exclusive(struct rxrpc_sock *rx,
 		/* not yet present - create a candidate for a new connection
 		 * and then redo the check */
 		conn = rxrpc_alloc_connection(gfp);
-		if (IS_ERR(conn)) {
-			_leave(" = %ld", PTR_ERR(conn));
-			return PTR_ERR(conn);
+		if (!conn) {
+			_leave(" = -ENOMEM");
+			return -ENOMEM;
 		}
 
 		conn->trans = trans;
@@ -508,9 +508,9 @@ int rxrpc_connect_call(struct rxrpc_sock *rx,
 		/* not yet present - create a candidate for a new connection and then
 		 * redo the check */
 		candidate = rxrpc_alloc_connection(gfp);
-		if (IS_ERR(candidate)) {
-			_leave(" = %ld", PTR_ERR(candidate));
-			return PTR_ERR(candidate);
+		if (!candidate) {
+			_leave(" = -ENOMEM");
+			return -ENOMEM;
 		}
 
 		candidate->trans = trans;
-- 
cgit v1.2.3


From bfcaa50270e18f35220a11d46e98fc6232c24606 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Mon, 25 May 2009 17:23:15 +0200
Subject: netfilter: nf_ct_tcp: fix accepting invalid RST segments

Robert L Mathews discovered that some clients send evil TCP RST segments,
which are accepted by netfilter conntrack but discarded by the
destination. Thus the conntrack entry is destroyed but the destination
retransmits data until timeout.

The same technique, i.e. sending properly crafted RST segments, can easily
be used to bypass connlimit/connbytes based restrictions (the sample
script written by Robert can be found in the netfilter mailing list
archives).

The patch below adds a new flag and new field to struct ip_ct_tcp_state so
that checking RST segments can be made more strict and thus TCP conntrack
can catch the invalid ones: the RST segment is accepted only if its
sequence number higher than or equal to the highest ack we seen from the
other direction. (The last_ack field cannot be reused because it is used
to catch resent packets.)

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_proto_tcp.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index b5ccf2b4b2e..97a6e93d742 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -634,6 +634,14 @@ static bool tcp_in_window(const struct nf_conn *ct,
 			sender->td_end = end;
 			sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
 		}
+		if (tcph->ack) {
+			if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
+				sender->td_maxack = ack;
+				sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
+			} else if (after(ack, sender->td_maxack))
+				sender->td_maxack = ack;
+		}
+
 		/*
 		 * Update receiver data.
 		 */
@@ -918,6 +926,16 @@ static int tcp_packet(struct nf_conn *ct,
 				  "nf_ct_tcp: invalid state ");
 		return -NF_ACCEPT;
 	case TCP_CONNTRACK_CLOSE:
+		if (index == TCP_RST_SET
+		    && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
+		    && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
+			/* Invalid RST  */
+			write_unlock_bh(&tcp_lock);
+			if (LOG_INVALID(net, IPPROTO_TCP))
+				nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
+					  "nf_ct_tcp: invalid RST ");
+			return -NF_ACCEPT;
+		}
 		if (index == TCP_RST_SET
 		    && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
 			 && ct->proto.tcp.last_index == TCP_SYN_SET)
-- 
cgit v1.2.3


From b38b1f616867c832301f24eaf259889494d495b3 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <Pablo Neira Ayuso>
Date: Mon, 25 May 2009 17:29:43 +0200
Subject: netfilter: nf_ct_dccp: add missing DCCP protocol changes in event
 cache

This patch adds the missing protocol state-change event reporting
for DCCP.

$ sudo conntrack -E
    [NEW] dccp     33 240 src=192.168.0.2 dst=192.168.1.2 sport=57040 dport=5001 [UNREPLIED] src=192.168.1.2 dst=192.168.1.100 sport=5001 dport=57040

With this patch:

$ sudo conntrack -E
    [NEW] dccp     33 240 REQUEST src=192.168.0.2 dst=192.168.1.2 sport=57040 dport=5001 [UNREPLIED] src=192.168.1.2 dst=192.168.1.100 sport=5001 dport=57040

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_proto_dccp.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 8e757dd5339..aee0d6bea30 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -22,6 +22,7 @@
 #include <linux/netfilter/nfnetlink_conntrack.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_log.h>
 
 static DEFINE_RWLOCK(dccp_lock);
@@ -553,6 +554,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
 	ct->proto.dccp.state = new_state;
 	write_unlock_bh(&dccp_lock);
 
+	if (new_state != old_state)
+		nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
+
 	dn = dccp_pernet(net);
 	nf_ct_refresh_acct(ct, ctinfo, skb, dn->dccp_timeout[new_state]);
 
-- 
cgit v1.2.3


From c80a5cdfc5ca6533cb893154f546370da1fdb8f0 Mon Sep 17 00:00:00 2001
From: Doug Leith <doug.leith@nuim.ie>
Date: Mon, 25 May 2009 22:44:59 -0700
Subject: tcp: tcp_vegas ssthresh bugfix

This patch fixes ssthresh accounting issues in tcp_vegas when cwnd decreases

Signed-off-by: Doug Leith <doug.leith@nuim.ie>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_vegas.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index a453aac91bd..c6743eec9b7 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -158,6 +158,11 @@ void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event)
 }
 EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
 
+static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
+{
+	return  min(tp->snd_ssthresh, tp->snd_cwnd-1);
+}
+
 static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -221,11 +226,10 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 			 */
 			diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT;
 
-			if (diff > gamma && tp->snd_ssthresh > 2 ) {
+			if (diff > gamma && tp->snd_cwnd <= tp->snd_ssthresh) {
 				/* Going too fast. Time to slow down
 				 * and switch to congestion avoidance.
 				 */
-				tp->snd_ssthresh = 2;
 
 				/* Set cwnd to match the actual rate
 				 * exactly:
@@ -235,6 +239,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 				 * utilization.
 				 */
 				tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1);
+				tp->snd_ssthresh = tcp_vegas_ssthresh(tp);
 
 			} else if (tp->snd_cwnd <= tp->snd_ssthresh) {
 				/* Slow start.  */
@@ -250,6 +255,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 					 * we slow down.
 					 */
 					tp->snd_cwnd--;
+					tp->snd_ssthresh
+						= tcp_vegas_ssthresh(tp);
 				} else if (diff < alpha) {
 					/* We don't have enough extra packets
 					 * in the network, so speed up.
-- 
cgit v1.2.3


From 68743082b560067e3e93eab8b2568f238e486865 Mon Sep 17 00:00:00 2001
From: Vu Pham <vu@mellanox.com>
Date: Tue, 26 May 2009 14:51:00 -0400
Subject: XPRTRDMA: fix client rpcrdma FRMR registration on mlx4 devices

mlx4/connectX FRMR requires local write enable together with remote
rdma write enable. This fixes NFS/RDMA operation over the ConnectX
Infiniband HCA in the default memreg mode.

Signed-off-by: Vu Pham <vu@mellanox.com>
Signed-off-by: Tom Talpey <tmtalpey@gmail.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprtrdma/verbs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 3b21e0cc5e6..465aafc2007 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1495,7 +1495,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
 	frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
 	frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT;
 	frmr_wr.wr.fast_reg.access_flags = (writing ?
-				IB_ACCESS_REMOTE_WRITE : IB_ACCESS_REMOTE_READ);
+				IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
+				IB_ACCESS_REMOTE_READ);
 	frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
 	DECR_CQCOUNT(&r_xprt->rx_ep);
 
-- 
cgit v1.2.3


From e65fcfd63a9a62baa5708484ff8edbe56eb3e7ec Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Tue, 26 May 2009 20:47:02 -0700
Subject: cls_cgroup: read classid atomically in classifier

Avoid reading the unsynchronized value cs->classid multiple times,
since it could change concurrently from non-zero to zero; this would
result in the classifier returning a positive result with a bogus
(zero) classid.

Signed-off-by: Paul Menage <menage@google.com>
Reviewed-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_cgroup.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 91a3db4a76f..cc29b44b150 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -104,8 +104,7 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
 			       struct tcf_result *res)
 {
 	struct cls_cgroup_head *head = tp->root;
-	struct cgroup_cls_state *cs;
-	int ret = 0;
+	u32 classid;
 
 	/*
 	 * Due to the nature of the classifier it is required to ignore all
@@ -121,17 +120,18 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
 		return -1;
 
 	rcu_read_lock();
-	cs = task_cls_state(current);
-	if (cs->classid && tcf_em_tree_match(skb, &head->ematches, NULL)) {
-		res->classid = cs->classid;
-		res->class = 0;
-		ret = tcf_exts_exec(skb, &head->exts, res);
-	} else
-		ret = -1;
-
+	classid = task_cls_state(current)->classid;
 	rcu_read_unlock();
 
-	return ret;
+	if (!classid)
+		return -1;
+
+	if (!tcf_em_tree_match(skb, &head->ematches, NULL))
+		return -1;
+
+	res->classid = classid;
+	res->class = 0;
+	return tcf_exts_exec(skb, &head->exts, res);
 }
 
 static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle)
-- 
cgit v1.2.3


From 4c713189485dbea875aecd1990daed74908e181d Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Wed, 27 May 2009 09:10:28 +0200
Subject: Bluetooth: Remove useless flush_work() causing lockdep warnings

The calls to flush_work() are pointless in a single thread workqueue
and they are actually causing a lockdep warning.

=============================================
[ INFO: possible recursive locking detected ]
2.6.30-rc6-02911-gbb803cf #16
---------------------------------------------
bluetooth/2518 is trying to acquire lock:
 (bluetooth){+.+.+.}, at: [<c0130c14>] flush_work+0x28/0xb0

but task is already holding lock:
 (bluetooth){+.+.+.}, at: [<c0130424>] worker_thread+0x149/0x25e

other info that might help us debug this:
2 locks held by bluetooth/2518:
 #0:  (bluetooth){+.+.+.}, at: [<c0130424>] worker_thread+0x149/0x25e
 #1:  (&conn->work_del){+.+...}, at: [<c0130424>] worker_thread+0x149/0x25e

stack backtrace:
Pid: 2518, comm: bluetooth Not tainted 2.6.30-rc6-02911-gbb803cf #16
Call Trace:
 [<c03d64d9>] ? printk+0xf/0x11
 [<c0140d96>] __lock_acquire+0x7ce/0xb1b
 [<c0141173>] lock_acquire+0x90/0xad
 [<c0130c14>] ? flush_work+0x28/0xb0
 [<c0130c2e>] flush_work+0x42/0xb0
 [<c0130c14>] ? flush_work+0x28/0xb0
 [<f8b84966>] del_conn+0x1c/0x84 [bluetooth]
 [<c0130469>] worker_thread+0x18e/0x25e
 [<c0130424>] ? worker_thread+0x149/0x25e
 [<f8b8494a>] ? del_conn+0x0/0x84 [bluetooth]
 [<c0133843>] ? autoremove_wake_function+0x0/0x33
 [<c01302db>] ? worker_thread+0x0/0x25e
 [<c013355a>] kthread+0x45/0x6b
 [<c0133515>] ? kthread+0x0/0x6b
 [<c01034a7>] kernel_thread_helper+0x7/0x10

Based on a report by Oliver Hartkopp <oliver@hartkopp.net>

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Tested-by: Oliver Hartkopp <oliver@hartkopp.net>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_sysfs.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 4cc3624bd22..95f7a7a544b 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -90,9 +90,6 @@ static void add_conn(struct work_struct *work)
 	struct hci_conn *conn = container_of(work, struct hci_conn, work_add);
 	struct hci_dev *hdev = conn->hdev;
 
-	/* ensure previous del is complete */
-	flush_work(&conn->work_del);
-
 	dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle);
 
 	if (device_add(&conn->dev) < 0) {
@@ -118,9 +115,6 @@ static void del_conn(struct work_struct *work)
 	struct hci_conn *conn = container_of(work, struct hci_conn, work_del);
 	struct hci_dev *hdev = conn->hdev;
 
-	/* ensure previous add is complete */
-	flush_work(&conn->work_add);
-
 	if (!device_is_registered(&conn->dev))
 		return;
 
-- 
cgit v1.2.3


From 683a04cebc63819a36b1db19843bd17771f05b55 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <hawk@comx.dk>
Date: Wed, 27 May 2009 15:45:34 +0200
Subject: netfilter: xt_hashlimit does a wrong SEQ_SKIP

The function dl_seq_show() returns 1 (equal to SEQ_SKIP) in case
a seq_printf() call return -1.  It should return -1.

This SEQ_SKIP behavior brakes processing the proc file e.g. via a
pipe or just through less.

Signed-off-by: Jesper Dangaard Brouer <hawk@comx.dk>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_hashlimit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index a5b5369c30f..219dcdbe388 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -926,7 +926,7 @@ static int dl_seq_show(struct seq_file *s, void *v)
 	if (!hlist_empty(&htable->hash[*bucket])) {
 		hlist_for_each_entry(ent, pos, &htable->hash[*bucket], node)
 			if (dl_seq_real_show(ent, htable->family, s))
-				return 1;
+				return -1;
 	}
 	return 0;
 }
-- 
cgit v1.2.3


From eeff9beec3d2563c42cca41e66d4169592bb5475 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 27 May 2009 15:49:11 +0200
Subject: netfilter: nfnetlink_log: fix wrong skbuff size	calculation

This problem was introduced in 72961ecf84d67d6359a1b30f9b2a8427f13e1e71
since no space was reserved for the new attributes NFULA_HWTYPE,
NFULA_HWLEN and NFULA_HWHEADER.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nfnetlink_log.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index fd326ac27ec..66a6dd5c519 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -581,6 +581,12 @@ nfulnl_log_packet(u_int8_t pf,
 		+ nla_total_size(sizeof(struct nfulnl_msg_packet_hw))
 		+ nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp));
 
+	if (in && skb_mac_header_was_set(skb)) {
+		size +=   nla_total_size(skb->dev->hard_header_len)
+			+ nla_total_size(sizeof(u_int16_t))	/* hwtype */
+			+ nla_total_size(sizeof(u_int16_t));	/* hwlen */
+	}
+
 	spin_lock_bh(&inst->lock);
 
 	if (inst->flags & NFULNL_CFG_F_SEQ)
-- 
cgit v1.2.3


From 7f4218354fe312b327af06c3d8c95ed5f214c8ca Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 27 May 2009 18:51:06 -0400
Subject: nfsd: Revert "svcrpc: take advantage of tcp autotuning"

This reverts commit 47a14ef1af48c696b214ac168f056ddc79793d0e "svcrpc:
take advantage of tcp autotuning", which uncovered some further problems
in the server rpc code, causing significant performance regressions in
common cases.

We will likely reinstate this patch after releasing 2.6.30 and applying
some work on the underlying fixes to the problem (developed by Trond).

Reported-by: Jeff Moyer <jmoyer@redhat.com>
Cc: Olga Kornievskaia <aglo@citi.umich.edu>
Cc: Jim Rees <rees@umich.edu>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/svcsock.c | 35 ++++++++++++++++++++++++++++-------
 1 file changed, 28 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index af3198814c1..9d504234af4 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -345,6 +345,7 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
 	lock_sock(sock->sk);
 	sock->sk->sk_sndbuf = snd * 2;
 	sock->sk->sk_rcvbuf = rcv * 2;
+	sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
 	release_sock(sock->sk);
 #endif
 }
@@ -796,6 +797,23 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 		test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags),
 		test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
 
+	if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
+		/* sndbuf needs to have room for one request
+		 * per thread, otherwise we can stall even when the
+		 * network isn't a bottleneck.
+		 *
+		 * We count all threads rather than threads in a
+		 * particular pool, which provides an upper bound
+		 * on the number of threads which will access the socket.
+		 *
+		 * rcvbuf just needs to be able to hold a few requests.
+		 * Normally they will be removed from the queue
+		 * as soon a a complete request arrives.
+		 */
+		svc_sock_setbufsize(svsk->sk_sock,
+				    (serv->sv_nrthreads+3) * serv->sv_max_mesg,
+				    3 * serv->sv_max_mesg);
+
 	clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 
 	/* Receive data. If we haven't got the record length yet, get
@@ -1043,6 +1061,15 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
 
 		tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
 
+		/* initialise setting must have enough space to
+		 * receive and respond to one request.
+		 * svc_tcp_recvfrom will re-adjust if necessary
+		 */
+		svc_sock_setbufsize(svsk->sk_sock,
+				    3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
+				    3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
+
+		set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
 		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 		if (sk->sk_state != TCP_ESTABLISHED)
 			set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
@@ -1112,14 +1139,8 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 	/* Initialize the socket */
 	if (sock->type == SOCK_DGRAM)
 		svc_udp_init(svsk, serv);
-	else {
-		/* initialise setting must have enough space to
-		 * receive and respond to one request.
-		 */
-		svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg,
-					4 * serv->sv_max_mesg);
+	else
 		svc_tcp_init(svsk, serv);
-	}
 
 	dprintk("svc: svc_setup_socket created %p (inet %p)\n",
 				svsk, svsk->sk_sk);
-- 
cgit v1.2.3


From 98779be861a05c4cb75bed916df72ec0cba8b53d Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Thu, 14 May 2009 16:34:28 -0500
Subject: svcrdma: dma unmap the correct length for the RPCRDMA header page.

The svcrdma module was incorrectly unmapping the RPCRDMA header page.
On IBM pserver systems this causes a resource leak that results in
running out of bus address space (10 cthon iterations will reproduce it).
The code was mapping the full page but only unmapping the actual header
length.  The fix is to only map the header length.

I also cleaned up the use of ib_dma_map_page() calls since the unmap
logic always uses ib_dma_unmap_single().  I made these symmetrical.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/xprtrdma/svc_rdma_sendto.c    | 12 ++++++------
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 10 +++++-----
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 8b510c5e877..f11be72a1a8 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -128,7 +128,8 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
 		page_bytes -= sge_bytes;
 
 		frmr->page_list->page_list[page_no] =
-			ib_dma_map_page(xprt->sc_cm_id->device, page, 0,
+			ib_dma_map_single(xprt->sc_cm_id->device,
+					  page_address(page),
 					  PAGE_SIZE, DMA_TO_DEVICE);
 		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
 					 frmr->page_list->page_list[page_no]))
@@ -532,18 +533,17 @@ static int send_reply(struct svcxprt_rdma *rdma,
 		clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
 
 	/* Prepare the SGE for the RPCRDMA Header */
+	ctxt->sge[0].lkey = rdma->sc_dma_lkey;
+	ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
 	ctxt->sge[0].addr =
-		ib_dma_map_page(rdma->sc_cm_id->device,
-				page, 0, PAGE_SIZE, DMA_TO_DEVICE);
+		ib_dma_map_single(rdma->sc_cm_id->device, page_address(page),
+				  ctxt->sge[0].length, DMA_TO_DEVICE);
 	if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
 		goto err;
 	atomic_inc(&rdma->sc_dma_used);
 
 	ctxt->direction = DMA_TO_DEVICE;
 
-	ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
-	ctxt->sge[0].lkey = rdma->sc_dma_lkey;
-
 	/* Determine how many of our SGE are to be transmitted */
 	for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
 		sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 4b0c2fa15e0..5151f9f6c57 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -500,8 +500,8 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
 		BUG_ON(sge_no >= xprt->sc_max_sge);
 		page = svc_rdma_get_page();
 		ctxt->pages[sge_no] = page;
-		pa = ib_dma_map_page(xprt->sc_cm_id->device,
-				     page, 0, PAGE_SIZE,
+		pa = ib_dma_map_single(xprt->sc_cm_id->device,
+				     page_address(page), PAGE_SIZE,
 				     DMA_FROM_DEVICE);
 		if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
 			goto err_put_ctxt;
@@ -1315,8 +1315,8 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
 	length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);
 
 	/* Prepare SGE for local address */
-	sge.addr = ib_dma_map_page(xprt->sc_cm_id->device,
-				   p, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+	sge.addr = ib_dma_map_single(xprt->sc_cm_id->device,
+				   page_address(p), PAGE_SIZE, DMA_FROM_DEVICE);
 	if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) {
 		put_page(p);
 		return;
@@ -1343,7 +1343,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
 	if (ret) {
 		dprintk("svcrdma: Error %d posting send for protocol error\n",
 			ret);
-		ib_dma_unmap_page(xprt->sc_cm_id->device,
+		ib_dma_unmap_single(xprt->sc_cm_id->device,
 				  sge.addr, PAGE_SIZE,
 				  DMA_FROM_DEVICE);
 		svc_rdma_put_context(ctxt, 1);
-- 
cgit v1.2.3


From 12186be7d2e1106cede1cc728526e3d7998cbe94 Mon Sep 17 00:00:00 2001
From: Minoru Usui <usui@mxm.nes.nec.co.jp>
Date: Tue, 2 Jun 2009 02:17:34 -0700
Subject: net_cls: fix unconfigured struct tcf_proto keeps chaining and avoid
 kernel panic when we use cls_cgroup

This patch fixes a bug which unconfigured struct tcf_proto keeps
chaining in tc_ctl_tfilter(), and avoids kernel panic in
cls_cgroup_classify() when we use cls_cgroup.

When we execute 'tc filter add', tcf_proto is allocated, initialized
by classifier's init(), and chained.  After it's chained,
tc_ctl_tfilter() calls classifier's change().  When classifier's
change() fails, tc_ctl_tfilter() does not free and keeps tcf_proto.

In addition, cls_cgroup is initialized in change() not in init().  It
accesses unconfigured struct tcf_proto which is chained before
change(), then hits Oops.

Signed-off-by: Minoru Usui <usui@mxm.nes.nec.co.jp>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Tested-by: Minoru Usui <usui@mxm.nes.nec.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_api.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 0759f32e9dc..09cdcdfe7e9 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -135,6 +135,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	unsigned long cl;
 	unsigned long fh;
 	int err;
+	int tp_created = 0;
 
 	if (net != &init_net)
 		return -EINVAL;
@@ -266,10 +267,7 @@ replay:
 			goto errout;
 		}
 
-		spin_lock_bh(root_lock);
-		tp->next = *back;
-		*back = tp;
-		spin_unlock_bh(root_lock);
+		tp_created = 1;
 
 	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind))
 		goto errout;
@@ -296,8 +294,11 @@ replay:
 		switch (n->nlmsg_type) {
 		case RTM_NEWTFILTER:
 			err = -EEXIST;
-			if (n->nlmsg_flags & NLM_F_EXCL)
+			if (n->nlmsg_flags & NLM_F_EXCL) {
+				if (tp_created)
+					tcf_destroy(tp);
 				goto errout;
+			}
 			break;
 		case RTM_DELTFILTER:
 			err = tp->ops->delete(tp, fh);
@@ -314,8 +315,18 @@ replay:
 	}
 
 	err = tp->ops->change(tp, cl, t->tcm_handle, tca, &fh);
-	if (err == 0)
+	if (err == 0) {
+		if (tp_created) {
+			spin_lock_bh(root_lock);
+			tp->next = *back;
+			*back = tp;
+			spin_unlock_bh(root_lock);
+		}
 		tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER);
+	} else {
+		if (tp_created)
+			tcf_destroy(tp);
+	}
 
 errout:
 	if (cl)
-- 
cgit v1.2.3


From 52ea3a56a3268bc2a5a7c75e98c81463004e38ef Mon Sep 17 00:00:00 2001
From: Minoru Usui <usui@mxm.nes.nec.co.jp>
Date: Tue, 9 Jun 2009 04:03:09 -0700
Subject: cls_cgroup: Fix oops when user send improperly 'tc filter add'
 request

I found a bug in cls_cgroup_change() in cls_cgroup.c.
cls_cgroup_change() expected tca[TCA_OPTIONS] was set from user space properly,
but tc in iproute2-2.6.29-1 (which I used) didn't set it.

In the current source code of tc in git, it set tca[TCA_OPTIONS].

  git://git.kernel.org/pub/scm/linux/kernel/git/shemminger/iproute2.git

If we always use a newest iproute2 in git when we use cls_cgroup,
we don't face this oops probably.
But I think, kernel shouldn't panic regardless of use program's behaviour.

Signed-off-by: Minoru Usui <usui@mxm.nes.nec.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_cgroup.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index cc29b44b150..e5becb92b3e 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -167,6 +167,9 @@ static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base,
 	struct tcf_exts e;
 	int err;
 
+	if (!tca[TCA_OPTIONS])
+		return -EINVAL;
+
 	if (head == NULL) {
 		if (!handle)
 			return -EINVAL;
-- 
cgit v1.2.3